Offload KQV by default
This commit is contained in:
parent
6bfe98bd80
commit
48c3b77e6f
2 changed files with 2 additions and 2 deletions
|
@ -77,7 +77,7 @@ class Llama:
|
||||||
mul_mat_q: bool = True,
|
mul_mat_q: bool = True,
|
||||||
logits_all: bool = False,
|
logits_all: bool = False,
|
||||||
embedding: bool = False,
|
embedding: bool = False,
|
||||||
offload_kqv: bool = False,
|
offload_kqv: bool = True,
|
||||||
# Sampling Params
|
# Sampling Params
|
||||||
last_n_tokens_size: int = 64,
|
last_n_tokens_size: int = 64,
|
||||||
# LoRA Params
|
# LoRA Params
|
||||||
|
|
|
@ -90,7 +90,7 @@ class ModelSettings(BaseSettings):
|
||||||
logits_all: bool = Field(default=True, description="Whether to return logits.")
|
logits_all: bool = Field(default=True, description="Whether to return logits.")
|
||||||
embedding: bool = Field(default=True, description="Whether to use embeddings.")
|
embedding: bool = Field(default=True, description="Whether to use embeddings.")
|
||||||
offload_kqv: bool = Field(
|
offload_kqv: bool = Field(
|
||||||
default=False, description="Whether to offload kqv to the GPU."
|
default=True, description="Whether to offload kqv to the GPU."
|
||||||
)
|
)
|
||||||
# Sampling Params
|
# Sampling Params
|
||||||
last_n_tokens_size: int = Field(
|
last_n_tokens_size: int = Field(
|
||||||
|
|
Loading…
Reference in a new issue