Add mul_mat_q option
This commit is contained in:
parent
f6a7850e1a
commit
d015bdb4f8
2 changed files with 8 additions and 0 deletions
|
@ -227,6 +227,7 @@ class Llama:
|
|||
rope_freq_scale: float = 1.0,
|
||||
n_gqa: Optional[int] = None, # (TEMPORARY) must be 8 for llama2 70b
|
||||
rms_norm_eps: Optional[float] = None, # (TEMPORARY)
|
||||
mul_mat_q: Optional(bool) = None, # (TEMPORARY)
|
||||
verbose: bool = True,
|
||||
):
|
||||
"""Load a llama.cpp model from `model_path`.
|
||||
|
@ -293,6 +294,9 @@ class Llama:
|
|||
if rms_norm_eps is not None:
|
||||
self.params.rms_norm_eps = rms_norm_eps
|
||||
|
||||
if mul_mat_q is not None:
|
||||
self.params.mul_mat_q = mul_mat_q
|
||||
|
||||
self.last_n_tokens_size = last_n_tokens_size
|
||||
self.n_batch = min(n_ctx, n_batch)
|
||||
|
||||
|
|
|
@ -103,6 +103,10 @@ class Settings(BaseSettings):
|
|||
default=None,
|
||||
description="TEMPORARY",
|
||||
)
|
||||
mul_mat_q: Optional[bool] = Field(
|
||||
default=None,
|
||||
description="TEMPORARY",
|
||||
)
|
||||
|
||||
|
||||
class ErrorResponse(TypedDict):
|
||||
|
|
Loading…
Reference in a new issue