Update server params
This commit is contained in:
parent
3720c739d4
commit
d9bce17794
1 changed files with 9 additions and 7 deletions
|
@ -27,6 +27,7 @@ import numpy as np
|
||||||
import numpy.typing as npt
|
import numpy.typing as npt
|
||||||
|
|
||||||
|
|
||||||
|
# Disable warning for model and model_alias settings
|
||||||
BaseSettings.model_config['protected_namespaces'] = ()
|
BaseSettings.model_config['protected_namespaces'] = ()
|
||||||
|
|
||||||
|
|
||||||
|
@ -58,14 +59,10 @@ class Settings(BaseSettings):
|
||||||
description="Split layers across multiple GPUs in proportion.",
|
description="Split layers across multiple GPUs in proportion.",
|
||||||
)
|
)
|
||||||
rope_freq_base: float = Field(
|
rope_freq_base: float = Field(
|
||||||
default=10000, ge=1, description="RoPE base frequency"
|
default=0.0, description="RoPE base frequency"
|
||||||
)
|
)
|
||||||
rope_freq_scale: float = Field(
|
rope_freq_scale: float = Field(
|
||||||
default=1.0, description="RoPE frequency scaling factor"
|
default=0.0, description="RoPE frequency scaling factor"
|
||||||
)
|
|
||||||
low_vram: bool = Field(
|
|
||||||
default=False,
|
|
||||||
description="Whether to use less VRAM. This will reduce performance.",
|
|
||||||
)
|
)
|
||||||
mul_mat_q: bool = Field(
|
mul_mat_q: bool = Field(
|
||||||
default=True, description="if true, use experimental mul_mat_q kernels"
|
default=True, description="if true, use experimental mul_mat_q kernels"
|
||||||
|
@ -106,6 +103,10 @@ class Settings(BaseSettings):
|
||||||
default=False,
|
default=False,
|
||||||
description="Enable NUMA support.",
|
description="Enable NUMA support.",
|
||||||
)
|
)
|
||||||
|
chat_format: str = Field(
|
||||||
|
default="llama-2",
|
||||||
|
description="Chat format to use.",
|
||||||
|
)
|
||||||
cache: bool = Field(
|
cache: bool = Field(
|
||||||
default=False,
|
default=False,
|
||||||
description="Use a cache to reduce processing times for evaluated prompts.",
|
description="Use a cache to reduce processing times for evaluated prompts.",
|
||||||
|
@ -349,7 +350,6 @@ def create_app(settings: Optional[Settings] = None):
|
||||||
tensor_split=settings.tensor_split,
|
tensor_split=settings.tensor_split,
|
||||||
rope_freq_base=settings.rope_freq_base,
|
rope_freq_base=settings.rope_freq_base,
|
||||||
rope_freq_scale=settings.rope_freq_scale,
|
rope_freq_scale=settings.rope_freq_scale,
|
||||||
low_vram=settings.low_vram,
|
|
||||||
mul_mat_q=settings.mul_mat_q,
|
mul_mat_q=settings.mul_mat_q,
|
||||||
f16_kv=settings.f16_kv,
|
f16_kv=settings.f16_kv,
|
||||||
logits_all=settings.logits_all,
|
logits_all=settings.logits_all,
|
||||||
|
@ -361,6 +361,8 @@ def create_app(settings: Optional[Settings] = None):
|
||||||
last_n_tokens_size=settings.last_n_tokens_size,
|
last_n_tokens_size=settings.last_n_tokens_size,
|
||||||
lora_base=settings.lora_base,
|
lora_base=settings.lora_base,
|
||||||
lora_path=settings.lora_path,
|
lora_path=settings.lora_path,
|
||||||
|
numa=settings.numa,
|
||||||
|
chat_format=settings.chat_format,
|
||||||
verbose=settings.verbose,
|
verbose=settings.verbose,
|
||||||
)
|
)
|
||||||
if settings.cache:
|
if settings.cache:
|
||||||
|
|
Loading…
Reference in a new issue