Update n_batch for server

This commit is contained in:
Andrei Betlen 2023-04-25 09:11:32 -04:00
parent cc706fb944
commit 3cab3ef4cb

View file

@ -28,7 +28,7 @@ from sse_starlette.sse import EventSourceResponse
class Settings(BaseSettings):
model: str
n_ctx: int = 2048
n_batch: int = 8
n_batch: int = 512
n_threads: int = max((os.cpu_count() or 2) // 2, 1)
f16_kv: bool = True
use_mlock: bool = False # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...