From 3cab3ef4cb1ae39ad19ffe2b58cdf6671dd82e43 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Tue, 25 Apr 2023 09:11:32 -0400 Subject: [PATCH] Update n_batch for server --- llama_cpp/server/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py index b2ec4de..af6cc38 100644 --- a/llama_cpp/server/__main__.py +++ b/llama_cpp/server/__main__.py @@ -28,7 +28,7 @@ from sse_starlette.sse import EventSourceResponse class Settings(BaseSettings): model: str n_ctx: int = 2048 - n_batch: int = 8 + n_batch: int = 512 n_threads: int = max((os.cpu_count() or 2) // 2, 1) f16_kv: bool = True use_mlock: bool = False # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...