From e4647c75ec49e21fa2146844c6b91faba58c6699 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Wed, 19 Apr 2023 15:57:46 -0400 Subject: [PATCH] Add use_mmap flag to server --- llama_cpp/server/__main__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py index 48481c6..b2ec4de 100644 --- a/llama_cpp/server/__main__.py +++ b/llama_cpp/server/__main__.py @@ -29,9 +29,10 @@ class Settings(BaseSettings): model: str n_ctx: int = 2048 n_batch: int = 8 - n_threads: int = ((os.cpu_count() or 2) // 2) or 1 + n_threads: int = max((os.cpu_count() or 2) // 2, 1) f16_kv: bool = True use_mlock: bool = False # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out... + use_mmap: bool = True embedding: bool = True last_n_tokens_size: int = 64 logits_all: bool = False @@ -54,6 +55,7 @@ llama = llama_cpp.Llama( settings.model, f16_kv=settings.f16_kv, use_mlock=settings.use_mlock, + use_mmap=settings.use_mmap, embedding=settings.embedding, logits_all=settings.logits_all, n_threads=settings.n_threads,