From e4647c75ec49e21fa2146844c6b91faba58c6699 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Wed, 19 Apr 2023 15:57:46 -0400
Subject: [PATCH] Add use_mmap flag to server

---
 llama_cpp/server/__main__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py
index 48481c6..b2ec4de 100644
--- a/llama_cpp/server/__main__.py
+++ b/llama_cpp/server/__main__.py
@@ -29,9 +29,10 @@ class Settings(BaseSettings):
     model: str
     n_ctx: int = 2048
     n_batch: int = 8
-    n_threads: int = ((os.cpu_count() or 2) // 2) or 1
+    n_threads: int = max((os.cpu_count() or 2) // 2, 1)
     f16_kv: bool = True
     use_mlock: bool = False  # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...
+    use_mmap: bool = True
     embedding: bool = True
     last_n_tokens_size: int = 64
     logits_all: bool = False
@@ -54,6 +55,7 @@ llama = llama_cpp.Llama(
     settings.model,
     f16_kv=settings.f16_kv,
     use_mlock=settings.use_mlock,
+    use_mmap=settings.use_mmap,
     embedding=settings.embedding,
     logits_all=settings.logits_all,
     n_threads=settings.n_threads,