Add use_mmap flag to server

This commit is contained in:
Andrei Betlen 2023-04-19 15:57:46 -04:00
parent 207ebbc8dc
commit e4647c75ec

View file

@ -29,9 +29,10 @@ class Settings(BaseSettings):
model: str model: str
n_ctx: int = 2048 n_ctx: int = 2048
n_batch: int = 8 n_batch: int = 8
n_threads: int = ((os.cpu_count() or 2) // 2) or 1 n_threads: int = max((os.cpu_count() or 2) // 2, 1)
f16_kv: bool = True f16_kv: bool = True
use_mlock: bool = False # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out... use_mlock: bool = False # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...
use_mmap: bool = True
embedding: bool = True embedding: bool = True
last_n_tokens_size: int = 64 last_n_tokens_size: int = 64
logits_all: bool = False logits_all: bool = False
@ -54,6 +55,7 @@ llama = llama_cpp.Llama(
settings.model, settings.model,
f16_kv=settings.f16_kv, f16_kv=settings.f16_kv,
use_mlock=settings.use_mlock, use_mlock=settings.use_mlock,
use_mmap=settings.use_mmap,
embedding=settings.embedding, embedding=settings.embedding,
logits_all=settings.logits_all, logits_all=settings.logits_all,
n_threads=settings.n_threads, n_threads=settings.n_threads,