Add low_vram parameter

This commit is contained in:
Andrei Betlen 2023-06-14 22:12:33 -04:00
parent f7c5cfaf50
commit 44b83cada5

View file

@ -219,6 +219,7 @@ class Llama:
last_n_tokens_size: int = 64, last_n_tokens_size: int = 64,
lora_base: Optional[str] = None, lora_base: Optional[str] = None,
lora_path: Optional[str] = None, lora_path: Optional[str] = None,
low_vram: bool = False,
verbose: bool = True, verbose: bool = True,
): ):
"""Load a llama.cpp model from `model_path`. """Load a llama.cpp model from `model_path`.
@ -260,6 +261,7 @@ class Llama:
self.params.use_mmap = use_mmap if lora_path is None else False self.params.use_mmap = use_mmap if lora_path is None else False
self.params.use_mlock = use_mlock self.params.use_mlock = use_mlock
self.params.embedding = embedding self.params.embedding = embedding
self.params.low_vram = low_vram
self.last_n_tokens_size = last_n_tokens_size self.last_n_tokens_size = last_n_tokens_size
self.n_batch = min(n_ctx, n_batch) self.n_batch = min(n_ctx, n_batch)
@ -1447,6 +1449,7 @@ class Llama:
use_mmap=self.params.use_mmap, use_mmap=self.params.use_mmap,
use_mlock=self.params.use_mlock, use_mlock=self.params.use_mlock,
embedding=self.params.embedding, embedding=self.params.embedding,
low_vram=self.params.low_vram,
last_n_tokens_size=self.last_n_tokens_size, last_n_tokens_size=self.last_n_tokens_size,
n_batch=self.n_batch, n_batch=self.n_batch,
n_threads=self.n_threads, n_threads=self.n_threads,
@ -1470,6 +1473,7 @@ class Llama:
use_mmap=state["use_mmap"], use_mmap=state["use_mmap"],
use_mlock=state["use_mlock"], use_mlock=state["use_mlock"],
embedding=state["embedding"], embedding=state["embedding"],
low_vram=state["low_vram"],
n_threads=state["n_threads"], n_threads=state["n_threads"],
n_batch=state["n_batch"], n_batch=state["n_batch"],
last_n_tokens_size=state["last_n_tokens_size"], last_n_tokens_size=state["last_n_tokens_size"],