Add low_vram parameter

This commit is contained in:
Andrei Betlen 2023-06-14 22:12:33 -04:00
parent f7c5cfaf50
commit 44b83cada5

View file

@ -219,6 +219,7 @@ class Llama:
last_n_tokens_size: int = 64,
lora_base: Optional[str] = None,
lora_path: Optional[str] = None,
low_vram: bool = False,
verbose: bool = True,
):
"""Load a llama.cpp model from `model_path`.
@ -260,6 +261,7 @@ class Llama:
self.params.use_mmap = use_mmap if lora_path is None else False
self.params.use_mlock = use_mlock
self.params.embedding = embedding
self.params.low_vram = low_vram
self.last_n_tokens_size = last_n_tokens_size
self.n_batch = min(n_ctx, n_batch)
@ -1447,6 +1449,7 @@ class Llama:
use_mmap=self.params.use_mmap,
use_mlock=self.params.use_mlock,
embedding=self.params.embedding,
low_vram=self.params.low_vram,
last_n_tokens_size=self.last_n_tokens_size,
n_batch=self.n_batch,
n_threads=self.n_threads,
@ -1470,6 +1473,7 @@ class Llama:
use_mmap=state["use_mmap"],
use_mlock=state["use_mlock"],
embedding=state["embedding"],
low_vram=state["low_vram"],
n_threads=state["n_threads"],
n_batch=state["n_batch"],
last_n_tokens_size=state["last_n_tokens_size"],