Add low_vram parameter
This commit is contained in:
parent
f7c5cfaf50
commit
44b83cada5
1 changed files with 4 additions and 0 deletions
|
@ -219,6 +219,7 @@ class Llama:
|
|||
last_n_tokens_size: int = 64,
|
||||
lora_base: Optional[str] = None,
|
||||
lora_path: Optional[str] = None,
|
||||
low_vram: bool = False,
|
||||
verbose: bool = True,
|
||||
):
|
||||
"""Load a llama.cpp model from `model_path`.
|
||||
|
@ -260,6 +261,7 @@ class Llama:
|
|||
self.params.use_mmap = use_mmap if lora_path is None else False
|
||||
self.params.use_mlock = use_mlock
|
||||
self.params.embedding = embedding
|
||||
self.params.low_vram = low_vram
|
||||
|
||||
self.last_n_tokens_size = last_n_tokens_size
|
||||
self.n_batch = min(n_ctx, n_batch)
|
||||
|
@ -1447,6 +1449,7 @@ class Llama:
|
|||
use_mmap=self.params.use_mmap,
|
||||
use_mlock=self.params.use_mlock,
|
||||
embedding=self.params.embedding,
|
||||
low_vram=self.params.low_vram,
|
||||
last_n_tokens_size=self.last_n_tokens_size,
|
||||
n_batch=self.n_batch,
|
||||
n_threads=self.n_threads,
|
||||
|
@ -1470,6 +1473,7 @@ class Llama:
|
|||
use_mmap=state["use_mmap"],
|
||||
use_mlock=state["use_mlock"],
|
||||
embedding=state["embedding"],
|
||||
low_vram=state["low_vram"],
|
||||
n_threads=state["n_threads"],
|
||||
n_batch=state["n_batch"],
|
||||
last_n_tokens_size=state["last_n_tokens_size"],
|
||||
|
|
Loading…
Reference in a new issue