Add low_vram parameter
This commit is contained in:
parent
f7c5cfaf50
commit
44b83cada5
1 changed files with 4 additions and 0 deletions
|
@ -219,6 +219,7 @@ class Llama:
|
||||||
last_n_tokens_size: int = 64,
|
last_n_tokens_size: int = 64,
|
||||||
lora_base: Optional[str] = None,
|
lora_base: Optional[str] = None,
|
||||||
lora_path: Optional[str] = None,
|
lora_path: Optional[str] = None,
|
||||||
|
low_vram: bool = False,
|
||||||
verbose: bool = True,
|
verbose: bool = True,
|
||||||
):
|
):
|
||||||
"""Load a llama.cpp model from `model_path`.
|
"""Load a llama.cpp model from `model_path`.
|
||||||
|
@ -260,6 +261,7 @@ class Llama:
|
||||||
self.params.use_mmap = use_mmap if lora_path is None else False
|
self.params.use_mmap = use_mmap if lora_path is None else False
|
||||||
self.params.use_mlock = use_mlock
|
self.params.use_mlock = use_mlock
|
||||||
self.params.embedding = embedding
|
self.params.embedding = embedding
|
||||||
|
self.params.low_vram = low_vram
|
||||||
|
|
||||||
self.last_n_tokens_size = last_n_tokens_size
|
self.last_n_tokens_size = last_n_tokens_size
|
||||||
self.n_batch = min(n_ctx, n_batch)
|
self.n_batch = min(n_ctx, n_batch)
|
||||||
|
@ -1447,6 +1449,7 @@ class Llama:
|
||||||
use_mmap=self.params.use_mmap,
|
use_mmap=self.params.use_mmap,
|
||||||
use_mlock=self.params.use_mlock,
|
use_mlock=self.params.use_mlock,
|
||||||
embedding=self.params.embedding,
|
embedding=self.params.embedding,
|
||||||
|
low_vram=self.params.low_vram,
|
||||||
last_n_tokens_size=self.last_n_tokens_size,
|
last_n_tokens_size=self.last_n_tokens_size,
|
||||||
n_batch=self.n_batch,
|
n_batch=self.n_batch,
|
||||||
n_threads=self.n_threads,
|
n_threads=self.n_threads,
|
||||||
|
@ -1470,6 +1473,7 @@ class Llama:
|
||||||
use_mmap=state["use_mmap"],
|
use_mmap=state["use_mmap"],
|
||||||
use_mlock=state["use_mlock"],
|
use_mlock=state["use_mlock"],
|
||||||
embedding=state["embedding"],
|
embedding=state["embedding"],
|
||||||
|
low_vram=state["low_vram"],
|
||||||
n_threads=state["n_threads"],
|
n_threads=state["n_threads"],
|
||||||
n_batch=state["n_batch"],
|
n_batch=state["n_batch"],
|
||||||
last_n_tokens_size=state["last_n_tokens_size"],
|
last_n_tokens_size=state["last_n_tokens_size"],
|
||||||
|
|
Loading…
Reference in a new issue