Add use_mmap option
This commit is contained in:
parent
d41cb0ecf7
commit
1f67ad2a0b
1 changed files with 5 additions and 0 deletions
|
@ -23,6 +23,7 @@ class Llama:
|
||||||
f16_kv: bool = False,
|
f16_kv: bool = False,
|
||||||
logits_all: bool = False,
|
logits_all: bool = False,
|
||||||
vocab_only: bool = False,
|
vocab_only: bool = False,
|
||||||
|
use_mmap: bool = True,
|
||||||
use_mlock: bool = False,
|
use_mlock: bool = False,
|
||||||
embedding: bool = False,
|
embedding: bool = False,
|
||||||
n_threads: Optional[int] = None,
|
n_threads: Optional[int] = None,
|
||||||
|
@ -40,6 +41,7 @@ class Llama:
|
||||||
f16_kv: Use half-precision for key/value cache.
|
f16_kv: Use half-precision for key/value cache.
|
||||||
logits_all: Return logits for all tokens, not just the last token.
|
logits_all: Return logits for all tokens, not just the last token.
|
||||||
vocab_only: Only load the vocabulary no weights.
|
vocab_only: Only load the vocabulary no weights.
|
||||||
|
use_mmap: Use mmap if possible.
|
||||||
use_mlock: Force the system to keep the model in RAM.
|
use_mlock: Force the system to keep the model in RAM.
|
||||||
embedding: Embedding mode only.
|
embedding: Embedding mode only.
|
||||||
n_threads: Number of threads to use. If None, the number of threads is automatically determined.
|
n_threads: Number of threads to use. If None, the number of threads is automatically determined.
|
||||||
|
@ -63,6 +65,7 @@ class Llama:
|
||||||
self.params.f16_kv = f16_kv
|
self.params.f16_kv = f16_kv
|
||||||
self.params.logits_all = logits_all
|
self.params.logits_all = logits_all
|
||||||
self.params.vocab_only = vocab_only
|
self.params.vocab_only = vocab_only
|
||||||
|
self.params.use_mmap = use_mmap
|
||||||
self.params.use_mlock = use_mlock
|
self.params.use_mlock = use_mlock
|
||||||
self.params.embedding = embedding
|
self.params.embedding = embedding
|
||||||
|
|
||||||
|
@ -661,6 +664,7 @@ class Llama:
|
||||||
f16_kv=self.params.f16_kv,
|
f16_kv=self.params.f16_kv,
|
||||||
logits_all=self.params.logits_all,
|
logits_all=self.params.logits_all,
|
||||||
vocab_only=self.params.vocab_only,
|
vocab_only=self.params.vocab_only,
|
||||||
|
use_mmap=self.params.use_mmap,
|
||||||
use_mlock=self.params.use_mlock,
|
use_mlock=self.params.use_mlock,
|
||||||
embedding=self.params.embedding,
|
embedding=self.params.embedding,
|
||||||
last_n_tokens_size=self.last_n_tokens_size,
|
last_n_tokens_size=self.last_n_tokens_size,
|
||||||
|
@ -679,6 +683,7 @@ class Llama:
|
||||||
f16_kv=state["f16_kv"],
|
f16_kv=state["f16_kv"],
|
||||||
logits_all=state["logits_all"],
|
logits_all=state["logits_all"],
|
||||||
vocab_only=state["vocab_only"],
|
vocab_only=state["vocab_only"],
|
||||||
|
use_mmap=state["use_mmap"],
|
||||||
use_mlock=state["use_mlock"],
|
use_mlock=state["use_mlock"],
|
||||||
embedding=state["embedding"],
|
embedding=state["embedding"],
|
||||||
n_threads=state["n_threads"],
|
n_threads=state["n_threads"],
|
||||||
|
|
Loading…
Reference in a new issue