Merge branch 'custom_rope' into main

This commit is contained in:
Andrei Betlen 2023-07-15 15:11:43 -04:00
commit d0572f4fca
2 changed files with 8 additions and 0 deletions

View file

@ -205,6 +205,8 @@ class Llama:
model_path: str,
# NOTE: These parameters are likely to change in the future.
n_ctx: int = 512,
rope_freq_base: float = 10000.0,
rope_freq_scale: float = 1.0,
n_parts: int = -1,
n_gpu_layers: int = 0,
seed: int = 1337,
@ -228,6 +230,8 @@ class Llama:
Args:
model_path: Path to the model.
n_ctx: Maximum context size.
rope_freq_base: RoPE base frequency.
rope_freq_scale: RoPE frequency scale.
n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
seed: Random seed. -1 for random.
f16_kv: Use half-precision for key/value cache.
@ -256,6 +260,8 @@ class Llama:
self.params = llama_cpp.llama_context_default_params()
self.params.n_ctx = n_ctx
self.params.rope_freq_base = rope_freq_base
self.params.rope_freq_scale = rope_freq_scale
self.params.n_gpu_layers = n_gpu_layers
self.params.seed = seed
self.params.f16_kv = f16_kv

View file

@ -188,6 +188,8 @@ class llama_context_params(Structure):
_fields_ = [
("seed", c_uint32),
("n_ctx", c_int32),
("rope_freq_base", c_float),
("rope_freq_scale", c_float),
("n_batch", c_int32),
("n_gpu_layers", c_int32),
("main_gpu", c_int32),