Update llama.cpp
This commit is contained in:
parent
985d559971
commit
d8a3ddbb1c
2 changed files with 3 additions and 1 deletions
|
@ -163,6 +163,7 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
|
|||
# int32_t n_ctx; // text context
|
||||
# int32_t n_batch; // prompt processing batch size
|
||||
# int32_t n_gqa; // grouped-query attention (TEMP - will be moved to model hparams)
|
||||
# float rms_norm_eps; // rms norm epsilon (TEMP - will be moved to model hparams)
|
||||
# int32_t n_gpu_layers; // number of layers to store in VRAM
|
||||
# int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
||||
#
|
||||
|
@ -193,6 +194,7 @@ class llama_context_params(Structure):
|
|||
("n_ctx", c_int32),
|
||||
("n_batch", c_int32),
|
||||
("n_gqa", c_int32),
|
||||
("rms_norm_eps", c_float),
|
||||
("n_gpu_layers", c_int32),
|
||||
("main_gpu", c_int32),
|
||||
("tensor_split", POINTER(c_float)),
|
||||
|
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 84e09a7d8bc4ab6d658b5cd81295ac0add60be78
|
||||
Subproject commit 41c674161fb2459bdf7806d1eebead15bc5d046e
|
Loading…
Reference in a new issue