Update llama.cpp

2023-07-24 13:08:06 -04:00 · 2023-07-24 13:08:06 -04:00 · d8a3ddbb1c
commit d8a3ddbb1c
parent 985d559971
2 changed files with 3 additions and 1 deletions
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@ -163,6 +163,7 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
 #     int32_t  n_ctx;        // text context
 #     int32_t  n_batch;      // prompt processing batch size
 #     int32_t  n_gqa;        // grouped-query attention (TEMP - will be moved to model hparams)
+#     float    rms_norm_eps; // rms norm epsilon (TEMP - will be moved to model hparams)
 #     int32_t  n_gpu_layers; // number of layers to store in VRAM
 #     int32_t  main_gpu;     // the GPU that is used for scratch and small tensors
 #
@ -193,6 +194,7 @@ class llama_context_params(Structure):
        ("n_ctx", c_int32),
        ("n_batch", c_int32),
        ("n_gqa", c_int32),
+        ("rms_norm_eps", c_float),
        ("n_gpu_layers", c_int32),
        ("main_gpu", c_int32),
        ("tensor_split", POINTER(c_float)),
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@ -1 +1 @@
-Subproject commit 84e09a7d8bc4ab6d658b5cd81295ac0add60be78
+Subproject commit 41c674161fb2459bdf7806d1eebead15bc5d046e