Update llama.cpp

2023-07-15 15:01:08 -04:00 · 2023-07-15 15:01:08 -04:00 · f72b6e9b73
commit f72b6e9b73
parent 15e0e0a937
2 changed files with 8 additions and 2 deletions
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@ -165,12 +165,16 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
 #     int32_t  n_gpu_layers;                 // number of layers to store in VRAM
 #     int32_t  main_gpu;                     // the GPU that is used for scratch and small tensors
 #     float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
+
+#     // ref: https://github.com/ggerganov/llama.cpp/pull/2054
+#     float    rope_freq_base;  // RoPE base frequency
+#     float    rope_freq_scale; // RoPE frequency scaling factor
+
 #     // called with a progress value between 0 and 1, pass NULL to disable
 #     llama_progress_callback progress_callback;
 #     // context pointer passed to the progress callback
 #     void * progress_callback_user_data;

-
 #     // Keep the booleans together to avoid misalignment during copy-by-value.
 #     bool low_vram;   // if true, reduce VRAM usage at the cost of performance
 #     bool f16_kv;     // use fp16 for KV cache
@ -188,6 +192,8 @@ class llama_context_params(Structure):
        ("n_gpu_layers", c_int32),
        ("main_gpu", c_int32),
        ("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
+        ("rope_freq_base", c_float),
+        ("rope_freq_scale", c_float),
        ("progress_callback", llama_progress_callback),
        ("progress_callback_user_data", c_void_p),
        ("low_vram", c_bool),
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@ -1 +1 @@
-Subproject commit a6803cab946c817fb7aaf2a40b317f5d3e373bd1
+Subproject commit 6e7cca404748dd4b1a3affd0d1296e37f4ac0a6f