diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 431a99f..da2a7f3 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -445,7 +445,7 @@ class llama_model_params(Structure): # uint32_t n_batch; // prompt processing maximum batch size # uint32_t n_threads; // number of threads to use for generation # uint32_t n_threads_batch; // number of threads to use for batch processing -# int8_t rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type` +# int32_t rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type` # // ref: https://github.com/ggerganov/llama.cpp/pull/2054 # float rope_freq_base; // RoPE base frequency, 0 = from model @@ -502,7 +502,7 @@ class llama_context_params(Structure): ("n_batch", c_uint32), ("n_threads", c_uint32), ("n_threads_batch", c_uint32), - ("rope_scaling_type", c_int8), + ("rope_scaling_type", c_int32), ("rope_freq_base", c_float), ("rope_freq_scale", c_float), ("yarn_ext_factor", c_float), diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 1912211..78b00dd 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 191221178f51b6e81122c5bda0fd79620e547d07 +Subproject commit 78b00dda6c0d62c34f5371d47718defff6ed2b22