diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 3e8cf58..8c69210 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -233,7 +233,7 @@ class Llama: rope_scaling_type: Optional[int] = llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED, rope_freq_base: float = 0.0, rope_freq_scale: float = 0.0, - yarn_ext_factor: float = float("nan"), + yarn_ext_factor: float = -1.0, yarn_attn_factor: float = 1.0, yarn_beta_fast: float = 32.0, yarn_beta_slow: float = 1.0, diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index b6216a5..48958e5 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -315,11 +315,11 @@ class llama_model_params(Structure): # struct llama_context_params { -# uint32_t seed; // RNG seed, -1 for random -# uint32_t n_ctx; // text context, 0 = from model -# uint32_t n_batch; // prompt processing maximum batch size -# uint32_t n_threads; // number of threads to use for generation -# uint32_t n_threads_batch; // number of threads to use for batch processing +# uint32_t seed; // RNG seed, -1 for random +# uint32_t n_ctx; // text context, 0 = from model +# uint32_t n_batch; // prompt processing maximum batch size +# uint32_t n_threads; // number of threads to use for generation +# uint32_t n_threads_batch; // number of threads to use for batch processing # int8_t rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type` # // ref: https://github.com/ggerganov/llama.cpp/pull/2054 diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index bec9561..93afc3e 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -93,7 +93,7 @@ class Settings(BaseSettings): default=0.0, description="RoPE frequency scaling factor" ) yarn_ext_factor: float = Field( - default=float("nan") + default=-1.0 ) yarn_attn_factor: float = Field( default=1.0 diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 629f917..abb77e7 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 629f917cd6b96ba1274c49a8aab163b1b189229d +Subproject commit abb77e7319aabc0b5cfb7c22da690a692489b6b7