diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index d31a5da..c4256dd 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -93,9 +93,7 @@ c_size_t_p = POINTER(c_size_t) # from ggml-backend.h # typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data); -ggml_backend_sched_eval_callback = ctypes.CFUNCTYPE( - c_bool, c_void_p, c_bool, c_void_p -) +ggml_backend_sched_eval_callback = ctypes.CFUNCTYPE(c_bool, c_void_p, c_bool, c_void_p) # llama.h bindings @@ -2174,6 +2172,34 @@ _lib.llama_sample_typical.argtypes = [ _lib.llama_sample_typical.restype = None +# /// @details Dynamic temperature implementation described in the paper https://arxiv.org/abs/2309.02772. +# LLAMA_API void llama_sample_entropy( +# struct llama_context * ctx, +# llama_token_data_array * candidates_p, +# float min_temp, +# float max_temp, +# float exponent_val); +def llama_sample_entropy( + ctx: llama_context_p, + candidates, # type: _Pointer[llama_token_data_array] + min_temp: Union[c_float, float], + max_temp: Union[c_float, float], + exponent_val: Union[c_float, float], +): + """Dynamic temperature implementation described in the paper https://arxiv.org/abs/2309.02772.""" + return _lib.llama_sample_entropy(ctx, candidates, min_temp, max_temp, exponent_val) + + +_lib.llama_sample_entropy.argtypes = [ + llama_context_p, + llama_token_data_array_p, + c_float, + c_float, + c_float, +] +_lib.llama_sample_entropy.restype = None + + # LLAMA_API void llama_sample_temp( # struct llama_context * ctx, # llama_token_data_array * candidates, diff --git a/vendor/llama.cpp b/vendor/llama.cpp index faa3526..5f1925a 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit faa3526a1eba458120987ed8269e5616385a76f4 +Subproject commit 5f1925a8cef81eb9b372faaae34b0dd76d5361d4