From 8908f4614cdb2c30a8415de47f70fcf42ec33016 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Tue, 28 Mar 2023 21:10:23 -0400 Subject: [PATCH] Update llama.cpp --- llama_cpp/llama_cpp.py | 20 ++++++++++---------- vendor/llama.cpp | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 3dbe570..724126e 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -3,7 +3,6 @@ import ctypes from ctypes import ( c_int, c_float, - c_double, c_char_p, c_void_p, c_bool, @@ -40,7 +39,7 @@ class llama_token_data(Structure): llama_token_data_p = POINTER(llama_token_data) -llama_progress_callback = ctypes.CFUNCTYPE(None, c_double, c_void_p) +llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p) class llama_context_params(Structure): @@ -48,6 +47,7 @@ class llama_context_params(Structure): ("n_ctx", c_int), # text context ("n_parts", c_int), # -1 for default ("seed", c_int), # RNG seed, 0 for random + ("f16_kv", c_bool), # use fp16 for KV cache ( "logits_all", @@ -56,6 +56,7 @@ class llama_context_params(Structure): ("vocab_only", c_bool), # only load the vocabulary, no weights ("use_mlock", c_bool), # force system to keep model in RAM ("embedding", c_bool), # embedding mode only + # called with a progress value between 0 and 1, pass NULL to disable ("progress_callback", llama_progress_callback), # context pointer passed to the progress callback @@ -70,8 +71,7 @@ llama_context_params_p = POINTER(llama_context_params) def llama_context_default_params() -> llama_context_params: - params = _lib.llama_context_default_params() - return params + return _lib.llama_context_default_params() _lib.llama_context_default_params.argtypes = [] @@ -229,9 +229,9 @@ def llama_sample_top_p_top_k( last_n_tokens_data: llama_token_p, last_n_tokens_size: c_int, top_k: c_int, - top_p: c_double, - temp: c_double, - repeat_penalty: c_double, + top_p: c_float, + temp: c_float, + repeat_penalty: c_float, ) -> llama_token: return _lib.llama_sample_top_p_top_k( ctx, last_n_tokens_data, last_n_tokens_size, top_k, top_p, temp, repeat_penalty @@ -243,9 +243,9 @@ _lib.llama_sample_top_p_top_k.argtypes = [ llama_token_p, c_int, c_int, - c_double, - c_double, - c_double, + c_float, + c_float, + c_float, ] _lib.llama_sample_top_p_top_k.restype = llama_token diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 7e53955..5a5f8b1 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 7e5395575a3360598f2565c73c8a2ec0c0abbdb8 +Subproject commit 5a5f8b1501fbb34367225544010ddfc306d6d2fe