diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 2ffc4c5..2b5af66 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -201,25 +201,6 @@ _lib.llama_apply_lora_from_file.argtypes = [llama_context_p, c_char_p, c_char_p, _lib.llama_apply_lora_from_file.restype = c_int -# Returns the KV cache that will contain the context for the -# ongoing prediction with the model. -def llama_get_kv_cache(ctx: llama_context_p): - return _lib.llama_get_kv_cache(ctx) - - -_lib.llama_get_kv_cache.argtypes = [llama_context_p] -_lib.llama_get_kv_cache.restype = POINTER(c_uint8) - - -# Returns the size of the KV cache -def llama_get_kv_cache_size(ctx: llama_context_p) -> c_size_t: - return _lib.llama_get_kv_cache_size(ctx) - - -_lib.llama_get_kv_cache_size.argtypes = [llama_context_p] -_lib.llama_get_kv_cache_size.restype = c_size_t - - # Returns the number of tokens in the KV cache def llama_get_kv_cache_token_count(ctx: llama_context_p) -> c_int: return _lib.llama_get_kv_cache_token_count(ctx) @@ -229,17 +210,6 @@ _lib.llama_get_kv_cache_token_count.argtypes = [llama_context_p] _lib.llama_get_kv_cache_token_count.restype = c_int -# Sets the KV cache containing the current context for the model -def llama_set_kv_cache( - ctx: llama_context_p, kv_cache, n_size: c_size_t, n_token_count: c_int -): - return _lib.llama_set_kv_cache(ctx, kv_cache, n_size, n_token_count) - - -_lib.llama_set_kv_cache.argtypes = [llama_context_p, POINTER(c_uint8), c_size_t, c_int] -_lib.llama_set_kv_cache.restype = None - - # Returns the size in bytes of the state (rng, logits, embedding and kv_cache) def llama_get_state_size(ctx: llama_context_p) -> c_size_t: return _lib.llama_get_state_size(ctx) diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 0e018fe..c4fe84f 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 0e018fe008eacebdbcfa2d61b6c988c245c961cd +Subproject commit c4fe84fb0d28851a5c10e5a633f82ae2ba3b7fae