From c67f7863604eedbcb3e17884d1aa3354f7857cf9 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Thu, 29 Jun 2023 01:08:15 -0400 Subject: [PATCH] Update llama.cpp --- llama_cpp/llama_cpp.py | 32 +++++++++++++++++++++++++++----- vendor/llama.cpp | 2 +- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 23643e2..52fc14e 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -290,13 +290,14 @@ _lib.llama_mlock_supported.restype = c_bool # // TODO: not great API - very likely to change # // Initialize the llama + ggml backend +# // If numa is true, use NUMA optimizations # // Call once at the start of the program -# LLAMA_API void llama_init_backend(); -def llama_init_backend(): - return _lib.llama_init_backend() +# LLAMA_API void llama_init_backend(bool numa); +def llama_init_backend(numa: c_bool): + return _lib.llama_init_backend(numa) -_lib.llama_init_backend.argtypes = [] +_lib.llama_init_backend.argtypes = [c_bool] _lib.llama_init_backend.restype = None @@ -565,6 +566,27 @@ _lib.llama_eval.argtypes = [llama_context_p, llama_token_p, c_int, c_int, c_int] _lib.llama_eval.restype = c_int +# // Same as llama_eval, but use float matrix input directly. +# LLAMA_API int llama_eval_embd( +# struct llama_context * ctx, +# const float * embd, +# int n_tokens, +# int n_past, +# int n_threads); +def llama_eval_embd( + ctx: llama_context_p, + embd, # type: Array[c_float] + n_tokens: c_int, + n_past: c_int, + n_threads: c_int, +) -> int: + return _lib.llama_eval_embd(ctx, embd, n_tokens, n_past, n_threads) + + +_lib.llama_eval_embd.argtypes = [llama_context_p, c_float_p, c_int, c_int, c_int] +_lib.llama_eval_embd.restype = c_int + + # Convert the provided text into tokens. # The tokens pointer must be large enough to hold the resulting tokens. # Returns the number of tokens on success, no more than n_max_tokens @@ -998,5 +1020,5 @@ _lib.llama_print_system_info.restype = c_char_p _llama_initialized = False if not _llama_initialized: - llama_init_backend() + llama_init_backend(c_bool(False)) _llama_initialized = True diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 447ccbe..96a712c 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 447ccbe8c39332fcdd0d98a041b6e2ff6f06219d +Subproject commit 96a712ca1b7f427e3bd7ffc0c70b2105cfc7fbf1