feat: Update llama.cpp

2024-02-17 00:37:51 -05:00 · 2024-02-17 00:37:51 -05:00 · fdce078cb9
commit fdce078cb9
parent c2a234a086
4 changed files with 44 additions and 10 deletions
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@ -98,7 +98,7 @@ class Llama:
        lora_scale: float = 1.0,
        lora_path: Optional[str] = None,
        # Backend Params
-        numa: bool = False,
+        numa: Union[bool, int] = False,
        # Chat Format Params
        chat_format: Optional[str] = None,
        chat_handler: Optional[llama_chat_format.LlamaChatCompletionHandler] = None,
@ -166,7 +166,7 @@ class Llama:
            last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque.
            lora_base: Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.
            lora_path: Path to a LoRA file to apply to the model.
-            numa: Enable NUMA support. (NOTE: The initial value of this parameter is used for the remainder of the program as this value is set in llama_backend_init)
+            numa: numa policy
            chat_format: String specifying the chat format to use when calling create_chat_completion.
            chat_handler: Optional chat handler to use when calling create_chat_completion.
            draft_model: Optional draft model to use for speculative decoding.
@ -183,12 +183,18 @@ class Llama:
        set_verbose(verbose)
        self.numa = numa
        if not Llama.__backend_initialized:
            with suppress_stdout_stderr(disable=verbose):
-                llama_cpp.llama_backend_init(self.numa)
+                llama_cpp.llama_backend_init()
            Llama.__backend_initialized = True
        if isinstance(numa, bool):
            self.numa = llama_cpp.GGML_NUMA_STRATEGY_DISTRIBUTE if numa else llama_cpp.GGML_NUMA_STRATEGY_DISABLED
        if self.numa != llama_cpp.GGML_NUMA_STRATEGY_DISABLED:
            with suppress_stdout_stderr(disable=verbose):
                llama_cpp.llama_numa_init(self.numa)
        self.model_path = model_path
        # Model Params
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@ -697,17 +697,45 @@ _lib.llama_model_quantize_default_params.restype = llama_model_quantize_params
 # // If numa is true, use NUMA optimizations
 # // Call once at the start of the program
 # LLAMA_API void llama_backend_init(bool numa);
-def llama_backend_init(numa: Union[c_bool, bool]):
+# LLAMA_API void llama_backend_init(void);
 def llama_backend_init():
    """Initialize the llama + ggml backend
    If numa is true, use NUMA optimizations
    Call once at the start of the program"""
-    return _lib.llama_backend_init(numa)
+    return _lib.llama_backend_init()
-_lib.llama_backend_init.argtypes = [c_bool]
+_lib.llama_backend_init.argtypes = []
 _lib.llama_backend_init.restype = None
 # // numa strategies
 # enum ggml_numa_strategy {
 #     GGML_NUMA_STRATEGY_DISABLED   = 0,
 #     GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
 #     GGML_NUMA_STRATEGY_ISOLATE    = 2,
 #     GGML_NUMA_STRATEGY_NUMACTL    = 3,
 #     GGML_NUMA_STRATEGY_MIRROR     = 4,
 #     GGML_NUMA_STRATEGY_COUNT
 # };
 GGML_NUMA_STRATEGY_DISABLED = 0
 GGML_NUMA_STRATEGY_DISTRIBUTE = 1
 GGML_NUMA_STRATEGY_ISOLATE = 2
 GGML_NUMA_STRATEGY_NUMACTL = 3
 GGML_NUMA_STRATEGY_MIRROR = 4
 GGML_NUMA_STRATEGY_COUNT = 5
 # //optional:
 # LLAMA_API void llama_numa_init(enum ggml_numa_strategy numa);
 def llama_numa_init(numa: int):
    return _lib.llama_numa_init(numa)
 _lib.llama_numa_init.argtypes = [c_int]
 _lib.llama_numa_init.restype = None
 # // Call once at the end of the program - currently only used for MPI
 # LLAMA_API void llama_backend_free(void);
 def llama_backend_free():
--- a/llama_cpp/server/settings.py
+++ b/llama_cpp/server/settings.py
@ -2,7 +2,7 @@ from __future__ import annotations
 import multiprocessing
-from typing import Optional, List, Literal
+from typing import Optional, List, Literal, Union
 from pydantic import Field
 from pydantic_settings import BaseSettings
@ -108,7 +108,7 @@ class ModelSettings(BaseSettings):
        description="Path to a LoRA file to apply to the model.",
    )
    # Backend Params
-    numa: bool = Field(
+    numa: Union[bool, int] = Field(
        default=False,
        description="Enable NUMA support.",
    )
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@ -1 +1 @@
-Subproject commit 4524290e87b8e107cc2b56e1251751546f4b9051
+Subproject commit 5bf2b94dd4fb74378b78604023b31512fec55f8f
		`@ -1 +1 @@`
			`Subproject commit 4524290e87b8e107cc2b56e1251751546f4b9051`				`Subproject commit 5bf2b94dd4fb74378b78604023b31512fec55f8f`