Add bindings for LoRA adapters. Closes #88

2023-04-18 01:30:04 -04:00 · 2023-04-18 01:30:04 -04:00 · 35abf89552
commit 35abf89552
parent 3f68e95097
1 changed files with 19 additions and 1 deletions
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@ -114,7 +114,9 @@ LLAMA_FTYPE_ALL_F32 = ctypes.c_int(0)
 LLAMA_FTYPE_MOSTLY_F16 = ctypes.c_int(1)  # except 1d tensors
 LLAMA_FTYPE_MOSTLY_Q4_0 = ctypes.c_int(2)  # except 1d tensors
 LLAMA_FTYPE_MOSTLY_Q4_1 = ctypes.c_int(3)  # except 1d tensors
-LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int(4)  # tok_embeddings.weight and output.weight are F16
+LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int(
+    4
+)  # tok_embeddings.weight and output.weight are F16

 # Functions

@ -175,6 +177,22 @@ _lib.llama_model_quantize.argtypes = [c_char_p, c_char_p, c_int]
 _lib.llama_model_quantize.restype = c_int


+# Apply a LoRA adapter to a loaded model
+# path_base_model is the path to a higher quality model to use as a base for
+# the layers modified by the adapter. Can be NULL to use the current loaded model.
+# The model needs to be reloaded before applying a new adapter, otherwise the adapter
+# will be applied on top of the previous one
+# Returns 0 on success
+def llama_apply_lora_from_file(
+    ctx: llama_context_p, path_lora: bytes, path_base_model: bytes, n_threads: c_int
+) -> c_int:
+    return _lib.llama_apply_lora_from_file(ctx, path_lora, path_base_model, n_threads)
+
+
+_lib.llama_apply_lora_from_file.argtypes = [llama_context_p, c_char_p, c_char_p, c_int]
+_lib.llama_apply_lora_from_file.restype = c_int
+
+
 # Returns the KV cache that will contain the context for the
 # ongoing prediction with the model.
 def llama_get_kv_cache(ctx: llama_context_p):