Add bindings for LoRA adapters. Closes #88

This commit is contained in:
Andrei Betlen 2023-04-18 01:30:04 -04:00
parent 3f68e95097
commit 35abf89552

View file

@ -114,7 +114,9 @@ LLAMA_FTYPE_ALL_F32 = ctypes.c_int(0)
LLAMA_FTYPE_MOSTLY_F16 = ctypes.c_int(1) # except 1d tensors LLAMA_FTYPE_MOSTLY_F16 = ctypes.c_int(1) # except 1d tensors
LLAMA_FTYPE_MOSTLY_Q4_0 = ctypes.c_int(2) # except 1d tensors LLAMA_FTYPE_MOSTLY_Q4_0 = ctypes.c_int(2) # except 1d tensors
LLAMA_FTYPE_MOSTLY_Q4_1 = ctypes.c_int(3) # except 1d tensors LLAMA_FTYPE_MOSTLY_Q4_1 = ctypes.c_int(3) # except 1d tensors
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int(4) # tok_embeddings.weight and output.weight are F16 LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int(
4
) # tok_embeddings.weight and output.weight are F16
# Functions # Functions
@ -175,6 +177,22 @@ _lib.llama_model_quantize.argtypes = [c_char_p, c_char_p, c_int]
_lib.llama_model_quantize.restype = c_int _lib.llama_model_quantize.restype = c_int
# Apply a LoRA adapter to a loaded model
# path_base_model is the path to a higher quality model to use as a base for
# the layers modified by the adapter. Can be NULL to use the current loaded model.
# The model needs to be reloaded before applying a new adapter, otherwise the adapter
# will be applied on top of the previous one
# Returns 0 on success
def llama_apply_lora_from_file(
ctx: llama_context_p, path_lora: bytes, path_base_model: bytes, n_threads: c_int
) -> c_int:
return _lib.llama_apply_lora_from_file(ctx, path_lora, path_base_model, n_threads)
_lib.llama_apply_lora_from_file.argtypes = [llama_context_p, c_char_p, c_char_p, c_int]
_lib.llama_apply_lora_from_file.restype = c_int
# Returns the KV cache that will contain the context for the # Returns the KV cache that will contain the context for the
# ongoing prediction with the model. # ongoing prediction with the model.
def llama_get_kv_cache(ctx: llama_context_p): def llama_get_kv_cache(ctx: llama_context_p):