Update llama.cpp
This commit is contained in:
parent
058b134ab6
commit
b994296c75
2 changed files with 17 additions and 13 deletions
|
@ -5,6 +5,8 @@ from ctypes import (
|
||||||
c_int,
|
c_int,
|
||||||
c_float,
|
c_float,
|
||||||
c_char_p,
|
c_char_p,
|
||||||
|
c_int32,
|
||||||
|
c_uint32,
|
||||||
c_void_p,
|
c_void_p,
|
||||||
c_bool,
|
c_bool,
|
||||||
POINTER,
|
POINTER,
|
||||||
|
@ -105,6 +107,9 @@ LLAMA_FILE_MAGIC_UNVERSIONED = LLAMA_FILE_MAGIC_GGML
|
||||||
LLAMA_SESSION_MAGIC = LLAMA_FILE_MAGIC_GGSN
|
LLAMA_SESSION_MAGIC = LLAMA_FILE_MAGIC_GGSN
|
||||||
LLAMA_SESSION_VERSION = c_int(1)
|
LLAMA_SESSION_VERSION = c_int(1)
|
||||||
|
|
||||||
|
# #define LLAMA_DEFAULT_SEED 0xFFFFFFFF
|
||||||
|
LLAMA_DEFAULT_SEED = c_int(0xFFFFFFFF)
|
||||||
|
|
||||||
# struct llama_model;
|
# struct llama_model;
|
||||||
llama_model_p = c_void_p
|
llama_model_p = c_void_p
|
||||||
|
|
||||||
|
@ -153,18 +158,17 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
|
||||||
|
|
||||||
|
|
||||||
# struct llama_context_params {
|
# struct llama_context_params {
|
||||||
# int seed; // RNG seed, -1 for random
|
# uint32_t seed; // RNG seed, -1 for random
|
||||||
# int n_ctx; // text context
|
# int32_t n_ctx; // text context
|
||||||
# int n_batch; // prompt processing batch size
|
# int32_t n_batch; // prompt processing batch size
|
||||||
# int n_gpu_layers; // number of layers to store in VRAM
|
# int32_t n_gpu_layers; // number of layers to store in VRAM
|
||||||
# int main_gpu; // the GPU that is used for scratch and small tensors
|
# int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
||||||
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
|
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
|
||||||
# // called with a progress value between 0 and 1, pass NULL to disable
|
# // called with a progress value between 0 and 1, pass NULL to disable
|
||||||
# llama_progress_callback progress_callback;
|
# llama_progress_callback progress_callback;
|
||||||
# // context pointer passed to the progress callback
|
# // context pointer passed to the progress callback
|
||||||
# void * progress_callback_user_data;
|
# void * progress_callback_user_data;
|
||||||
|
|
||||||
|
|
||||||
# // Keep the booleans together to avoid misalignment during copy-by-value.
|
# // Keep the booleans together to avoid misalignment during copy-by-value.
|
||||||
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
||||||
# bool f16_kv; // use fp16 for KV cache
|
# bool f16_kv; // use fp16 for KV cache
|
||||||
|
@ -176,11 +180,11 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
|
||||||
# };
|
# };
|
||||||
class llama_context_params(Structure):
|
class llama_context_params(Structure):
|
||||||
_fields_ = [
|
_fields_ = [
|
||||||
("seed", c_int),
|
("seed", c_uint32),
|
||||||
("n_ctx", c_int),
|
("n_ctx", c_int32),
|
||||||
("n_batch", c_int),
|
("n_batch", c_int32),
|
||||||
("n_gpu_layers", c_int),
|
("n_gpu_layers", c_int32),
|
||||||
("main_gpu", c_int),
|
("main_gpu", c_int32),
|
||||||
("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
|
("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
|
||||||
("progress_callback", llama_progress_callback),
|
("progress_callback", llama_progress_callback),
|
||||||
("progress_callback_user_data", c_void_p),
|
("progress_callback_user_data", c_void_p),
|
||||||
|
@ -453,7 +457,7 @@ _lib.llama_get_kv_cache_token_count.restype = c_int
|
||||||
|
|
||||||
# Sets the current rng seed.
|
# Sets the current rng seed.
|
||||||
# LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, int seed);
|
# LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, int seed);
|
||||||
def llama_set_rng_seed(ctx: llama_context_p, seed: c_int):
|
def llama_set_rng_seed(ctx: llama_context_p, seed: c_uint32):
|
||||||
return _lib.llama_set_rng_seed(ctx, seed)
|
return _lib.llama_set_rng_seed(ctx, seed)
|
||||||
|
|
||||||
|
|
||||||
|
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit 96a712ca1b7f427e3bd7ffc0c70b2105cfc7fbf1
|
Subproject commit 7f0e9a775ecc4c6ade271c217f63d6dc93e79eaa
|
Loading…
Reference in a new issue