Update llama.cpp
This commit is contained in:
parent
b8fc1c7d83
commit
89cce50f8c
2 changed files with 14 additions and 1 deletions
|
@ -91,6 +91,12 @@ c_float_p = POINTER(c_float)
|
||||||
c_uint8_p = POINTER(c_uint8)
|
c_uint8_p = POINTER(c_uint8)
|
||||||
c_size_t_p = POINTER(c_size_t)
|
c_size_t_p = POINTER(c_size_t)
|
||||||
|
|
||||||
|
# from ggml-backend.h
|
||||||
|
# typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
|
||||||
|
ggml_backend_sched_eval_callback = ctypes.CFUNCTYPE(
|
||||||
|
c_bool, c_void_p, c_bool, c_void_p
|
||||||
|
)
|
||||||
|
|
||||||
# llama.h bindings
|
# llama.h bindings
|
||||||
|
|
||||||
_lib.llama_max_devices.argtypes = []
|
_lib.llama_max_devices.argtypes = []
|
||||||
|
@ -448,6 +454,9 @@ class llama_model_params(Structure):
|
||||||
# float yarn_beta_slow; // YaRN high correction dim
|
# float yarn_beta_slow; // YaRN high correction dim
|
||||||
# uint32_t yarn_orig_ctx; // YaRN original context size
|
# uint32_t yarn_orig_ctx; // YaRN original context size
|
||||||
|
|
||||||
|
# ggml_backend_sched_eval_callback cb_eval;
|
||||||
|
# void * cb_eval_user_data;
|
||||||
|
|
||||||
# enum ggml_type type_k; // data type for K cache
|
# enum ggml_type type_k; // data type for K cache
|
||||||
# enum ggml_type type_v; // data type for V cache
|
# enum ggml_type type_v; // data type for V cache
|
||||||
|
|
||||||
|
@ -475,6 +484,8 @@ class llama_context_params(Structure):
|
||||||
yarn_beta_fast (float): YaRN low correction dim
|
yarn_beta_fast (float): YaRN low correction dim
|
||||||
yarn_beta_slow (float): YaRN high correction dim
|
yarn_beta_slow (float): YaRN high correction dim
|
||||||
yarn_orig_ctx (int): YaRN original context size
|
yarn_orig_ctx (int): YaRN original context size
|
||||||
|
cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval
|
||||||
|
cb_eval_user_data (ctypes.c_void_p): user data for cb_eval
|
||||||
type_k (int): data type for K cache
|
type_k (int): data type for K cache
|
||||||
type_v (int): data type for V cache
|
type_v (int): data type for V cache
|
||||||
mul_mat_q (bool): if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
|
mul_mat_q (bool): if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
|
||||||
|
@ -497,6 +508,8 @@ class llama_context_params(Structure):
|
||||||
("yarn_beta_fast", c_float),
|
("yarn_beta_fast", c_float),
|
||||||
("yarn_beta_slow", c_float),
|
("yarn_beta_slow", c_float),
|
||||||
("yarn_orig_ctx", c_uint32),
|
("yarn_orig_ctx", c_uint32),
|
||||||
|
("cb_eval", ggml_backend_sched_eval_callback),
|
||||||
|
("cb_eval_user_data", c_void_p),
|
||||||
("type_k", c_int),
|
("type_k", c_int),
|
||||||
("type_v", c_int),
|
("type_v", c_int),
|
||||||
("mul_mat_q", c_bool),
|
("mul_mat_q", c_bool),
|
||||||
|
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit 4f4bf35f46600441dec2f941e667291eeb9a18d8
|
Subproject commit 2d5419d08ab1131623e6a1d554607b7663435e87
|
Loading…
Reference in a new issue