feat: Update llama.cpp
This commit is contained in:
parent
266abfc1a3
commit
7f52335c50
2 changed files with 5 additions and 1 deletions
|
@ -811,6 +811,7 @@ It might not exist for progress report where '.' is output repeatedly."""
|
||||||
# bool quantize_output_tensor; // quantize output.weight
|
# bool quantize_output_tensor; // quantize output.weight
|
||||||
# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
||||||
# bool pure; // quantize all tensors to the default type
|
# bool pure; // quantize all tensors to the default type
|
||||||
|
# bool keep_split; // quantize to the same number of shards
|
||||||
# void * imatrix; // pointer to importance matrix data
|
# void * imatrix; // pointer to importance matrix data
|
||||||
# void * kv_overrides; // pointer to vector containing overrides
|
# void * kv_overrides; // pointer to vector containing overrides
|
||||||
# } llama_model_quantize_params;
|
# } llama_model_quantize_params;
|
||||||
|
@ -826,6 +827,7 @@ class llama_model_quantize_params(ctypes.Structure):
|
||||||
quantize_output_tensor (bool): quantize output.weight
|
quantize_output_tensor (bool): quantize output.weight
|
||||||
only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
||||||
pure (bool): quantize all tensors to the default type
|
pure (bool): quantize all tensors to the default type
|
||||||
|
keep_split (bool): quantize to the same number of shards
|
||||||
imatrix (ctypes.c_void_p): pointer to importance matrix data
|
imatrix (ctypes.c_void_p): pointer to importance matrix data
|
||||||
kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
|
kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
|
||||||
"""
|
"""
|
||||||
|
@ -839,6 +841,7 @@ class llama_model_quantize_params(ctypes.Structure):
|
||||||
quantize_output_tensor: bool
|
quantize_output_tensor: bool
|
||||||
only_copy: bool
|
only_copy: bool
|
||||||
pure: bool
|
pure: bool
|
||||||
|
keep_split: bool
|
||||||
imatrix: ctypes.c_void_p
|
imatrix: ctypes.c_void_p
|
||||||
kv_overrides: ctypes.c_void_p
|
kv_overrides: ctypes.c_void_p
|
||||||
|
|
||||||
|
@ -851,6 +854,7 @@ class llama_model_quantize_params(ctypes.Structure):
|
||||||
("quantize_output_tensor", ctypes.c_bool),
|
("quantize_output_tensor", ctypes.c_bool),
|
||||||
("only_copy", ctypes.c_bool),
|
("only_copy", ctypes.c_bool),
|
||||||
("pure", ctypes.c_bool),
|
("pure", ctypes.c_bool),
|
||||||
|
("keep_split", ctypes.c_bool),
|
||||||
("imatrix", ctypes.c_void_p),
|
("imatrix", ctypes.c_void_p),
|
||||||
("kv_overrides", ctypes.c_void_p),
|
("kv_overrides", ctypes.c_void_p),
|
||||||
]
|
]
|
||||||
|
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit 784e11dea1f5ce9638851b2b0dddb107e2a609c8
|
Subproject commit 46e12c4692a37bdd31a0432fc5153d7d22bc7f72
|
Loading…
Reference in a new issue