feat: Update llama.cpp

This commit is contained in:
Andrei Betlen 2024-04-25 21:21:29 -04:00
parent 266abfc1a3
commit 7f52335c50
2 changed files with 5 additions and 1 deletions

View file

@ -811,6 +811,7 @@ It might not exist for progress report where '.' is output repeatedly."""
# bool quantize_output_tensor; // quantize output.weight # bool quantize_output_tensor; // quantize output.weight
# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored # bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
# bool pure; // quantize all tensors to the default type # bool pure; // quantize all tensors to the default type
# bool keep_split; // quantize to the same number of shards
# void * imatrix; // pointer to importance matrix data # void * imatrix; // pointer to importance matrix data
# void * kv_overrides; // pointer to vector containing overrides # void * kv_overrides; // pointer to vector containing overrides
# } llama_model_quantize_params; # } llama_model_quantize_params;
@ -826,6 +827,7 @@ class llama_model_quantize_params(ctypes.Structure):
quantize_output_tensor (bool): quantize output.weight quantize_output_tensor (bool): quantize output.weight
only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
pure (bool): quantize all tensors to the default type pure (bool): quantize all tensors to the default type
keep_split (bool): quantize to the same number of shards
imatrix (ctypes.c_void_p): pointer to importance matrix data imatrix (ctypes.c_void_p): pointer to importance matrix data
kv_overrides (ctypes.c_void_p): pointer to vector containing overrides kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
""" """
@ -839,6 +841,7 @@ class llama_model_quantize_params(ctypes.Structure):
quantize_output_tensor: bool quantize_output_tensor: bool
only_copy: bool only_copy: bool
pure: bool pure: bool
keep_split: bool
imatrix: ctypes.c_void_p imatrix: ctypes.c_void_p
kv_overrides: ctypes.c_void_p kv_overrides: ctypes.c_void_p
@ -851,6 +854,7 @@ class llama_model_quantize_params(ctypes.Structure):
("quantize_output_tensor", ctypes.c_bool), ("quantize_output_tensor", ctypes.c_bool),
("only_copy", ctypes.c_bool), ("only_copy", ctypes.c_bool),
("pure", ctypes.c_bool), ("pure", ctypes.c_bool),
("keep_split", ctypes.c_bool),
("imatrix", ctypes.c_void_p), ("imatrix", ctypes.c_void_p),
("kv_overrides", ctypes.c_void_p), ("kv_overrides", ctypes.c_void_p),
] ]

2
vendor/llama.cpp vendored

@ -1 +1 @@
Subproject commit 784e11dea1f5ce9638851b2b0dddb107e2a609c8 Subproject commit 46e12c4692a37bdd31a0432fc5153d7d22bc7f72