feat: Update llama.cpp
This commit is contained in:
parent
748c0ce057
commit
6225f027e5
2 changed files with 54 additions and 1 deletions
|
@ -664,6 +664,18 @@ class llama_timings(Structure):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# // used in chat template
|
||||||
|
# typedef struct llama_chat_message {
|
||||||
|
# const char * role;
|
||||||
|
# const char * content;
|
||||||
|
# } llama_chat_message;
|
||||||
|
class llama_chat_message(Structure):
|
||||||
|
_fields_ = [
|
||||||
|
("role", c_char_p),
|
||||||
|
("content", c_char_p),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
# // Helpers for getting default parameters
|
# // Helpers for getting default parameters
|
||||||
# LLAMA_API struct llama_model_params llama_model_default_params(void);
|
# LLAMA_API struct llama_model_params llama_model_default_params(void);
|
||||||
def llama_model_default_params() -> llama_model_params:
|
def llama_model_default_params() -> llama_model_params:
|
||||||
|
@ -1956,6 +1968,47 @@ _lib.llama_token_to_piece.argtypes = [llama_model_p, llama_token, c_char_p, c_in
|
||||||
_lib.llama_token_to_piece.restype = c_int32
|
_lib.llama_token_to_piece.restype = c_int32
|
||||||
|
|
||||||
|
|
||||||
|
# /// Apply chat template. Inspired by hf apply_chat_template() on python.
|
||||||
|
# /// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model"
|
||||||
|
# /// NOTE: This function only support some known jinja templates. It is not a jinja parser.
|
||||||
|
# /// @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead.
|
||||||
|
# /// @param chat Pointer to a list of multiple llama_chat_message
|
||||||
|
# /// @param n_msg Number of llama_chat_message in this chat
|
||||||
|
# /// @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message.
|
||||||
|
# /// @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages)
|
||||||
|
# /// @param length The size of the allocated buffer
|
||||||
|
# /// @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.
|
||||||
|
# LLAMA_API int32_t llama_chat_apply_template(
|
||||||
|
# const struct llama_model * model,
|
||||||
|
# const char * tmpl,
|
||||||
|
# const struct llama_chat_message * chat,
|
||||||
|
# size_t n_msg,
|
||||||
|
# bool add_ass,
|
||||||
|
# char * buf,
|
||||||
|
# int32_t length);
|
||||||
|
def llama_chat_apply_template(
|
||||||
|
model: llama_model_p,
|
||||||
|
tmpl: bytes,
|
||||||
|
chat: "ctypes._Pointer[llama_chat_message]",
|
||||||
|
n_msg: int,
|
||||||
|
) -> int:
|
||||||
|
return _lib.llama_chat_apply_template(
|
||||||
|
model,
|
||||||
|
tmpl,
|
||||||
|
chat,
|
||||||
|
n_msg
|
||||||
|
)
|
||||||
|
|
||||||
|
_lib.llama_chat_apply_template.argtypes = [
|
||||||
|
ctypes.c_void_p,
|
||||||
|
ctypes.c_char_p,
|
||||||
|
ctypes.POINTER(llama_chat_message),
|
||||||
|
ctypes.c_size_t
|
||||||
|
]
|
||||||
|
_lib.llama_chat_apply_template.restype = ctypes.c_int32
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# //
|
# //
|
||||||
# // Grammar
|
# // Grammar
|
||||||
# //
|
# //
|
||||||
|
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit a0c2dad9d43456c677e205c6240a5f8afb0121ac
|
Subproject commit f53119cec4f073b6d214195ecbe1fad3abdf2b34
|
Loading…
Reference in a new issue