feat: Update llama.cpp
This commit is contained in:
parent
5a595f035a
commit
087cc0b036
2 changed files with 17 additions and 1 deletions
|
@ -2265,6 +2265,22 @@ def llama_set_n_threads(
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
|
# // Get the number of threads used for generation of a single token.
|
||||||
|
# LLAMA_API uint32_t llama_n_threads(struct llama_context * ctx);
|
||||||
|
@ctypes_function("llama_n_threads", [llama_context_p_ctypes], ctypes.c_uint32)
|
||||||
|
def llama_n_threads(ctx: llama_context_p, /) -> int:
|
||||||
|
"""Get the number of threads used for generation of a single token"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
# // Get the number of threads used for prompt and batch processing (multiple token).
|
||||||
|
# LLAMA_API uint32_t llama_n_threads_batch(struct llama_context * ctx);
|
||||||
|
@ctypes_function("llama_n_threads_batch", [llama_context_p_ctypes], ctypes.c_uint32)
|
||||||
|
def llama_n_threads_batch(ctx: llama_context_p, /) -> int:
|
||||||
|
"""Get the number of threads used for prompt and batch processing (multiple token)"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
# // Set whether to use causal attention or not
|
# // Set whether to use causal attention or not
|
||||||
# // If set to true, the model will only attend to the past tokens
|
# // If set to true, the model will only attend to the past tokens
|
||||||
# LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
|
# LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
|
||||||
|
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit 201cc11afa0a1950e1f632390b2ac6c937a0d8f0
|
Subproject commit 0df0aa8e43c3378975269a51f9b876c8692e70da
|
Loading…
Reference in a new issue