feat: Update llama_cpp.py bindings
This commit is contained in:
parent
35c980eb2e
commit
04959f1884
1 changed files with 12 additions and 1 deletions
|
@ -468,11 +468,13 @@ LLAMA_ROPE_SCALING_TYPE_MAX_VALUE = LLAMA_ROPE_SCALING_TYPE_YARN
|
||||||
# LLAMA_POOLING_TYPE_NONE = 0,
|
# LLAMA_POOLING_TYPE_NONE = 0,
|
||||||
# LLAMA_POOLING_TYPE_MEAN = 1,
|
# LLAMA_POOLING_TYPE_MEAN = 1,
|
||||||
# LLAMA_POOLING_TYPE_CLS = 2,
|
# LLAMA_POOLING_TYPE_CLS = 2,
|
||||||
|
# LLAMA_POOLING_TYPE_LAST = 3,
|
||||||
# };
|
# };
|
||||||
LLAMA_POOLING_TYPE_UNSPECIFIED = -1
|
LLAMA_POOLING_TYPE_UNSPECIFIED = -1
|
||||||
LLAMA_POOLING_TYPE_NONE = 0
|
LLAMA_POOLING_TYPE_NONE = 0
|
||||||
LLAMA_POOLING_TYPE_MEAN = 1
|
LLAMA_POOLING_TYPE_MEAN = 1
|
||||||
LLAMA_POOLING_TYPE_CLS = 2
|
LLAMA_POOLING_TYPE_CLS = 2
|
||||||
|
LLAMA_POOLING_TYPE_LAST = 3
|
||||||
|
|
||||||
# enum llama_split_mode {
|
# enum llama_split_mode {
|
||||||
# LLAMA_SPLIT_MODE_NONE = 0, // single GPU
|
# LLAMA_SPLIT_MODE_NONE = 0, // single GPU
|
||||||
|
@ -761,7 +763,6 @@ class llama_model_params(ctypes.Structure):
|
||||||
|
|
||||||
# enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
|
# enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
|
||||||
# enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
|
# enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
|
||||||
# // (ignored if no pooling layer)
|
|
||||||
|
|
||||||
# // ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
# // ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
||||||
# float rope_freq_base; // RoPE base frequency, 0 = from model
|
# float rope_freq_base; // RoPE base frequency, 0 = from model
|
||||||
|
@ -2316,6 +2317,16 @@ def llama_n_threads_batch(ctx: llama_context_p, /) -> int:
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
|
# // Set whether the model is in embeddings model or not
|
||||||
|
# // If true, embeddings will be returned but logits will not
|
||||||
|
# LLAMA_API void llama_set_embeddings(struct llama_context * ctx, bool embeddings);
|
||||||
|
@ctypes_function("llama_set_embeddings", [llama_context_p_ctypes, ctypes.c_bool], None)
|
||||||
|
def llama_set_embeddings(ctx: llama_context_p, embeddings: bool, /):
|
||||||
|
"""Set whether the model is in embeddings model or not
|
||||||
|
If true, embeddings will be returned but logits will not"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
# // Set whether to use causal attention or not
|
# // Set whether to use causal attention or not
|
||||||
# // If set to true, the model will only attend to the past tokens
|
# // If set to true, the model will only attend to the past tokens
|
||||||
# LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
|
# LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
|
||||||
|
|
Loading…
Reference in a new issue