feat: Update llama_cpp.py bindings
This commit is contained in:
parent
35c980eb2e
commit
04959f1884
1 changed files with 12 additions and 1 deletions
|
@ -468,11 +468,13 @@ LLAMA_ROPE_SCALING_TYPE_MAX_VALUE = LLAMA_ROPE_SCALING_TYPE_YARN
|
|||
# LLAMA_POOLING_TYPE_NONE = 0,
|
||||
# LLAMA_POOLING_TYPE_MEAN = 1,
|
||||
# LLAMA_POOLING_TYPE_CLS = 2,
|
||||
# LLAMA_POOLING_TYPE_LAST = 3,
|
||||
# };
|
||||
LLAMA_POOLING_TYPE_UNSPECIFIED = -1
|
||||
LLAMA_POOLING_TYPE_NONE = 0
|
||||
LLAMA_POOLING_TYPE_MEAN = 1
|
||||
LLAMA_POOLING_TYPE_CLS = 2
|
||||
LLAMA_POOLING_TYPE_LAST = 3
|
||||
|
||||
# enum llama_split_mode {
|
||||
# LLAMA_SPLIT_MODE_NONE = 0, // single GPU
|
||||
|
@ -761,7 +763,6 @@ class llama_model_params(ctypes.Structure):
|
|||
|
||||
# enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
|
||||
# enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
|
||||
# // (ignored if no pooling layer)
|
||||
|
||||
# // ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
||||
# float rope_freq_base; // RoPE base frequency, 0 = from model
|
||||
|
@ -2316,6 +2317,16 @@ def llama_n_threads_batch(ctx: llama_context_p, /) -> int:
|
|||
...
|
||||
|
||||
|
||||
# // Set whether the model is in embeddings model or not
|
||||
# // If true, embeddings will be returned but logits will not
|
||||
# LLAMA_API void llama_set_embeddings(struct llama_context * ctx, bool embeddings);
|
||||
@ctypes_function("llama_set_embeddings", [llama_context_p_ctypes, ctypes.c_bool], None)
|
||||
def llama_set_embeddings(ctx: llama_context_p, embeddings: bool, /):
|
||||
"""Set whether the model is in embeddings model or not
|
||||
If true, embeddings will be returned but logits will not"""
|
||||
...
|
||||
|
||||
|
||||
# // Set whether to use causal attention or not
|
||||
# // If set to true, the model will only attend to the past tokens
|
||||
# LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
|
||||
|
|
Loading…
Reference in a new issue