Update llama.cpp
This commit is contained in:
parent
68fb71b6a2
commit
f7cdf78788
2 changed files with 19 additions and 1 deletions
|
@ -470,6 +470,7 @@ class llama_model_params(Structure):
|
||||||
# bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
# bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
||||||
# bool embedding; // embedding mode only
|
# bool embedding; // embedding mode only
|
||||||
# bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
# bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
||||||
|
# bool do_pooling; // whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
|
||||||
# };
|
# };
|
||||||
class llama_context_params(Structure):
|
class llama_context_params(Structure):
|
||||||
"""Parameters for llama_context
|
"""Parameters for llama_context
|
||||||
|
@ -496,6 +497,7 @@ class llama_context_params(Structure):
|
||||||
logits_all (bool): the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
logits_all (bool): the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
||||||
embedding (bool): embedding mode only
|
embedding (bool): embedding mode only
|
||||||
offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU
|
offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU
|
||||||
|
do_pooling (bool): whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_fields_ = [
|
_fields_ = [
|
||||||
|
@ -520,6 +522,7 @@ class llama_context_params(Structure):
|
||||||
("logits_all", c_bool),
|
("logits_all", c_bool),
|
||||||
("embedding", c_bool),
|
("embedding", c_bool),
|
||||||
("offload_kqv", c_bool),
|
("offload_kqv", c_bool),
|
||||||
|
("do_pooling", c_bool),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -1699,6 +1702,21 @@ _lib.llama_get_embeddings.argtypes = [llama_context_p]
|
||||||
_lib.llama_get_embeddings.restype = c_float_p
|
_lib.llama_get_embeddings.restype = c_float_p
|
||||||
|
|
||||||
|
|
||||||
|
# // Get the embeddings for the ith sequence
|
||||||
|
# // llama_get_embeddings(ctx) + i*n_embd
|
||||||
|
# LLAMA_API float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i);
|
||||||
|
def llama_get_embeddings_ith(
|
||||||
|
ctx: llama_context_p, i: Union[c_int32, int]
|
||||||
|
): # type: (...) -> Array[float] # type: ignore
|
||||||
|
"""Get the embeddings for the ith sequence
|
||||||
|
llama_get_embeddings(ctx) + i*n_embd"""
|
||||||
|
return _lib.llama_get_embeddings_ith(ctx, i)
|
||||||
|
|
||||||
|
|
||||||
|
_lib.llama_get_embeddings_ith.argtypes = [llama_context_p, c_int32]
|
||||||
|
_lib.llama_get_embeddings_ith.restype = c_float_p
|
||||||
|
|
||||||
|
|
||||||
# //
|
# //
|
||||||
# // Vocab
|
# // Vocab
|
||||||
# //
|
# //
|
||||||
|
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit 895407f31b358e3d9335e847d13f033491ec8a5b
|
Subproject commit ea9c8e11436ad50719987fa23a289c74b7b40d40
|
Loading…
Reference in a new issue