Update llama.cpp

2023-12-13 21:43:16 -05:00 · 2023-12-13 21:43:16 -05:00 · c0fc0a1e82
commit c0fc0a1e82
parent e9bc4c4baf
2 changed files with 3 additions and 3 deletions
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@ -405,7 +405,7 @@ class llama_model_params(Structure):

 #     // Keep the booleans together to avoid misalignment during copy-by-value.
 #     bool mul_mat_q;   // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
-#     bool logits_all;  // the llama_eval() call computes all logits, not just the last one
+#     bool logits_all;  // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
 #     bool embedding;   // embedding mode only
 #     bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
 # };
@ -430,7 +430,7 @@ class llama_context_params(Structure):
        type_v (int): data type for V cache
        mul_mat_q (bool): if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
        f16_kv (bool): use fp16 for KV cache, fp32 otherwise
-        logits_all (bool): the llama_eval() call computes all logits, not just the last one
+        logits_all (bool): the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
        embedding (bool): embedding mode only"""
    _fields_ = [
        ("seed", c_uint32),
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@ -1 +1 @@
-Subproject commit 8a7b2fa528f130631a5f43648481596ab320ed5a
+Subproject commit 948ff137ec37f1ec74c02905917fa0afc9b97514