From c0fc0a1e824a84d8e3af99d6fc5ed5299673c848 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Wed, 13 Dec 2023 21:43:16 -0500
Subject: [PATCH] Update llama.cpp

---
 llama_cpp/llama_cpp.py | 4 ++--
 vendor/llama.cpp       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
index 6c442d0..1911b26 100644
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@@ -405,7 +405,7 @@ class llama_model_params(Structure):
 
 #     // Keep the booleans together to avoid misalignment during copy-by-value.
 #     bool mul_mat_q;   // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
-#     bool logits_all;  // the llama_eval() call computes all logits, not just the last one
+#     bool logits_all;  // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
 #     bool embedding;   // embedding mode only
 #     bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
 # };
@@ -430,7 +430,7 @@ class llama_context_params(Structure):
         type_v (int): data type for V cache
         mul_mat_q (bool): if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
         f16_kv (bool): use fp16 for KV cache, fp32 otherwise
-        logits_all (bool): the llama_eval() call computes all logits, not just the last one
+        logits_all (bool): the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
         embedding (bool): embedding mode only"""
     _fields_ = [
         ("seed", c_uint32),
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
index 8a7b2fa..948ff13 160000
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 8a7b2fa528f130631a5f43648481596ab320ed5a
+Subproject commit 948ff137ec37f1ec74c02905917fa0afc9b97514