From cb791716b42eb897acf66b8b78c4a67b6e026a74 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Mon, 12 Feb 2024 16:19:05 -0500 Subject: [PATCH] fix: Always set logits_all = True when using speculative decoding --- llama_cpp/llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 3efd95d..4869a9d 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -281,7 +281,7 @@ class Llama: ) self.context_params.yarn_orig_ctx = yarn_orig_ctx if yarn_orig_ctx != 0 else 0 self.context_params.mul_mat_q = mul_mat_q - self.context_params.logits_all = logits_all + self.context_params.logits_all = logits_all if draft_model is None else True # Must be set to True for speculative decoding self.context_params.embedding = embedding self.context_params.offload_kqv = offload_kqv