From 3b5a4a77f3a191e368af3412e5de9b38b4f80771 Mon Sep 17 00:00:00 2001 From: royjhan <65097070+royjhan@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:46:23 -0700 Subject: [PATCH] Return Correct Prompt Eval Count Regardless of Cache Prompt (#5371) * openai compatibility * Revert "openai compatibility" This reverts commit d3f98a811e00fc497d889c8c45b0cfec5b64690c. * remove erroneous subtraction of prompt cache --- llm/ext_server/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llm/ext_server/server.cpp b/llm/ext_server/server.cpp index 3bc01252..09970599 100644 --- a/llm/ext_server/server.cpp +++ b/llm/ext_server/server.cpp @@ -1732,7 +1732,7 @@ struct llama_server_context slot.n_past -= 1; } - slot.n_prompt_tokens_processed = slot.n_prompt_tokens - slot.n_past; + slot.n_prompt_tokens_processed = slot.n_prompt_tokens; if (slot.ga_n != 1) {