Return Correct Prompt Eval Count Regardless of Cache Prompt (#5371)
* openai compatibility * Revert "openai compatibility" This reverts commit d3f98a811e00fc497d889c8c45b0cfec5b64690c. * remove erroneous subtraction of prompt cache
This commit is contained in:
parent
daed0634a9
commit
3b5a4a77f3
1 changed files with 1 additions and 1 deletions
2
llm/ext_server/server.cpp
vendored
2
llm/ext_server/server.cpp
vendored
|
@ -1732,7 +1732,7 @@ struct llama_server_context
|
||||||
slot.n_past -= 1;
|
slot.n_past -= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
slot.n_prompt_tokens_processed = slot.n_prompt_tokens - slot.n_past;
|
slot.n_prompt_tokens_processed = slot.n_prompt_tokens;
|
||||||
|
|
||||||
if (slot.ga_n != 1)
|
if (slot.ga_n != 1)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue