Apply 01-cache.diff

This commit is contained in:
Daniel Hiltgen 2024-03-19 09:49:24 +01:00
parent 58d95cc9bd
commit 0a0e9f3e0f

View file

@ -1007,13 +1007,15 @@ struct llama_server_context
slot.n_sent_text += result.text_to_send.size(); slot.n_sent_text += result.text_to_send.size();
// add the token to slot queue and cache // add the token to slot queue and cache
} }
slot.add_token_string(result);
if (slot.params.stream) if (slot.params.stream)
{ {
send_partial_response(slot, result); send_partial_response(slot, result);
} }
} }
slot.add_token_string(result);
if (incomplete) if (incomplete)
{ {
slot.has_next_token = true; slot.has_next_token = true;