Apply 01-cache.diff

This commit is contained in:
Daniel Hiltgen 2024-03-19 09:49:24 +01:00
parent 58d95cc9bd
commit 0a0e9f3e0f

View file

@ -1007,13 +1007,15 @@ struct llama_server_context
slot.n_sent_text += result.text_to_send.size();
// add the token to slot queue and cache
}
slot.add_token_string(result);
if (slot.params.stream)
{
send_partial_response(slot, result);
}
}
slot.add_token_string(result);
if (incomplete)
{
slot.has_next_token = true;