diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 8fe5e0b1..3e82acb9 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -997,13 +997,15 @@ struct llama_server_context slot.n_sent_text += result.text_to_send.size(); // add the token to slot queue and cache } - slot.add_token_string(result); + if (slot.params.stream) { send_partial_response(slot, result); } } + slot.add_token_string(result); + if (incomplete) { slot.has_next_token = true;