Merge pull request #6767 from ollama/jessegross/bug_6707

runner: Flush pending responses before returning
This commit is contained in:
Jesse Gross 2024-09-11 17:20:22 -07:00 committed by GitHub
commit c354e87809
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -913,7 +913,9 @@ struct llama_server_context
slot.sampled = result.tok;
// search stop word and delete it
if (!llama_token_is_eog(model, result.tok))
slot.generated_text += token_str;
slot.has_next_token = true;
if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
@ -954,6 +956,8 @@ struct llama_server_context
if (!incomplete)
{
size_t pos = std::min(slot.n_sent_text, slot.generated_text.size());
if (!llama_token_is_eog(model, result.tok)) {
const std::string str_test = slot.generated_text.substr(pos);
bool is_stop_full = false;
size_t stop_pos = find_stopping_strings(str_test, token_str.size(), STOP_FULL, slot);
@ -979,6 +983,10 @@ struct llama_server_context
slot.n_sent_text += result.text_to_send.size();
// add the token to slot queue and cache
}
} else {
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
slot.n_sent_text += result.text_to_send.size();
}
if (slot.params.stream)
{
@ -1117,9 +1125,7 @@ struct llama_server_context
{"multimodal", multimodal}
};
if (!llama_token_is_eog(model, tkn.tok)) {
res.result_json["content"] = tkn.text_to_send;
}
if (slot.sparams.n_probs > 0)
{