update llama.cpp submodule to f364eb6
(#4060)
This commit is contained in:
parent
8488388cbd
commit
18d9a7e1f1
2 changed files with 6 additions and 3 deletions
7
llm/ext_server/server.cpp
vendored
7
llm/ext_server/server.cpp
vendored
|
@ -1032,7 +1032,7 @@ struct llama_server_context
|
|||
slot.has_next_token = false;
|
||||
}
|
||||
|
||||
if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model))
|
||||
if (llama_token_is_eog(model, result.tok))
|
||||
{
|
||||
slot.stopped_eos = true;
|
||||
slot.has_next_token = false;
|
||||
|
@ -1144,12 +1144,15 @@ struct llama_server_context
|
|||
|
||||
res.result_json = json
|
||||
{
|
||||
{"content", tkn.text_to_send},
|
||||
{"stop", false},
|
||||
{"slot_id", slot.id},
|
||||
{"multimodal", multimodal}
|
||||
};
|
||||
|
||||
if (!llama_token_is_eog(model, tkn.tok)) {
|
||||
res.result_json["content"] = tkn.text_to_send;
|
||||
}
|
||||
|
||||
if (slot.sparams.n_probs > 0)
|
||||
{
|
||||
std::vector<completion_token_output> probs_output = {};
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit f4ab2a41476600a98067a9474ea8f9e6db41bcfa
|
||||
Subproject commit f364eb6fb5d46118a76fa045f487318de4c24961
|
Loading…
Reference in a new issue