llm: always add bos token to prompt (#4941)
* fix embedding by adding fixes from llama.cpp upstream * remove assert --------- Co-authored-by: Jesper Ek <deadbeef84@gmail.com>
This commit is contained in:
parent
46a7f1e74a
commit
34f142797a
1 changed files with 2 additions and 2 deletions
4
llm/ext_server/server.cpp
vendored
4
llm/ext_server/server.cpp
vendored
|
@ -835,7 +835,7 @@ struct llama_server_context
|
||||||
system_tokens.clear();
|
system_tokens.clear();
|
||||||
|
|
||||||
if (!system_prompt.empty()) {
|
if (!system_prompt.empty()) {
|
||||||
system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
|
system_tokens = ::llama_tokenize(ctx, system_prompt, true);
|
||||||
|
|
||||||
llama_batch_clear(batch);
|
llama_batch_clear(batch);
|
||||||
|
|
||||||
|
@ -1656,7 +1656,7 @@ struct llama_server_context
|
||||||
slot.t_start_process_prompt = ggml_time_us();
|
slot.t_start_process_prompt = ggml_time_us();
|
||||||
slot.t_start_genereration = 0;
|
slot.t_start_genereration = 0;
|
||||||
|
|
||||||
prompt_tokens = tokenize(slot.prompt, system_prompt.empty() && add_bos_token); // add BOS if there isn't system prompt
|
prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt
|
||||||
|
|
||||||
slot.n_prompt_tokens = prompt_tokens.size();
|
slot.n_prompt_tokens = prompt_tokens.size();
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue