llm: always add bos token to prompt (#4941)

* fix embedding by adding fixes from llama.cpp upstream

* remove assert

---------

Co-authored-by: Jesper Ek <deadbeef84@gmail.com>
This commit is contained in:
Jeffrey Morgan 2024-06-08 18:47:10 -07:00 committed by GitHub
parent 46a7f1e74a
commit 34f142797a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -835,7 +835,7 @@ struct llama_server_context
system_tokens.clear(); system_tokens.clear();
if (!system_prompt.empty()) { if (!system_prompt.empty()) {
system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token); system_tokens = ::llama_tokenize(ctx, system_prompt, true);
llama_batch_clear(batch); llama_batch_clear(batch);
@ -1656,7 +1656,7 @@ struct llama_server_context
slot.t_start_process_prompt = ggml_time_us(); slot.t_start_process_prompt = ggml_time_us();
slot.t_start_genereration = 0; slot.t_start_genereration = 0;
prompt_tokens = tokenize(slot.prompt, system_prompt.empty() && add_bos_token); // add BOS if there isn't system prompt prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt
slot.n_prompt_tokens = prompt_tokens.size(); slot.n_prompt_tokens = prompt_tokens.size();