diff --git a/llm/ext_server/server.cpp b/llm/ext_server/server.cpp index e342d5f1..e0424a92 100644 --- a/llm/ext_server/server.cpp +++ b/llm/ext_server/server.cpp @@ -738,7 +738,7 @@ struct llama_server_context sampler_names.emplace_back(sampler_name); } } - slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false); + slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false); } else { @@ -1096,7 +1096,7 @@ struct llama_server_context std::vector samplers_sequence; for (const auto &sampler_type : slot.sparams.samplers_sequence) { - samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type)); + samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type)); } return json { diff --git a/llm/llama.cpp b/llm/llama.cpp index 614d3b91..74f33adf 160000 --- a/llm/llama.cpp +++ b/llm/llama.cpp @@ -1 +1 @@ -Subproject commit 614d3b914e1c3e02596f869649eb4f1d3b68614d +Subproject commit 74f33adf5f8b20b08fc5a6aa17ce081abe86ef2f diff --git a/llm/patches/03-load_exception.diff b/llm/patches/03-load_exception.diff index 9e838fa9..eb245c2a 100644 --- a/llm/patches/03-load_exception.diff +++ b/llm/patches/03-load_exception.diff @@ -1,8 +1,17 @@ +From 544a2d2e646d39e878d87dfbb3398a356bc560ab Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Thu, 23 May 2024 11:18:45 -0700 +Subject: [PATCH] throw exception on load errors + +--- + llama.cpp | 25 ++++++++++++++++--------- + 1 file changed, 16 insertions(+), 9 deletions(-) + diff --git a/llama.cpp b/llama.cpp -index 4225f955..7b762f86 100644 +index 15c66077..8ba90b6a 100644 --- a/llama.cpp +++ b/llama.cpp -@@ -4756,7 +4756,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam +@@ -6346,7 +6346,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam } } catch (const std::exception & err) { LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what()); @@ -11,10 +20,10 @@ index 4225f955..7b762f86 100644 } return 0; -@@ -12102,16 +12102,22 @@ struct llama_model * llama_load_model_from_file( - }; +@@ -15600,16 +15600,23 @@ struct llama_model * llama_load_model_from_file( + } + model->rpc_servers.push_back(servers); } - - int status = llama_model_load(path_model, *model, params); - GGML_ASSERT(status <= 0); - if (status < 0) { @@ -22,6 +31,7 @@ index 4225f955..7b762f86 100644 - LLAMA_LOG_ERROR("%s: failed to load model\n", __func__); - } else if (status == -2) { - LLAMA_LOG_INFO("%s: cancelled model load\n", __func__); ++ + try { + int status = llama_model_load(path_model, *model, params); + GGML_ASSERT(status <= 0); @@ -42,3 +52,6 @@ index 4225f955..7b762f86 100644 } return model; +-- +2.45.1 + diff --git a/llm/patches/05-default-pretokenizer.diff b/llm/patches/05-default-pretokenizer.diff new file mode 100644 index 00000000..0d0bf05d --- /dev/null +++ b/llm/patches/05-default-pretokenizer.diff @@ -0,0 +1,35 @@ +From d02a06f3f45a09255ace8684a66590e06ce44605 Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Thu, 23 May 2024 11:33:20 -0700 +Subject: [PATCH] default pretokenizer on unrecognized type + +--- + llama.cpp | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/llama.cpp b/llama.cpp +index 15c66077..af1aede3 100644 +--- a/llama.cpp ++++ b/llama.cpp +@@ -4504,9 +4504,6 @@ static void llm_load_vocab( + LLAMA_LOG_WARN("%s: ************************************ \n", __func__); + LLAMA_LOG_WARN("%s: \n", __func__); + vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; +- } else if ( +- tokenizer_pre == "default") { +- vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; + } else if ( + tokenizer_pre == "llama3" || + tokenizer_pre == "llama-v3" || +@@ -4553,7 +4550,7 @@ static void llm_load_vocab( + tokenizer_pre == "dbrx") { + vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DBRX; + } else { +- throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str())); ++ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; + } + } else { + vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; +-- +2.45.1 +