This commit is contained in:
Michael Yang 2024-05-23 14:16:26 -07:00 committed by GitHub
parent 95b1133d0c
commit 714adb8bd1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 56 additions and 8 deletions

View file

@ -738,7 +738,7 @@ struct llama_server_context
sampler_names.emplace_back(sampler_name); sampler_names.emplace_back(sampler_name);
} }
} }
slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false); slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
} }
else else
{ {
@ -1096,7 +1096,7 @@ struct llama_server_context
std::vector<std::string> samplers_sequence; std::vector<std::string> samplers_sequence;
for (const auto &sampler_type : slot.sparams.samplers_sequence) for (const auto &sampler_type : slot.sparams.samplers_sequence)
{ {
samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type)); samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
} }
return json { return json {

@ -1 +1 @@
Subproject commit 614d3b914e1c3e02596f869649eb4f1d3b68614d Subproject commit 74f33adf5f8b20b08fc5a6aa17ce081abe86ef2f

View file

@ -1,8 +1,17 @@
From 544a2d2e646d39e878d87dfbb3398a356bc560ab Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Thu, 23 May 2024 11:18:45 -0700
Subject: [PATCH] throw exception on load errors
---
llama.cpp | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/llama.cpp b/llama.cpp diff --git a/llama.cpp b/llama.cpp
index 4225f955..7b762f86 100644 index 15c66077..8ba90b6a 100644
--- a/llama.cpp --- a/llama.cpp
+++ b/llama.cpp +++ b/llama.cpp
@@ -4756,7 +4756,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam @@ -6346,7 +6346,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
} }
} catch (const std::exception & err) { } catch (const std::exception & err) {
LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what()); LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
@ -11,10 +20,10 @@ index 4225f955..7b762f86 100644
} }
return 0; return 0;
@@ -12102,16 +12102,22 @@ struct llama_model * llama_load_model_from_file( @@ -15600,16 +15600,23 @@ struct llama_model * llama_load_model_from_file(
}; }
model->rpc_servers.push_back(servers);
} }
- int status = llama_model_load(path_model, *model, params); - int status = llama_model_load(path_model, *model, params);
- GGML_ASSERT(status <= 0); - GGML_ASSERT(status <= 0);
- if (status < 0) { - if (status < 0) {
@ -22,6 +31,7 @@ index 4225f955..7b762f86 100644
- LLAMA_LOG_ERROR("%s: failed to load model\n", __func__); - LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
- } else if (status == -2) { - } else if (status == -2) {
- LLAMA_LOG_INFO("%s: cancelled model load\n", __func__); - LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
+
+ try { + try {
+ int status = llama_model_load(path_model, *model, params); + int status = llama_model_load(path_model, *model, params);
+ GGML_ASSERT(status <= 0); + GGML_ASSERT(status <= 0);
@ -42,3 +52,6 @@ index 4225f955..7b762f86 100644
} }
return model; return model;
--
2.45.1

View file

@ -0,0 +1,35 @@
From d02a06f3f45a09255ace8684a66590e06ce44605 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Thu, 23 May 2024 11:33:20 -0700
Subject: [PATCH] default pretokenizer on unrecognized type
---
llama.cpp | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/llama.cpp b/llama.cpp
index 15c66077..af1aede3 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4504,9 +4504,6 @@ static void llm_load_vocab(
LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
LLAMA_LOG_WARN("%s: \n", __func__);
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
- } else if (
- tokenizer_pre == "default") {
- vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
} else if (
tokenizer_pre == "llama3" ||
tokenizer_pre == "llama-v3" ||
@@ -4553,7 +4550,7 @@ static void llm_load_vocab(
tokenizer_pre == "dbrx") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DBRX;
} else {
- throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
}
} else {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
--
2.45.1