bump (#4597)
This commit is contained in:
parent
95b1133d0c
commit
714adb8bd1
4 changed files with 56 additions and 8 deletions
4
llm/ext_server/server.cpp
vendored
4
llm/ext_server/server.cpp
vendored
|
@ -738,7 +738,7 @@ struct llama_server_context
|
||||||
sampler_names.emplace_back(sampler_name);
|
sampler_names.emplace_back(sampler_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
|
slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1096,7 +1096,7 @@ struct llama_server_context
|
||||||
std::vector<std::string> samplers_sequence;
|
std::vector<std::string> samplers_sequence;
|
||||||
for (const auto &sampler_type : slot.sparams.samplers_sequence)
|
for (const auto &sampler_type : slot.sparams.samplers_sequence)
|
||||||
{
|
{
|
||||||
samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
|
samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
|
||||||
}
|
}
|
||||||
|
|
||||||
return json {
|
return json {
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit 614d3b914e1c3e02596f869649eb4f1d3b68614d
|
Subproject commit 74f33adf5f8b20b08fc5a6aa17ce081abe86ef2f
|
|
@ -1,8 +1,17 @@
|
||||||
|
From 544a2d2e646d39e878d87dfbb3398a356bc560ab Mon Sep 17 00:00:00 2001
|
||||||
|
From: Michael Yang <mxyng@pm.me>
|
||||||
|
Date: Thu, 23 May 2024 11:18:45 -0700
|
||||||
|
Subject: [PATCH] throw exception on load errors
|
||||||
|
|
||||||
|
---
|
||||||
|
llama.cpp | 25 ++++++++++++++++---------
|
||||||
|
1 file changed, 16 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
diff --git a/llama.cpp b/llama.cpp
|
diff --git a/llama.cpp b/llama.cpp
|
||||||
index 4225f955..7b762f86 100644
|
index 15c66077..8ba90b6a 100644
|
||||||
--- a/llama.cpp
|
--- a/llama.cpp
|
||||||
+++ b/llama.cpp
|
+++ b/llama.cpp
|
||||||
@@ -4756,7 +4756,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
|
@@ -6346,7 +6346,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
|
||||||
}
|
}
|
||||||
} catch (const std::exception & err) {
|
} catch (const std::exception & err) {
|
||||||
LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
|
LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
|
||||||
|
@ -11,10 +20,10 @@ index 4225f955..7b762f86 100644
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -12102,16 +12102,22 @@ struct llama_model * llama_load_model_from_file(
|
@@ -15600,16 +15600,23 @@ struct llama_model * llama_load_model_from_file(
|
||||||
};
|
}
|
||||||
|
model->rpc_servers.push_back(servers);
|
||||||
}
|
}
|
||||||
|
|
||||||
- int status = llama_model_load(path_model, *model, params);
|
- int status = llama_model_load(path_model, *model, params);
|
||||||
- GGML_ASSERT(status <= 0);
|
- GGML_ASSERT(status <= 0);
|
||||||
- if (status < 0) {
|
- if (status < 0) {
|
||||||
|
@ -22,6 +31,7 @@ index 4225f955..7b762f86 100644
|
||||||
- LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
|
- LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
|
||||||
- } else if (status == -2) {
|
- } else if (status == -2) {
|
||||||
- LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
|
- LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
|
||||||
|
+
|
||||||
+ try {
|
+ try {
|
||||||
+ int status = llama_model_load(path_model, *model, params);
|
+ int status = llama_model_load(path_model, *model, params);
|
||||||
+ GGML_ASSERT(status <= 0);
|
+ GGML_ASSERT(status <= 0);
|
||||||
|
@ -42,3 +52,6 @@ index 4225f955..7b762f86 100644
|
||||||
}
|
}
|
||||||
|
|
||||||
return model;
|
return model;
|
||||||
|
--
|
||||||
|
2.45.1
|
||||||
|
|
||||||
|
|
35
llm/patches/05-default-pretokenizer.diff
Normal file
35
llm/patches/05-default-pretokenizer.diff
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
From d02a06f3f45a09255ace8684a66590e06ce44605 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Michael Yang <mxyng@pm.me>
|
||||||
|
Date: Thu, 23 May 2024 11:33:20 -0700
|
||||||
|
Subject: [PATCH] default pretokenizer on unrecognized type
|
||||||
|
|
||||||
|
---
|
||||||
|
llama.cpp | 5 +----
|
||||||
|
1 file changed, 1 insertion(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/llama.cpp b/llama.cpp
|
||||||
|
index 15c66077..af1aede3 100644
|
||||||
|
--- a/llama.cpp
|
||||||
|
+++ b/llama.cpp
|
||||||
|
@@ -4504,9 +4504,6 @@ static void llm_load_vocab(
|
||||||
|
LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
|
||||||
|
LLAMA_LOG_WARN("%s: \n", __func__);
|
||||||
|
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
||||||
|
- } else if (
|
||||||
|
- tokenizer_pre == "default") {
|
||||||
|
- vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
||||||
|
} else if (
|
||||||
|
tokenizer_pre == "llama3" ||
|
||||||
|
tokenizer_pre == "llama-v3" ||
|
||||||
|
@@ -4553,7 +4550,7 @@ static void llm_load_vocab(
|
||||||
|
tokenizer_pre == "dbrx") {
|
||||||
|
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DBRX;
|
||||||
|
} else {
|
||||||
|
- throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
||||||
|
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
||||||
|
--
|
||||||
|
2.45.1
|
||||||
|
|
Loading…
Add table
Reference in a new issue