relay load model errors to the client (#3065)
This commit is contained in:
parent
6d3adfbea2
commit
b80661e8c7
3 changed files with 51 additions and 11 deletions
|
@ -149,7 +149,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
|
||||||
|
|
||||||
slog.Info("Initializing llama server")
|
slog.Info("Initializing llama server")
|
||||||
slog.Debug(fmt.Sprintf("server params: %+v", sparams))
|
slog.Debug(fmt.Sprintf("server params: %+v", sparams))
|
||||||
initResp := newExtServerResp(128)
|
initResp := newExtServerResp(512)
|
||||||
defer freeExtServerResp(initResp)
|
defer freeExtServerResp(initResp)
|
||||||
C.dyn_llama_server_init(llm.s, &sparams, &initResp)
|
C.dyn_llama_server_init(llm.s, &sparams, &initResp)
|
||||||
if initResp.id < 0 {
|
if initResp.id < 0 {
|
||||||
|
|
|
@ -114,16 +114,12 @@ void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) {
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
llama_numa_init(params.numa);
|
llama_numa_init(params.numa);
|
||||||
|
|
||||||
// load the model
|
if (!llama->load_model(params)) {
|
||||||
if (!llama->load_model(params)) {
|
// an error occured that was not thrown
|
||||||
// TODO - consider modifying the logging logic or patching load_model so
|
err->id = -1;
|
||||||
// we can capture more detailed error messages and pass them back to the
|
snprintf(err->msg, err->msg_len, "error loading model %s", params.model.c_str());
|
||||||
// caller for better UX
|
return;
|
||||||
err->id = -1;
|
}
|
||||||
snprintf(err->msg, err->msg_len, "error loading model %s",
|
|
||||||
params.model.c_str());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
llama->initialize();
|
llama->initialize();
|
||||||
} catch (std::exception &e) {
|
} catch (std::exception &e) {
|
||||||
|
|
44
llm/patches/03-load_exception.diff
Normal file
44
llm/patches/03-load_exception.diff
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
diff --git a/llama.cpp b/llama.cpp
|
||||||
|
index 4225f955..7b762f86 100644
|
||||||
|
--- a/llama.cpp
|
||||||
|
+++ b/llama.cpp
|
||||||
|
@@ -4756,7 +4756,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
|
||||||
|
}
|
||||||
|
} catch (const std::exception & err) {
|
||||||
|
LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
|
||||||
|
- return -1;
|
||||||
|
+ throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
@@ -12102,16 +12102,22 @@ struct llama_model * llama_load_model_from_file(
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
- int status = llama_model_load(path_model, *model, params);
|
||||||
|
- GGML_ASSERT(status <= 0);
|
||||||
|
- if (status < 0) {
|
||||||
|
- if (status == -1) {
|
||||||
|
- LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
|
||||||
|
- } else if (status == -2) {
|
||||||
|
- LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
|
||||||
|
+ try {
|
||||||
|
+ int status = llama_model_load(path_model, *model, params);
|
||||||
|
+ GGML_ASSERT(status <= 0);
|
||||||
|
+ if (status < 0) {
|
||||||
|
+ if (status == -1) {
|
||||||
|
+ LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
|
||||||
|
+ } else if (status == -2) {
|
||||||
|
+ LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
|
||||||
|
+ }
|
||||||
|
+ delete model;
|
||||||
|
+ return nullptr;
|
||||||
|
}
|
||||||
|
+ } catch (...) {
|
||||||
|
+ LLAMA_LOG_ERROR("%s: exception loading model\n", __func__);
|
||||||
|
delete model;
|
||||||
|
- return nullptr;
|
||||||
|
+ throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
return model;
|
Loading…
Reference in a new issue