relay load model errors to the client (#3065)

2024-03-11 16:48:27 -04:00 · 2024-03-11 16:48:27 -04:00 · b80661e8c7
commit b80661e8c7
parent 6d3adfbea2
3 changed files with 51 additions and 11 deletions
--- a/llm/dyn_ext_server.go
+++ b/llm/dyn_ext_server.go
@ -149,7 +149,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
 	slog.Info("Initializing llama server")
 	slog.Debug(fmt.Sprintf("server params: %+v", sparams))
-	initResp := newExtServerResp(128)
+	initResp := newExtServerResp(512)
 	defer freeExtServerResp(initResp)
 	C.dyn_llama_server_init(llm.s, &sparams, &initResp)
 	if initResp.id < 0 {
--- a/llm/ext_server/ext_server.cpp
+++ b/llm/ext_server/ext_server.cpp
@ -114,14 +114,10 @@ void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) {
    llama_backend_init();
    llama_numa_init(params.numa);
    // load the model
  if (!llama->load_model(params)) { 
-      // TODO - consider modifying the logging logic or patching load_model so
+    // an error occured that was not thrown
      // we can capture more detailed error messages and pass them back to the
      // caller for better UX
    err->id = -1;
-      snprintf(err->msg, err->msg_len, "error loading model %s",
+    snprintf(err->msg, err->msg_len, "error loading model %s", params.model.c_str());
               params.model.c_str());
    return;
  }
--- a/llm/patches/03-load_exception.diff
+++ b/llm/patches/03-load_exception.diff
@ -0,0 +1,44 @@
 diff --git a/llama.cpp b/llama.cpp
 index 4225f955..7b762f86 100644
 --- a/llama.cpp
 +++ b/llama.cpp
@@ -4756,7 +4756,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
         }
     } catch (const std::exception & err) {
         LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
 -        return -1;
 +        throw;
     }
     return 0;
@@ -12102,16 +12102,22 @@ struct llama_model * llama_load_model_from_file(
         };
     }
 -    int status = llama_model_load(path_model, *model, params);
 -    GGML_ASSERT(status <= 0);
 -    if (status < 0) {
 -        if (status == -1) {
 -            LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
 -        } else if (status == -2) {
 -            LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
 +    try {
 +        int status = llama_model_load(path_model, *model, params);
 +        GGML_ASSERT(status <= 0);
 +        if (status < 0) {
 +            if (status == -1) {
 +                LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
 +            } else if (status == -2) {
 +                LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
 +            }
 +            delete model;
 +            return nullptr;
         }
 +    } catch (...) {
 +        LLAMA_LOG_ERROR("%s: exception loading model\n", __func__);
         delete model;
 -        return nullptr;
 +        throw;
     }
     return model;