ollama/llm/patches/01-load-progress.diff

diff --git a/common/common.cpp b/common/common.cpp
index 73ff0e85..6adb1a92 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2447,6 +2447,8 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
     mparams.use_mmap        = params.use_mmap;
     mparams.use_mlock       = params.use_mlock;
     mparams.check_tensors   = params.check_tensors;
+    mparams.progress_callback = params.progress_callback;
+    mparams.progress_callback_user_data = params.progress_callback_user_data;
     if (params.kv_overrides.empty()) {
         mparams.kv_overrides = NULL;
     } else {
diff --git a/common/common.h b/common/common.h
index 58ed72f4..0bb2605e 100644
--- a/common/common.h
+++ b/common/common.h
@@ -180,6 +180,13 @@ struct gpt_params {
     std::string mmproj = "";        // path to multimodal projector
     std::vector<std::string> image; // path to image file(s)

+    // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
+    // If the provided progress_callback returns true, model loading continues.
+    // If it returns false, model loading is immediately aborted.
+    llama_progress_callback progress_callback = NULL;
+    // context pointer passed to the progress callback
+    void * progress_callback_user_data;
+
     // server params
     int32_t port           = 8080;         // server listens on this network port
     int32_t timeout_read   = 600;          // http read timeout in seconds