ollama/llm/patches/02-shutdown.diff

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 11dd82c3..311495a8 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -28,6 +28,7 @@
 #include <chrono>
 #include <condition_variable>
 #include <atomic>
+#include <signal.h>
 
 using json = nlohmann::json;
 
@@ -2394,6 +2395,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
     }
 }
 
+std::function<void(int)> shutdown_handler;
+inline void signal_handler(int signal) { shutdown_handler(signal); }
+
 int main(int argc, char **argv)
 {
 #if SERVER_VERBOSE != 1
@@ -3014,8 +3018,14 @@ int main(int argc, char **argv)
         std::placeholders::_2,
         std::placeholders::_3
     ));
-    llama.queue_tasks.start_loop();
 
+    shutdown_handler = [&](int) {
+        llama.queue_tasks.terminate();
+    };
+    signal(SIGTERM, signal_handler);
+    signal(SIGINT, signal_handler);
+    llama.queue_tasks.start_loop();
+    svr.stop();
     t.join();
 
     llama_backend_free();
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index 70cce072..9124869a 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -190,6 +190,7 @@ inline std::string format_chatml(std::vector<json> messages)
 struct llama_server_queue {
     int id = 0;
     std::mutex mutex_tasks;
+    bool running;
     // queues
     std::vector<task_server> queue_tasks;
     std::vector<task_server> queue_tasks_deferred;
@@ -248,9 +249,18 @@ struct llama_server_queue {
         queue_tasks_deferred.clear();
     }
 
-    // Start the main loop. This call is blocking
-    [[noreturn]]
+    // end the start_loop routine
+    void terminate() {
+        {
+            std::unique_lock<std::mutex> lock(mutex_tasks);
+            running = false;
+        }
+        condition_tasks.notify_all();
+    }
+
+    // Start the main loop.
     void start_loop() {
+        running = true;
         while (true) {
             // new task arrived
             LOG_VERBOSE("have new task", {});
@@ -294,8 +304,12 @@ struct llama_server_queue {
             {
                 std::unique_lock<std::mutex> lock(mutex_tasks);
                 if (queue_tasks.empty()) {
+                    if (!running) {
+                        LOG_VERBOSE("ending start_loop", {});
+                        return;
+                    }
                     condition_tasks.wait(lock, [&]{
-                        return !queue_tasks.empty();
+                        return (!queue_tasks.empty() || !running);
                     });
                 }
             }
Bump llama.cpp to b1999 This requires an upstream change to support graceful termination, carried as a patch. 2024-01-29 20:58:17 +00:00			`diff --git a/examples/server/server.cpp b/examples/server/server.cpp`
			`index 11dd82c3..311495a8 100644`
			`--- a/examples/server/server.cpp`
			`+++ b/examples/server/server.cpp`
			`@@ -28,6 +28,7 @@`
			`#include <chrono>`
			`#include <condition_variable>`
			`#include <atomic>`
			`+#include <signal.h>`

			`using json = nlohmann::json;`

			`@@ -2394,6 +2395,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con`
			`}`
			`}`

			`+std::function<void(int)> shutdown_handler;`
			`+inline void signal_handler(int signal) { shutdown_handler(signal); }`
			`+`
			`int main(int argc, char **argv)`
			`{`
			`#if SERVER_VERBOSE != 1`
			`@@ -3014,8 +3018,14 @@ int main(int argc, char **argv)`
			`std::placeholders::_2,`
			`std::placeholders::_3`
			`));`
			`- llama.queue_tasks.start_loop();`

			`+ shutdown_handler = [&](int) {`
			`+ llama.queue_tasks.terminate();`
			`+ };`
			`+ signal(SIGTERM, signal_handler);`
			`+ signal(SIGINT, signal_handler);`
			`+ llama.queue_tasks.start_loop();`
			`+ svr.stop();`
			`t.join();`

			`llama_backend_free();`
			`diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp`
Bump llama.cpp to b2081 2024-02-06 20:06:43 +00:00			`index 70cce072..9124869a 100644`
Bump llama.cpp to b1999 This requires an upstream change to support graceful termination, carried as a patch. 2024-01-29 20:58:17 +00:00			`--- a/examples/server/utils.hpp`
			`+++ b/examples/server/utils.hpp`
Bump llama.cpp to b2081 2024-02-06 20:06:43 +00:00			`@@ -190,6 +190,7 @@ inline std::string format_chatml(std::vector<json> messages)`
Bump llama.cpp to b1999 This requires an upstream change to support graceful termination, carried as a patch. 2024-01-29 20:58:17 +00:00			`struct llama_server_queue {`
			`int id = 0;`
			`std::mutex mutex_tasks;`
Bump llama.cpp to b2081 2024-02-06 20:06:43 +00:00			`+ bool running;`
Bump llama.cpp to b1999 This requires an upstream change to support graceful termination, carried as a patch. 2024-01-29 20:58:17 +00:00			`// queues`
			`std::vector<task_server> queue_tasks;`
			`std::vector<task_server> queue_tasks_deferred;`
Bump llama.cpp to b2081 2024-02-06 20:06:43 +00:00			`@@ -248,9 +249,18 @@ struct llama_server_queue {`
Bump llama.cpp to b1999 This requires an upstream change to support graceful termination, carried as a patch. 2024-01-29 20:58:17 +00:00			`queue_tasks_deferred.clear();`
			`}`

			`- // Start the main loop. This call is blocking`
			`- [[noreturn]]`
			`+ // end the start_loop routine`
			`+ void terminate() {`
Bump llama.cpp to b2081 2024-02-06 20:06:43 +00:00			`+ {`
			`+ std::unique_lock<std::mutex> lock(mutex_tasks);`
			`+ running = false;`
			`+ }`
Bump llama.cpp to b1999 This requires an upstream change to support graceful termination, carried as a patch. 2024-01-29 20:58:17 +00:00			`+ condition_tasks.notify_all();`
			`+ }`
			`+`
			`+ // Start the main loop.`
			`void start_loop() {`
			`+ running = true;`
			`while (true) {`
			`// new task arrived`
			`LOG_VERBOSE("have new task", {});`
Bump llama.cpp to b2081 2024-02-06 20:06:43 +00:00			`@@ -294,8 +304,12 @@ struct llama_server_queue {`
Bump llama.cpp to b1999 This requires an upstream change to support graceful termination, carried as a patch. 2024-01-29 20:58:17 +00:00			`{`
			`std::unique_lock<std::mutex> lock(mutex_tasks);`
			`if (queue_tasks.empty()) {`
Bump llama.cpp to b2081 2024-02-06 20:06:43 +00:00			`+ if (!running) {`
Bump llama.cpp to b1999 This requires an upstream change to support graceful termination, carried as a patch. 2024-01-29 20:58:17 +00:00			`+ LOG_VERBOSE("ending start_loop", {});`
			`+ return;`
			`+ }`
			`condition_tasks.wait(lock, [&]{`
			`- return !queue_tasks.empty();`
Bump llama.cpp to b2081 2024-02-06 20:06:43 +00:00			`+ return (!queue_tasks.empty() \|\| !running);`
Bump llama.cpp to b1999 This requires an upstream change to support graceful termination, carried as a patch. 2024-01-29 20:58:17 +00:00			`});`
			`}`
			`}`