ollama/llm/patches/02-shutdown.diff

86 lines
2.5 KiB
Diff
Raw Normal View History

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 11dd82c3..311495a8 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -28,6 +28,7 @@
#include <chrono>
#include <condition_variable>
#include <atomic>
+#include <signal.h>
using json = nlohmann::json;
@@ -2394,6 +2395,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
}
}
+std::function<void(int)> shutdown_handler;
+inline void signal_handler(int signal) { shutdown_handler(signal); }
+
int main(int argc, char **argv)
{
#if SERVER_VERBOSE != 1
@@ -3014,8 +3018,14 @@ int main(int argc, char **argv)
std::placeholders::_2,
std::placeholders::_3
));
- llama.queue_tasks.start_loop();
+ shutdown_handler = [&](int) {
+ llama.queue_tasks.terminate();
+ };
+ signal(SIGTERM, signal_handler);
+ signal(SIGINT, signal_handler);
+ llama.queue_tasks.start_loop();
+ svr.stop();
t.join();
llama_backend_free();
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
2024-02-06 20:06:43 +00:00
index 70cce072..9124869a 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
2024-02-06 20:06:43 +00:00
@@ -190,6 +190,7 @@ inline std::string format_chatml(std::vector<json> messages)
struct llama_server_queue {
int id = 0;
std::mutex mutex_tasks;
2024-02-06 20:06:43 +00:00
+ bool running;
// queues
std::vector<task_server> queue_tasks;
std::vector<task_server> queue_tasks_deferred;
2024-02-06 20:06:43 +00:00
@@ -248,9 +249,18 @@ struct llama_server_queue {
queue_tasks_deferred.clear();
}
- // Start the main loop. This call is blocking
- [[noreturn]]
+ // end the start_loop routine
+ void terminate() {
2024-02-06 20:06:43 +00:00
+ {
+ std::unique_lock<std::mutex> lock(mutex_tasks);
+ running = false;
+ }
+ condition_tasks.notify_all();
+ }
+
+ // Start the main loop.
void start_loop() {
+ running = true;
while (true) {
// new task arrived
LOG_VERBOSE("have new task", {});
2024-02-06 20:06:43 +00:00
@@ -294,8 +304,12 @@ struct llama_server_queue {
{
std::unique_lock<std::mutex> lock(mutex_tasks);
if (queue_tasks.empty()) {
2024-02-06 20:06:43 +00:00
+ if (!running) {
+ LOG_VERBOSE("ending start_loop", {});
+ return;
+ }
condition_tasks.wait(lock, [&]{
- return !queue_tasks.empty();
2024-02-06 20:06:43 +00:00
+ return (!queue_tasks.empty() || !running);
});
}
}