diff --git a/llm/ext_server/ext_server.cpp b/llm/ext_server/ext_server.cpp index d0a036b3..c30d4220 100644 --- a/llm/ext_server/ext_server.cpp +++ b/llm/ext_server/ext_server.cpp @@ -111,6 +111,10 @@ void llama_server_stop() { // TODO - too verbose, remove once things are solid LOG_TEE("requesting llama server shutdown\n"); ext_server_running = false; + + // unblocks the update_slots() loop so it can clean up and exit + llama->request_cancel(0); + ext_server_thread.join(); delete llama; llama = NULL; diff --git a/llm/llama.cpp b/llm/llama.cpp index 328b83de..6efb8eb3 160000 --- a/llm/llama.cpp +++ b/llm/llama.cpp @@ -1 +1 @@ -Subproject commit 328b83de23b33240e28f4e74900d1d06726f5eb1 +Subproject commit 6efb8eb30e7025b168f3fda3ff83b9b386428ad6