From de76b95dd454798d041cdec18c927ae7c5f1e7a3 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 6 Feb 2024 12:06:43 -0800 Subject: [PATCH] Bump llama.cpp to b2081 --- llm/llama.cpp | 2 +- llm/patches/01-cache.diff | 10 +++++----- llm/patches/02-shutdown.diff | 27 +++++++++++---------------- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/llm/llama.cpp b/llm/llama.cpp index d2f650cb..f57fadc0 160000 --- a/llm/llama.cpp +++ b/llm/llama.cpp @@ -1 +1 @@ -Subproject commit d2f650cb5b04ee2726663e79b47da5efe196ce00 +Subproject commit f57fadc009cbff741a1961cb7896c47d73978d2c diff --git a/llm/patches/01-cache.diff b/llm/patches/01-cache.diff index 79f8d002..9565d60b 100644 --- a/llm/patches/01-cache.diff +++ b/llm/patches/01-cache.diff @@ -1,8 +1,8 @@ diff --git a/examples/server/server.cpp b/examples/server/server.cpp -index a48582ad..9fffffd8 100644 +index d86d7e04..7d71c766 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp -@@ -1564,12 +1564,6 @@ struct llama_server_context +@@ -1598,12 +1598,6 @@ struct llama_server_context LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed); } @@ -15,7 +15,7 @@ index a48582ad..9fffffd8 100644 if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0) { // we have to evaluate at least 1 token to generate logits. -@@ -1581,6 +1575,12 @@ struct llama_server_context +@@ -1615,6 +1609,12 @@ struct llama_server_context } } @@ -26,5 +26,5 @@ index a48582ad..9fffffd8 100644 + slot.cache_tokens = prompt_tokens; + LOG_VERBOSE("prompt ingested", { - {"n_past", slot.n_past}, - {"cached", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)}, + {"n_past", slot.n_past}, + {"cached", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)}, diff --git a/llm/patches/02-shutdown.diff b/llm/patches/02-shutdown.diff index 4c247cc0..c588dcfb 100644 --- a/llm/patches/02-shutdown.diff +++ b/llm/patches/02-shutdown.diff @@ -37,26 +37,18 @@ index 11dd82c3..311495a8 100644 llama_backend_free(); diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp -index 70cce072..2acb1eab 100644 +index 70cce072..9124869a 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp -@@ -6,6 +6,7 @@ - #include - #include - #include -+#include - - #include "json.hpp" - -@@ -190,6 +191,7 @@ inline std::string format_chatml(std::vector messages) +@@ -190,6 +190,7 @@ inline std::string format_chatml(std::vector messages) struct llama_server_queue { int id = 0; std::mutex mutex_tasks; -+ std::atomic running; ++ bool running; // queues std::vector queue_tasks; std::vector queue_tasks_deferred; -@@ -248,9 +250,15 @@ struct llama_server_queue { +@@ -248,9 +249,18 @@ struct llama_server_queue { queue_tasks_deferred.clear(); } @@ -64,7 +56,10 @@ index 70cce072..2acb1eab 100644 - [[noreturn]] + // end the start_loop routine + void terminate() { -+ running = false; ++ { ++ std::unique_lock lock(mutex_tasks); ++ running = false; ++ } + condition_tasks.notify_all(); + } + @@ -74,17 +69,17 @@ index 70cce072..2acb1eab 100644 while (true) { // new task arrived LOG_VERBOSE("have new task", {}); -@@ -294,8 +302,12 @@ struct llama_server_queue { +@@ -294,8 +304,12 @@ struct llama_server_queue { { std::unique_lock lock(mutex_tasks); if (queue_tasks.empty()) { -+ if (!running.load()) { ++ if (!running) { + LOG_VERBOSE("ending start_loop", {}); + return; + } condition_tasks.wait(lock, [&]{ - return !queue_tasks.empty(); -+ return (!queue_tasks.empty() || !running.load()); ++ return (!queue_tasks.empty() || !running); }); } }