Use slot with cached prompt instead of least recently used (#5492)

* Use common prefix to select slot

* actually report `longest`
This commit is contained in:
Jeffrey Morgan 2024-07-05 12:32:47 -04:00 committed by GitHub
parent af28b94533
commit d89454de80
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1382,12 +1382,50 @@ struct llama_server_context
} }
} }
std::string common_prefix(const std::string& str1, const std::string& str2) {
auto mismatch_pair = std::mismatch(str1.begin(), str1.end(), str2.begin());
return std::string(str1.begin(), mismatch_pair.first);
}
// Find the slot that has the greatest common prefix
server_slot *prefix_slot(const json &prompt) {
if (!prompt.is_string()) {
return nullptr;
}
std::string prompt_str = prompt.get<std::string>();
server_slot *slot = nullptr;
size_t longest = 0;
for (server_slot &s : slots) {
if (s.available() && s.prompt.is_string()) {
std::string s_prompt = s.prompt.get<std::string>();
std::string prefix = common_prefix(s_prompt, prompt_str);
if (prefix.size() > longest) {
slot = &s;
longest = prefix.size();
}
}
}
if (!slot) {
return get_slot(-1);
}
LOG_INFO("slot with common prefix found", {{
"slot_id", slot->id,
"characters", longest
}});
return slot;
}
void process_single_task(task_server& task) void process_single_task(task_server& task)
{ {
switch (task.type) switch (task.type)
{ {
case TASK_TYPE_COMPLETION: { case TASK_TYPE_COMPLETION: {
server_slot *slot = get_slot(json_value(task.data, "slot_id", -1)); server_slot *slot = prefix_slot(task.data["prompt"]);
if (slot == nullptr) if (slot == nullptr)
{ {
// if no slot is available, we defer this task for processing later // if no slot is available, we defer this task for processing later