Use slot with cached prompt instead of least recently used (#5492)
* Use common prefix to select slot * actually report `longest`
This commit is contained in:
parent
af28b94533
commit
d89454de80
1 changed files with 39 additions and 1 deletions
40
llm/ext_server/server.cpp
vendored
40
llm/ext_server/server.cpp
vendored
|
@ -1382,12 +1382,50 @@ struct llama_server_context
|
|||
}
|
||||
}
|
||||
|
||||
std::string common_prefix(const std::string& str1, const std::string& str2) {
|
||||
auto mismatch_pair = std::mismatch(str1.begin(), str1.end(), str2.begin());
|
||||
return std::string(str1.begin(), mismatch_pair.first);
|
||||
}
|
||||
|
||||
// Find the slot that has the greatest common prefix
|
||||
server_slot *prefix_slot(const json &prompt) {
|
||||
if (!prompt.is_string()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::string prompt_str = prompt.get<std::string>();
|
||||
server_slot *slot = nullptr;
|
||||
size_t longest = 0;
|
||||
|
||||
for (server_slot &s : slots) {
|
||||
if (s.available() && s.prompt.is_string()) {
|
||||
std::string s_prompt = s.prompt.get<std::string>();
|
||||
std::string prefix = common_prefix(s_prompt, prompt_str);
|
||||
|
||||
if (prefix.size() > longest) {
|
||||
slot = &s;
|
||||
longest = prefix.size();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!slot) {
|
||||
return get_slot(-1);
|
||||
}
|
||||
|
||||
LOG_INFO("slot with common prefix found", {{
|
||||
"slot_id", slot->id,
|
||||
"characters", longest
|
||||
}});
|
||||
return slot;
|
||||
}
|
||||
|
||||
void process_single_task(task_server& task)
|
||||
{
|
||||
switch (task.type)
|
||||
{
|
||||
case TASK_TYPE_COMPLETION: {
|
||||
server_slot *slot = get_slot(json_value(task.data, "slot_id", -1));
|
||||
server_slot *slot = prefix_slot(task.data["prompt"]);
|
||||
if (slot == nullptr)
|
||||
{
|
||||
// if no slot is available, we defer this task for processing later
|
||||
|
|
Loading…
Reference in a new issue