Use slot with cached prompt instead of least recently used (#5492)
* Use common prefix to select slot * actually report `longest`
This commit is contained in:
parent
af28b94533
commit
d89454de80
1 changed files with 39 additions and 1 deletions
40
llm/ext_server/server.cpp
vendored
40
llm/ext_server/server.cpp
vendored
|
@ -1382,12 +1382,50 @@ struct llama_server_context
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string common_prefix(const std::string& str1, const std::string& str2) {
|
||||||
|
auto mismatch_pair = std::mismatch(str1.begin(), str1.end(), str2.begin());
|
||||||
|
return std::string(str1.begin(), mismatch_pair.first);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the slot that has the greatest common prefix
|
||||||
|
server_slot *prefix_slot(const json &prompt) {
|
||||||
|
if (!prompt.is_string()) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string prompt_str = prompt.get<std::string>();
|
||||||
|
server_slot *slot = nullptr;
|
||||||
|
size_t longest = 0;
|
||||||
|
|
||||||
|
for (server_slot &s : slots) {
|
||||||
|
if (s.available() && s.prompt.is_string()) {
|
||||||
|
std::string s_prompt = s.prompt.get<std::string>();
|
||||||
|
std::string prefix = common_prefix(s_prompt, prompt_str);
|
||||||
|
|
||||||
|
if (prefix.size() > longest) {
|
||||||
|
slot = &s;
|
||||||
|
longest = prefix.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!slot) {
|
||||||
|
return get_slot(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_INFO("slot with common prefix found", {{
|
||||||
|
"slot_id", slot->id,
|
||||||
|
"characters", longest
|
||||||
|
}});
|
||||||
|
return slot;
|
||||||
|
}
|
||||||
|
|
||||||
void process_single_task(task_server& task)
|
void process_single_task(task_server& task)
|
||||||
{
|
{
|
||||||
switch (task.type)
|
switch (task.type)
|
||||||
{
|
{
|
||||||
case TASK_TYPE_COMPLETION: {
|
case TASK_TYPE_COMPLETION: {
|
||||||
server_slot *slot = get_slot(json_value(task.data, "slot_id", -1));
|
server_slot *slot = prefix_slot(task.data["prompt"]);
|
||||||
if (slot == nullptr)
|
if (slot == nullptr)
|
||||||
{
|
{
|
||||||
// if no slot is available, we defer this task for processing later
|
// if no slot is available, we defer this task for processing later
|
||||||
|
|
Loading…
Add table
Reference in a new issue