llm: fix seed value not being applied to requests (#4986)

This commit is contained in:
Jeffrey Morgan 2024-06-11 14:24:41 -07:00 committed by GitHub
parent 2ff45d571d
commit ead259d877
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 3 additions and 9 deletions

View file

@ -250,7 +250,7 @@ curl http://localhost:11434/api/generate -d '{
#### Request (Reproducible outputs) #### Request (Reproducible outputs)
For reproducible outputs, set `temperature` to 0 and `seed` to a number: For reproducible outputs, set `seed` to a number:
##### Request ##### Request
@ -259,8 +259,7 @@ curl http://localhost:11434/api/generate -d '{
"model": "mistral", "model": "mistral",
"prompt": "Why is the sky blue?", "prompt": "Why is the sky blue?",
"options": { "options": {
"seed": 123, "seed": 123
"temperature": 0
} }
}' }'
``` ```

View file

@ -359,7 +359,6 @@ struct llama_server_context
// slots / clients // slots / clients
std::vector<server_slot> slots; std::vector<server_slot> slots;
json default_generation_settings_for_props;
llama_server_queue queue_tasks; llama_server_queue queue_tasks;
llama_server_response queue_results; llama_server_response queue_results;
@ -483,9 +482,6 @@ struct llama_server_context
slots.push_back(slot); slots.push_back(slot);
} }
default_generation_settings_for_props = get_formated_generation(slots.front());
default_generation_settings_for_props["seed"] = -1;
batch = llama_batch_init(n_ctx, 0, params.n_parallel); batch = llama_batch_init(n_ctx, 0, params.n_parallel);
} }
@ -584,7 +580,7 @@ struct llama_server_context
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta); slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl); slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep); slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
slot->params.seed = json_value(data, "seed", default_params.seed); slot->sparams.seed = json_value(data, "seed", default_params.seed);
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs); slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep); slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
@ -811,7 +807,6 @@ struct llama_server_context
llama_sampling_free(slot->ctx_sampling); llama_sampling_free(slot->ctx_sampling);
} }
slot->ctx_sampling = llama_sampling_init(slot->sparams); slot->ctx_sampling = llama_sampling_init(slot->sparams);
llama_set_rng_seed(ctx, slot->params.seed);
slot->command = LOAD_PROMPT; slot->command = LOAD_PROMPT;
all_slots_are_idle = false; all_slots_are_idle = false;