Fixed startup sequence to report model loading

This commit is contained in:
ManniX-ITA 2024-04-17 17:40:32 +02:00 committed by GitHub
parent bd54b08261
commit c942e4a07b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -2726,7 +2726,7 @@ static json format_detokenized_response(std::string content)
static void log_server_request(const httplib::Request &req, const httplib::Response &res) static void log_server_request(const httplib::Request &req, const httplib::Response &res)
{ {
// skip GH copilot requests when using default port // skip GH copilot requests when using default port
if (req.path == "/v1/health" || req.path == "/v1/completions") if (req.path == "/health" || req.path == "/v1/health" || req.path == "/v1/completions")
{ {
return; return;
} }
@ -3053,6 +3053,26 @@ int main(int argc, char **argv) {
log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
} }
if (sparams.n_threads_http < 1) {
// +2 threads for monitoring endpoints
sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1);
}
log_data["n_threads_http"] = std::to_string(sparams.n_threads_http);
svr.new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); };
LOG_INFO("HTTP server listening", log_data);
// run the HTTP server in a thread - see comment below
std::thread t([&]()
{
if (!svr.listen_after_bind())
{
state.store(SERVER_STATE_ERROR);
return 1;
}
return 0;
});
// load the model // load the model
if (!llama.load_model(params)) if (!llama.load_model(params))
{ {
@ -3257,26 +3277,6 @@ int main(int argc, char **argv) {
}*/ }*/
//); //);
if (sparams.n_threads_http < 1) {
// +2 threads for monitoring endpoints
sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1);
}
log_data["n_threads_http"] = std::to_string(sparams.n_threads_http);
svr.new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); };
LOG_INFO("HTTP server listening", log_data);
// run the HTTP server in a thread - see comment below
std::thread t([&]()
{
if (!svr.listen_after_bind())
{
state.store(SERVER_STATE_ERROR);
return 1;
}
return 0;
});
llama.queue_tasks.on_new_task(std::bind( llama.queue_tasks.on_new_task(std::bind(
&llama_server_context::process_single_task, &llama, std::placeholders::_1)); &llama_server_context::process_single_task, &llama, std::placeholders::_1));
llama.queue_tasks.on_finish_multitask(std::bind( llama.queue_tasks.on_finish_multitask(std::bind(