diff --git a/llm/dyn_ext_server.go b/llm/dyn_ext_server.go index a4e78313..45e2dc72 100644 --- a/llm/dyn_ext_server.go +++ b/llm/dyn_ext_server.go @@ -136,6 +136,12 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts sparams.n_threads = C.uint(opts.NumThread) + if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" { + sparams.verbose_logging = C.bool(true) + } else { + sparams.verbose_logging = C.bool(false) + } + slog.Info("Initializing llama server") initResp := newExtServerResp(128) defer freeExtServerResp(initResp) diff --git a/llm/ext_server/ext_server.cpp b/llm/ext_server/ext_server.cpp index ab6fa7e8..635a1f68 100644 --- a/llm/ext_server/ext_server.cpp +++ b/llm/ext_server/ext_server.cpp @@ -30,16 +30,17 @@ std::atomic ext_server_running(false); std::thread ext_server_thread; void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) { -#if SERVER_VERBOSE != 1 - log_disable(); -#endif - LOG_TEE("system info: %s\n", llama_print_system_info()); assert(err != NULL && sparams != NULL); + log_set_target(stderr); + if (!sparams->verbose_logging) { + log_disable(); + } + + LOG_TEE("system info: %s\n", llama_print_system_info()); err->id = 0; err->msg[0] = '\0'; try { llama = new llama_server_context; - log_set_target(stdout); gpt_params params; params.n_ctx = sparams->n_ctx; params.n_batch = sparams->n_batch; diff --git a/llm/ext_server/ext_server.h b/llm/ext_server/ext_server.h index 0a584de0..8eefb3cc 100644 --- a/llm/ext_server/ext_server.h +++ b/llm/ext_server/ext_server.h @@ -45,6 +45,7 @@ typedef struct ext_server_params { bool embedding; // get only sentence embedding ext_server_lora_adapter_t *lora_adapters; char *mmproj; + bool verbose_logging; // Enable verbose logging of the server } ext_server_params_t; typedef struct ext_server_task_result {