silence warm up log
This commit is contained in:
parent
ee1c994d15
commit
058d0cd04b
6 changed files with 30 additions and 0 deletions
1
.gitmodules
vendored
1
.gitmodules
vendored
|
@ -6,4 +6,5 @@
|
||||||
[submodule "llm/llama.cpp/gguf"]
|
[submodule "llm/llama.cpp/gguf"]
|
||||||
path = llm/llama.cpp/gguf
|
path = llm/llama.cpp/gguf
|
||||||
url = https://github.com/ggerganov/llama.cpp.git
|
url = https://github.com/ggerganov/llama.cpp.git
|
||||||
|
ignore = dirty
|
||||||
shallow = true
|
shallow = true
|
||||||
|
|
|
@ -11,5 +11,6 @@ package llm
|
||||||
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
||||||
|
|
||||||
//go:generate git submodule update --force gguf
|
//go:generate git submodule update --force gguf
|
||||||
|
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
|
||||||
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
||||||
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
||||||
|
|
|
@ -11,5 +11,6 @@ package llm
|
||||||
//go:generate cmake --build ggml/build/metal --target server --config Release
|
//go:generate cmake --build ggml/build/metal --target server --config Release
|
||||||
|
|
||||||
//go:generate git submodule update --force gguf
|
//go:generate git submodule update --force gguf
|
||||||
|
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
|
||||||
//go:generate cmake -S gguf -B gguf/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
//go:generate cmake -S gguf -B gguf/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
||||||
//go:generate cmake --build gguf/build/metal --target server --config Release
|
//go:generate cmake --build gguf/build/metal --target server --config Release
|
||||||
|
|
|
@ -12,6 +12,7 @@ package llm
|
||||||
|
|
||||||
//go:generate git submodule update --force gguf
|
//go:generate git submodule update --force gguf
|
||||||
//go:generate git -C gguf apply ../patches/0001-copy-cuda-runtime-libraries.patch
|
//go:generate git -C gguf apply ../patches/0001-copy-cuda-runtime-libraries.patch
|
||||||
|
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
|
||||||
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
|
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
|
||||||
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
||||||
|
|
||||||
|
|
|
@ -9,5 +9,6 @@ package llm
|
||||||
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
||||||
|
|
||||||
//go:generate git submodule update --force gguf
|
//go:generate git submodule update --force gguf
|
||||||
|
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
|
||||||
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
|
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
|
||||||
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
||||||
|
|
25
llm/llama.cpp/patches/0001-remove-warm-up-logging.patch
Normal file
25
llm/llama.cpp/patches/0001-remove-warm-up-logging.patch
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
From 07993bdc35345b67b27aa649a7c099ad42d80c4c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Michael Yang <mxyng@pm.me>
|
||||||
|
Date: Thu, 21 Sep 2023 14:43:21 -0700
|
||||||
|
Subject: [PATCH] remove warm up logging
|
||||||
|
|
||||||
|
---
|
||||||
|
common/common.cpp | 2 --
|
||||||
|
1 file changed, 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/common/common.cpp b/common/common.cpp
|
||||||
|
index 2597ba0..b56549b 100644
|
||||||
|
--- a/common/common.cpp
|
||||||
|
+++ b/common/common.cpp
|
||||||
|
@@ -780,8 +780,6 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
- LOG("warming up the model with an empty run\n");
|
||||||
|
-
|
||||||
|
const std::vector<llama_token> tmp = { llama_token_bos(lctx), llama_token_eos(lctx), };
|
||||||
|
llama_eval(lctx, tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, params.n_threads);
|
||||||
|
llama_reset_timings(lctx);
|
||||||
|
--
|
||||||
|
2.42.0
|
||||||
|
|
Loading…
Reference in a new issue