diff --git a/docs/development.md b/docs/development.md index cf75101d..594787d7 100644 --- a/docs/development.md +++ b/docs/development.md @@ -14,7 +14,13 @@ Install required tools: brew install go cmake gcc ``` -Get the required libraries: +Optionally enable debugging and more verbose logging: + +```bash +export CGO_CFLAGS="-g" +``` + +Get the required libraries and build the native LLM code: ```bash go generate ./... diff --git a/llm/llama.cpp/gen_common.sh b/llm/llama.cpp/gen_common.sh index ad5d12e2..0bfd8d8f 100644 --- a/llm/llama.cpp/gen_common.sh +++ b/llm/llama.cpp/gen_common.sh @@ -3,14 +3,14 @@ init_vars() { LLAMACPP_DIR=gguf PATCHES="0001-Expose-callable-API-for-server.patch" - CMAKE_DEFS="-DLLAMA_ACCELERATE=on -DLLAMA_SERVER_VERBOSE=off" + CMAKE_DEFS="-DLLAMA_ACCELERATE=on" # TODO - LLAMA_K_QUANTS is stale and needs to be mapped to newer cmake settings CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static" if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then - CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on ${CMAKE_DEFS}" + CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}" else # TODO - add additional optimization flags... - CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release ${CMAKE_DEFS}" + CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}" fi } diff --git a/llm/llama.cpp/gen_windows.ps1 b/llm/llama.cpp/gen_windows.ps1 index 2c77d4ab..2f2f856d 100644 --- a/llm/llama.cpp/gen_windows.ps1 +++ b/llm/llama.cpp/gen_windows.ps1 @@ -7,9 +7,10 @@ function init_vars { $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-DLLAMA_K_QUANTS=on", "-DLLAMA_ACCELERATE=on", "-A","x64") if ($env:CGO_CFLAGS -contains "-g") { - $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on") + $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on") $script:config = "RelWithDebInfo" } else { + $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off") $script:config = "Release" } } diff --git a/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch b/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch index ac3fc12a..e1c1b141 100644 --- a/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch +++ b/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch @@ -1,22 +1,22 @@ -From 4c72576c5f6c2217b1ecf7fd8523616acc5526ae Mon Sep 17 00:00:00 2001 +From 90c332fe2ef61149b38561d02836e66715df214d Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Mon, 13 Nov 2023 12:25:58 -0800 Subject: [PATCH] Expose callable API for server This adds an extern "C" interface within the example server --- - examples/server/CMakeLists.txt | 24 +++ - examples/server/server.cpp | 279 +++++++++++++++++++++++++++++++++ + examples/server/CMakeLists.txt | 27 ++++ + examples/server/server.cpp | 280 +++++++++++++++++++++++++++++++++ examples/server/server.h | 89 +++++++++++ ggml-cuda.cu | 1 + - 4 files changed, 393 insertions(+) + 4 files changed, 397 insertions(+) create mode 100644 examples/server/server.h diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt -index 859cd12..4ea47a7 100644 +index 859cd12..da2b9bf 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt -@@ -11,3 +11,27 @@ if (WIN32) +@@ -11,3 +11,30 @@ if (WIN32) TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32) endif() target_compile_features(${TARGET} PRIVATE cxx_std_11) @@ -29,6 +29,9 @@ index 859cd12..4ea47a7 100644 +target_compile_features(${TARGET} PRIVATE cxx_std_11) +target_compile_definitions(${TARGET} PUBLIC LLAMA_SERVER_LIBRARY=1) +target_link_libraries(${TARGET} PRIVATE common llama llava ${CMAKE_THREAD_LIBS_INIT}) ++target_compile_definitions(${TARGET} PRIVATE ++ SERVER_VERBOSE=$ ++) + +if (BUILD_SHARED_LIBS) + set_target_properties(ext_server PROPERTIES POSITION_INDEPENDENT_CODE ON) @@ -46,7 +49,7 @@ index 859cd12..4ea47a7 100644 +endif() \ No newline at end of file diff --git a/examples/server/server.cpp b/examples/server/server.cpp -index 0403853..5e78e4d 100644 +index 0403853..07fb05c 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -5,6 +5,9 @@ @@ -67,7 +70,7 @@ index 0403853..5e78e4d 100644 int main(int argc, char **argv) { #if SERVER_VERBOSE != 1 -@@ -3123,3 +3127,278 @@ int main(int argc, char **argv) +@@ -3123,3 +3127,279 @@ int main(int argc, char **argv) llama_backend_free(); return 0; } @@ -81,6 +84,7 @@ index 0403853..5e78e4d 100644 +void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) +{ +#if SERVER_VERBOSE != 1 ++ LOG_TEE("disabling verbose llm logging\n"); + log_disable(); +#endif + assert(err != NULL && sparams != NULL); diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index 06a2ae1c..8a5d86a5 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -8,7 +8,7 @@ export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version mkdir -p dist for TARGETARCH in amd64 arm64; do - docker buildx build --load --progress=plain --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS -f Dockerfile.build -t builder:$TARGETARCH . + docker buildx build --load --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS --build-arg=CGO_CFLAGS -f Dockerfile.build -t builder:$TARGETARCH . docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH docker rm builder-$TARGETARCH