diff --git a/Dockerfile.build b/Dockerfile.build index 6b7e3c4d..c8170919 100644 --- a/Dockerfile.build +++ b/Dockerfile.build @@ -4,6 +4,7 @@ ARG CUDA_VERSION=11.3.1-1 ARG CMAKE_VERSION=3.22.1 # ROCm only supports amd64 ARG ROCM_VERSION=6.0 +ARG CLBLAST_VER=1.6.1 # Note: https://rocm.docs.amd.com/en/latest/release/user_kernel_space_compat_matrix.html RUN apt-get update && \ @@ -23,6 +24,10 @@ RUN apt-get update && \ apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get -y install cuda=${CUDA_VERSION} rocm-hip-libraries rocm-device-libs rocm-libs rocm-ocl-icd rocm-hip-sdk rocm-hip-libraries rocm-cmake rocm-clang-ocl rocm-dev +# CLBlast +RUN wget -qO- https://github.com/CNugteren/CLBlast/archive/refs/tags/${CLBLAST_VER}.tar.gz | tar zxv -C /tmp/ && \ + cd /tmp/CLBlast-${CLBLAST_VER} && mkdir build && cd build && cmake .. && make && make install + ENV ROCM_PATH=/opt/rocm # Ubuntu 22.04 arm64 dependencies @@ -45,7 +50,6 @@ FROM base-${TARGETARCH} ARG TARGETARCH ARG GOFLAGS="'-ldflags -w -s'" ARG CGO_CFLAGS -ARG CLBLAST_VER=1.6.1 ARG GOLANG_VERSION=1.21.3 # Common toolchain @@ -53,10 +57,6 @@ RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y gcc-10 g++-10 cpp-10 git ocl-icd-opencl-dev && \ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10 -# CLBlast -RUN wget -qO- https://github.com/CNugteren/CLBlast/archive/refs/tags/${CLBLAST_VER}.tar.gz | tar zxv -C /tmp/ && \ - cd /tmp/CLBlast-${CLBLAST_VER} && mkdir build && cd build && cmake .. && make && make install - # install go ADD https://dl.google.com/go/go${GOLANG_VERSION}.linux-$TARGETARCH.tar.gz /tmp/go${GOLANG_VERSION}.tar.gz RUN mkdir -p /usr/local && tar xz -C /usr/local /dev/null; then diff --git a/llm/llama.cpp/gen_linux.sh b/llm/llama.cpp/gen_linux.sh index e3cb87a8..3d659fff 100755 --- a/llm/llama.cpp/gen_linux.sh +++ b/llm/llama.cpp/gen_linux.sh @@ -22,13 +22,14 @@ if [ -z "${CUDACXX}" -a -x /usr/local/cuda/bin/nvcc ]; then fi COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_ACCELERATE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" OLLAMA_DYN_LIB_DIR="gguf/build/lib" -mkdir -p ${OLLAMA_DYN_LIB_DIR} -touch ${OLLAMA_DYN_LIB_DIR}/.generated source $(dirname $0)/gen_common.sh init_vars git_module_setup apply_patches +mkdir -p ${OLLAMA_DYN_LIB_DIR} +touch ${OLLAMA_DYN_LIB_DIR}/.generated + # # CPU first for the default library # diff --git a/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch b/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch index 07e42972..ac3fc12a 100644 --- a/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch +++ b/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch @@ -1,4 +1,4 @@ -From 7184ae16e8fd0e9e91cac4c81daa323057fa992b Mon Sep 17 00:00:00 2001 +From 4c72576c5f6c2217b1ecf7fd8523616acc5526ae Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Mon, 13 Nov 2023 12:25:58 -0800 Subject: [PATCH] Expose callable API for server @@ -6,10 +6,10 @@ Subject: [PATCH] Expose callable API for server This adds an extern "C" interface within the example server --- examples/server/CMakeLists.txt | 24 +++ - examples/server/server.cpp | 276 +++++++++++++++++++++++++++++++++ + examples/server/server.cpp | 279 +++++++++++++++++++++++++++++++++ examples/server/server.h | 89 +++++++++++ ggml-cuda.cu | 1 + - 4 files changed, 390 insertions(+) + 4 files changed, 393 insertions(+) create mode 100644 examples/server/server.h diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt @@ -46,7 +46,7 @@ index 859cd12..4ea47a7 100644 +endif() \ No newline at end of file diff --git a/examples/server/server.cpp b/examples/server/server.cpp -index 0403853..065420c 100644 +index 0403853..5e78e4d 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -5,6 +5,9 @@ @@ -67,7 +67,7 @@ index 0403853..065420c 100644 int main(int argc, char **argv) { #if SERVER_VERBOSE != 1 -@@ -3123,3 +3127,275 @@ int main(int argc, char **argv) +@@ -3123,3 +3127,278 @@ int main(int argc, char **argv) llama_backend_free(); return 0; } @@ -80,6 +80,9 @@ index 0403853..065420c 100644 + +void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) +{ ++#if SERVER_VERBOSE != 1 ++ log_disable(); ++#endif + assert(err != NULL && sparams != NULL); + err->id = 0; + err->msg[0] = '\0'; diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index 836de6ac..06a2ae1c 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -8,14 +8,8 @@ export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version mkdir -p dist for TARGETARCH in amd64 arm64; do - docker buildx build --load --progress=plain --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS -f Dockerfile.build -t gpubuilder:$TARGETARCH . - docker create --platform linux/$TARGETARCH --name gpubuilder-$TARGETARCH gpubuilder:$TARGETARCH - docker cp gpubuilder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH - docker rm gpubuilder-$TARGETARCH - - docker buildx build --load --progress=plain --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS -f Dockerfile.cpu -t cpubuilder:$TARGETARCH . - docker create --platform linux/$TARGETARCH --name cpubuilder-$TARGETARCH cpubuilder:$TARGETARCH - docker cp cpubuilder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH-cpu - docker rm cpubuilder-$TARGETARCH - + docker buildx build --load --progress=plain --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS -f Dockerfile.build -t builder:$TARGETARCH . + docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH + docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH + docker rm builder-$TARGETARCH done