Merge pull request #2007 from dhiltgen/cpu_fallback

Add multiple CPU variants for Intel Mac
This commit is contained in:
Daniel Hiltgen 2024-01-18 11:32:29 -08:00 committed by GitHub
commit df40b11d03
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 321 additions and 186 deletions

View file

@ -79,13 +79,16 @@ jobs:
strategy: strategy:
matrix: matrix:
os: [ubuntu-latest, macos-latest, windows-latest] os: [ubuntu-latest, macos-latest, windows-latest]
arch: [amd64, arm64] arch: [amd64]
exclude: exclude:
- os: ubuntu-latest - os: ubuntu-latest
arch: arm64 arch: arm64
- os: windows-latest - os: windows-latest
arch: arm64 arch: arm64
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
env:
GOARCH: ${{ matrix.arch }}
CGO_ENABLED: "1"
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:

View file

@ -10,6 +10,7 @@ COPY llm llm
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64 FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
@ -19,6 +20,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64 FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
@ -28,6 +30,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64 FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
@ -38,6 +41,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64 FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
@ -50,6 +54,7 @@ FROM --platform=linux/amd64 centos:7 AS cpu-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG GOLANG_VERSION ARG GOLANG_VERSION
ARG OLLAMA_CUSTOM_CPU_DEFS ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
@ -61,6 +66,7 @@ FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG GOLANG_VERSION ARG GOLANG_VERSION
ARG OLLAMA_CUSTOM_CPU_DEFS ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
@ -72,7 +78,7 @@ RUN sh gen_linux.sh
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64 FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
ENV CGO_ENABLED 1 ENV CGO_ENABLED 1
ARG GOFLAGS ARG GOFLAGS
ARG CGO_FLAGS ARG CGO_CFLAGS
WORKDIR /go/src/github.com/jmorganca/ollama WORKDIR /go/src/github.com/jmorganca/ollama
COPY . . COPY . .
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
@ -84,7 +90,7 @@ FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
ENV CGO_ENABLED 1 ENV CGO_ENABLED 1
ARG GOLANG_VERSION ARG GOLANG_VERSION
ARG GOFLAGS ARG GOFLAGS
ARG CGO_FLAGS ARG CGO_CFLAGS
WORKDIR /go/src/github.com/jmorganca/ollama WORKDIR /go/src/github.com/jmorganca/ollama
COPY . . COPY . .
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/

View file

@ -5,7 +5,7 @@
#ifdef __linux__ #ifdef __linux__
#include <dlfcn.h> #include <dlfcn.h>
#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags | RTLD_DEEPBIND) #define LOAD_LIBRARY(lib, flags) dlopen(lib, flags)
#define LOAD_SYMBOL(handle, sym) dlsym(handle, sym) #define LOAD_SYMBOL(handle, sym) dlsym(handle, sym)
#define LOAD_ERR() strdup(dlerror()) #define LOAD_ERR() strdup(dlerror())
#define UNLOAD_LIBRARY(handle) dlclose(handle) #define UNLOAD_LIBRARY(handle) dlclose(handle)
@ -58,8 +58,8 @@ void dyn_init(const char *libPath, struct dynamic_llama_server *s,
{"", NULL}, {"", NULL},
}; };
printf("loading %s library\n", libPath); printf("loading library %s\n", libPath);
s->handle = LOAD_LIBRARY(libPath, RTLD_NOW); s->handle = LOAD_LIBRARY(libPath, RTLD_GLOBAL|RTLD_NOW);
if (!s->handle) { if (!s->handle) {
err->id = -1; err->id = -1;
char *msg = LOAD_ERR(); char *msg = LOAD_ERR();

View file

@ -372,15 +372,6 @@ func updatePath(dir string) {
newPath := strings.Join(append([]string{dir}, pathComponents...), ";") newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
log.Printf("Updating PATH to %s", newPath) log.Printf("Updating PATH to %s", newPath)
os.Setenv("PATH", newPath) os.Setenv("PATH", newPath)
} else {
pathComponents := strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
for _, comp := range pathComponents {
if comp == dir {
return
}
}
newPath := strings.Join(append([]string{dir}, pathComponents...), ":")
log.Printf("Updating LD_LIBRARY_PATH to %s", newPath)
os.Setenv("LD_LIBRARY_PATH", newPath)
} }
// linux and darwin rely on rpath
} }

View file

@ -2,28 +2,24 @@
set(TARGET ext_server) set(TARGET ext_server)
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON) option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
add_library(${TARGET} STATIC ../../../ext_server/ext_server.cpp) if (WIN32)
add_library(${TARGET} SHARED ../../../ext_server/ext_server.cpp ../../llama.cpp)
else()
add_library(${TARGET} STATIC ../../../ext_server/ext_server.cpp ../../llama.cpp)
endif()
target_include_directories(${TARGET} PRIVATE ../../common) target_include_directories(${TARGET} PRIVATE ../../common)
target_include_directories(${TARGET} PRIVATE ../..) target_include_directories(${TARGET} PRIVATE ../..)
target_include_directories(${TARGET} PRIVATE ../../..) target_include_directories(${TARGET} PRIVATE ../../..)
target_compile_features(${TARGET} PRIVATE cxx_std_11) target_compile_features(${TARGET} PRIVATE cxx_std_11)
target_compile_definitions(${TARGET} PUBLIC LLAMA_SERVER_LIBRARY=1) target_compile_definitions(${TARGET} PUBLIC LLAMA_SERVER_LIBRARY=1)
target_link_libraries(${TARGET} PRIVATE common llama llava ${CMAKE_THREAD_LIBS_INIT}) target_link_libraries(${TARGET} PRIVATE ggml llava common )
target_compile_definitions(${TARGET} PRIVATE set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}> target_compile_definitions(${TARGET} PRIVATE SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>)
) install(TARGETS ext_server LIBRARY)
if (BUILD_SHARED_LIBS)
set_target_properties(ext_server PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(ext_server PRIVATE LLAMA_SHARED LLAMA_BUILD)
add_library(ext_server_shared SHARED $<TARGET_OBJECTS:ext_server>)
target_link_libraries(ext_server_shared PRIVATE ggml llama llava common ${CMAKE_THREAD_LIBS_INIT})
install(TARGETS ext_server_shared LIBRARY)
endif()
if (CUDAToolkit_FOUND) if (CUDAToolkit_FOUND)
target_include_directories(${TARGET} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) target_include_directories(${TARGET} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
if (WIN32) if (WIN32)
target_link_libraries(ext_server_shared PRIVATE nvml) target_link_libraries(${TARGET} PRIVATE nvml)
endif() endif()
endif() endif()

View file

@ -1,15 +1,44 @@
# common logic accross linux and darwin # common logic accross linux and darwin
init_vars() { init_vars() {
case "${GOARCH}" in
"amd64")
ARCH="x86_64"
;;
"arm64")
ARCH="arm64"
;;
*)
ARCH=$(uname -m | sed -e "s/aarch64/arm64/g")
esac
LLAMACPP_DIR=../llama.cpp LLAMACPP_DIR=../llama.cpp
CMAKE_DEFS="" CMAKE_DEFS=""
CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static" CMAKE_TARGETS="--target ext_server"
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on" CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"
else else
# TODO - add additional optimization flags... # TODO - add additional optimization flags...
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off" CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
fi fi
case $(uname -s) in
"Darwin")
LIB_EXT="dylib"
WHOLE_ARCHIVE="-Wl,-force_load"
NO_WHOLE_ARCHIVE=""
GCC_ARCH="-arch ${ARCH}"
;;
"Linux")
LIB_EXT="so"
WHOLE_ARCHIVE="-Wl,--whole-archive"
NO_WHOLE_ARCHIVE="-Wl,--no-whole-archive"
# Cross compiling not supported on linux - Use docker
GCC_ARCH=""
;;
*)
;;
esac
} }
git_module_setup() { git_module_setup() {
@ -40,25 +69,29 @@ apply_patches() {
build() { build() {
cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS} cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8 cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
mkdir -p ${BUILD_DIR}/lib/
g++ -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.${LIB_EXT} \
${GCC_ARCH} \
${WHOLE_ARCHIVE} ${BUILD_DIR}/examples/server/libext_server.a ${NO_WHOLE_ARCHIVE} \
${BUILD_DIR}/common/libcommon.a \
${BUILD_DIR}/libllama.a \
-Wl,-rpath,\$ORIGIN \
-lpthread -ldl -lm \
${EXTRA_LIBS}
} }
install() { compress_libs() {
rm -rf ${BUILD_DIR}/lib echo "Compressing payloads to reduce overall binary size..."
mkdir -p ${BUILD_DIR}/lib pids=""
cp ${BUILD_DIR}/examples/server/libext_server.a ${BUILD_DIR}/lib for lib in ${BUILD_DIR}/lib/*.${LIB_EXT}* ; do
cp ${BUILD_DIR}/common/libcommon.a ${BUILD_DIR}/lib bzip2 -v9 ${lib} &
cp ${BUILD_DIR}/libllama.a ${BUILD_DIR}/lib pids+=" $!"
cp ${BUILD_DIR}/libggml_static.a ${BUILD_DIR}/lib done
} echo
for pid in ${pids}; do
link_server_lib() { wait $pid
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \ done
-Wl,--whole-archive \ echo "Finished compression"
${BUILD_DIR}/lib/libext_server.a \
-Wl,--no-whole-archive \
${BUILD_DIR}/lib/libcommon.a \
${BUILD_DIR}/lib/libllama.a \
-lstdc++
} }
# Keep the local tree clean after we're done with the build # Keep the local tree clean after we're done with the build

View file

@ -9,16 +9,52 @@ set -o pipefail
echo "Starting darwin generate script" echo "Starting darwin generate script"
source $(dirname $0)/gen_common.sh source $(dirname $0)/gen_common.sh
init_vars init_vars
CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}" git_module_setup
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal" apply_patches
COMMON_DARWIN_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_ACCELERATE=off"
case "${GOARCH}" in case "${GOARCH}" in
"amd64") "amd64")
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_METAL=off -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" COMMON_CPU_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=off -DLLAMA_NATIVE=off"
ARCH="x86_64"
#
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
#
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/cpu"
echo "Building LCD CPU"
build
compress_libs
#
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
# Approximately 400% faster than LCD on same CPU
#
init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/cpu_avx"
echo "Building AVX CPU"
build
compress_libs
#
# ~2013 CPU Dynamic library
# Approximately 10% faster than AVX on same CPU
#
init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/cpu_avx2"
echo "Building AVX2 CPU"
build
compress_libs
;; ;;
"arm64") "arm64")
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DLLAMA_METAL=on ${CMAKE_DEFS}" CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
ARCH="arm64" BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/metal"
EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders"
build
compress_libs
;; ;;
*) *)
echo "GOARCH must be set" echo "GOARCH must be set"
@ -27,21 +63,4 @@ case "${GOARCH}" in
;; ;;
esac esac
git_module_setup
apply_patches
build
install
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
-arch ${ARCH} \
-Wl,-force_load ${BUILD_DIR}/lib/libext_server.a \
${BUILD_DIR}/lib/libcommon.a \
${BUILD_DIR}/lib/libllama.a \
${BUILD_DIR}/lib/libggml_static.a \
-lpthread -ldl -lm -lc++ \
-framework Accelerate \
-framework Foundation \
-framework Metal \
-framework MetalKit \
-framework MetalPerformanceShaders
cleanup cleanup

View file

@ -2,16 +2,14 @@
# This script is intended to run inside the go generate # This script is intended to run inside the go generate
# working directory must be llm/generate/ # working directory must be llm/generate/
# First we build our default built-in library which will be linked into the CGO # First we build one or more CPU based LLM libraries
# binary as a normal dependency. This default build is CPU based.
# #
# Then we build a CUDA dynamic library (although statically linked with the CUDA # Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
# library dependencies for maximum portability) # library dependencies
# #
# Then if we detect ROCm, we build a dynamically loaded ROCm lib. ROCm is particularly # Then if we detect ROCm, we build a dynamically loaded ROCm lib. The ROCM
# important to be a dynamic lib even if it's the only GPU library detected because # libraries are quite large, and also dynamically load data files at runtime
# we can't redistribute the objectfiles but must rely on dynamic libraries at # which in turn are large, so we don't attempt to cary them as payload
# runtime, which could lead the server not to start if not present.
set -ex set -ex
set -o pipefail set -o pipefail
@ -59,11 +57,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\"" echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}" CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
echo "Building custom CPU" echo "Building custom CPU"
build build
install compress_libs
link_server_lib
else else
# Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512 # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
@ -80,11 +77,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta) # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
# #
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
echo "Building LCD CPU" echo "Building LCD CPU"
build build
install compress_libs
link_server_lib
# #
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
@ -92,11 +88,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# #
init_vars init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx"
echo "Building AVX CPU" echo "Building AVX CPU"
build build
install compress_libs
link_server_lib
# #
# ~2013 CPU Dynamic library # ~2013 CPU Dynamic library
@ -104,11 +99,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# #
init_vars init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}" CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx2" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx2"
echo "Building AVX2 CPU" echo "Building AVX2 CPU"
build build
install compress_libs
link_server_lib
fi fi
else else
echo "Skipping CPU generation step as requested" echo "Skipping CPU generation step as requested"
@ -127,22 +121,27 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
CUDA_VARIANT=_v${CUDA_MAJOR} CUDA_VARIANT=_v${CUDA_MAJOR}
fi fi
CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}" CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cuda${CUDA_VARIANT}"
EXTRA_LIBS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
build build
install
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \ # Cary the CUDA libs as payloads to help reduce dependency burden on users
-Wl,--whole-archive \ #
${BUILD_DIR}/lib/libext_server.a \ # TODO - in the future we may shift to packaging these separately and conditionally
${BUILD_DIR}/lib/libcommon.a \ # downloading them in the install script.
${BUILD_DIR}/lib/libllama.a \ DEPS="$(ldd ${BUILD_DIR}/lib/libext_server.so )"
-Wl,--no-whole-archive \ for lib in libcudart.so libcublas.so libcublasLt.so ; do
${CUDA_LIB_DIR}/libcudart_static.a \ DEP=$(echo "${DEPS}" | grep ${lib} | cut -f1 -d' ' | xargs || true)
${CUDA_LIB_DIR}/libcublas_static.a \ if [ -n "${DEP}" -a -e "${CUDA_LIB_DIR}/${DEP}" ]; then
${CUDA_LIB_DIR}/libcublasLt_static.a \ cp "${CUDA_LIB_DIR}/${DEP}" "${BUILD_DIR}/lib/"
${CUDA_LIB_DIR}/libcudadevrt.a \ elif [ -e "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" ]; then
${CUDA_LIB_DIR}/libculibos.a \ cp "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" "${BUILD_DIR}/lib/"
-lcuda \ else
-lrt -lpthread -ldl -lstdc++ -lm cp -d "${CUDA_LIB_DIR}/${lib}*" "${BUILD_DIR}/lib/"
fi
done
compress_libs
fi fi
if [ -z "${ROCM_PATH}" ]; then if [ -z "${ROCM_PATH}" ]; then
@ -164,19 +163,13 @@ if [ -d "${ROCM_PATH}" ]; then
fi fi
init_vars init_vars
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)" CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm${ROCM_VARIANT}" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}"
EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,${ROCM_PATH}/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
build build
install
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \ # Note: the ROCM libs and runtime library files are too large to embed, so we depend on
-Wl,--whole-archive \ # them being present at runtime on the host
${BUILD_DIR}/lib/libext_server.a \ compress_libs
${BUILD_DIR}/lib/libcommon.a \
${BUILD_DIR}/lib/libllama.a \
-Wl,--no-whole-archive \
-lrt -lpthread -ldl -lstdc++ -lm \
-L/opt/rocm/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ \
-Wl,-rpath,/opt/rocm/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ \
-lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu
fi fi
cleanup cleanup

View file

@ -5,7 +5,8 @@ $ErrorActionPreference = "Stop"
function init_vars { function init_vars {
$script:llamacppDir = "../llama.cpp" $script:llamacppDir = "../llama.cpp"
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-A","x64") $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-A","x64")
$script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static") $script:cmakeTargets = @("ext_server")
$script:ARCH = "amd64" # arm not yet supported.
if ($env:CGO_CFLAGS -contains "-g") { if ($env:CGO_CFLAGS -contains "-g") {
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on") $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")
$script:config = "RelWithDebInfo" $script:config = "RelWithDebInfo"
@ -13,6 +14,17 @@ function init_vars {
$script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off") $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off")
$script:config = "Release" $script:config = "Release"
} }
# Try to find the CUDA dir
if ($env:CUDA_LIB_DIR -eq $null) {
$d=(get-command -ea 'silentlycontinue' nvcc).path
if ($d -ne $null) {
$script:CUDA_LIB_DIR=($d| split-path -parent)
}
} else {
$script:CUDA_LIB_DIR=$env:CUDA_LIB_DIR
}
$script:BZIP2=(get-command -ea 'silentlycontinue' bzip2).path
$script:DUMPBIN=(get-command -ea 'silentlycontinue' dumpbin).path
} }
function git_module_setup { function git_module_setup {
@ -47,11 +59,25 @@ function build {
function install { function install {
rm -ea 0 -recurse -force -path "${script:buildDir}/lib" rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
md "${script:buildDir}/lib" -ea 0 > $null md "${script:buildDir}/lib" -ea 0 > $null
cp "${script:buildDir}/bin/${script:config}/ext_server_shared.dll" "${script:buildDir}/lib" cp "${script:buildDir}/bin/${script:config}/ext_server.dll" "${script:buildDir}/lib"
cp "${script:buildDir}/bin/${script:config}/llama.dll" "${script:buildDir}/lib" cp "${script:buildDir}/bin/${script:config}/llama.dll" "${script:buildDir}/lib"
# Display the dll dependencies in the build log # Display the dll dependencies in the build log
dumpbin /dependents "${script:buildDir}/bin/${script:config}/ext_server_shared.dll" | select-string ".dll" if ($script:DUMPBIN -ne $null) {
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/${script:config}/ext_server.dll" | select-string ".dll"
}
}
function compress_libs {
if ($script:BZIP2 -eq $null) {
write-host "bzip2 not installed, not compressing files"
return
}
write-host "Compressing dlls..."
$libs = dir "${script:buildDir}/lib/*.dll"
foreach ($file in $libs) {
& "$script:BZIP2" -v9 $file
}
} }
function cleanup { function cleanup {
@ -71,33 +97,47 @@ apply_patches
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on", "-DLLAMA_NATIVE=off") $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on", "-DLLAMA_NATIVE=off")
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs $script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/cpu" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu"
write-host "Building LCD CPU" write-host "Building LCD CPU"
build build
install install
compress_libs
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs $script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/cpu_avx" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx"
write-host "Building AVX CPU" write-host "Building AVX CPU"
build build
install install
compress_libs
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs $script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/cpu_avx2" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2"
write-host "Building AVX2 CPU" write-host "Building AVX2 CPU"
build build
install install
compress_libs
# Then build cuda as a dynamically loaded library if ($null -ne $script:CUDA_LIB_DIR) {
# TODO figure out how to detect cuda version # Then build cuda as a dynamically loaded library
init_vars $nvcc = (get-command -ea 'silentlycontinue' nvcc)
$script:buildDir="${script:llamacppDir}/build/windows/cuda" if ($null -ne $nvcc) {
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on") $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
build }
install if ($null -ne $script:CUDA_VERSION) {
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
}
init_vars
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on")
build
install
cp "${script:CUDA_LIB_DIR}/cudart64_*.dll" "${script:buildDir}/lib"
cp "${script:CUDA_LIB_DIR}/cublas64_*.dll" "${script:buildDir}/lib"
cp "${script:CUDA_LIB_DIR}/cublasLt64_*.dll" "${script:buildDir}/lib"
compress_libs
}
# TODO - actually implement ROCm support on windows # TODO - actually implement ROCm support on windows
$script:buildDir="${script:llamacppDir}/build/windows/rocm" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm"
rm -ea 0 -recurse -force -path "${script:buildDir}/lib" rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
md "${script:buildDir}/lib" -ea 0 > $null md "${script:buildDir}/lib" -ea 0 > $null

View file

@ -1,9 +1,9 @@
package llm package llm
import ( import (
"compress/bzip2"
"errors" "errors"
"fmt" "fmt"
"golang.org/x/exp/slices"
"io" "io"
"io/fs" "io/fs"
"log" "log"
@ -12,6 +12,9 @@ import (
"runtime" "runtime"
"strings" "strings"
"golang.org/x/exp/slices"
"golang.org/x/sync/errgroup"
"github.com/jmorganca/ollama/gpu" "github.com/jmorganca/ollama/gpu"
) )
@ -20,7 +23,7 @@ import (
// Any library without a variant is the lowest common denominator // Any library without a variant is the lowest common denominator
var availableDynLibs = map[string]string{} var availableDynLibs = map[string]string{}
const pathComponentCount = 6 const pathComponentCount = 7
// getDynLibs returns an ordered list of LLM libraries to try, starting with the best // getDynLibs returns an ordered list of LLM libraries to try, starting with the best
func getDynLibs(gpuInfo gpu.GpuInfo) []string { func getDynLibs(gpuInfo gpu.GpuInfo) []string {
@ -100,6 +103,7 @@ func rocmDynLibPresent() bool {
} }
func nativeInit(workdir string) error { func nativeInit(workdir string) error {
log.Printf("Extracting dynamic libraries...")
if runtime.GOOS == "darwin" { if runtime.GOOS == "darwin" {
err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal") err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
if err != nil { if err != nil {
@ -113,7 +117,7 @@ func nativeInit(workdir string) error {
os.Setenv("GGML_METAL_PATH_RESOURCES", workdir) os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
} }
libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*") libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*")
if err != nil { if err != nil {
if err == payloadMissing { if err == payloadMissing {
log.Printf("%s", payloadMissing) log.Printf("%s", payloadMissing)
@ -151,45 +155,61 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
} }
libs := []string{} libs := []string{}
// TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
// and tracking by version so we don't reexpand the files every time
// Also maybe consider lazy loading only what is needed
g := new(errgroup.Group)
for _, file := range files { for _, file := range files {
pathComps := strings.Split(file, "/") pathComps := strings.Split(file, "/")
if len(pathComps) != pathComponentCount { if len(pathComps) != pathComponentCount {
log.Printf("unexpected payload components: %v", pathComps) log.Printf("unexpected payload components: %v", pathComps)
continue continue
} }
// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
// Include the variant in the path to avoid conflicts between multiple server libs
targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
srcFile, err := libEmbed.Open(file)
if err != nil {
return nil, fmt.Errorf("read payload %s: %v", file, err)
}
defer srcFile.Close()
if err := os.MkdirAll(targetDir, 0o755); err != nil {
return nil, fmt.Errorf("create payload temp dir %s: %v", workDir, err)
}
destFile := filepath.Join(targetDir, filepath.Base(file)) file := file
if strings.Contains(destFile, "server") { g.Go(func() error {
libs = append(libs, destFile) // llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
} // Include the variant in the path to avoid conflicts between multiple server libs
targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
_, err = os.Stat(destFile) srcFile, err := libEmbed.Open(file)
switch {
case errors.Is(err, os.ErrNotExist):
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if err != nil { if err != nil {
return nil, fmt.Errorf("write payload %s: %v", file, err) return fmt.Errorf("read payload %s: %v", file, err)
} }
defer destFile.Close() defer srcFile.Close()
if _, err := io.Copy(destFile, srcFile); err != nil { if err := os.MkdirAll(targetDir, 0o755); err != nil {
return nil, fmt.Errorf("copy payload %s: %v", file, err) return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
} }
case err != nil: src := io.Reader(srcFile)
return nil, fmt.Errorf("stat payload %s: %v", file, err) filename := file
} if strings.HasSuffix(file, ".bz2") {
src = bzip2.NewReader(src)
filename = strings.TrimSuffix(filename, ".bz2")
}
destFile := filepath.Join(targetDir, filepath.Base(filename))
if strings.Contains(destFile, "server") {
libs = append(libs, destFile)
}
_, err = os.Stat(destFile)
switch {
case errors.Is(err, os.ErrNotExist):
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if err != nil {
return fmt.Errorf("write payload %s: %v", file, err)
}
defer destFile.Close()
if _, err := io.Copy(destFile, src); err != nil {
return fmt.Errorf("copy payload %s: %v", file, err)
}
case err != nil:
return fmt.Errorf("stat payload %s: %v", file, err)
}
return nil
})
} }
return libs, nil return libs, g.Wait()
} }
func extractPayloadFiles(workDir, glob string) error { func extractPayloadFiles(workDir, glob string) error {
@ -207,8 +227,14 @@ func extractPayloadFiles(workDir, glob string) error {
if err := os.MkdirAll(workDir, 0o755); err != nil { if err := os.MkdirAll(workDir, 0o755); err != nil {
return fmt.Errorf("create payload temp dir %s: %v", workDir, err) return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
} }
src := io.Reader(srcFile)
filename := file
if strings.HasSuffix(file, ".bz2") {
src = bzip2.NewReader(src)
filename = strings.TrimSuffix(filename, ".bz2")
}
destFile := filepath.Join(workDir, filepath.Base(file)) destFile := filepath.Join(workDir, filepath.Base(filename))
_, err = os.Stat(destFile) _, err = os.Stat(destFile)
switch { switch {
case errors.Is(err, os.ErrNotExist): case errors.Is(err, os.ErrNotExist):
@ -217,7 +243,7 @@ func extractPayloadFiles(workDir, glob string) error {
return fmt.Errorf("write payload %s: %v", file, err) return fmt.Errorf("write payload %s: %v", file, err)
} }
defer destFile.Close() defer destFile.Close()
if _, err := io.Copy(destFile, srcFile); err != nil { if _, err := io.Copy(destFile, src); err != nil {
return fmt.Errorf("copy payload %s: %v", file, err) return fmt.Errorf("copy payload %s: %v", file, err)
} }
case err != nil: case err != nil:

View file

@ -1,8 +0,0 @@
package llm
import (
"embed"
)
//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/*/lib/*.so
var libEmbed embed.FS

View file

@ -0,0 +1,8 @@
package llm
import (
"embed"
)
//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/x86_64/*/lib/*.dylib*
var libEmbed embed.FS

View file

@ -0,0 +1,8 @@
package llm
import (
"embed"
)
//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/arm64/*/lib/*.dylib*
var libEmbed embed.FS

View file

@ -4,5 +4,5 @@ import (
"embed" "embed"
) )
//go:embed llama.cpp/build/linux/*/lib/*.so //go:embed llama.cpp/build/linux/*/*/lib/*.so*
var libEmbed embed.FS var libEmbed embed.FS

View file

@ -4,5 +4,5 @@ import (
"embed" "embed"
) )
//go:embed llama.cpp/build/windows/*/lib/*.dll //go:embed llama.cpp/build/windows/*/*/lib/*.dll*
var libEmbed embed.FS var libEmbed embed.FS

View file

@ -1,6 +1,6 @@
#!/bin/sh #!/bin/sh
set -eu set -e
export VERSION=${VERSION:-0.0.0} export VERSION=${VERSION:-0.0.0}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'" export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
@ -11,21 +11,36 @@ for TARGETARCH in arm64 amd64; do
rm -rf llm/llama.cpp/build rm -rf llm/llama.cpp/build
GOOS=darwin GOARCH=$TARGETARCH go generate ./... GOOS=darwin GOARCH=$TARGETARCH go generate ./...
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -cover -o dist/ollama-darwin-$TARGETARCH-cov
done done
lipo -create -output dist/ollama dist/ollama-darwin-* lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
rm -f dist/ollama-darwin-* rm -f dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama if [ -n "$APPLE_IDENTITY" ]; then
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
else
echo "Skipping code signing - set APPLE_IDENTITY"
fi
chmod +x dist/ollama chmod +x dist/ollama
# build and sign the mac app # build and optionally sign the mac app
npm install --prefix app npm install --prefix app
npm run --prefix app make:sign if [ -n "$APPLE_IDENTITY" ]; then
npm run --prefix app make:sign
else
npm run --prefix app make
fi
cp app/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip cp app/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip
# sign the binary and rename it # sign the binary and rename it
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama if [ -n "$APPLE_IDENTITY" ]; then
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama
else
echo "WARNING: Skipping code signing - set APPLE_IDENTITY"
fi
ditto -c -k --keepParent dist/ollama dist/temp.zip ditto -c -k --keepParent dist/ollama dist/temp.zip
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID if [ -n "$APPLE_IDENTITY" ]; then
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
fi
mv dist/ollama dist/ollama-darwin mv dist/ollama dist/ollama-darwin
rm -f dist/temp.zip rm -f dist/temp.zip

View file

@ -66,3 +66,7 @@ subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'generate', './...
print("Building") print("Building")
subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'build', '.']) subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'build', '.'])
print("Copying built result")
subprocess.check_call(['scp', netloc +":"+ path + "/ollama.exe", './dist/'])

View file

@ -28,6 +28,7 @@ fi
if [ -n "${CMAKE_VERSION}" ]; then if [ -n "${CMAKE_VERSION}" ]; then
curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1 curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
dnf install -y bzip2
fi fi
if [ -n "${GOLANG_VERSION}" ]; then if [ -n "${GOLANG_VERSION}" ]; then