Merge pull request #2007 from dhiltgen/cpu_fallback
Add multiple CPU variants for Intel Mac
This commit is contained in:
commit
df40b11d03
18 changed files with 321 additions and 186 deletions
5
.github/workflows/test.yaml
vendored
5
.github/workflows/test.yaml
vendored
|
@ -79,13 +79,16 @@ jobs:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||||
arch: [amd64, arm64]
|
arch: [amd64]
|
||||||
exclude:
|
exclude:
|
||||||
- os: ubuntu-latest
|
- os: ubuntu-latest
|
||||||
arch: arm64
|
arch: arm64
|
||||||
- os: windows-latest
|
- os: windows-latest
|
||||||
arch: arm64
|
arch: arm64
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
|
env:
|
||||||
|
GOARCH: ${{ matrix.arch }}
|
||||||
|
CGO_ENABLED: "1"
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
|
|
|
@ -10,6 +10,7 @@ COPY llm llm
|
||||||
|
|
||||||
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
|
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
|
ARG CGO_CFLAGS
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
|
@ -19,6 +20,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
|
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
|
ARG CGO_CFLAGS
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
||||||
|
@ -28,6 +30,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
|
FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
|
ARG CGO_CFLAGS
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
|
@ -38,6 +41,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
|
FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
|
ARG CGO_CFLAGS
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
|
@ -50,6 +54,7 @@ FROM --platform=linux/amd64 centos:7 AS cpu-build-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG GOLANG_VERSION
|
ARG GOLANG_VERSION
|
||||||
ARG OLLAMA_CUSTOM_CPU_DEFS
|
ARG OLLAMA_CUSTOM_CPU_DEFS
|
||||||
|
ARG CGO_CFLAGS
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
|
@ -61,6 +66,7 @@ FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG GOLANG_VERSION
|
ARG GOLANG_VERSION
|
||||||
ARG OLLAMA_CUSTOM_CPU_DEFS
|
ARG OLLAMA_CUSTOM_CPU_DEFS
|
||||||
|
ARG CGO_CFLAGS
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
|
@ -72,7 +78,7 @@ RUN sh gen_linux.sh
|
||||||
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
|
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
|
||||||
ENV CGO_ENABLED 1
|
ENV CGO_ENABLED 1
|
||||||
ARG GOFLAGS
|
ARG GOFLAGS
|
||||||
ARG CGO_FLAGS
|
ARG CGO_CFLAGS
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||||
COPY . .
|
COPY . .
|
||||||
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
|
@ -84,7 +90,7 @@ FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
|
||||||
ENV CGO_ENABLED 1
|
ENV CGO_ENABLED 1
|
||||||
ARG GOLANG_VERSION
|
ARG GOLANG_VERSION
|
||||||
ARG GOFLAGS
|
ARG GOFLAGS
|
||||||
ARG CGO_FLAGS
|
ARG CGO_CFLAGS
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||||
COPY . .
|
COPY . .
|
||||||
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <dlfcn.h>
|
#include <dlfcn.h>
|
||||||
#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags | RTLD_DEEPBIND)
|
#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags)
|
||||||
#define LOAD_SYMBOL(handle, sym) dlsym(handle, sym)
|
#define LOAD_SYMBOL(handle, sym) dlsym(handle, sym)
|
||||||
#define LOAD_ERR() strdup(dlerror())
|
#define LOAD_ERR() strdup(dlerror())
|
||||||
#define UNLOAD_LIBRARY(handle) dlclose(handle)
|
#define UNLOAD_LIBRARY(handle) dlclose(handle)
|
||||||
|
@ -58,8 +58,8 @@ void dyn_init(const char *libPath, struct dynamic_llama_server *s,
|
||||||
{"", NULL},
|
{"", NULL},
|
||||||
};
|
};
|
||||||
|
|
||||||
printf("loading %s library\n", libPath);
|
printf("loading library %s\n", libPath);
|
||||||
s->handle = LOAD_LIBRARY(libPath, RTLD_NOW);
|
s->handle = LOAD_LIBRARY(libPath, RTLD_GLOBAL|RTLD_NOW);
|
||||||
if (!s->handle) {
|
if (!s->handle) {
|
||||||
err->id = -1;
|
err->id = -1;
|
||||||
char *msg = LOAD_ERR();
|
char *msg = LOAD_ERR();
|
||||||
|
|
|
@ -372,15 +372,6 @@ func updatePath(dir string) {
|
||||||
newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
|
newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
|
||||||
log.Printf("Updating PATH to %s", newPath)
|
log.Printf("Updating PATH to %s", newPath)
|
||||||
os.Setenv("PATH", newPath)
|
os.Setenv("PATH", newPath)
|
||||||
} else {
|
|
||||||
pathComponents := strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
|
|
||||||
for _, comp := range pathComponents {
|
|
||||||
if comp == dir {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
newPath := strings.Join(append([]string{dir}, pathComponents...), ":")
|
|
||||||
log.Printf("Updating LD_LIBRARY_PATH to %s", newPath)
|
|
||||||
os.Setenv("LD_LIBRARY_PATH", newPath)
|
|
||||||
}
|
}
|
||||||
|
// linux and darwin rely on rpath
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,28 +2,24 @@
|
||||||
|
|
||||||
set(TARGET ext_server)
|
set(TARGET ext_server)
|
||||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||||
add_library(${TARGET} STATIC ../../../ext_server/ext_server.cpp)
|
if (WIN32)
|
||||||
|
add_library(${TARGET} SHARED ../../../ext_server/ext_server.cpp ../../llama.cpp)
|
||||||
|
else()
|
||||||
|
add_library(${TARGET} STATIC ../../../ext_server/ext_server.cpp ../../llama.cpp)
|
||||||
|
endif()
|
||||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
target_include_directories(${TARGET} PRIVATE ../../common)
|
||||||
target_include_directories(${TARGET} PRIVATE ../..)
|
target_include_directories(${TARGET} PRIVATE ../..)
|
||||||
target_include_directories(${TARGET} PRIVATE ../../..)
|
target_include_directories(${TARGET} PRIVATE ../../..)
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
target_compile_definitions(${TARGET} PUBLIC LLAMA_SERVER_LIBRARY=1)
|
target_compile_definitions(${TARGET} PUBLIC LLAMA_SERVER_LIBRARY=1)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama llava ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE ggml llava common )
|
||||||
target_compile_definitions(${TARGET} PRIVATE
|
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
target_compile_definitions(${TARGET} PRIVATE SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>)
|
||||||
)
|
install(TARGETS ext_server LIBRARY)
|
||||||
|
|
||||||
if (BUILD_SHARED_LIBS)
|
|
||||||
set_target_properties(ext_server PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
||||||
target_compile_definitions(ext_server PRIVATE LLAMA_SHARED LLAMA_BUILD)
|
|
||||||
add_library(ext_server_shared SHARED $<TARGET_OBJECTS:ext_server>)
|
|
||||||
target_link_libraries(ext_server_shared PRIVATE ggml llama llava common ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
install(TARGETS ext_server_shared LIBRARY)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (CUDAToolkit_FOUND)
|
if (CUDAToolkit_FOUND)
|
||||||
target_include_directories(${TARGET} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
target_include_directories(${TARGET} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
target_link_libraries(ext_server_shared PRIVATE nvml)
|
target_link_libraries(${TARGET} PRIVATE nvml)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
|
@ -1,15 +1,44 @@
|
||||||
# common logic accross linux and darwin
|
# common logic accross linux and darwin
|
||||||
|
|
||||||
init_vars() {
|
init_vars() {
|
||||||
|
case "${GOARCH}" in
|
||||||
|
"amd64")
|
||||||
|
ARCH="x86_64"
|
||||||
|
;;
|
||||||
|
"arm64")
|
||||||
|
ARCH="arm64"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
ARCH=$(uname -m | sed -e "s/aarch64/arm64/g")
|
||||||
|
esac
|
||||||
|
|
||||||
LLAMACPP_DIR=../llama.cpp
|
LLAMACPP_DIR=../llama.cpp
|
||||||
CMAKE_DEFS=""
|
CMAKE_DEFS=""
|
||||||
CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
|
CMAKE_TARGETS="--target ext_server"
|
||||||
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
|
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
|
||||||
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on"
|
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"
|
||||||
else
|
else
|
||||||
# TODO - add additional optimization flags...
|
# TODO - add additional optimization flags...
|
||||||
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off"
|
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
|
||||||
fi
|
fi
|
||||||
|
case $(uname -s) in
|
||||||
|
"Darwin")
|
||||||
|
LIB_EXT="dylib"
|
||||||
|
WHOLE_ARCHIVE="-Wl,-force_load"
|
||||||
|
NO_WHOLE_ARCHIVE=""
|
||||||
|
GCC_ARCH="-arch ${ARCH}"
|
||||||
|
;;
|
||||||
|
"Linux")
|
||||||
|
LIB_EXT="so"
|
||||||
|
WHOLE_ARCHIVE="-Wl,--whole-archive"
|
||||||
|
NO_WHOLE_ARCHIVE="-Wl,--no-whole-archive"
|
||||||
|
|
||||||
|
# Cross compiling not supported on linux - Use docker
|
||||||
|
GCC_ARCH=""
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
;;
|
||||||
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
git_module_setup() {
|
git_module_setup() {
|
||||||
|
@ -40,25 +69,29 @@ apply_patches() {
|
||||||
build() {
|
build() {
|
||||||
cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
|
cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
|
||||||
cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
|
cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
|
||||||
|
mkdir -p ${BUILD_DIR}/lib/
|
||||||
|
g++ -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.${LIB_EXT} \
|
||||||
|
${GCC_ARCH} \
|
||||||
|
${WHOLE_ARCHIVE} ${BUILD_DIR}/examples/server/libext_server.a ${NO_WHOLE_ARCHIVE} \
|
||||||
|
${BUILD_DIR}/common/libcommon.a \
|
||||||
|
${BUILD_DIR}/libllama.a \
|
||||||
|
-Wl,-rpath,\$ORIGIN \
|
||||||
|
-lpthread -ldl -lm \
|
||||||
|
${EXTRA_LIBS}
|
||||||
}
|
}
|
||||||
|
|
||||||
install() {
|
compress_libs() {
|
||||||
rm -rf ${BUILD_DIR}/lib
|
echo "Compressing payloads to reduce overall binary size..."
|
||||||
mkdir -p ${BUILD_DIR}/lib
|
pids=""
|
||||||
cp ${BUILD_DIR}/examples/server/libext_server.a ${BUILD_DIR}/lib
|
for lib in ${BUILD_DIR}/lib/*.${LIB_EXT}* ; do
|
||||||
cp ${BUILD_DIR}/common/libcommon.a ${BUILD_DIR}/lib
|
bzip2 -v9 ${lib} &
|
||||||
cp ${BUILD_DIR}/libllama.a ${BUILD_DIR}/lib
|
pids+=" $!"
|
||||||
cp ${BUILD_DIR}/libggml_static.a ${BUILD_DIR}/lib
|
done
|
||||||
}
|
echo
|
||||||
|
for pid in ${pids}; do
|
||||||
link_server_lib() {
|
wait $pid
|
||||||
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
|
done
|
||||||
-Wl,--whole-archive \
|
echo "Finished compression"
|
||||||
${BUILD_DIR}/lib/libext_server.a \
|
|
||||||
-Wl,--no-whole-archive \
|
|
||||||
${BUILD_DIR}/lib/libcommon.a \
|
|
||||||
${BUILD_DIR}/lib/libllama.a \
|
|
||||||
-lstdc++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Keep the local tree clean after we're done with the build
|
# Keep the local tree clean after we're done with the build
|
||||||
|
|
|
@ -9,16 +9,52 @@ set -o pipefail
|
||||||
echo "Starting darwin generate script"
|
echo "Starting darwin generate script"
|
||||||
source $(dirname $0)/gen_common.sh
|
source $(dirname $0)/gen_common.sh
|
||||||
init_vars
|
init_vars
|
||||||
CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
|
git_module_setup
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal"
|
apply_patches
|
||||||
|
|
||||||
|
COMMON_DARWIN_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_ACCELERATE=off"
|
||||||
|
|
||||||
case "${GOARCH}" in
|
case "${GOARCH}" in
|
||||||
"amd64")
|
"amd64")
|
||||||
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_METAL=off -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
COMMON_CPU_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=off -DLLAMA_NATIVE=off"
|
||||||
ARCH="x86_64"
|
|
||||||
|
#
|
||||||
|
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
|
||||||
|
#
|
||||||
|
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
||||||
|
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/cpu"
|
||||||
|
echo "Building LCD CPU"
|
||||||
|
build
|
||||||
|
compress_libs
|
||||||
|
|
||||||
|
#
|
||||||
|
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
|
||||||
|
# Approximately 400% faster than LCD on same CPU
|
||||||
|
#
|
||||||
|
init_vars
|
||||||
|
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
||||||
|
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/cpu_avx"
|
||||||
|
echo "Building AVX CPU"
|
||||||
|
build
|
||||||
|
compress_libs
|
||||||
|
|
||||||
|
#
|
||||||
|
# ~2013 CPU Dynamic library
|
||||||
|
# Approximately 10% faster than AVX on same CPU
|
||||||
|
#
|
||||||
|
init_vars
|
||||||
|
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
|
||||||
|
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/cpu_avx2"
|
||||||
|
echo "Building AVX2 CPU"
|
||||||
|
build
|
||||||
|
compress_libs
|
||||||
;;
|
;;
|
||||||
"arm64")
|
"arm64")
|
||||||
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DLLAMA_METAL=on ${CMAKE_DEFS}"
|
CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
|
||||||
ARCH="arm64"
|
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/metal"
|
||||||
|
EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders"
|
||||||
|
build
|
||||||
|
compress_libs
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "GOARCH must be set"
|
echo "GOARCH must be set"
|
||||||
|
@ -27,21 +63,4 @@ case "${GOARCH}" in
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
git_module_setup
|
|
||||||
apply_patches
|
|
||||||
build
|
|
||||||
install
|
|
||||||
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
|
|
||||||
-arch ${ARCH} \
|
|
||||||
-Wl,-force_load ${BUILD_DIR}/lib/libext_server.a \
|
|
||||||
${BUILD_DIR}/lib/libcommon.a \
|
|
||||||
${BUILD_DIR}/lib/libllama.a \
|
|
||||||
${BUILD_DIR}/lib/libggml_static.a \
|
|
||||||
-lpthread -ldl -lm -lc++ \
|
|
||||||
-framework Accelerate \
|
|
||||||
-framework Foundation \
|
|
||||||
-framework Metal \
|
|
||||||
-framework MetalKit \
|
|
||||||
-framework MetalPerformanceShaders
|
|
||||||
|
|
||||||
cleanup
|
cleanup
|
||||||
|
|
|
@ -2,16 +2,14 @@
|
||||||
# This script is intended to run inside the go generate
|
# This script is intended to run inside the go generate
|
||||||
# working directory must be llm/generate/
|
# working directory must be llm/generate/
|
||||||
|
|
||||||
# First we build our default built-in library which will be linked into the CGO
|
# First we build one or more CPU based LLM libraries
|
||||||
# binary as a normal dependency. This default build is CPU based.
|
|
||||||
#
|
#
|
||||||
# Then we build a CUDA dynamic library (although statically linked with the CUDA
|
# Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
|
||||||
# library dependencies for maximum portability)
|
# library dependencies
|
||||||
#
|
#
|
||||||
# Then if we detect ROCm, we build a dynamically loaded ROCm lib. ROCm is particularly
|
# Then if we detect ROCm, we build a dynamically loaded ROCm lib. The ROCM
|
||||||
# important to be a dynamic lib even if it's the only GPU library detected because
|
# libraries are quite large, and also dynamically load data files at runtime
|
||||||
# we can't redistribute the objectfiles but must rely on dynamic libraries at
|
# which in turn are large, so we don't attempt to cary them as payload
|
||||||
# runtime, which could lead the server not to start if not present.
|
|
||||||
|
|
||||||
set -ex
|
set -ex
|
||||||
set -o pipefail
|
set -o pipefail
|
||||||
|
@ -59,11 +57,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
|
||||||
if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
|
if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
|
||||||
echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
|
echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
|
||||||
CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
|
CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
|
||||||
echo "Building custom CPU"
|
echo "Building custom CPU"
|
||||||
build
|
build
|
||||||
install
|
compress_libs
|
||||||
link_server_lib
|
|
||||||
else
|
else
|
||||||
# Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
|
# Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
|
||||||
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
|
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
|
||||||
|
@ -80,11 +77,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
|
||||||
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
|
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
|
||||||
#
|
#
|
||||||
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
|
||||||
echo "Building LCD CPU"
|
echo "Building LCD CPU"
|
||||||
build
|
build
|
||||||
install
|
compress_libs
|
||||||
link_server_lib
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
|
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
|
||||||
|
@ -92,11 +88,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
|
||||||
#
|
#
|
||||||
init_vars
|
init_vars
|
||||||
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx"
|
||||||
echo "Building AVX CPU"
|
echo "Building AVX CPU"
|
||||||
build
|
build
|
||||||
install
|
compress_libs
|
||||||
link_server_lib
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# ~2013 CPU Dynamic library
|
# ~2013 CPU Dynamic library
|
||||||
|
@ -104,11 +99,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
|
||||||
#
|
#
|
||||||
init_vars
|
init_vars
|
||||||
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
|
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx2"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx2"
|
||||||
echo "Building AVX2 CPU"
|
echo "Building AVX2 CPU"
|
||||||
build
|
build
|
||||||
install
|
compress_libs
|
||||||
link_server_lib
|
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "Skipping CPU generation step as requested"
|
echo "Skipping CPU generation step as requested"
|
||||||
|
@ -127,22 +121,27 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
|
||||||
CUDA_VARIANT=_v${CUDA_MAJOR}
|
CUDA_VARIANT=_v${CUDA_MAJOR}
|
||||||
fi
|
fi
|
||||||
CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
|
CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cuda${CUDA_VARIANT}"
|
||||||
|
EXTRA_LIBS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
|
||||||
build
|
build
|
||||||
install
|
|
||||||
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
|
# Cary the CUDA libs as payloads to help reduce dependency burden on users
|
||||||
-Wl,--whole-archive \
|
#
|
||||||
${BUILD_DIR}/lib/libext_server.a \
|
# TODO - in the future we may shift to packaging these separately and conditionally
|
||||||
${BUILD_DIR}/lib/libcommon.a \
|
# downloading them in the install script.
|
||||||
${BUILD_DIR}/lib/libllama.a \
|
DEPS="$(ldd ${BUILD_DIR}/lib/libext_server.so )"
|
||||||
-Wl,--no-whole-archive \
|
for lib in libcudart.so libcublas.so libcublasLt.so ; do
|
||||||
${CUDA_LIB_DIR}/libcudart_static.a \
|
DEP=$(echo "${DEPS}" | grep ${lib} | cut -f1 -d' ' | xargs || true)
|
||||||
${CUDA_LIB_DIR}/libcublas_static.a \
|
if [ -n "${DEP}" -a -e "${CUDA_LIB_DIR}/${DEP}" ]; then
|
||||||
${CUDA_LIB_DIR}/libcublasLt_static.a \
|
cp "${CUDA_LIB_DIR}/${DEP}" "${BUILD_DIR}/lib/"
|
||||||
${CUDA_LIB_DIR}/libcudadevrt.a \
|
elif [ -e "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" ]; then
|
||||||
${CUDA_LIB_DIR}/libculibos.a \
|
cp "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" "${BUILD_DIR}/lib/"
|
||||||
-lcuda \
|
else
|
||||||
-lrt -lpthread -ldl -lstdc++ -lm
|
cp -d "${CUDA_LIB_DIR}/${lib}*" "${BUILD_DIR}/lib/"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
compress_libs
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z "${ROCM_PATH}" ]; then
|
if [ -z "${ROCM_PATH}" ]; then
|
||||||
|
@ -164,19 +163,13 @@ if [ -d "${ROCM_PATH}" ]; then
|
||||||
fi
|
fi
|
||||||
init_vars
|
init_vars
|
||||||
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
|
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm${ROCM_VARIANT}"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}"
|
||||||
|
EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,${ROCM_PATH}/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
|
||||||
build
|
build
|
||||||
install
|
|
||||||
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
|
# Note: the ROCM libs and runtime library files are too large to embed, so we depend on
|
||||||
-Wl,--whole-archive \
|
# them being present at runtime on the host
|
||||||
${BUILD_DIR}/lib/libext_server.a \
|
compress_libs
|
||||||
${BUILD_DIR}/lib/libcommon.a \
|
|
||||||
${BUILD_DIR}/lib/libllama.a \
|
|
||||||
-Wl,--no-whole-archive \
|
|
||||||
-lrt -lpthread -ldl -lstdc++ -lm \
|
|
||||||
-L/opt/rocm/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ \
|
|
||||||
-Wl,-rpath,/opt/rocm/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ \
|
|
||||||
-lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cleanup
|
cleanup
|
||||||
|
|
|
@ -5,7 +5,8 @@ $ErrorActionPreference = "Stop"
|
||||||
function init_vars {
|
function init_vars {
|
||||||
$script:llamacppDir = "../llama.cpp"
|
$script:llamacppDir = "../llama.cpp"
|
||||||
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-A","x64")
|
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-A","x64")
|
||||||
$script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")
|
$script:cmakeTargets = @("ext_server")
|
||||||
|
$script:ARCH = "amd64" # arm not yet supported.
|
||||||
if ($env:CGO_CFLAGS -contains "-g") {
|
if ($env:CGO_CFLAGS -contains "-g") {
|
||||||
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")
|
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")
|
||||||
$script:config = "RelWithDebInfo"
|
$script:config = "RelWithDebInfo"
|
||||||
|
@ -13,6 +14,17 @@ function init_vars {
|
||||||
$script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off")
|
$script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off")
|
||||||
$script:config = "Release"
|
$script:config = "Release"
|
||||||
}
|
}
|
||||||
|
# Try to find the CUDA dir
|
||||||
|
if ($env:CUDA_LIB_DIR -eq $null) {
|
||||||
|
$d=(get-command -ea 'silentlycontinue' nvcc).path
|
||||||
|
if ($d -ne $null) {
|
||||||
|
$script:CUDA_LIB_DIR=($d| split-path -parent)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$script:CUDA_LIB_DIR=$env:CUDA_LIB_DIR
|
||||||
|
}
|
||||||
|
$script:BZIP2=(get-command -ea 'silentlycontinue' bzip2).path
|
||||||
|
$script:DUMPBIN=(get-command -ea 'silentlycontinue' dumpbin).path
|
||||||
}
|
}
|
||||||
|
|
||||||
function git_module_setup {
|
function git_module_setup {
|
||||||
|
@ -47,11 +59,25 @@ function build {
|
||||||
function install {
|
function install {
|
||||||
rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
|
rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
|
||||||
md "${script:buildDir}/lib" -ea 0 > $null
|
md "${script:buildDir}/lib" -ea 0 > $null
|
||||||
cp "${script:buildDir}/bin/${script:config}/ext_server_shared.dll" "${script:buildDir}/lib"
|
cp "${script:buildDir}/bin/${script:config}/ext_server.dll" "${script:buildDir}/lib"
|
||||||
cp "${script:buildDir}/bin/${script:config}/llama.dll" "${script:buildDir}/lib"
|
cp "${script:buildDir}/bin/${script:config}/llama.dll" "${script:buildDir}/lib"
|
||||||
|
|
||||||
# Display the dll dependencies in the build log
|
# Display the dll dependencies in the build log
|
||||||
dumpbin /dependents "${script:buildDir}/bin/${script:config}/ext_server_shared.dll" | select-string ".dll"
|
if ($script:DUMPBIN -ne $null) {
|
||||||
|
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/${script:config}/ext_server.dll" | select-string ".dll"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function compress_libs {
|
||||||
|
if ($script:BZIP2 -eq $null) {
|
||||||
|
write-host "bzip2 not installed, not compressing files"
|
||||||
|
return
|
||||||
|
}
|
||||||
|
write-host "Compressing dlls..."
|
||||||
|
$libs = dir "${script:buildDir}/lib/*.dll"
|
||||||
|
foreach ($file in $libs) {
|
||||||
|
& "$script:BZIP2" -v9 $file
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function cleanup {
|
function cleanup {
|
||||||
|
@ -71,33 +97,47 @@ apply_patches
|
||||||
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on", "-DLLAMA_NATIVE=off")
|
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on", "-DLLAMA_NATIVE=off")
|
||||||
|
|
||||||
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||||
$script:buildDir="${script:llamacppDir}/build/windows/cpu"
|
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu"
|
||||||
write-host "Building LCD CPU"
|
write-host "Building LCD CPU"
|
||||||
build
|
build
|
||||||
install
|
install
|
||||||
|
compress_libs
|
||||||
|
|
||||||
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||||
$script:buildDir="${script:llamacppDir}/build/windows/cpu_avx"
|
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx"
|
||||||
write-host "Building AVX CPU"
|
write-host "Building AVX CPU"
|
||||||
build
|
build
|
||||||
install
|
install
|
||||||
|
compress_libs
|
||||||
|
|
||||||
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
|
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
|
||||||
$script:buildDir="${script:llamacppDir}/build/windows/cpu_avx2"
|
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2"
|
||||||
write-host "Building AVX2 CPU"
|
write-host "Building AVX2 CPU"
|
||||||
build
|
build
|
||||||
install
|
install
|
||||||
|
compress_libs
|
||||||
|
|
||||||
# Then build cuda as a dynamically loaded library
|
if ($null -ne $script:CUDA_LIB_DIR) {
|
||||||
# TODO figure out how to detect cuda version
|
# Then build cuda as a dynamically loaded library
|
||||||
init_vars
|
$nvcc = (get-command -ea 'silentlycontinue' nvcc)
|
||||||
$script:buildDir="${script:llamacppDir}/build/windows/cuda"
|
if ($null -ne $nvcc) {
|
||||||
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on")
|
$script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
|
||||||
build
|
}
|
||||||
install
|
if ($null -ne $script:CUDA_VERSION) {
|
||||||
|
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
|
||||||
|
}
|
||||||
|
init_vars
|
||||||
|
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
|
||||||
|
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on")
|
||||||
|
build
|
||||||
|
install
|
||||||
|
cp "${script:CUDA_LIB_DIR}/cudart64_*.dll" "${script:buildDir}/lib"
|
||||||
|
cp "${script:CUDA_LIB_DIR}/cublas64_*.dll" "${script:buildDir}/lib"
|
||||||
|
cp "${script:CUDA_LIB_DIR}/cublasLt64_*.dll" "${script:buildDir}/lib"
|
||||||
|
compress_libs
|
||||||
|
}
|
||||||
# TODO - actually implement ROCm support on windows
|
# TODO - actually implement ROCm support on windows
|
||||||
$script:buildDir="${script:llamacppDir}/build/windows/rocm"
|
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm"
|
||||||
|
|
||||||
rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
|
rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
|
||||||
md "${script:buildDir}/lib" -ea 0 > $null
|
md "${script:buildDir}/lib" -ea 0 > $null
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
package llm
|
package llm
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"compress/bzip2"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"golang.org/x/exp/slices"
|
|
||||||
"io"
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"log"
|
"log"
|
||||||
|
@ -12,6 +12,9 @@ import (
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/exp/slices"
|
||||||
|
"golang.org/x/sync/errgroup"
|
||||||
|
|
||||||
"github.com/jmorganca/ollama/gpu"
|
"github.com/jmorganca/ollama/gpu"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -20,7 +23,7 @@ import (
|
||||||
// Any library without a variant is the lowest common denominator
|
// Any library without a variant is the lowest common denominator
|
||||||
var availableDynLibs = map[string]string{}
|
var availableDynLibs = map[string]string{}
|
||||||
|
|
||||||
const pathComponentCount = 6
|
const pathComponentCount = 7
|
||||||
|
|
||||||
// getDynLibs returns an ordered list of LLM libraries to try, starting with the best
|
// getDynLibs returns an ordered list of LLM libraries to try, starting with the best
|
||||||
func getDynLibs(gpuInfo gpu.GpuInfo) []string {
|
func getDynLibs(gpuInfo gpu.GpuInfo) []string {
|
||||||
|
@ -100,6 +103,7 @@ func rocmDynLibPresent() bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
func nativeInit(workdir string) error {
|
func nativeInit(workdir string) error {
|
||||||
|
log.Printf("Extracting dynamic libraries...")
|
||||||
if runtime.GOOS == "darwin" {
|
if runtime.GOOS == "darwin" {
|
||||||
err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
|
err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -113,7 +117,7 @@ func nativeInit(workdir string) error {
|
||||||
os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
|
os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
|
||||||
}
|
}
|
||||||
|
|
||||||
libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*")
|
libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == payloadMissing {
|
if err == payloadMissing {
|
||||||
log.Printf("%s", payloadMissing)
|
log.Printf("%s", payloadMissing)
|
||||||
|
@ -151,45 +155,61 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
|
||||||
}
|
}
|
||||||
libs := []string{}
|
libs := []string{}
|
||||||
|
|
||||||
|
// TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
|
||||||
|
// and tracking by version so we don't reexpand the files every time
|
||||||
|
// Also maybe consider lazy loading only what is needed
|
||||||
|
|
||||||
|
g := new(errgroup.Group)
|
||||||
for _, file := range files {
|
for _, file := range files {
|
||||||
pathComps := strings.Split(file, "/")
|
pathComps := strings.Split(file, "/")
|
||||||
if len(pathComps) != pathComponentCount {
|
if len(pathComps) != pathComponentCount {
|
||||||
log.Printf("unexpected payload components: %v", pathComps)
|
log.Printf("unexpected payload components: %v", pathComps)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
|
|
||||||
// Include the variant in the path to avoid conflicts between multiple server libs
|
|
||||||
targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
|
|
||||||
srcFile, err := libEmbed.Open(file)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("read payload %s: %v", file, err)
|
|
||||||
}
|
|
||||||
defer srcFile.Close()
|
|
||||||
if err := os.MkdirAll(targetDir, 0o755); err != nil {
|
|
||||||
return nil, fmt.Errorf("create payload temp dir %s: %v", workDir, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
destFile := filepath.Join(targetDir, filepath.Base(file))
|
file := file
|
||||||
if strings.Contains(destFile, "server") {
|
g.Go(func() error {
|
||||||
libs = append(libs, destFile)
|
// llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
|
||||||
}
|
// Include the variant in the path to avoid conflicts between multiple server libs
|
||||||
|
targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
|
||||||
_, err = os.Stat(destFile)
|
srcFile, err := libEmbed.Open(file)
|
||||||
switch {
|
|
||||||
case errors.Is(err, os.ErrNotExist):
|
|
||||||
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("write payload %s: %v", file, err)
|
return fmt.Errorf("read payload %s: %v", file, err)
|
||||||
}
|
}
|
||||||
defer destFile.Close()
|
defer srcFile.Close()
|
||||||
if _, err := io.Copy(destFile, srcFile); err != nil {
|
if err := os.MkdirAll(targetDir, 0o755); err != nil {
|
||||||
return nil, fmt.Errorf("copy payload %s: %v", file, err)
|
return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
|
||||||
}
|
}
|
||||||
case err != nil:
|
src := io.Reader(srcFile)
|
||||||
return nil, fmt.Errorf("stat payload %s: %v", file, err)
|
filename := file
|
||||||
}
|
if strings.HasSuffix(file, ".bz2") {
|
||||||
|
src = bzip2.NewReader(src)
|
||||||
|
filename = strings.TrimSuffix(filename, ".bz2")
|
||||||
|
}
|
||||||
|
|
||||||
|
destFile := filepath.Join(targetDir, filepath.Base(filename))
|
||||||
|
if strings.Contains(destFile, "server") {
|
||||||
|
libs = append(libs, destFile)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = os.Stat(destFile)
|
||||||
|
switch {
|
||||||
|
case errors.Is(err, os.ErrNotExist):
|
||||||
|
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("write payload %s: %v", file, err)
|
||||||
|
}
|
||||||
|
defer destFile.Close()
|
||||||
|
if _, err := io.Copy(destFile, src); err != nil {
|
||||||
|
return fmt.Errorf("copy payload %s: %v", file, err)
|
||||||
|
}
|
||||||
|
case err != nil:
|
||||||
|
return fmt.Errorf("stat payload %s: %v", file, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
}
|
}
|
||||||
return libs, nil
|
return libs, g.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func extractPayloadFiles(workDir, glob string) error {
|
func extractPayloadFiles(workDir, glob string) error {
|
||||||
|
@ -207,8 +227,14 @@ func extractPayloadFiles(workDir, glob string) error {
|
||||||
if err := os.MkdirAll(workDir, 0o755); err != nil {
|
if err := os.MkdirAll(workDir, 0o755); err != nil {
|
||||||
return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
|
return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
|
||||||
}
|
}
|
||||||
|
src := io.Reader(srcFile)
|
||||||
|
filename := file
|
||||||
|
if strings.HasSuffix(file, ".bz2") {
|
||||||
|
src = bzip2.NewReader(src)
|
||||||
|
filename = strings.TrimSuffix(filename, ".bz2")
|
||||||
|
}
|
||||||
|
|
||||||
destFile := filepath.Join(workDir, filepath.Base(file))
|
destFile := filepath.Join(workDir, filepath.Base(filename))
|
||||||
_, err = os.Stat(destFile)
|
_, err = os.Stat(destFile)
|
||||||
switch {
|
switch {
|
||||||
case errors.Is(err, os.ErrNotExist):
|
case errors.Is(err, os.ErrNotExist):
|
||||||
|
@ -217,7 +243,7 @@ func extractPayloadFiles(workDir, glob string) error {
|
||||||
return fmt.Errorf("write payload %s: %v", file, err)
|
return fmt.Errorf("write payload %s: %v", file, err)
|
||||||
}
|
}
|
||||||
defer destFile.Close()
|
defer destFile.Close()
|
||||||
if _, err := io.Copy(destFile, srcFile); err != nil {
|
if _, err := io.Copy(destFile, src); err != nil {
|
||||||
return fmt.Errorf("copy payload %s: %v", file, err)
|
return fmt.Errorf("copy payload %s: %v", file, err)
|
||||||
}
|
}
|
||||||
case err != nil:
|
case err != nil:
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
package llm
|
|
||||||
|
|
||||||
import (
|
|
||||||
"embed"
|
|
||||||
)
|
|
||||||
|
|
||||||
//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/*/lib/*.so
|
|
||||||
var libEmbed embed.FS
|
|
8
llm/payload_darwin_amd64.go
Normal file
8
llm/payload_darwin_amd64.go
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
package llm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"embed"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/x86_64/*/lib/*.dylib*
|
||||||
|
var libEmbed embed.FS
|
8
llm/payload_darwin_arm64.go
Normal file
8
llm/payload_darwin_arm64.go
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
package llm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"embed"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/arm64/*/lib/*.dylib*
|
||||||
|
var libEmbed embed.FS
|
|
@ -4,5 +4,5 @@ import (
|
||||||
"embed"
|
"embed"
|
||||||
)
|
)
|
||||||
|
|
||||||
//go:embed llama.cpp/build/linux/*/lib/*.so
|
//go:embed llama.cpp/build/linux/*/*/lib/*.so*
|
||||||
var libEmbed embed.FS
|
var libEmbed embed.FS
|
||||||
|
|
|
@ -4,5 +4,5 @@ import (
|
||||||
"embed"
|
"embed"
|
||||||
)
|
)
|
||||||
|
|
||||||
//go:embed llama.cpp/build/windows/*/lib/*.dll
|
//go:embed llama.cpp/build/windows/*/*/lib/*.dll*
|
||||||
var libEmbed embed.FS
|
var libEmbed embed.FS
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
set -eu
|
set -e
|
||||||
|
|
||||||
export VERSION=${VERSION:-0.0.0}
|
export VERSION=${VERSION:-0.0.0}
|
||||||
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
|
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
|
||||||
|
@ -11,21 +11,36 @@ for TARGETARCH in arm64 amd64; do
|
||||||
rm -rf llm/llama.cpp/build
|
rm -rf llm/llama.cpp/build
|
||||||
GOOS=darwin GOARCH=$TARGETARCH go generate ./...
|
GOOS=darwin GOARCH=$TARGETARCH go generate ./...
|
||||||
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH
|
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH
|
||||||
|
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -cover -o dist/ollama-darwin-$TARGETARCH-cov
|
||||||
done
|
done
|
||||||
|
|
||||||
lipo -create -output dist/ollama dist/ollama-darwin-*
|
lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
|
||||||
rm -f dist/ollama-darwin-*
|
rm -f dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
|
||||||
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
|
if [ -n "$APPLE_IDENTITY" ]; then
|
||||||
|
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
|
||||||
|
else
|
||||||
|
echo "Skipping code signing - set APPLE_IDENTITY"
|
||||||
|
fi
|
||||||
chmod +x dist/ollama
|
chmod +x dist/ollama
|
||||||
|
|
||||||
# build and sign the mac app
|
# build and optionally sign the mac app
|
||||||
npm install --prefix app
|
npm install --prefix app
|
||||||
npm run --prefix app make:sign
|
if [ -n "$APPLE_IDENTITY" ]; then
|
||||||
|
npm run --prefix app make:sign
|
||||||
|
else
|
||||||
|
npm run --prefix app make
|
||||||
|
fi
|
||||||
cp app/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip
|
cp app/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip
|
||||||
|
|
||||||
# sign the binary and rename it
|
# sign the binary and rename it
|
||||||
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama
|
if [ -n "$APPLE_IDENTITY" ]; then
|
||||||
|
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama
|
||||||
|
else
|
||||||
|
echo "WARNING: Skipping code signing - set APPLE_IDENTITY"
|
||||||
|
fi
|
||||||
ditto -c -k --keepParent dist/ollama dist/temp.zip
|
ditto -c -k --keepParent dist/ollama dist/temp.zip
|
||||||
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
|
if [ -n "$APPLE_IDENTITY" ]; then
|
||||||
|
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
|
||||||
|
fi
|
||||||
mv dist/ollama dist/ollama-darwin
|
mv dist/ollama dist/ollama-darwin
|
||||||
rm -f dist/temp.zip
|
rm -f dist/temp.zip
|
||||||
|
|
|
@ -66,3 +66,7 @@ subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'generate', './...
|
||||||
print("Building")
|
print("Building")
|
||||||
subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'build', '.'])
|
subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'build', '.'])
|
||||||
|
|
||||||
|
print("Copying built result")
|
||||||
|
subprocess.check_call(['scp', netloc +":"+ path + "/ollama.exe", './dist/'])
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,7 @@ fi
|
||||||
|
|
||||||
if [ -n "${CMAKE_VERSION}" ]; then
|
if [ -n "${CMAKE_VERSION}" ]; then
|
||||||
curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
|
curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
|
||||||
|
dnf install -y bzip2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -n "${GOLANG_VERSION}" ]; then
|
if [ -n "${GOLANG_VERSION}" ]; then
|
||||||
|
|
Loading…
Add table
Reference in a new issue