ollama/llm/generate/gen_darwin.sh

#!/bin/bash
# This script is intended to run inside the go generate
# working directory must be ./llm/generate/

# TODO - add hardening to detect missing tools (cmake, etc.)

set -ex
set -o pipefail
echo "Starting darwin generate script"
source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
apply_patches

sign() {
    if [ -n "$APPLE_IDENTITY" ]; then
        codesign -f --timestamp --deep --options=runtime --sign "$APPLE_IDENTITY" --identifier ai.ollama.ollama $1
    fi
}

COMMON_DARWIN_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_METAL_EMBED_LIBRARY=on"

case "${GOARCH}" in
"amd64")
    COMMON_CPU_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=off -DLLAMA_NATIVE=off"

    # Static build for linking into the Go binary
    init_vars
    CMAKE_TARGETS="--target llama --target ggml"
    CMAKE_DEFS="${COMMON_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DLLAMA_ACCELERATE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
    BUILD_DIR="../build/darwin/${ARCH}_static"
    echo "Building static library"
    build


    #
    # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
    #
    init_vars
    CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_ACCELERATE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
    BUILD_DIR="../build/darwin/${ARCH}/cpu"
    echo "Building LCD CPU"
    build
    sign ${BUILD_DIR}/lib/libext_server.dylib
    compress

    #
    # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
    # Approximately 400% faster than LCD on same CPU
    #
    init_vars
    CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_ACCELERATE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
    BUILD_DIR="../build/darwin/${ARCH}/cpu_avx"
    echo "Building AVX CPU"
    build
    sign ${BUILD_DIR}/lib/libext_server.dylib
    compress

    #
    # ~2013 CPU Dynamic library
    # Approximately 10% faster than AVX on same CPU
    #
    init_vars
    CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_ACCELERATE=on -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
    BUILD_DIR="../build/darwin/${ARCH}/cpu_avx2"
    echo "Building AVX2 CPU"
    EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation"
    build
    sign ${BUILD_DIR}/lib/libext_server.dylib
    compress
    ;;
"arm64")

    # Static build for linking into the Go binary
    init_vars
    CMAKE_TARGETS="--target llama --target ggml"
    CMAKE_DEFS="${COMMON_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DLLAMA_ACCELERATE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
    BUILD_DIR="../build/darwin/${ARCH}_static"
    echo "Building static library"
    build

    init_vars
    CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DLLAMA_METAL_EMBED_LIBRARY=on -DLLAMA_ACCELERATE=on -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=on ${CMAKE_DEFS}"
    BUILD_DIR="../build/darwin/${ARCH}/metal"
    EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders"
    build
    sign ${BUILD_DIR}/lib/libext_server.dylib
    compress
    ;;
*)
    echo "GOARCH must be set"
    echo "this script is meant to be run from within go generate"
    exit 1
    ;;
esac

cleanup
echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`#!/bin/bash`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`# This script is intended to run inside the go generate`
Code shuffle to clean up the llm dir 2024-01-04 17:40:15 +00:00			`# working directory must be ./llm/generate/`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00
			`# TODO - add hardening to detect missing tools (cmake, etc.)`

			`set -ex`
			`set -o pipefail`
			`echo "Starting darwin generate script"`
			`source $(dirname $0)/gen_common.sh`
			`init_vars`
Add multiple CPU variants for Intel Mac This also refines the build process for the ext_server build. 2024-01-13 00:28:00 +00:00			`git_module_setup`
			`apply_patches`

sign dylibs on macOS (#2101) 2024-01-20 00:24:11 +00:00			`sign() {`
			`if [ -n "$APPLE_IDENTITY" ]; then`
			`codesign -f --timestamp --deep --options=runtime --sign "$APPLE_IDENTITY" --identifier ai.ollama.ollama $1`
			`fi`
			`}`

Switch back to subprocessing for llama.cpp This should resolve a number of memory leak and stability defects by allowing us to isolate llama.cpp in a separate process and shutdown when idle, and gracefully restart if it has problems. This also serves as a first step to be able to run multiple copies to support multiple models concurrently. 2024-03-14 17:24:13 +00:00			`COMMON_DARWIN_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_METAL_EMBED_LIBRARY=on"`
Add multiple CPU variants for Intel Mac This also refines the build process for the ext_server build. 2024-01-13 00:28:00 +00:00
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`case "${GOARCH}" in`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`"amd64")`
Add multiple CPU variants for Intel Mac This also refines the build process for the ext_server build. 2024-01-13 00:28:00 +00:00			`COMMON_CPU_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=off -DLLAMA_NATIVE=off"`

Switch back to subprocessing for llama.cpp This should resolve a number of memory leak and stability defects by allowing us to isolate llama.cpp in a separate process and shutdown when idle, and gracefully restart if it has problems. This also serves as a first step to be able to run multiple copies to support multiple models concurrently. 2024-03-14 17:24:13 +00:00			`# Static build for linking into the Go binary`
			`init_vars`
			`CMAKE_TARGETS="--target llama --target ggml"`
			`CMAKE_DEFS="${COMMON_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DLLAMA_ACCELERATE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"`
			`BUILD_DIR="../build/darwin/${ARCH}_static"`
			`echo "Building static library"`
			`build`


Add multiple CPU variants for Intel Mac This also refines the build process for the ext_server build. 2024-01-13 00:28:00 +00:00			`#`
			`# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)`
			`#`
Switch back to subprocessing for llama.cpp This should resolve a number of memory leak and stability defects by allowing us to isolate llama.cpp in a separate process and shutdown when idle, and gracefully restart if it has problems. This also serves as a first step to be able to run multiple copies to support multiple models concurrently. 2024-03-14 17:24:13 +00:00			`init_vars`
Refine Accelerate usage on mac For old macs, accelerate seems to cause crashes, but for AVX2 capable macs, it does not. 2024-01-23 00:25:56 +00:00			`CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_ACCELERATE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"`
Switch back to subprocessing for llama.cpp This should resolve a number of memory leak and stability defects by allowing us to isolate llama.cpp in a separate process and shutdown when idle, and gracefully restart if it has problems. This also serves as a first step to be able to run multiple copies to support multiple models concurrently. 2024-03-14 17:24:13 +00:00			`BUILD_DIR="../build/darwin/${ARCH}/cpu"`
Add multiple CPU variants for Intel Mac This also refines the build process for the ext_server build. 2024-01-13 00:28:00 +00:00			`echo "Building LCD CPU"`
			`build`
Switch back to subprocessing for llama.cpp This should resolve a number of memory leak and stability defects by allowing us to isolate llama.cpp in a separate process and shutdown when idle, and gracefully restart if it has problems. This also serves as a first step to be able to run multiple copies to support multiple models concurrently. 2024-03-14 17:24:13 +00:00			`sign ${BUILD_DIR}/lib/libext_server.dylib`
			`compress`
Add multiple CPU variants for Intel Mac This also refines the build process for the ext_server build. 2024-01-13 00:28:00 +00:00
			`#`
			`# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance`
			`# Approximately 400% faster than LCD on same CPU`
			`#`
			`init_vars`
Refine Accelerate usage on mac For old macs, accelerate seems to cause crashes, but for AVX2 capable macs, it does not. 2024-01-23 00:25:56 +00:00			`CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_ACCELERATE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"`
Switch back to subprocessing for llama.cpp This should resolve a number of memory leak and stability defects by allowing us to isolate llama.cpp in a separate process and shutdown when idle, and gracefully restart if it has problems. This also serves as a first step to be able to run multiple copies to support multiple models concurrently. 2024-03-14 17:24:13 +00:00			`BUILD_DIR="../build/darwin/${ARCH}/cpu_avx"`
Add multiple CPU variants for Intel Mac This also refines the build process for the ext_server build. 2024-01-13 00:28:00 +00:00			`echo "Building AVX CPU"`
			`build`
Switch back to subprocessing for llama.cpp This should resolve a number of memory leak and stability defects by allowing us to isolate llama.cpp in a separate process and shutdown when idle, and gracefully restart if it has problems. This also serves as a first step to be able to run multiple copies to support multiple models concurrently. 2024-03-14 17:24:13 +00:00			`sign ${BUILD_DIR}/lib/libext_server.dylib`
			`compress`
Add multiple CPU variants for Intel Mac This also refines the build process for the ext_server build. 2024-01-13 00:28:00 +00:00
			`#`
			`# ~2013 CPU Dynamic library`
			`# Approximately 10% faster than AVX on same CPU`
			`#`
			`init_vars`
Refine Accelerate usage on mac For old macs, accelerate seems to cause crashes, but for AVX2 capable macs, it does not. 2024-01-23 00:25:56 +00:00			`CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_ACCELERATE=on -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"`
Switch back to subprocessing for llama.cpp This should resolve a number of memory leak and stability defects by allowing us to isolate llama.cpp in a separate process and shutdown when idle, and gracefully restart if it has problems. This also serves as a first step to be able to run multiple copies to support multiple models concurrently. 2024-03-14 17:24:13 +00:00			`BUILD_DIR="../build/darwin/${ARCH}/cpu_avx2"`
Add multiple CPU variants for Intel Mac This also refines the build process for the ext_server build. 2024-01-13 00:28:00 +00:00			`echo "Building AVX2 CPU"`
Refine Accelerate usage on mac For old macs, accelerate seems to cause crashes, but for AVX2 capable macs, it does not. 2024-01-23 00:25:56 +00:00			`EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation"`
Add multiple CPU variants for Intel Mac This also refines the build process for the ext_server build. 2024-01-13 00:28:00 +00:00			`build`
Switch back to subprocessing for llama.cpp This should resolve a number of memory leak and stability defects by allowing us to isolate llama.cpp in a separate process and shutdown when idle, and gracefully restart if it has problems. This also serves as a first step to be able to run multiple copies to support multiple models concurrently. 2024-03-14 17:24:13 +00:00			`sign ${BUILD_DIR}/lib/libext_server.dylib`
			`compress`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`;;`
			`"arm64")`
Switch back to subprocessing for llama.cpp This should resolve a number of memory leak and stability defects by allowing us to isolate llama.cpp in a separate process and shutdown when idle, and gracefully restart if it has problems. This also serves as a first step to be able to run multiple copies to support multiple models concurrently. 2024-03-14 17:24:13 +00:00
			`# Static build for linking into the Go binary`
			`init_vars`
			`CMAKE_TARGETS="--target llama --target ggml"`
			`CMAKE_DEFS="${COMMON_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DLLAMA_ACCELERATE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"`
			`BUILD_DIR="../build/darwin/${ARCH}_static"`
			`echo "Building static library"`
			`build`

			`init_vars`
update llama.cpp submodule to `77d1ac7` (#3030) 2024-03-09 23:55:34 +00:00			`CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DLLAMA_METAL_EMBED_LIBRARY=on -DLLAMA_ACCELERATE=on -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=on ${CMAKE_DEFS}"`
Switch back to subprocessing for llama.cpp This should resolve a number of memory leak and stability defects by allowing us to isolate llama.cpp in a separate process and shutdown when idle, and gracefully restart if it has problems. This also serves as a first step to be able to run multiple copies to support multiple models concurrently. 2024-03-14 17:24:13 +00:00			`BUILD_DIR="../build/darwin/${ARCH}/metal"`
Add multiple CPU variants for Intel Mac This also refines the build process for the ext_server build. 2024-01-13 00:28:00 +00:00			`EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders"`
			`build`
Switch back to subprocessing for llama.cpp This should resolve a number of memory leak and stability defects by allowing us to isolate llama.cpp in a separate process and shutdown when idle, and gracefully restart if it has problems. This also serves as a first step to be able to run multiple copies to support multiple models concurrently. 2024-03-14 17:24:13 +00:00			`sign ${BUILD_DIR}/lib/libext_server.dylib`
			`compress`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`;;`
			`*)`
			`echo "GOARCH must be set"`
			`echo "this script is meant to be run from within go generate"`
			`exit 1`
			`;;`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`esac`

Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`cleanup`
Switch back to subprocessing for llama.cpp This should resolve a number of memory leak and stability defects by allowing us to isolate llama.cpp in a separate process and shutdown when idle, and gracefully restart if it has problems. This also serves as a first step to be able to run multiple copies to support multiple models concurrently. 2024-03-14 17:24:13 +00:00			`echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"`