ollama/llm/llama.cpp/gen_common.sh

# common logic accross linux and darwin

init_vars() {
    LLAMACPP_DIR=gguf
    PATCHES="0001-Expose-callable-API-for-server.patch"
    CMAKE_DEFS="-DLLAMA_ACCELERATE=on"
    # TODO - LLAMA_K_QUANTS is stale and needs to be mapped to newer cmake settings
    CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
    if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"
    else
        # TODO - add additional optimization flags...
        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
    fi
}

git_module_setup() {
    if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
        echo "Skipping submodule initialization"
        return
    fi
    git submodule init
    git submodule update --force gguf

}

apply_patches() {
    # Wire up our CMakefile
    if ! grep ollama.txt gguf/examples/server/CMakeLists.txt; then
        echo 'include (../../../ollama.txt)' >>gguf/examples/server/CMakeLists.txt
    fi
    # Avoid duplicate main symbols when we link into the cgo binary
    sed -e 's/int main(/int __main(/g' <./gguf/examples/server/server.cpp >./gguf/examples/server/server.cpp.tmp &&
        mv ./gguf/examples/server/server.cpp.tmp ./gguf/examples/server/server.cpp
}

build() {
    cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
    cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
}

install() {
    rm -rf ${BUILD_DIR}/lib
    mkdir -p ${BUILD_DIR}/lib
    cp ${BUILD_DIR}/examples/server/libext_server.a ${BUILD_DIR}/lib
    cp ${BUILD_DIR}/common/libcommon.a ${BUILD_DIR}/lib
    cp ${BUILD_DIR}/libllama.a ${BUILD_DIR}/lib
    cp ${BUILD_DIR}/libggml_static.a ${BUILD_DIR}/lib
}

# Keep the local tree clean after we're done with the build
cleanup() {
    (cd gguf/examples/server/ && git checkout CMakeLists.txt server.cpp)
}
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`# common logic accross linux and darwin`

			`init_vars() {`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`LLAMACPP_DIR=gguf`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`PATCHES="0001-Expose-callable-API-for-server.patch"`
Quiet down llama.cpp logging by default By default builds will now produce non-debug and non-verbose binaries. To enable verbose logs in llama.cpp and debug symbols in the native code, set `CGO_CFLAGS=-g` 2023-12-22 16:47:18 +00:00			`CMAKE_DEFS="-DLLAMA_ACCELERATE=on"`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`# TODO - LLAMA_K_QUANTS is stale and needs to be mapped to newer cmake settings`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`if echo "${CGO_CFLAGS}" \| grep -- '-g' >/dev/null; then`
Quiet down llama.cpp logging by default By default builds will now produce non-debug and non-verbose binaries. To enable verbose logs in llama.cpp and debug symbols in the native code, set `CGO_CFLAGS=-g` 2023-12-22 16:47:18 +00:00			`CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`else`
			`# TODO - add additional optimization flags...`
Quiet down llama.cpp logging by default By default builds will now produce non-debug and non-verbose binaries. To enable verbose logs in llama.cpp and debug symbols in the native code, set `CGO_CFLAGS=-g` 2023-12-22 16:47:18 +00:00			`CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`fi`
			`}`

			`git_module_setup() {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then`
Build linux using ubuntu 20.04 This changes the container-based linux build to use an older Ubuntu distro to improve our compatibility matrix for older user machines 2023-12-18 20:05:59 +00:00			`echo "Skipping submodule initialization"`
			`return`
			`fi`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`git submodule init`
			`git submodule update --force gguf`

			`}`

			`apply_patches() {`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`# Wire up our CMakefile`
			`if ! grep ollama.txt gguf/examples/server/CMakeLists.txt; then`
			`echo 'include (../../../ollama.txt)' >>gguf/examples/server/CMakeLists.txt`
Build linux using ubuntu 20.04 This changes the container-based linux build to use an older Ubuntu distro to improve our compatibility matrix for older user machines 2023-12-18 20:05:59 +00:00			`fi`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`# Avoid duplicate main symbols when we link into the cgo binary`
			`sed -e 's/int main(/int __main(/g' <./gguf/examples/server/server.cpp >./gguf/examples/server/server.cpp.tmp &&`
			`mv ./gguf/examples/server/server.cpp.tmp ./gguf/examples/server/server.cpp`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`}`

			`build() {`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}`
			`cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`}`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`install() {`
			`rm -rf ${BUILD_DIR}/lib`
			`mkdir -p ${BUILD_DIR}/lib`
			`cp ${BUILD_DIR}/examples/server/libext_server.a ${BUILD_DIR}/lib`
			`cp ${BUILD_DIR}/common/libcommon.a ${BUILD_DIR}/lib`
			`cp ${BUILD_DIR}/libllama.a ${BUILD_DIR}/lib`
			`cp ${BUILD_DIR}/libggml_static.a ${BUILD_DIR}/lib`
			`}`

Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`# Keep the local tree clean after we're done with the build`
			`cleanup() {`
			`(cd gguf/examples/server/ && git checkout CMakeLists.txt server.cpp)`
			`}`