ollama/llm/llama.cpp/gen_common.sh

# common logic accross linux and darwin

init_vars() {
    LLAMACPP_DIR=gguf
    PATCHES="0001-Expose-callable-API-for-server.patch"
    CMAKE_DEFS=""
    CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
    if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on"
    else
        # TODO - add additional optimization flags...
        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off"
    fi
}

git_module_setup() {
    if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
        echo "Skipping submodule initialization"
        return
    fi
    git submodule init
    git submodule update --force gguf

}

apply_patches() {
    # Wire up our CMakefile
    if ! grep ollama gguf/examples/server/CMakeLists.txt; then
        echo 'include (../../../CMakeLists.txt) # ollama' >>gguf/examples/server/CMakeLists.txt
    fi
    # Avoid duplicate main symbols when we link into the cgo binary
    sed -e 's/int main(/int __main(/g' <./gguf/examples/server/server.cpp >./gguf/examples/server/server.cpp.tmp &&
        mv ./gguf/examples/server/server.cpp.tmp ./gguf/examples/server/server.cpp
}

build() {
    cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
    cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
}

install() {
    rm -rf ${BUILD_DIR}/lib
    mkdir -p ${BUILD_DIR}/lib
    cp ${BUILD_DIR}/examples/server/libext_server.a ${BUILD_DIR}/lib
    cp ${BUILD_DIR}/common/libcommon.a ${BUILD_DIR}/lib
    cp ${BUILD_DIR}/libllama.a ${BUILD_DIR}/lib
    cp ${BUILD_DIR}/libggml_static.a ${BUILD_DIR}/lib
}

# Keep the local tree clean after we're done with the build
cleanup() {
    (cd gguf/examples/server/ && git checkout CMakeLists.txt server.cpp)
}
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`# common logic accross linux and darwin`

			`init_vars() {`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`LLAMACPP_DIR=gguf`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`PATCHES="0001-Expose-callable-API-for-server.patch"`
update cmake flags for `amd64` macOS (#1780) * update cmake flags for intel macOS * remove `LLAMA_K_QUANTS` * put back `CMAKE_OSX_DEPLOYMENT_TARGET` and disable `LLAMA_F16C` 2024-01-04 00:22:15 +00:00			`CMAKE_DEFS=""`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`if echo "${CGO_CFLAGS}" \| grep -- '-g' >/dev/null; then`
update cmake flags for `amd64` macOS (#1780) * update cmake flags for intel macOS * remove `LLAMA_K_QUANTS` * put back `CMAKE_OSX_DEPLOYMENT_TARGET` and disable `LLAMA_F16C` 2024-01-04 00:22:15 +00:00			`CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on"`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`else`
			`# TODO - add additional optimization flags...`
update cmake flags for `amd64` macOS (#1780) * update cmake flags for intel macOS * remove `LLAMA_K_QUANTS` * put back `CMAKE_OSX_DEPLOYMENT_TARGET` and disable `LLAMA_F16C` 2024-01-04 00:22:15 +00:00			`CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off"`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`fi`
			`}`

			`git_module_setup() {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then`
Build linux using ubuntu 20.04 This changes the container-based linux build to use an older Ubuntu distro to improve our compatibility matrix for older user machines 2023-12-18 20:05:59 +00:00			`echo "Skipping submodule initialization"`
			`return`
			`fi`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`git submodule init`
			`git submodule update --force gguf`

			`}`

			`apply_patches() {`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`# Wire up our CMakefile`
Rename the ollama cmakefile 2023-12-24 22:12:21 +00:00			`if ! grep ollama gguf/examples/server/CMakeLists.txt; then`
			`echo 'include (../../../CMakeLists.txt) # ollama' >>gguf/examples/server/CMakeLists.txt`
Build linux using ubuntu 20.04 This changes the container-based linux build to use an older Ubuntu distro to improve our compatibility matrix for older user machines 2023-12-18 20:05:59 +00:00			`fi`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`# Avoid duplicate main symbols when we link into the cgo binary`
			`sed -e 's/int main(/int __main(/g' <./gguf/examples/server/server.cpp >./gguf/examples/server/server.cpp.tmp &&`
			`mv ./gguf/examples/server/server.cpp.tmp ./gguf/examples/server/server.cpp`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`}`

			`build() {`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}`
			`cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`}`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`install() {`
			`rm -rf ${BUILD_DIR}/lib`
			`mkdir -p ${BUILD_DIR}/lib`
			`cp ${BUILD_DIR}/examples/server/libext_server.a ${BUILD_DIR}/lib`
			`cp ${BUILD_DIR}/common/libcommon.a ${BUILD_DIR}/lib`
			`cp ${BUILD_DIR}/libllama.a ${BUILD_DIR}/lib`
			`cp ${BUILD_DIR}/libggml_static.a ${BUILD_DIR}/lib`
			`}`

Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`# Keep the local tree clean after we're done with the build`
			`cleanup() {`
			`(cd gguf/examples/server/ && git checkout CMakeLists.txt server.cpp)`
			`}`