ollama/llm/llama.cpp/gen_common.sh

# common logic accross linux and darwin

init_vars() {
    LLAMACPP_DIR=gguf
    PATCHES="0001-Expose-callable-API-for-server.patch"
    CMAKE_DEFS="-DLLAMA_ACCELERATE=on"
    # TODO - LLAMA_K_QUANTS is stale and needs to be mapped to newer cmake settings
    CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
    if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"
    else
        # TODO - add additional optimization flags...
        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
    fi
}

git_module_setup() {
    if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
        echo "Skipping submodule initialization"
        return
    fi
    git submodule init
    git submodule update --force gguf

}

apply_patches() {
    if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
        echo "Skipping submodule patching"
        return
    fi
    # Workaround git apply not handling creation well for iteration
    rm -f gguf/examples/server/server.h
    for patch in ${PATCHES}; do
        git -C gguf apply ../patches/${patch}
    done
}

build() {
    cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
    cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
}
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`# common logic accross linux and darwin`

			`init_vars() {`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`LLAMACPP_DIR=gguf`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`PATCHES="0001-Expose-callable-API-for-server.patch"`
Quiet down llama.cpp logging by default By default builds will now produce non-debug and non-verbose binaries. To enable verbose logs in llama.cpp and debug symbols in the native code, set `CGO_CFLAGS=-g` 2023-12-22 16:47:18 +00:00			`CMAKE_DEFS="-DLLAMA_ACCELERATE=on"`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`# TODO - LLAMA_K_QUANTS is stale and needs to be mapped to newer cmake settings`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`if echo "${CGO_CFLAGS}" \| grep -- '-g' >/dev/null; then`
Quiet down llama.cpp logging by default By default builds will now produce non-debug and non-verbose binaries. To enable verbose logs in llama.cpp and debug symbols in the native code, set `CGO_CFLAGS=-g` 2023-12-22 16:47:18 +00:00			`CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`else`
			`# TODO - add additional optimization flags...`
Quiet down llama.cpp logging by default By default builds will now produce non-debug and non-verbose binaries. To enable verbose logs in llama.cpp and debug symbols in the native code, set `CGO_CFLAGS=-g` 2023-12-22 16:47:18 +00:00			`CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`fi`
			`}`

			`git_module_setup() {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then`
Build linux using ubuntu 20.04 This changes the container-based linux build to use an older Ubuntu distro to improve our compatibility matrix for older user machines 2023-12-18 20:05:59 +00:00			`echo "Skipping submodule initialization"`
			`return`
			`fi`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`git submodule init`
			`git submodule update --force gguf`

			`}`

			`apply_patches() {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then`
Build linux using ubuntu 20.04 This changes the container-based linux build to use an older Ubuntu distro to improve our compatibility matrix for older user machines 2023-12-18 20:05:59 +00:00			`echo "Skipping submodule patching"`
			`return`
			`fi`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`# Workaround git apply not handling creation well for iteration`
			`rm -f gguf/examples/server/server.h`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`for patch in ${PATCHES}; do`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`git -C gguf apply ../patches/${patch}`
			`done`
			`}`

			`build() {`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}`
			`cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`}`