ollama/llm/llama.cpp/gen_windows.ps1

#!powershell

$ErrorActionPreference = "Stop"

function init_vars {
    $script:patches = @("0001-Expose-callable-API-for-server.patch")
    $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-A","x64")
    $script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")
    if ($env:CGO_CFLAGS -contains "-g") {
        $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")
        $script:config = "RelWithDebInfo"
    } else {
        $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off")
        $script:config = "Release"
    }
}

function git_module_setup {
    # TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
    & git submodule init
    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
    & git submodule update --force gguf
    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}

function apply_patches {
    # Wire up our CMakefile
    if (!(Select-String -Path "gguf/examples/server/CMakeLists.txt" -Pattern 'ollama')) {
        Add-Content -Path "gguf/examples/server/CMakeLists.txt" -Value 'include (../../../CMakeLists.txt) # ollama'
    }
    # Avoid duplicate main symbols when we link into the cgo binary
    $content = Get-Content -Path "./gguf/examples/server/server.cpp"
    $content = $content -replace 'int main\(', 'int __main('
    Set-Content -Path "./gguf/examples/server/server.cpp" -Value $content
}

function build {
    write-host "generating config with: cmake -S gguf -B $script:buildDir $script:cmakeDefs"
    & cmake --version
    & cmake -S gguf -B $script:buildDir $script:cmakeDefs
    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
    write-host "building with: cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })"
    & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })
    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}

function install {
    rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
    md "${script:buildDir}/lib" -ea 0 > $null
    cp "${script:buildDir}/bin/${script:config}/ext_server_shared.dll" "${script:buildDir}/lib"
    cp "${script:buildDir}/bin/${script:config}/llama.dll" "${script:buildDir}/lib"
}

function cleanup {
    Set-Location "gguf/examples/server"
    git checkout CMakeLists.txt server.cpp
}

init_vars
git_module_setup
apply_patches

# first build CPU based
$script:buildDir="gguf/build/windows/cpu"

build
install

# Then build cuda as a dynamically loaded library
init_vars
$script:buildDir="gguf/build/windows/cuda"
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON")
build
install

# TODO - actually implement ROCm support on windows
$script:buildDir="gguf/build/windows/rocm"

rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
md "${script:buildDir}/lib" -ea 0 > $null
echo $null >> "${script:buildDir}/lib/.generated"

cleanup
write-host "`ngo generate completed"
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`#!powershell`

			`$ErrorActionPreference = "Stop"`

			`function init_vars {`
			`$script:patches = @("0001-Expose-callable-API-for-server.patch")`
update cmake flags for `amd64` macOS (#1780) * update cmake flags for intel macOS * remove `LLAMA_K_QUANTS` * put back `CMAKE_OSX_DEPLOYMENT_TARGET` and disable `LLAMA_F16C` 2024-01-04 00:22:15 +00:00			`$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-A","x64")`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`$script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`if ($env:CGO_CFLAGS -contains "-g") {`
Quiet down llama.cpp logging by default By default builds will now produce non-debug and non-verbose binaries. To enable verbose logs in llama.cpp and debug symbols in the native code, set `CGO_CFLAGS=-g` 2023-12-22 16:47:18 +00:00			`$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")`
Revive windows build The windows native setup still needs some more work, but this gets it building again and if you set the PATH properly, you can run the resulting exe on a cuda system. 2023-12-20 22:46:15 +00:00			`$script:config = "RelWithDebInfo"`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`} else {`
Quiet down llama.cpp logging by default By default builds will now produce non-debug and non-verbose binaries. To enable verbose logs in llama.cpp and debug symbols in the native code, set `CGO_CFLAGS=-g` 2023-12-22 16:47:18 +00:00			`$script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off")`
Revive windows build The windows native setup still needs some more work, but this gets it building again and if you set the PATH properly, you can run the resulting exe on a cuda system. 2023-12-20 22:46:15 +00:00			`$script:config = "Release"`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`}`
			`}`

			`function git_module_setup {`
			`# TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo`
			`& git submodule init`
Revive windows build The windows native setup still needs some more work, but this gets it building again and if you set the PATH properly, you can run the resulting exe on a cuda system. 2023-12-20 22:46:15 +00:00			`if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`& git submodule update --force gguf`
Revive windows build The windows native setup still needs some more work, but this gets it building again and if you set the PATH properly, you can run the resulting exe on a cuda system. 2023-12-20 22:46:15 +00:00			`if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`}`

			`function apply_patches {`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`# Wire up our CMakefile`
Rename the ollama cmakefile 2023-12-24 22:12:21 +00:00			`if (!(Select-String -Path "gguf/examples/server/CMakeLists.txt" -Pattern 'ollama')) {`
			`Add-Content -Path "gguf/examples/server/CMakeLists.txt" -Value 'include (../../../CMakeLists.txt) # ollama'`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`}`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`# Avoid duplicate main symbols when we link into the cgo binary`
			`$content = Get-Content -Path "./gguf/examples/server/server.cpp"`
			`$content = $content -replace 'int main\(', 'int __main('`
			`Set-Content -Path "./gguf/examples/server/server.cpp" -Value $content`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`}`

			`function build {`
Revive windows build The windows native setup still needs some more work, but this gets it building again and if you set the PATH properly, you can run the resulting exe on a cuda system. 2023-12-20 22:46:15 +00:00			`write-host "generating config with: cmake -S gguf -B $script:buildDir $script:cmakeDefs"`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`& cmake --version`
Revive windows build The windows native setup still needs some more work, but this gets it building again and if you set the PATH properly, you can run the resulting exe on a cuda system. 2023-12-20 22:46:15 +00:00			`& cmake -S gguf -B $script:buildDir $script:cmakeDefs`
			`if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`write-host "building with: cmake --build $script:buildDir --config $script:config ($script:cmakeTargets \| ForEach-Object { "--target", $_ })"`
			`& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets \| ForEach-Object { "--target", $_ })`
Revive windows build The windows native setup still needs some more work, but this gets it building again and if you set the PATH properly, you can run the resulting exe on a cuda system. 2023-12-20 22:46:15 +00:00			`if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`}`

Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`function install {`
			`rm -ea 0 -recurse -force -path "${script:buildDir}/lib"`
			`md "${script:buildDir}/lib" -ea 0 > $null`
			`cp "${script:buildDir}/bin/${script:config}/ext_server_shared.dll" "${script:buildDir}/lib"`
			`cp "${script:buildDir}/bin/${script:config}/llama.dll" "${script:buildDir}/lib"`
			`}`

Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00			`function cleanup {`
			`Set-Location "gguf/examples/server"`
			`git checkout CMakeLists.txt server.cpp`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`}`

			`init_vars`
			`git_module_setup`
			`apply_patches`
Revive windows build The windows native setup still needs some more work, but this gets it building again and if you set the PATH properly, you can run the resulting exe on a cuda system. 2023-12-20 22:46:15 +00:00
			`# first build CPU based`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`$script:buildDir="gguf/build/windows/cpu"`
Revive windows build The windows native setup still needs some more work, but this gets it building again and if you set the PATH properly, you can run the resulting exe on a cuda system. 2023-12-20 22:46:15 +00:00
			`build`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`install`
Revive windows build The windows native setup still needs some more work, but this gets it building again and if you set the PATH properly, you can run the resulting exe on a cuda system. 2023-12-20 22:46:15 +00:00
			`# Then build cuda as a dynamically loaded library`
			`init_vars`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`$script:buildDir="gguf/build/windows/cuda"`
			`$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON")`
Add cgo implementation for llama.cpp Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions. 2023-11-14 01:20:34 +00:00			`build`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`install`
Revive windows build The windows native setup still needs some more work, but this gets it building again and if you set the PATH properly, you can run the resulting exe on a cuda system. 2023-12-20 22:46:15 +00:00
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`# TODO - actually implement ROCm support on windows`
			`$script:buildDir="gguf/build/windows/rocm"`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`rm -ea 0 -recurse -force -path "${script:buildDir}/lib"`
			`md "${script:buildDir}/lib" -ea 0 > $null`
			`echo $null >> "${script:buildDir}/lib/.generated"`
Refactor how we augment llama.cpp This changes the model for llama.cpp inclusion so we're not applying a patch, but instead have the C++ code directly in the ollama tree, which should make it easier to refine and update over time. 2023-12-22 17:51:53 +00:00
			`cleanup`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			write-host "`ngo generate completed"