diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index ffb2cf9d..9694457e 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -95,7 +95,8 @@ jobs: cd $env:GITHUB_WORKSPACE $env:CMAKE_SYSTEM_VERSION="10.0.22621.0" $env:PATH="$gopath;$env:PATH" - go generate -x ./... + + $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }} name: go generate - uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index e4242997..39b93227 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,5 +1,16 @@ name: test +concurrency: + # For PRs, later CI runs preempt previous ones. e.g. a force push on a PR + # cancels running CI jobs and starts all new ones. + # + # For non-PR pushes, concurrency.group needs to be unique for every distinct + # CI run we want to have happen. Use run_id, which in practice means all + # non-PR CI runs will be allowed to run without preempting each other. + group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }} + cancel-in-progress: true + + on: pull_request: paths: @@ -62,10 +73,12 @@ jobs: $env:CMAKE_SYSTEM_VERSION="10.0.22621.0" $env:PATH="$gopath;$gccpath;$env:PATH" echo $env:PATH - go generate -x ./... + + $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }} if: ${{ startsWith(matrix.os, 'windows-') }} name: 'Windows Go Generate' - - run: go generate -x ./... + - run: | + GOARCH= go run build.go -f -d -target=${{ matrix.arch }} if: ${{ ! startsWith(matrix.os, 'windows-') }} name: 'Unix Go Generate' - uses: actions/upload-artifact@v4 @@ -98,7 +111,7 @@ jobs: - run: go get ./... - run: | git config --global --add safe.directory /__w/ollama/ollama - go generate -x ./... + GOARCH= go run build.go -f -d -target=${{ matrix.arch }} env: OLLAMA_SKIP_CPU_GENERATE: '1' - uses: actions/upload-artifact@v4 @@ -129,7 +142,7 @@ jobs: - run: go get ./... - run: | git config --global --add safe.directory /__w/ollama/ollama - go generate -x ./... + GOARCH= go run build.go -f -d -target=${{ matrix.arch }} env: OLLAMA_SKIP_CPU_GENERATE: '1' - uses: actions/upload-artifact@v4 @@ -168,8 +181,9 @@ jobs: $env:PATH="$gopath;$env:PATH" $env:OLLAMA_SKIP_CPU_GENERATE="1" $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) - go generate -x ./... - name: go generate + + $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }} + name: go run build.go env: OLLAMA_SKIP_CPU_GENERATE: '1' # TODO - do we need any artifacts? @@ -202,7 +216,7 @@ jobs: - name: 'Verify CUDA' run: nvcc -V - run: go get ./... - - name: go generate + - name: go run build.go run: | $gopath=(get-command go).source | split-path -parent $cudabin=(get-command nvcc).source | split-path @@ -211,7 +225,8 @@ jobs: $env:CMAKE_SYSTEM_VERSION="10.0.22621.0" $env:PATH="$gopath;$cudabin;$env:PATH" $env:OLLAMA_SKIP_CPU_GENERATE="1" - go generate -x ./... + + $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }} env: OLLAMA_SKIP_CPU_GENERATE: '1' # TODO - do we need any artifacts? @@ -285,6 +300,12 @@ jobs: with: go-version-file: go.mod cache: true + - run: | + GOARCH= go run build.go -f -d -target=${{ matrix.arch }} + if: ${{ ! startsWith(matrix.os, 'windows-') }} + - run: | + $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }} + if: ${{ startsWith(matrix.os, 'windows-') }} - run: go get - run: | case ${{ matrix.arch }} in @@ -305,9 +326,8 @@ jobs: touch llm/build/windows/$ARCH/stub/bin/ollama_llama_server if: ${{ startsWith(matrix.os, 'windows-') }} shell: bash - - run: go generate ./... - - run: go build - - run: go test -v ./... + - run: | + go test -v ./... - uses: actions/upload-artifact@v4 with: name: ${{ matrix.os }}-binaries diff --git a/README.md b/README.md index d5e265ff..9b4015ca 100644 --- a/README.md +++ b/README.md @@ -201,16 +201,10 @@ Install `cmake` and `go`: brew install cmake go ``` -Then generate dependencies: - -``` -go generate ./... -``` - Then build the binary: ``` -go build . +go run build.go ``` More detailed instructions can be found in the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md) diff --git a/build.go b/build.go new file mode 100644 index 00000000..b7a55fb1 --- /dev/null +++ b/build.go @@ -0,0 +1,192 @@ +//go:build ignore + +package main + +import ( + "cmp" + "errors" + "flag" + "log" + "os" + "os/exec" + "path/filepath" + "runtime" +) + +// Flags +var ( + flagForce = flag.Bool("f", false, "force re-generation of dependencies") + flagSkipBuild = flag.Bool("d", false, "generate dependencies only (e.g. skip 'go build .')") + + // Flags to set GOARCH and GOOS explicitly for cross-platform builds, + // e.g., in CI to target a different platform than the build matrix + // default. These allows us to run generate without a separate build + // step for building the script binary for the host ARCH and then + // runing the generate script for the target ARCH. Instead, we can + // just run `go run build.go -target=$GOARCH` to generate the + // deps. + flagGOARCH = flag.String("target", "", "sets GOARCH to use when generating dependencies and building") +) + +func buildEnv() []string { + return append(os.Environ(), + "GOARCH="+cmp.Or(*flagGOARCH, runtime.GOARCH), + ) +} + +func main() { + log.SetFlags(0) + flag.Usage = func() { + log.Printf("Usage: go run build.go [flags]") + log.Println() + log.Println("Flags:") + flag.PrintDefaults() + log.Println() + log.Println("This script builds the Ollama server binary and generates the llama.cpp") + log.Println("bindings for the current platform. It assumes that the current working") + log.Println("directory is the root directory of the Ollama project.") + log.Println() + log.Println("If the -d flag is provided, the script will only generate the dependencies") + log.Println("and skip building the Ollama server binary.") + log.Println() + log.Println("If the -f flag is provided, the script will force re-generation of the") + log.Println("dependencies.") + log.Println() + log.Println("If the -target flag is provided, the script will set GOARCH to the value") + log.Println("of the flag. This is useful for cross-platform builds.") + log.Println() + log.Println("The script will check for the required dependencies (cmake, gcc) and") + log.Println("print their version.") + log.Println() + log.Println("The script will also check if it is being run from the root directory of") + log.Println("the Ollama project.") + log.Println() + os.Exit(1) + } + flag.Parse() + + log.Printf("=== Building Ollama ===") + defer func() { + log.Printf("=== Done building Ollama ===") + log.Println() + log.Println("To run the Ollama server, use:") + log.Println() + log.Println(" ./ollama serve") + log.Println() + }() + + if flag.NArg() > 0 { + flag.Usage() + } + + if !inRootDir() { + log.Fatalf("Please run this script from the root directory of the Ollama project.") + } + + if err := checkDependencies(); err != nil { + log.Fatalf("Failed dependency check: %v", err) + } + if err := buildLlammaCPP(); err != nil { + log.Fatalf("Failed to build llama.cpp: %v", err) + } + if err := goBuildOllama(); err != nil { + log.Fatalf("Failed to build ollama Go binary: %v", err) + } +} + +// checkDependencies does a quick check to see if the required dependencies are +// installed on the system and functioning enough to print their version. +// +// TODO(bmizerany): Check the actual version of the dependencies? Seems a +// little daunting given diff versions might print diff things. This should +// be good enough for now. +func checkDependencies() error { + var err error + check := func(name string, args ...string) { + log.Printf("=== Checking for %s ===", name) + defer log.Printf("=== Done checking for %s ===\n\n", name) + cmd := exec.Command(name, args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err = errors.Join(err, cmd.Run()) + } + + check("cmake", "--version") + check("gcc", "--version") + return err +} + +func goBuildOllama() error { + log.Println("=== Building Ollama binary ===") + defer log.Printf("=== Done building Ollama binary ===\n\n") + if *flagSkipBuild { + log.Println("Skipping 'go build -o ollama .'") + return nil + } + cmd := exec.Command("go", "build", "-o", "ollama", ".") + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Env = buildEnv() + return cmd.Run() +} + +// buildLlammaCPP generates the llama.cpp bindings for the current platform. +// +// It assumes that the current working directory is the root directory of the +// Ollama project. +func buildLlammaCPP() error { + log.Println("=== Generating dependencies ===") + defer log.Printf("=== Done generating dependencies ===\n\n") + if *flagForce { + if err := os.RemoveAll(filepath.Join("llm", "build")); err != nil { + return err + } + } + if isDirectory(filepath.Join("llm", "build")) { + log.Println("llm/build already exists; skipping. Use -f to force re-generate.") + return nil + } + + scriptDir, err := filepath.Abs(filepath.Join("llm", "generate")) + if err != nil { + return err + } + + var cmd *exec.Cmd + switch runtime.GOOS { + case "windows": + script := filepath.Join(scriptDir, "gen_windows.ps1") + cmd = exec.Command("powershell", "-ExecutionPolicy", "Bypass", "-File", script) + case "linux": + script := filepath.Join(scriptDir, "gen_linux.sh") + cmd = exec.Command("bash", script) + case "darwin": + script := filepath.Join(scriptDir, "gen_darwin.sh") + cmd = exec.Command("bash", script) + default: + log.Fatalf("Unsupported OS: %s", runtime.GOOS) + } + cmd.Dir = filepath.Join("llm", "generate") + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Env = buildEnv() + + log.Printf("Running GOOS=%s GOARCH=%s %s", runtime.GOOS, runtime.GOARCH, cmd.Args) + + return cmd.Run() +} + +func isDirectory(path string) bool { + info, err := os.Stat(path) + if err != nil { + return false + } + return info.IsDir() +} + +// inRootDir returns true if the current working directory is the root +// directory of the Ollama project. It looks for a file named "go.mod". +func inRootDir() bool { + _, err := os.Stat("go.mod") + return err == nil +} diff --git a/docs/development.md b/docs/development.md index 76936c35..178fb439 100644 --- a/docs/development.md +++ b/docs/development.md @@ -23,13 +23,7 @@ export OLLAMA_DEBUG=1 Get the required libraries and build the native LLM code: ```bash -go generate ./... -``` - -Then build ollama: - -```bash -go build . +go run build.go ``` Now you can run `ollama`: @@ -38,6 +32,16 @@ Now you can run `ollama`: ./ollama ``` +### Rebuilding the native code + +If at any point you need to rebuild the native code, you can run the +build.go script again using the `-f` flag to force a rebuild, and, +optionally, the `-d` flag to skip building the Go binary: + +```bash +go run build.go -f -d +``` + ### Linux #### Linux CUDA (NVIDIA) @@ -53,16 +57,10 @@ specifying an environment variable `CUDA_LIB_DIR` to the location of the shared libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize set set of target CUDA architectues by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70") -Then generate dependencies: - -``` -go generate ./... -``` - Then build the binary: ``` -go build . +go run build.go ``` #### Linux ROCm (AMD) @@ -78,21 +76,17 @@ install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`) -``` -go generate ./... -``` - Then build the binary: ``` -go build . +go run build.go ``` ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root. #### Advanced CPU Settings -By default, running `go generate ./...` will compile a few different variations +By default, running `go run build.go` will compile a few different variations of the LLM library based on common CPU families and vector math capabilities, including a lowest-common-denominator which should run on almost any 64 bit CPU somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to @@ -102,8 +96,7 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H, you might use: ``` -OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./... -go build . +OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go run build.go ``` #### Containerized Linux Build @@ -124,8 +117,7 @@ Install required tools: ```powershell $env:CGO_ENABLED="1" -go generate ./... -go build . +go run build.go ``` #### Windows CUDA (NVIDIA) @@ -142,4 +134,4 @@ In addition to the common Windows development tools described above, install AMD - [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html) - [Strawberry Perl](https://strawberryperl.com/) -Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`). \ No newline at end of file +Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`). diff --git a/llm/generate/gen_darwin.sh b/llm/generate/gen_darwin.sh index f79534cd..81e05d71 100755 --- a/llm/generate/gen_darwin.sh +++ b/llm/generate/gen_darwin.sh @@ -1,6 +1,6 @@ #!/bin/bash -# This script is intended to run inside the go generate -# working directory must be ./llm/generate/ +# This script is intended to run inside the `go run build.go` script, which +# sets the working directory to the correct location: ./llm/generate/. # TODO - add hardening to detect missing tools (cmake, etc.) @@ -89,10 +89,10 @@ case "${GOARCH}" in ;; *) echo "GOARCH must be set" - echo "this script is meant to be run from within go generate" + echo "this script is meant to be run from within 'go run build.go'" exit 1 ;; esac cleanup -echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)" +echo "code generation completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)" diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index fd4a6bc0..ec542aca 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -1,6 +1,6 @@ #!/bin/bash -# This script is intended to run inside the go generate -# working directory must be llm/generate/ +# This script is intended to run with the `go run build.go` script, which +# sets the working directory to the correct location: ./llm/generate/. # First we build one or more CPU based LLM libraries # @@ -237,4 +237,4 @@ if [ -d "${ROCM_PATH}" ]; then fi cleanup -echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)" +echo "code generation completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)" diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index 0d2ae57f..8880a269 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -288,4 +288,4 @@ if ($null -ne $env:HIP_PATH) { cleanup -write-host "`ngo generate completed. LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})" +write-host "`ncode generation completed. LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})" diff --git a/llm/generate/generate_darwin.go b/llm/generate/generate_darwin.go deleted file mode 100644 index 77685234..00000000 --- a/llm/generate/generate_darwin.go +++ /dev/null @@ -1,3 +0,0 @@ -package generate - -//go:generate bash ./gen_darwin.sh diff --git a/llm/generate/generate_linux.go b/llm/generate/generate_linux.go deleted file mode 100644 index 2b7e116d..00000000 --- a/llm/generate/generate_linux.go +++ /dev/null @@ -1,3 +0,0 @@ -package generate - -//go:generate bash ./gen_linux.sh diff --git a/llm/generate/generate_windows.go b/llm/generate/generate_windows.go deleted file mode 100644 index d2ee5428..00000000 --- a/llm/generate/generate_windows.go +++ /dev/null @@ -1,3 +0,0 @@ -package generate - -//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1