diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index ffb2cf9d..9694457e 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -95,7 +95,8 @@ jobs:
           cd $env:GITHUB_WORKSPACE
           $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
           $env:PATH="$gopath;$env:PATH"
-          go generate -x ./...
+          
+          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
         name: go generate
       - uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index e4242997..39b93227 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,5 +1,16 @@
 name: test
 
+concurrency:
+  # For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
+  # cancels running CI jobs and starts all new ones.
+  #
+  # For non-PR pushes, concurrency.group needs to be unique for every distinct
+  # CI run we want to have happen. Use run_id, which in practice means all
+  # non-PR CI runs will be allowed to run without preempting each other.
+  group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }}
+  cancel-in-progress: true
+
+
 on:
   pull_request:
     paths:
@@ -62,10 +73,12 @@ jobs:
           $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
           $env:PATH="$gopath;$gccpath;$env:PATH"
           echo $env:PATH
-          go generate -x ./...
+
+          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
         if: ${{ startsWith(matrix.os, 'windows-') }}
         name: 'Windows Go Generate'
-      - run: go generate -x ./...
+      - run: |
+          GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
         if: ${{ ! startsWith(matrix.os, 'windows-') }}
         name: 'Unix Go Generate'
       - uses: actions/upload-artifact@v4
@@ -98,7 +111,7 @@ jobs:
       - run: go get ./...
       - run: |
           git config --global --add safe.directory /__w/ollama/ollama
-          go generate -x ./...
+          GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
         env:
           OLLAMA_SKIP_CPU_GENERATE: '1'
       - uses: actions/upload-artifact@v4
@@ -129,7 +142,7 @@ jobs:
       - run: go get ./...
       - run: |
           git config --global --add safe.directory /__w/ollama/ollama
-          go generate -x ./...
+          GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
         env:
           OLLAMA_SKIP_CPU_GENERATE: '1'
       - uses: actions/upload-artifact@v4
@@ -168,8 +181,9 @@ jobs:
           $env:PATH="$gopath;$env:PATH"
           $env:OLLAMA_SKIP_CPU_GENERATE="1"
           $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
-          go generate -x ./...
-        name: go generate
+
+          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
+        name: go run build.go
         env:
           OLLAMA_SKIP_CPU_GENERATE: '1'
       # TODO - do we need any artifacts?
@@ -202,7 +216,7 @@ jobs:
       - name: 'Verify CUDA'
         run: nvcc -V
       - run: go get ./...
-      - name: go generate
+      - name: go run build.go
         run: |
           $gopath=(get-command go).source | split-path -parent
           $cudabin=(get-command nvcc).source | split-path
@@ -211,7 +225,8 @@ jobs:
           $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
           $env:PATH="$gopath;$cudabin;$env:PATH"
           $env:OLLAMA_SKIP_CPU_GENERATE="1"
-          go generate -x ./...
+          
+          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
         env:
           OLLAMA_SKIP_CPU_GENERATE: '1'
       # TODO - do we need any artifacts?
@@ -285,6 +300,12 @@ jobs:
         with:
           go-version-file: go.mod
           cache: true
+      - run: |
+          GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
+        if: ${{ ! startsWith(matrix.os, 'windows-') }}
+      - run: |
+          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
+        if: ${{ startsWith(matrix.os, 'windows-') }}
       - run: go get
       - run: |
           case ${{ matrix.arch }} in
@@ -305,9 +326,8 @@ jobs:
           touch llm/build/windows/$ARCH/stub/bin/ollama_llama_server
         if: ${{ startsWith(matrix.os, 'windows-') }}
         shell: bash
-      - run: go generate ./...
-      - run: go build
-      - run: go test -v ./...
+      - run: |
+          go test -v ./...
       - uses: actions/upload-artifact@v4
         with:
           name: ${{ matrix.os }}-binaries
diff --git a/README.md b/README.md
index d5e265ff..9b4015ca 100644
--- a/README.md
+++ b/README.md
@@ -201,16 +201,10 @@ Install `cmake` and `go`:
 brew install cmake go
 ```
 
-Then generate dependencies:
-
-```
-go generate ./...
-```
-
 Then build the binary:
 
 ```
-go build .
+go run build.go
 ```
 
 More detailed instructions can be found in the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
diff --git a/build.go b/build.go
new file mode 100644
index 00000000..b7a55fb1
--- /dev/null
+++ b/build.go
@@ -0,0 +1,192 @@
+//go:build ignore
+
+package main
+
+import (
+	"cmp"
+	"errors"
+	"flag"
+	"log"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+)
+
+// Flags
+var (
+	flagForce     = flag.Bool("f", false, "force re-generation of dependencies")
+	flagSkipBuild = flag.Bool("d", false, "generate dependencies only (e.g. skip 'go build .')")
+
+	// Flags to set GOARCH and GOOS explicitly for cross-platform builds,
+	// e.g., in CI to target a different platform than the build matrix
+	// default. These allows us to run generate without a separate build
+	// step for building the script binary for the host ARCH and then
+	// runing the generate script for the target ARCH. Instead, we can
+	// just run `go run build.go -target=$GOARCH` to generate the
+	// deps.
+	flagGOARCH = flag.String("target", "", "sets GOARCH to use when generating dependencies and building")
+)
+
+func buildEnv() []string {
+	return append(os.Environ(),
+		"GOARCH="+cmp.Or(*flagGOARCH, runtime.GOARCH),
+	)
+}
+
+func main() {
+	log.SetFlags(0)
+	flag.Usage = func() {
+		log.Printf("Usage: go run build.go [flags]")
+		log.Println()
+		log.Println("Flags:")
+		flag.PrintDefaults()
+		log.Println()
+		log.Println("This script builds the Ollama server binary and generates the llama.cpp")
+		log.Println("bindings for the current platform. It assumes that the current working")
+		log.Println("directory is the root directory of the Ollama project.")
+		log.Println()
+		log.Println("If the -d flag is provided, the script will only generate the dependencies")
+		log.Println("and skip building the Ollama server binary.")
+		log.Println()
+		log.Println("If the -f flag is provided, the script will force re-generation of the")
+		log.Println("dependencies.")
+		log.Println()
+		log.Println("If the -target flag is provided, the script will set GOARCH to the value")
+		log.Println("of the flag. This is useful for cross-platform builds.")
+		log.Println()
+		log.Println("The script will check for the required dependencies (cmake, gcc) and")
+		log.Println("print their version.")
+		log.Println()
+		log.Println("The script will also check if it is being run from the root directory of")
+		log.Println("the Ollama project.")
+		log.Println()
+		os.Exit(1)
+	}
+	flag.Parse()
+
+	log.Printf("=== Building Ollama ===")
+	defer func() {
+		log.Printf("=== Done building Ollama ===")
+		log.Println()
+		log.Println("To run the Ollama server, use:")
+		log.Println()
+		log.Println("    ./ollama serve")
+		log.Println()
+	}()
+
+	if flag.NArg() > 0 {
+		flag.Usage()
+	}
+
+	if !inRootDir() {
+		log.Fatalf("Please run this script from the root directory of the Ollama project.")
+	}
+
+	if err := checkDependencies(); err != nil {
+		log.Fatalf("Failed dependency check: %v", err)
+	}
+	if err := buildLlammaCPP(); err != nil {
+		log.Fatalf("Failed to build llama.cpp: %v", err)
+	}
+	if err := goBuildOllama(); err != nil {
+		log.Fatalf("Failed to build ollama Go binary: %v", err)
+	}
+}
+
+// checkDependencies does a quick check to see if the required dependencies are
+// installed on the system and functioning enough to print their version.
+//
+// TODO(bmizerany): Check the actual version of the dependencies? Seems a
+// little daunting given diff versions might print diff things. This should
+// be good enough for now.
+func checkDependencies() error {
+	var err error
+	check := func(name string, args ...string) {
+		log.Printf("=== Checking for %s ===", name)
+		defer log.Printf("=== Done checking for %s ===\n\n", name)
+		cmd := exec.Command(name, args...)
+		cmd.Stdout = os.Stdout
+		cmd.Stderr = os.Stderr
+		err = errors.Join(err, cmd.Run())
+	}
+
+	check("cmake", "--version")
+	check("gcc", "--version")
+	return err
+}
+
+func goBuildOllama() error {
+	log.Println("=== Building Ollama binary ===")
+	defer log.Printf("=== Done building Ollama binary ===\n\n")
+	if *flagSkipBuild {
+		log.Println("Skipping 'go build -o ollama .'")
+		return nil
+	}
+	cmd := exec.Command("go", "build", "-o", "ollama", ".")
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Env = buildEnv()
+	return cmd.Run()
+}
+
+// buildLlammaCPP generates the llama.cpp bindings for the current platform.
+//
+// It assumes that the current working directory is the root directory of the
+// Ollama project.
+func buildLlammaCPP() error {
+	log.Println("=== Generating dependencies ===")
+	defer log.Printf("=== Done generating dependencies ===\n\n")
+	if *flagForce {
+		if err := os.RemoveAll(filepath.Join("llm", "build")); err != nil {
+			return err
+		}
+	}
+	if isDirectory(filepath.Join("llm", "build")) {
+		log.Println("llm/build already exists; skipping.  Use -f to force re-generate.")
+		return nil
+	}
+
+	scriptDir, err := filepath.Abs(filepath.Join("llm", "generate"))
+	if err != nil {
+		return err
+	}
+
+	var cmd *exec.Cmd
+	switch runtime.GOOS {
+	case "windows":
+		script := filepath.Join(scriptDir, "gen_windows.ps1")
+		cmd = exec.Command("powershell", "-ExecutionPolicy", "Bypass", "-File", script)
+	case "linux":
+		script := filepath.Join(scriptDir, "gen_linux.sh")
+		cmd = exec.Command("bash", script)
+	case "darwin":
+		script := filepath.Join(scriptDir, "gen_darwin.sh")
+		cmd = exec.Command("bash", script)
+	default:
+		log.Fatalf("Unsupported OS: %s", runtime.GOOS)
+	}
+	cmd.Dir = filepath.Join("llm", "generate")
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Env = buildEnv()
+
+	log.Printf("Running GOOS=%s GOARCH=%s %s", runtime.GOOS, runtime.GOARCH, cmd.Args)
+
+	return cmd.Run()
+}
+
+func isDirectory(path string) bool {
+	info, err := os.Stat(path)
+	if err != nil {
+		return false
+	}
+	return info.IsDir()
+}
+
+// inRootDir returns true if the current working directory is the root
+// directory of the Ollama project. It looks for a file named "go.mod".
+func inRootDir() bool {
+	_, err := os.Stat("go.mod")
+	return err == nil
+}
diff --git a/docs/development.md b/docs/development.md
index 76936c35..178fb439 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -23,13 +23,7 @@ export OLLAMA_DEBUG=1
 Get the required libraries and build the native LLM code:
 
 ```bash
-go generate ./...
-```
-
-Then build ollama:
-
-```bash
-go build .
+go run build.go
 ```
 
 Now you can run `ollama`:
@@ -38,6 +32,16 @@ Now you can run `ollama`:
 ./ollama
 ```
 
+### Rebuilding the native code
+
+If at any point you need to rebuild the native code, you can run the
+build.go script again using the `-f` flag to force a rebuild, and,
+optionally, the `-d` flag to skip building the Go binary:
+
+```bash
+go run build.go -f -d
+```
+
 ### Linux
 
 #### Linux CUDA (NVIDIA)
@@ -53,16 +57,10 @@ specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
 libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
 set set of target CUDA architectues by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
 
-Then generate dependencies:
-
-```
-go generate ./...
-```
-
 Then build the binary:
 
 ```
-go build .
+go run build.go
 ```
 
 #### Linux ROCm (AMD)
@@ -78,21 +76,17 @@ install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
 CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
 the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
 
-```
-go generate ./...
-```
-
 Then build the binary:
 
 ```
-go build .
+go run build.go
 ```
 
 ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
 
 #### Advanced CPU Settings
 
-By default, running `go generate ./...` will compile a few different variations
+By default, running `go run build.go` will compile a few different variations
 of the LLM library based on common CPU families and vector math capabilities,
 including a lowest-common-denominator which should run on almost any 64 bit CPU
 somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
@@ -102,8 +96,7 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H,
 you might use:
 
 ```
-OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./...
-go build .
+OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go run build.go
 ```
 
 #### Containerized Linux Build
@@ -124,8 +117,7 @@ Install required tools:
 
 ```powershell
 $env:CGO_ENABLED="1"
-go generate ./...
-go build .
+go run build.go
 ```
 
 #### Windows CUDA (NVIDIA)
@@ -142,4 +134,4 @@ In addition to the common Windows development tools described above, install AMD
 - [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
 - [Strawberry Perl](https://strawberryperl.com/)
 
-Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
\ No newline at end of file
+Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
diff --git a/llm/generate/gen_darwin.sh b/llm/generate/gen_darwin.sh
index f79534cd..81e05d71 100755
--- a/llm/generate/gen_darwin.sh
+++ b/llm/generate/gen_darwin.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
-# This script is intended to run inside the go generate
-# working directory must be ./llm/generate/
+# This script is intended to run inside the `go run build.go` script, which
+# sets the working directory to the correct location: ./llm/generate/.
 
 # TODO - add hardening to detect missing tools (cmake, etc.)
 
@@ -89,10 +89,10 @@ case "${GOARCH}" in
     ;;
 *)
     echo "GOARCH must be set"
-    echo "this script is meant to be run from within go generate"
+    echo "this script is meant to be run from within 'go run build.go'"
     exit 1
     ;;
 esac
 
 cleanup
-echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
+echo "code generation completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh
index fd4a6bc0..ec542aca 100755
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
-# This script is intended to run inside the go generate
-# working directory must be llm/generate/
+# This script is intended to run with the `go run build.go` script, which
+# sets the working directory to the correct location: ./llm/generate/.
 
 # First we build one or more CPU based LLM libraries
 #
@@ -237,4 +237,4 @@ if [ -d "${ROCM_PATH}" ]; then
 fi
 
 cleanup
-echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
+echo "code generation completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1
index 0d2ae57f..8880a269 100644
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -288,4 +288,4 @@ if ($null -ne $env:HIP_PATH) {
 
 
 cleanup
-write-host "`ngo generate completed.  LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"
+write-host "`ncode generation completed.  LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"
diff --git a/llm/generate/generate_darwin.go b/llm/generate/generate_darwin.go
deleted file mode 100644
index 77685234..00000000
--- a/llm/generate/generate_darwin.go
+++ /dev/null
@@ -1,3 +0,0 @@
-package generate
-
-//go:generate bash ./gen_darwin.sh
diff --git a/llm/generate/generate_linux.go b/llm/generate/generate_linux.go
deleted file mode 100644
index 2b7e116d..00000000
--- a/llm/generate/generate_linux.go
+++ /dev/null
@@ -1,3 +0,0 @@
-package generate
-
-//go:generate bash ./gen_linux.sh
diff --git a/llm/generate/generate_windows.go b/llm/generate/generate_windows.go
deleted file mode 100644
index d2ee5428..00000000
--- a/llm/generate/generate_windows.go
+++ /dev/null
@@ -1,3 +0,0 @@
-package generate
-
-//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1