This commit is contained in:
parent
fccf3eecaa
commit
1524f323a3
11 changed files with 61 additions and 251 deletions
3
.github/workflows/release.yaml
vendored
3
.github/workflows/release.yaml
vendored
|
@ -95,8 +95,7 @@ jobs:
|
|||
cd $env:GITHUB_WORKSPACE
|
||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||
$env:PATH="$gopath;$env:PATH"
|
||||
|
||||
$env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
|
||||
go generate -x ./...
|
||||
name: go generate
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
|
|
42
.github/workflows/test.yaml
vendored
42
.github/workflows/test.yaml
vendored
|
@ -1,16 +1,5 @@
|
|||
name: test
|
||||
|
||||
concurrency:
|
||||
# For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
|
||||
# cancels running CI jobs and starts all new ones.
|
||||
#
|
||||
# For non-PR pushes, concurrency.group needs to be unique for every distinct
|
||||
# CI run we want to have happen. Use run_id, which in practice means all
|
||||
# non-PR CI runs will be allowed to run without preempting each other.
|
||||
group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
|
@ -73,12 +62,10 @@ jobs:
|
|||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||
$env:PATH="$gopath;$gccpath;$env:PATH"
|
||||
echo $env:PATH
|
||||
|
||||
$env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
|
||||
go generate -x ./...
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
name: 'Windows Go Generate'
|
||||
- run: |
|
||||
GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
|
||||
- run: go generate -x ./...
|
||||
if: ${{ ! startsWith(matrix.os, 'windows-') }}
|
||||
name: 'Unix Go Generate'
|
||||
- uses: actions/upload-artifact@v4
|
||||
|
@ -111,7 +98,7 @@ jobs:
|
|||
- run: go get ./...
|
||||
- run: |
|
||||
git config --global --add safe.directory /__w/ollama/ollama
|
||||
GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
|
||||
go generate -x ./...
|
||||
env:
|
||||
OLLAMA_SKIP_CPU_GENERATE: '1'
|
||||
- uses: actions/upload-artifact@v4
|
||||
|
@ -142,7 +129,7 @@ jobs:
|
|||
- run: go get ./...
|
||||
- run: |
|
||||
git config --global --add safe.directory /__w/ollama/ollama
|
||||
GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
|
||||
go generate -x ./...
|
||||
env:
|
||||
OLLAMA_SKIP_CPU_GENERATE: '1'
|
||||
- uses: actions/upload-artifact@v4
|
||||
|
@ -181,9 +168,8 @@ jobs:
|
|||
$env:PATH="$gopath;$env:PATH"
|
||||
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
||||
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
||||
|
||||
$env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
|
||||
name: go run build.go
|
||||
go generate -x ./...
|
||||
name: go generate
|
||||
env:
|
||||
OLLAMA_SKIP_CPU_GENERATE: '1'
|
||||
# TODO - do we need any artifacts?
|
||||
|
@ -216,7 +202,7 @@ jobs:
|
|||
- name: 'Verify CUDA'
|
||||
run: nvcc -V
|
||||
- run: go get ./...
|
||||
- name: go run build.go
|
||||
- name: go generate
|
||||
run: |
|
||||
$gopath=(get-command go).source | split-path -parent
|
||||
$cudabin=(get-command nvcc).source | split-path
|
||||
|
@ -225,8 +211,7 @@ jobs:
|
|||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||
$env:PATH="$gopath;$cudabin;$env:PATH"
|
||||
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
||||
|
||||
$env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
|
||||
go generate -x ./...
|
||||
env:
|
||||
OLLAMA_SKIP_CPU_GENERATE: '1'
|
||||
# TODO - do we need any artifacts?
|
||||
|
@ -300,12 +285,6 @@ jobs:
|
|||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- run: |
|
||||
GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
|
||||
if: ${{ ! startsWith(matrix.os, 'windows-') }}
|
||||
- run: |
|
||||
$env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
- run: go get
|
||||
- run: |
|
||||
case ${{ matrix.arch }} in
|
||||
|
@ -326,8 +305,9 @@ jobs:
|
|||
touch llm/build/windows/$ARCH/stub/bin/ollama_llama_server
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
shell: bash
|
||||
- run: |
|
||||
go test -v ./...
|
||||
- run: go generate ./...
|
||||
- run: go build
|
||||
- run: go test -v ./...
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.os }}-binaries
|
||||
|
|
|
@ -201,10 +201,16 @@ Install `cmake` and `go`:
|
|||
brew install cmake go
|
||||
```
|
||||
|
||||
Then generate dependencies:
|
||||
|
||||
```
|
||||
go generate ./...
|
||||
```
|
||||
|
||||
Then build the binary:
|
||||
|
||||
```
|
||||
go run build.go
|
||||
go build .
|
||||
```
|
||||
|
||||
More detailed instructions can be found in the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
|
||||
|
|
192
build.go
192
build.go
|
@ -1,192 +0,0 @@
|
|||
//go:build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"errors"
|
||||
"flag"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
)
|
||||
|
||||
// Flags
|
||||
var (
|
||||
flagForce = flag.Bool("f", false, "force re-generation of dependencies")
|
||||
flagSkipBuild = flag.Bool("d", false, "generate dependencies only (e.g. skip 'go build .')")
|
||||
|
||||
// Flags to set GOARCH and GOOS explicitly for cross-platform builds,
|
||||
// e.g., in CI to target a different platform than the build matrix
|
||||
// default. These allows us to run generate without a separate build
|
||||
// step for building the script binary for the host ARCH and then
|
||||
// runing the generate script for the target ARCH. Instead, we can
|
||||
// just run `go run build.go -target=$GOARCH` to generate the
|
||||
// deps.
|
||||
flagGOARCH = flag.String("target", "", "sets GOARCH to use when generating dependencies and building")
|
||||
)
|
||||
|
||||
func buildEnv() []string {
|
||||
return append(os.Environ(),
|
||||
"GOARCH="+cmp.Or(*flagGOARCH, runtime.GOARCH),
|
||||
)
|
||||
}
|
||||
|
||||
func main() {
|
||||
log.SetFlags(0)
|
||||
flag.Usage = func() {
|
||||
log.Printf("Usage: go run build.go [flags]")
|
||||
log.Println()
|
||||
log.Println("Flags:")
|
||||
flag.PrintDefaults()
|
||||
log.Println()
|
||||
log.Println("This script builds the Ollama server binary and generates the llama.cpp")
|
||||
log.Println("bindings for the current platform. It assumes that the current working")
|
||||
log.Println("directory is the root directory of the Ollama project.")
|
||||
log.Println()
|
||||
log.Println("If the -d flag is provided, the script will only generate the dependencies")
|
||||
log.Println("and skip building the Ollama server binary.")
|
||||
log.Println()
|
||||
log.Println("If the -f flag is provided, the script will force re-generation of the")
|
||||
log.Println("dependencies.")
|
||||
log.Println()
|
||||
log.Println("If the -target flag is provided, the script will set GOARCH to the value")
|
||||
log.Println("of the flag. This is useful for cross-platform builds.")
|
||||
log.Println()
|
||||
log.Println("The script will check for the required dependencies (cmake, gcc) and")
|
||||
log.Println("print their version.")
|
||||
log.Println()
|
||||
log.Println("The script will also check if it is being run from the root directory of")
|
||||
log.Println("the Ollama project.")
|
||||
log.Println()
|
||||
os.Exit(1)
|
||||
}
|
||||
flag.Parse()
|
||||
|
||||
log.Printf("=== Building Ollama ===")
|
||||
defer func() {
|
||||
log.Printf("=== Done building Ollama ===")
|
||||
log.Println()
|
||||
log.Println("To run the Ollama server, use:")
|
||||
log.Println()
|
||||
log.Println(" ./ollama serve")
|
||||
log.Println()
|
||||
}()
|
||||
|
||||
if flag.NArg() > 0 {
|
||||
flag.Usage()
|
||||
}
|
||||
|
||||
if !inRootDir() {
|
||||
log.Fatalf("Please run this script from the root directory of the Ollama project.")
|
||||
}
|
||||
|
||||
if err := checkDependencies(); err != nil {
|
||||
log.Fatalf("Failed dependency check: %v", err)
|
||||
}
|
||||
if err := buildLlammaCPP(); err != nil {
|
||||
log.Fatalf("Failed to build llama.cpp: %v", err)
|
||||
}
|
||||
if err := goBuildOllama(); err != nil {
|
||||
log.Fatalf("Failed to build ollama Go binary: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// checkDependencies does a quick check to see if the required dependencies are
|
||||
// installed on the system and functioning enough to print their version.
|
||||
//
|
||||
// TODO(bmizerany): Check the actual version of the dependencies? Seems a
|
||||
// little daunting given diff versions might print diff things. This should
|
||||
// be good enough for now.
|
||||
func checkDependencies() error {
|
||||
var err error
|
||||
check := func(name string, args ...string) {
|
||||
log.Printf("=== Checking for %s ===", name)
|
||||
defer log.Printf("=== Done checking for %s ===\n\n", name)
|
||||
cmd := exec.Command(name, args...)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
err = errors.Join(err, cmd.Run())
|
||||
}
|
||||
|
||||
check("cmake", "--version")
|
||||
check("gcc", "--version")
|
||||
return err
|
||||
}
|
||||
|
||||
func goBuildOllama() error {
|
||||
log.Println("=== Building Ollama binary ===")
|
||||
defer log.Printf("=== Done building Ollama binary ===\n\n")
|
||||
if *flagSkipBuild {
|
||||
log.Println("Skipping 'go build -o ollama .'")
|
||||
return nil
|
||||
}
|
||||
cmd := exec.Command("go", "build", "-o", "ollama", ".")
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
cmd.Env = buildEnv()
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
// buildLlammaCPP generates the llama.cpp bindings for the current platform.
|
||||
//
|
||||
// It assumes that the current working directory is the root directory of the
|
||||
// Ollama project.
|
||||
func buildLlammaCPP() error {
|
||||
log.Println("=== Generating dependencies ===")
|
||||
defer log.Printf("=== Done generating dependencies ===\n\n")
|
||||
if *flagForce {
|
||||
if err := os.RemoveAll(filepath.Join("llm", "build")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if isDirectory(filepath.Join("llm", "build")) {
|
||||
log.Println("llm/build already exists; skipping. Use -f to force re-generate.")
|
||||
return nil
|
||||
}
|
||||
|
||||
scriptDir, err := filepath.Abs(filepath.Join("llm", "generate"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var cmd *exec.Cmd
|
||||
switch runtime.GOOS {
|
||||
case "windows":
|
||||
script := filepath.Join(scriptDir, "gen_windows.ps1")
|
||||
cmd = exec.Command("powershell", "-ExecutionPolicy", "Bypass", "-File", script)
|
||||
case "linux":
|
||||
script := filepath.Join(scriptDir, "gen_linux.sh")
|
||||
cmd = exec.Command("bash", script)
|
||||
case "darwin":
|
||||
script := filepath.Join(scriptDir, "gen_darwin.sh")
|
||||
cmd = exec.Command("bash", script)
|
||||
default:
|
||||
log.Fatalf("Unsupported OS: %s", runtime.GOOS)
|
||||
}
|
||||
cmd.Dir = filepath.Join("llm", "generate")
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
cmd.Env = buildEnv()
|
||||
|
||||
log.Printf("Running GOOS=%s GOARCH=%s %s", runtime.GOOS, runtime.GOARCH, cmd.Args)
|
||||
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
func isDirectory(path string) bool {
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return info.IsDir()
|
||||
}
|
||||
|
||||
// inRootDir returns true if the current working directory is the root
|
||||
// directory of the Ollama project. It looks for a file named "go.mod".
|
||||
func inRootDir() bool {
|
||||
_, err := os.Stat("go.mod")
|
||||
return err == nil
|
||||
}
|
|
@ -23,7 +23,13 @@ export OLLAMA_DEBUG=1
|
|||
Get the required libraries and build the native LLM code:
|
||||
|
||||
```bash
|
||||
go run build.go
|
||||
go generate ./...
|
||||
```
|
||||
|
||||
Then build ollama:
|
||||
|
||||
```bash
|
||||
go build .
|
||||
```
|
||||
|
||||
Now you can run `ollama`:
|
||||
|
@ -32,16 +38,6 @@ Now you can run `ollama`:
|
|||
./ollama
|
||||
```
|
||||
|
||||
### Rebuilding the native code
|
||||
|
||||
If at any point you need to rebuild the native code, you can run the
|
||||
build.go script again using the `-f` flag to force a rebuild, and,
|
||||
optionally, the `-d` flag to skip building the Go binary:
|
||||
|
||||
```bash
|
||||
go run build.go -f -d
|
||||
```
|
||||
|
||||
### Linux
|
||||
|
||||
#### Linux CUDA (NVIDIA)
|
||||
|
@ -57,10 +53,16 @@ specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
|
|||
libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
|
||||
set set of target CUDA architectues by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
|
||||
|
||||
Then generate dependencies:
|
||||
|
||||
```
|
||||
go generate ./...
|
||||
```
|
||||
|
||||
Then build the binary:
|
||||
|
||||
```
|
||||
go run build.go
|
||||
go build .
|
||||
```
|
||||
|
||||
#### Linux ROCm (AMD)
|
||||
|
@ -76,17 +78,21 @@ install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
|
|||
CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
|
||||
the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
|
||||
|
||||
```
|
||||
go generate ./...
|
||||
```
|
||||
|
||||
Then build the binary:
|
||||
|
||||
```
|
||||
go run build.go
|
||||
go build .
|
||||
```
|
||||
|
||||
ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
|
||||
|
||||
#### Advanced CPU Settings
|
||||
|
||||
By default, running `go run build.go` will compile a few different variations
|
||||
By default, running `go generate ./...` will compile a few different variations
|
||||
of the LLM library based on common CPU families and vector math capabilities,
|
||||
including a lowest-common-denominator which should run on almost any 64 bit CPU
|
||||
somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
|
||||
|
@ -96,7 +102,8 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H,
|
|||
you might use:
|
||||
|
||||
```
|
||||
OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go run build.go
|
||||
OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./...
|
||||
go build .
|
||||
```
|
||||
|
||||
#### Containerized Linux Build
|
||||
|
@ -117,7 +124,8 @@ Install required tools:
|
|||
|
||||
```powershell
|
||||
$env:CGO_ENABLED="1"
|
||||
go run build.go
|
||||
go generate ./...
|
||||
go build .
|
||||
```
|
||||
|
||||
#### Windows CUDA (NVIDIA)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/bash
|
||||
# This script is intended to run inside the `go run build.go` script, which
|
||||
# sets the working directory to the correct location: ./llm/generate/.
|
||||
# This script is intended to run inside the go generate
|
||||
# working directory must be ./llm/generate/
|
||||
|
||||
# TODO - add hardening to detect missing tools (cmake, etc.)
|
||||
|
||||
|
@ -89,10 +89,10 @@ case "${GOARCH}" in
|
|||
;;
|
||||
*)
|
||||
echo "GOARCH must be set"
|
||||
echo "this script is meant to be run from within 'go run build.go'"
|
||||
echo "this script is meant to be run from within go generate"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
cleanup
|
||||
echo "code generation completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
|
||||
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/bash
|
||||
# This script is intended to run with the `go run build.go` script, which
|
||||
# sets the working directory to the correct location: ./llm/generate/.
|
||||
# This script is intended to run inside the go generate
|
||||
# working directory must be llm/generate/
|
||||
|
||||
# First we build one or more CPU based LLM libraries
|
||||
#
|
||||
|
@ -237,4 +237,4 @@ if [ -d "${ROCM_PATH}" ]; then
|
|||
fi
|
||||
|
||||
cleanup
|
||||
echo "code generation completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
|
||||
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
|
||||
|
|
|
@ -288,4 +288,4 @@ if ($null -ne $env:HIP_PATH) {
|
|||
|
||||
|
||||
cleanup
|
||||
write-host "`ncode generation completed. LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"
|
||||
write-host "`ngo generate completed. LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"
|
||||
|
|
3
llm/generate/generate_darwin.go
Normal file
3
llm/generate/generate_darwin.go
Normal file
|
@ -0,0 +1,3 @@
|
|||
package generate
|
||||
|
||||
//go:generate bash ./gen_darwin.sh
|
3
llm/generate/generate_linux.go
Normal file
3
llm/generate/generate_linux.go
Normal file
|
@ -0,0 +1,3 @@
|
|||
package generate
|
||||
|
||||
//go:generate bash ./gen_linux.sh
|
3
llm/generate/generate_windows.go
Normal file
3
llm/generate/generate_windows.go
Normal file
|
@ -0,0 +1,3 @@
|
|||
package generate
|
||||
|
||||
//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
|
Loading…
Reference in a new issue