1f50356e8e
This also adjusts our algorithm to favor our bundled ROCm. I've confirmed VRAM reporting still doesn't work properly so we can't yet enable concurrency by default.
323 lines
11 KiB
YAML
323 lines
11 KiB
YAML
name: test
|
|
|
|
concurrency:
|
|
# For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
|
|
# cancels running CI jobs and starts all new ones.
|
|
#
|
|
# For non-PR pushes, concurrency.group needs to be unique for every distinct
|
|
# CI run we want to have happen. Use run_id, which in practice means all
|
|
# non-PR CI runs will be allowed to run without preempting each other.
|
|
group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }}
|
|
cancel-in-progress: true
|
|
|
|
on:
|
|
pull_request:
|
|
paths:
|
|
- '**/*'
|
|
- '!docs/**'
|
|
- '!README.md'
|
|
|
|
jobs:
|
|
changes:
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
GENERATE: ${{ steps.changes.outputs.GENERATE }}
|
|
GENERATE_CUDA: ${{ steps.changes.outputs.GENERATE_CUDA }}
|
|
GENERATE_ROCM: ${{ steps.changes.outputs.GENERATE_ROCM }}
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
fetch-depth: 0
|
|
- id: changes
|
|
run: |
|
|
changed() {
|
|
git diff-tree -r --no-commit-id --name-only \
|
|
$(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
|
|
${{ github.event.pull_request.head.sha }} \
|
|
| xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
|
|
}
|
|
|
|
{
|
|
echo GENERATE=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
|
|
echo GENERATE_CUDA=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
|
|
echo GENERATE_ROCM=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
|
|
} >>$GITHUB_OUTPUT
|
|
|
|
generate:
|
|
needs: [changes]
|
|
if: ${{ needs.changes.outputs.GENERATE == 'True' }}
|
|
strategy:
|
|
matrix:
|
|
os: [ubuntu-latest, macos-latest, windows-2019]
|
|
arch: [amd64, arm64]
|
|
exclude:
|
|
- os: ubuntu-latest
|
|
arch: arm64
|
|
- os: windows-2019
|
|
arch: arm64
|
|
runs-on: ${{ matrix.os }}
|
|
env:
|
|
GOARCH: ${{ matrix.arch }}
|
|
CGO_ENABLED: '1'
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/setup-go@v5
|
|
with:
|
|
go-version-file: go.mod
|
|
cache: true
|
|
- run: go get ./...
|
|
- run: |
|
|
$gopath=(get-command go).source | split-path -parent
|
|
$gccpath=(get-command gcc).source | split-path -parent
|
|
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
|
|
cd $env:GITHUB_WORKSPACE
|
|
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
|
$env:PATH="$gopath;$gccpath;$env:PATH"
|
|
echo $env:PATH
|
|
go generate -x ./...
|
|
if: ${{ startsWith(matrix.os, 'windows-') }}
|
|
name: 'Windows Go Generate'
|
|
- run: go generate -x ./...
|
|
if: ${{ ! startsWith(matrix.os, 'windows-') }}
|
|
name: 'Unix Go Generate'
|
|
- run: go build .
|
|
- uses: actions/upload-artifact@v4
|
|
with:
|
|
name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
|
|
path: |
|
|
llm/build/**/bin/*
|
|
llm/build/**/*.a
|
|
generate-cuda:
|
|
needs: [changes]
|
|
if: ${{ needs.changes.outputs.GENERATE_CUDA == 'True' }}
|
|
strategy:
|
|
matrix:
|
|
cuda-version:
|
|
- '11.8.0'
|
|
runs-on: linux
|
|
container: nvidia/cuda:${{ matrix.cuda-version }}-devel-ubuntu20.04
|
|
steps:
|
|
- run: |
|
|
apt-get update && apt-get install -y git build-essential curl
|
|
curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz \
|
|
| tar -zx -C /usr --strip-components 1
|
|
env:
|
|
DEBIAN_FRONTEND: noninteractive
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/setup-go@v4
|
|
with:
|
|
go-version-file: go.mod
|
|
cache: true
|
|
- run: go get ./...
|
|
- run: |
|
|
git config --global --add safe.directory /__w/ollama/ollama
|
|
go generate -x ./...
|
|
env:
|
|
OLLAMA_SKIP_CPU_GENERATE: '1'
|
|
- uses: actions/upload-artifact@v4
|
|
with:
|
|
name: cuda-${{ matrix.cuda-version }}-libraries
|
|
path: |
|
|
llm/build/**/bin/*
|
|
dist/windows-amd64/**
|
|
generate-rocm:
|
|
needs: [changes]
|
|
if: ${{ needs.changes.outputs.GENERATE_ROCM == 'True' }}
|
|
strategy:
|
|
matrix:
|
|
rocm-version:
|
|
- '6.1.1'
|
|
runs-on: linux
|
|
container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
|
|
steps:
|
|
- run: |
|
|
apt-get update && apt-get install -y git build-essential curl rocm-libs
|
|
curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz \
|
|
| tar -zx -C /usr --strip-components 1
|
|
env:
|
|
DEBIAN_FRONTEND: noninteractive
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/setup-go@v4
|
|
with:
|
|
go-version-file: go.mod
|
|
cache: true
|
|
- run: go get ./...
|
|
- run: |
|
|
git config --global --add safe.directory /__w/ollama/ollama
|
|
go generate -x ./...
|
|
env:
|
|
OLLAMA_SKIP_CPU_GENERATE: '1'
|
|
- uses: actions/upload-artifact@v4
|
|
with:
|
|
name: rocm-${{ matrix.rocm-version }}-libraries
|
|
path: |
|
|
llm/build/**/bin/*
|
|
dist/windows-amd64/**
|
|
|
|
# ROCm generation step
|
|
generate-windows-rocm:
|
|
needs: [changes]
|
|
if: ${{ needs.changes.outputs.GENERATE_ROCM == 'True' }}
|
|
runs-on: windows
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/setup-go@v5
|
|
with:
|
|
go-version-file: go.mod
|
|
cache: true
|
|
- name: 'Install ROCm'
|
|
run: |
|
|
$ErrorActionPreference = "Stop"
|
|
write-host "downloading AMD HIP Installer"
|
|
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
|
|
write-host "Installing AMD HIP"
|
|
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
|
|
write-host "Completed AMD HIP"
|
|
- name: 'Verify ROCm'
|
|
run: |
|
|
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
|
|
- run: go get ./...
|
|
- run: |
|
|
$gopath=(get-command go).source | split-path -parent
|
|
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
|
|
cd $env:GITHUB_WORKSPACE
|
|
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
|
$env:PATH="$gopath;$env:PATH"
|
|
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
|
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
|
go generate -x ./...
|
|
name: go generate
|
|
env:
|
|
OLLAMA_SKIP_CPU_GENERATE: '1'
|
|
# TODO - do we need any artifacts?
|
|
|
|
# CUDA generation step
|
|
generate-windows-cuda:
|
|
needs: [changes]
|
|
if: ${{ needs.changes.outputs.GENERATE_CUDA == 'True' }}
|
|
runs-on: windows
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/setup-go@v5
|
|
with:
|
|
go-version-file: go.mod
|
|
cache: true
|
|
- name: 'Install CUDA'
|
|
run: |
|
|
$ErrorActionPreference = "Stop"
|
|
write-host "downloading CUDA Installer"
|
|
Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
|
|
write-host "Installing CUDA"
|
|
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
|
|
write-host "Completed CUDA"
|
|
$cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
|
|
$cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2'
|
|
echo "$cudaPath\bin" >> $env:GITHUB_PATH
|
|
echo "CUDA_PATH=$cudaPath" >> $env:GITHUB_ENV
|
|
echo "CUDA_PATH_V${cudaVer}=$cudaPath" >> $env:GITHUB_ENV
|
|
echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV
|
|
- name: 'Verify CUDA'
|
|
run: nvcc -V
|
|
- run: go get ./...
|
|
- name: go generate
|
|
run: |
|
|
$gopath=(get-command go).source | split-path -parent
|
|
$cudabin=(get-command nvcc).source | split-path
|
|
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
|
|
cd $env:GITHUB_WORKSPACE
|
|
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
|
$env:PATH="$gopath;$cudabin;$env:PATH"
|
|
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
|
go generate -x ./...
|
|
env:
|
|
OLLAMA_SKIP_CPU_GENERATE: '1'
|
|
# TODO - do we need any artifacts?
|
|
|
|
lint:
|
|
strategy:
|
|
matrix:
|
|
os: [ubuntu-latest, macos-latest, windows-2019]
|
|
arch: [amd64, arm64]
|
|
exclude:
|
|
- os: ubuntu-latest
|
|
arch: arm64
|
|
- os: windows-2019
|
|
arch: arm64
|
|
- os: macos-latest
|
|
arch: amd64
|
|
runs-on: ${{ matrix.os }}
|
|
env:
|
|
GOARCH: ${{ matrix.arch }}
|
|
CGO_ENABLED: '1'
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: recursive
|
|
- uses: actions/setup-go@v5
|
|
with:
|
|
go-version-file: go.mod
|
|
cache: false
|
|
- run: |
|
|
case ${{ matrix.arch }} in
|
|
amd64) echo ARCH=x86_64 ;;
|
|
arm64) echo ARCH=arm64 ;;
|
|
esac >>$GITHUB_ENV
|
|
shell: bash
|
|
- run: |
|
|
mkdir -p llm/build/linux/$ARCH/stub/bin
|
|
touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server
|
|
if: ${{ startsWith(matrix.os, 'ubuntu-') }}
|
|
- run: |
|
|
mkdir -p llm/build/darwin/$ARCH/stub/bin
|
|
touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
|
|
if: ${{ startsWith(matrix.os, 'macos-') }}
|
|
- uses: golangci/golangci-lint-action@v6
|
|
with:
|
|
args: --timeout 8m0s -v ${{ startsWith(matrix.os, 'windows-') && '' || '--disable gofmt --disable goimports' }}
|
|
test:
|
|
strategy:
|
|
matrix:
|
|
os: [ubuntu-latest, macos-latest, windows-2019]
|
|
arch: [amd64]
|
|
exclude:
|
|
- os: ubuntu-latest
|
|
arch: arm64
|
|
- os: windows-2019
|
|
arch: arm64
|
|
runs-on: ${{ matrix.os }}
|
|
env:
|
|
GOARCH: ${{ matrix.arch }}
|
|
CGO_ENABLED: '1'
|
|
OLLAMA_CPU_TARGET: 'static'
|
|
OLLAMA_SKIP_CPU_GENERATE: '1'
|
|
OLLAMA_SKIP_METAL_GENERATE: '1'
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: recursive
|
|
- uses: actions/setup-go@v5
|
|
with:
|
|
go-version-file: go.mod
|
|
cache: true
|
|
- run: |
|
|
case ${{ matrix.arch }} in
|
|
amd64) echo ARCH=x86_64 ;;
|
|
arm64) echo ARCH=arm64 ;;
|
|
esac >>$GITHUB_ENV
|
|
shell: bash
|
|
- run: |
|
|
mkdir -p llm/build/linux/$ARCH/stub/bin
|
|
touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server
|
|
if: ${{ startsWith(matrix.os, 'ubuntu-') }}
|
|
- run: |
|
|
mkdir -p llm/build/darwin/$ARCH/stub/bin
|
|
touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
|
|
if: ${{ startsWith(matrix.os, 'macos-') }}
|
|
shell: bash
|
|
- run: go generate ./...
|
|
- run: go build
|
|
- run: go test -v ./...
|
|
- uses: actions/upload-artifact@v4
|
|
with:
|
|
name: ${{ matrix.os }}-binaries
|
|
path: ollama
|