Review comments

This commit is contained in:
Daniel Hiltgen 2024-08-15 14:38:14 -07:00
parent 88bb9e3328
commit f9e31da946
5 changed files with 32 additions and 104 deletions

View file

@ -183,10 +183,17 @@ jobs:
name: windows-rocm-deps name: windows-rocm-deps
path: dist/deps/* path: dist/deps/*
# CUDA v11 generation step # CUDA generation step
generate-windows-cuda-v11: generate-windows-cuda:
environment: release environment: release
runs-on: windows runs-on: windows
strategy:
matrix:
cuda:
- version: "11"
url: 'https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe'
- version: "12"
url: 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe'
env: env:
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }} KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
steps: steps:
@ -220,11 +227,11 @@ jobs:
with: with:
go-version-file: go.mod go-version-file: go.mod
cache: true cache: true
- name: 'Install CUDA' - name: 'Install CUDA ${{ matrix.cuda.version }}'
run: | run: |
$ErrorActionPreference = "Stop" $ErrorActionPreference = "Stop"
write-host "downloading CUDA Installer" write-host "downloading CUDA Installer"
Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe" Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
write-host "Installing CUDA" write-host "Installing CUDA"
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
write-host "Completed CUDA" write-host "Completed CUDA"
@ -256,7 +263,7 @@ jobs:
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\" cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: generate-windows-cuda-v11 name: generate-windows-cuda-${{ matrix.cuda.version }}
path: | path: |
llm/build/**/bin/* llm/build/**/bin/*
dist/windows-amd64/** dist/windows-amd64/**
@ -265,95 +272,13 @@ jobs:
name: windows-cuda-deps name: windows-cuda-deps
path: dist/deps/* path: dist/deps/*
# CUDA v12 generation step
generate-windows-cuda-v12:
environment: release
runs-on: windows
env:
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
steps:
- uses: actions/checkout@v4
- name: Set Version
shell: bash
run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
- uses: 'google-github-actions/auth@v2'
with:
project_id: 'ollama'
credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}'
- run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt
- name: install Windows SDK 8.1 to get signtool
run: |
$ErrorActionPreference = "Stop"
write-host "downloading SDK"
Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe"
Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait
write-host "Win SDK 8.1 installed"
gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe'
- name: install signing plugin
run: |
$ErrorActionPreference = "Stop"
write-host "downloading plugin"
Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip"
Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\
write-host "Installing plugin"
& "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true
- name: 'Install CUDA'
run: |
$ErrorActionPreference = "Stop"
write-host "downloading CUDA Installer"
Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
write-host "Installing CUDA"
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
write-host "Completed CUDA"
$cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
$cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2'
echo "$cudaPath\bin" >> $env:GITHUB_PATH
echo "CUDA_PATH=$cudaPath" >> $env:GITHUB_ENV
echo "CUDA_PATH_V${cudaVer}=$cudaPath" >> $env:GITHUB_ENV
echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV
- name: 'Verify CUDA'
run: nvcc -V
- run: go get ./...
- name: go generate
run: |
$gopath=(get-command go).source | split-path -parent
$cudabin=(get-command nvcc).source | split-path
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
cd $env:GITHUB_WORKSPACE
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$cudabin;$env:PATH"
$env:OLLAMA_SKIP_CPU_GENERATE="1"
go generate -x ./...
- name: 'gather cuda dependencies'
run: |
$NVIDIA_DIR=(resolve-path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*\bin\')[0]
md "dist\deps"
cp "${NVIDIA_DIR}\cudart64_*.dll" "dist\deps\"
cp "${NVIDIA_DIR}\cublas64_*.dll" "dist\deps\"
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
- uses: actions/upload-artifact@v4
with:
name: generate-windows-cuda-v12
path: |
llm/build/**/bin/*
dist/windows-amd64/**
- uses: actions/upload-artifact@v4
with:
name: windows-cuda-deps
path: dist/deps/*
# Import the prior generation steps and build the final windows assets # Import the prior generation steps and build the final windows assets
build-windows: build-windows:
environment: release environment: release
runs-on: windows runs-on: windows
needs: needs:
- generate-windows-cuda-v11 - generate-windows-cuda
- generate-windows-cuda-v12
- generate-windows-rocm - generate-windows-rocm
- generate-windows-cpu - generate-windows-cpu
env: env:
@ -397,7 +322,10 @@ jobs:
name: generate-windows-cpu name: generate-windows-cpu
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
name: generate-windows-cuda-v11 name: generate-windows-cuda-11
- uses: actions/download-artifact@v4
with:
name: generate-windows-cuda-12
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
name: windows-cuda-deps name: windows-cuda-deps

View file

@ -20,12 +20,12 @@ GPU.
## Manual install ## Manual install
### Download the `ollama` tar file ### Download `ollama`
Ollama is distributed as a tar file including GPU library dependencies. Download and extract the Linux package:
```bash ```bash
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar -C /usr -zxf - curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
``` ```
### Adding Ollama as a startup service (recommended) ### Adding Ollama as a startup service (recommended)
@ -95,7 +95,7 @@ curl -fsSL https://ollama.com/install.sh | sh
Or by downloading the ollama binary: Or by downloading the ollama binary:
```bash ```bash
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar -C /usr -zxf - curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
``` ```
## Installing specific versions ## Installing specific versions

View file

@ -28,7 +28,7 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",") return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
} }
func cudaGetVariant(gpuInfo CudaGPUInfo) string { func cudaVariant(gpuInfo CudaGPUInfo) string {
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" { if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
if CudaTegra != "" { if CudaTegra != "" {
ver := strings.Split(CudaTegra, ".") ver := strings.Split(CudaTegra, ".")

View file

@ -225,7 +225,7 @@ func GetGPUInfo() GpuInfoList {
return GpuInfoList{cpus[0].GpuInfo} return GpuInfoList{cpus[0].GpuInfo}
} }
depPath := GetDepDir() depPath := LibraryDir()
// Load ALL libraries // Load ALL libraries
cHandles = initCudaHandles() cHandles = initCudaHandles()
@ -264,20 +264,20 @@ func GetGPUInfo() GpuInfoList {
gpuInfo.computeMajor = int(memInfo.major) gpuInfo.computeMajor = int(memInfo.major)
gpuInfo.computeMinor = int(memInfo.minor) gpuInfo.computeMinor = int(memInfo.minor)
gpuInfo.MinimumMemory = cudaMinimumMemory gpuInfo.MinimumMemory = cudaMinimumMemory
cudaVariant := cudaGetVariant(gpuInfo) variant := cudaVariant(gpuInfo)
if depPath != "" { if depPath != "" {
gpuInfo.DependencyPath = depPath gpuInfo.DependencyPath = depPath
// Check for variant specific directory // Check for variant specific directory
if cudaVariant != "" { if variant != "" {
if _, err := os.Stat(filepath.Join(depPath, "cuda_"+cudaVariant)); err == nil { if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil {
gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+cudaVariant) gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant)
} }
} }
} }
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DriverMajor = driverMajor gpuInfo.DriverMajor = driverMajor
gpuInfo.DriverMinor = driverMinor gpuInfo.DriverMinor = driverMinor
gpuInfo.Variant = cudaGetVariant(gpuInfo) gpuInfo.Variant = variant
// query the management library as well so we can record any skew between the two // query the management library as well so we can record any skew between the two
// which represents overhead on the GPU we must set aside on subsequent updates // which represents overhead on the GPU we must set aside on subsequent updates
@ -468,7 +468,7 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
slog.Debug("Searching for GPU library", "name", baseLibName) slog.Debug("Searching for GPU library", "name", baseLibName)
// Start with our bundled libraries // Start with our bundled libraries
patterns := []string{filepath.Join(GetDepDir(), baseLibName)} patterns := []string{filepath.Join(LibraryDir(), baseLibName)}
switch runtime.GOOS { switch runtime.GOOS {
case "windows": case "windows":
@ -642,7 +642,7 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
} }
} }
func GetDepDir() string { func LibraryDir() string {
// On Windows/linux we bundle the dependencies at the same level as the executable // On Windows/linux we bundle the dependencies at the same level as the executable
appExe, err := os.Executable() appExe, err := os.Executable()
if err != nil { if err != nil {

View file

@ -117,7 +117,7 @@ function build {
if ($cmakeDefs -contains "-G") { if ($cmakeDefs -contains "-G") {
$extra=@("-j8") $extra=@("-j8")
} else { } else {
$extra= @("--", "/p:CL_MPcount=8") $extra= @("--", "/maxCpuCount:8")
} }
write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra" write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra"
& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra
@ -273,7 +273,7 @@ function build_cuda() {
"-DGGML_CUDA=ON", "-DGGML_CUDA=ON",
"-DGGML_AVX=on", "-DGGML_AVX=on",
"-DGGML_AVX2=off", "-DGGML_AVX2=off",
"-DCMAKE_CUDA_FLAGS=-t8", "-DCMAKE_CUDA_FLAGS=-t6",
"-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}",
"-DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=$env:CUDA_PATH" "-DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=$env:CUDA_PATH"
) )