Review comments
This commit is contained in:
parent
88bb9e3328
commit
f9e31da946
5 changed files with 32 additions and 104 deletions
106
.github/workflows/release.yaml
vendored
106
.github/workflows/release.yaml
vendored
|
@ -183,10 +183,17 @@ jobs:
|
||||||
name: windows-rocm-deps
|
name: windows-rocm-deps
|
||||||
path: dist/deps/*
|
path: dist/deps/*
|
||||||
|
|
||||||
# CUDA v11 generation step
|
# CUDA generation step
|
||||||
generate-windows-cuda-v11:
|
generate-windows-cuda:
|
||||||
environment: release
|
environment: release
|
||||||
runs-on: windows
|
runs-on: windows
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
cuda:
|
||||||
|
- version: "11"
|
||||||
|
url: 'https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe'
|
||||||
|
- version: "12"
|
||||||
|
url: 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe'
|
||||||
env:
|
env:
|
||||||
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
|
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
|
||||||
steps:
|
steps:
|
||||||
|
@ -220,11 +227,11 @@ jobs:
|
||||||
with:
|
with:
|
||||||
go-version-file: go.mod
|
go-version-file: go.mod
|
||||||
cache: true
|
cache: true
|
||||||
- name: 'Install CUDA'
|
- name: 'Install CUDA ${{ matrix.cuda.version }}'
|
||||||
run: |
|
run: |
|
||||||
$ErrorActionPreference = "Stop"
|
$ErrorActionPreference = "Stop"
|
||||||
write-host "downloading CUDA Installer"
|
write-host "downloading CUDA Installer"
|
||||||
Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
|
Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
|
||||||
write-host "Installing CUDA"
|
write-host "Installing CUDA"
|
||||||
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
|
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
|
||||||
write-host "Completed CUDA"
|
write-host "Completed CUDA"
|
||||||
|
@ -256,7 +263,7 @@ jobs:
|
||||||
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
|
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: generate-windows-cuda-v11
|
name: generate-windows-cuda-${{ matrix.cuda.version }}
|
||||||
path: |
|
path: |
|
||||||
llm/build/**/bin/*
|
llm/build/**/bin/*
|
||||||
dist/windows-amd64/**
|
dist/windows-amd64/**
|
||||||
|
@ -265,95 +272,13 @@ jobs:
|
||||||
name: windows-cuda-deps
|
name: windows-cuda-deps
|
||||||
path: dist/deps/*
|
path: dist/deps/*
|
||||||
|
|
||||||
# CUDA v12 generation step
|
|
||||||
generate-windows-cuda-v12:
|
|
||||||
environment: release
|
|
||||||
runs-on: windows
|
|
||||||
env:
|
|
||||||
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- name: Set Version
|
|
||||||
shell: bash
|
|
||||||
run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
|
|
||||||
- uses: 'google-github-actions/auth@v2'
|
|
||||||
with:
|
|
||||||
project_id: 'ollama'
|
|
||||||
credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}'
|
|
||||||
- run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt
|
|
||||||
- name: install Windows SDK 8.1 to get signtool
|
|
||||||
run: |
|
|
||||||
$ErrorActionPreference = "Stop"
|
|
||||||
write-host "downloading SDK"
|
|
||||||
Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe"
|
|
||||||
Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait
|
|
||||||
write-host "Win SDK 8.1 installed"
|
|
||||||
gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe'
|
|
||||||
- name: install signing plugin
|
|
||||||
run: |
|
|
||||||
$ErrorActionPreference = "Stop"
|
|
||||||
write-host "downloading plugin"
|
|
||||||
Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip"
|
|
||||||
Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\
|
|
||||||
write-host "Installing plugin"
|
|
||||||
& "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
|
|
||||||
write-host "plugin installed"
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version-file: go.mod
|
|
||||||
cache: true
|
|
||||||
- name: 'Install CUDA'
|
|
||||||
run: |
|
|
||||||
$ErrorActionPreference = "Stop"
|
|
||||||
write-host "downloading CUDA Installer"
|
|
||||||
Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
|
|
||||||
write-host "Installing CUDA"
|
|
||||||
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
|
|
||||||
write-host "Completed CUDA"
|
|
||||||
$cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
|
|
||||||
$cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2'
|
|
||||||
echo "$cudaPath\bin" >> $env:GITHUB_PATH
|
|
||||||
echo "CUDA_PATH=$cudaPath" >> $env:GITHUB_ENV
|
|
||||||
echo "CUDA_PATH_V${cudaVer}=$cudaPath" >> $env:GITHUB_ENV
|
|
||||||
echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV
|
|
||||||
- name: 'Verify CUDA'
|
|
||||||
run: nvcc -V
|
|
||||||
- run: go get ./...
|
|
||||||
- name: go generate
|
|
||||||
run: |
|
|
||||||
$gopath=(get-command go).source | split-path -parent
|
|
||||||
$cudabin=(get-command nvcc).source | split-path
|
|
||||||
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
|
|
||||||
cd $env:GITHUB_WORKSPACE
|
|
||||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
|
||||||
$env:PATH="$gopath;$cudabin;$env:PATH"
|
|
||||||
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
|
||||||
go generate -x ./...
|
|
||||||
- name: 'gather cuda dependencies'
|
|
||||||
run: |
|
|
||||||
$NVIDIA_DIR=(resolve-path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*\bin\')[0]
|
|
||||||
md "dist\deps"
|
|
||||||
cp "${NVIDIA_DIR}\cudart64_*.dll" "dist\deps\"
|
|
||||||
cp "${NVIDIA_DIR}\cublas64_*.dll" "dist\deps\"
|
|
||||||
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: generate-windows-cuda-v12
|
|
||||||
path: |
|
|
||||||
llm/build/**/bin/*
|
|
||||||
dist/windows-amd64/**
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: windows-cuda-deps
|
|
||||||
path: dist/deps/*
|
|
||||||
|
|
||||||
# Import the prior generation steps and build the final windows assets
|
# Import the prior generation steps and build the final windows assets
|
||||||
build-windows:
|
build-windows:
|
||||||
environment: release
|
environment: release
|
||||||
runs-on: windows
|
runs-on: windows
|
||||||
needs:
|
needs:
|
||||||
- generate-windows-cuda-v11
|
- generate-windows-cuda
|
||||||
- generate-windows-cuda-v12
|
|
||||||
- generate-windows-rocm
|
- generate-windows-rocm
|
||||||
- generate-windows-cpu
|
- generate-windows-cpu
|
||||||
env:
|
env:
|
||||||
|
@ -397,7 +322,10 @@ jobs:
|
||||||
name: generate-windows-cpu
|
name: generate-windows-cpu
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: generate-windows-cuda-v11
|
name: generate-windows-cuda-11
|
||||||
|
- uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: generate-windows-cuda-12
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: windows-cuda-deps
|
name: windows-cuda-deps
|
||||||
|
|
|
@ -20,12 +20,12 @@ GPU.
|
||||||
|
|
||||||
## Manual install
|
## Manual install
|
||||||
|
|
||||||
### Download the `ollama` tar file
|
### Download `ollama`
|
||||||
|
|
||||||
Ollama is distributed as a tar file including GPU library dependencies.
|
Download and extract the Linux package:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar -C /usr -zxf -
|
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
|
||||||
```
|
```
|
||||||
|
|
||||||
### Adding Ollama as a startup service (recommended)
|
### Adding Ollama as a startup service (recommended)
|
||||||
|
@ -95,7 +95,7 @@ curl -fsSL https://ollama.com/install.sh | sh
|
||||||
Or by downloading the ollama binary:
|
Or by downloading the ollama binary:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar -C /usr -zxf -
|
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installing specific versions
|
## Installing specific versions
|
||||||
|
|
|
@ -28,7 +28,7 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
|
||||||
return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
|
return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
|
||||||
}
|
}
|
||||||
|
|
||||||
func cudaGetVariant(gpuInfo CudaGPUInfo) string {
|
func cudaVariant(gpuInfo CudaGPUInfo) string {
|
||||||
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
||||||
if CudaTegra != "" {
|
if CudaTegra != "" {
|
||||||
ver := strings.Split(CudaTegra, ".")
|
ver := strings.Split(CudaTegra, ".")
|
||||||
|
|
16
gpu/gpu.go
16
gpu/gpu.go
|
@ -225,7 +225,7 @@ func GetGPUInfo() GpuInfoList {
|
||||||
return GpuInfoList{cpus[0].GpuInfo}
|
return GpuInfoList{cpus[0].GpuInfo}
|
||||||
}
|
}
|
||||||
|
|
||||||
depPath := GetDepDir()
|
depPath := LibraryDir()
|
||||||
|
|
||||||
// Load ALL libraries
|
// Load ALL libraries
|
||||||
cHandles = initCudaHandles()
|
cHandles = initCudaHandles()
|
||||||
|
@ -264,20 +264,20 @@ func GetGPUInfo() GpuInfoList {
|
||||||
gpuInfo.computeMajor = int(memInfo.major)
|
gpuInfo.computeMajor = int(memInfo.major)
|
||||||
gpuInfo.computeMinor = int(memInfo.minor)
|
gpuInfo.computeMinor = int(memInfo.minor)
|
||||||
gpuInfo.MinimumMemory = cudaMinimumMemory
|
gpuInfo.MinimumMemory = cudaMinimumMemory
|
||||||
cudaVariant := cudaGetVariant(gpuInfo)
|
variant := cudaVariant(gpuInfo)
|
||||||
if depPath != "" {
|
if depPath != "" {
|
||||||
gpuInfo.DependencyPath = depPath
|
gpuInfo.DependencyPath = depPath
|
||||||
// Check for variant specific directory
|
// Check for variant specific directory
|
||||||
if cudaVariant != "" {
|
if variant != "" {
|
||||||
if _, err := os.Stat(filepath.Join(depPath, "cuda_"+cudaVariant)); err == nil {
|
if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil {
|
||||||
gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+cudaVariant)
|
gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||||
gpuInfo.DriverMajor = driverMajor
|
gpuInfo.DriverMajor = driverMajor
|
||||||
gpuInfo.DriverMinor = driverMinor
|
gpuInfo.DriverMinor = driverMinor
|
||||||
gpuInfo.Variant = cudaGetVariant(gpuInfo)
|
gpuInfo.Variant = variant
|
||||||
|
|
||||||
// query the management library as well so we can record any skew between the two
|
// query the management library as well so we can record any skew between the two
|
||||||
// which represents overhead on the GPU we must set aside on subsequent updates
|
// which represents overhead on the GPU we must set aside on subsequent updates
|
||||||
|
@ -468,7 +468,7 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
|
||||||
slog.Debug("Searching for GPU library", "name", baseLibName)
|
slog.Debug("Searching for GPU library", "name", baseLibName)
|
||||||
|
|
||||||
// Start with our bundled libraries
|
// Start with our bundled libraries
|
||||||
patterns := []string{filepath.Join(GetDepDir(), baseLibName)}
|
patterns := []string{filepath.Join(LibraryDir(), baseLibName)}
|
||||||
|
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
case "windows":
|
case "windows":
|
||||||
|
@ -642,7 +642,7 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetDepDir() string {
|
func LibraryDir() string {
|
||||||
// On Windows/linux we bundle the dependencies at the same level as the executable
|
// On Windows/linux we bundle the dependencies at the same level as the executable
|
||||||
appExe, err := os.Executable()
|
appExe, err := os.Executable()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -117,7 +117,7 @@ function build {
|
||||||
if ($cmakeDefs -contains "-G") {
|
if ($cmakeDefs -contains "-G") {
|
||||||
$extra=@("-j8")
|
$extra=@("-j8")
|
||||||
} else {
|
} else {
|
||||||
$extra= @("--", "/p:CL_MPcount=8")
|
$extra= @("--", "/maxCpuCount:8")
|
||||||
}
|
}
|
||||||
write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra"
|
write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra"
|
||||||
& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra
|
& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra
|
||||||
|
@ -273,7 +273,7 @@ function build_cuda() {
|
||||||
"-DGGML_CUDA=ON",
|
"-DGGML_CUDA=ON",
|
||||||
"-DGGML_AVX=on",
|
"-DGGML_AVX=on",
|
||||||
"-DGGML_AVX2=off",
|
"-DGGML_AVX2=off",
|
||||||
"-DCMAKE_CUDA_FLAGS=-t8",
|
"-DCMAKE_CUDA_FLAGS=-t6",
|
||||||
"-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}",
|
"-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}",
|
||||||
"-DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=$env:CUDA_PATH"
|
"-DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=$env:CUDA_PATH"
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue