f8ef4439e9
The build tags rocm or cuda must be specified to both go generate and go build. ROCm builds should have both ROCM_PATH set (and the ROCM SDK present) as well as CLBlast installed (for GGML) and CLBlast_DIR set in the environment to the CLBlast cmake directory (likely /usr/lib/cmake/CLBlast). Build tags are also used to switch VRAM detection between cuda and rocm implementations, using added "accelerator_foo.go" files which contain architecture specific functions and variables. accelerator_none is used when no tags are set, and a helper function addRunner will ignore it if it is the chosen accelerator. Fix go generate commands, thanks @deadmeu for testing.
67 lines
1.6 KiB
Go
67 lines
1.6 KiB
Go
//go:build cuda
|
|
|
|
package llm
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"os/exec"
|
|
"path"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/jmorganca/ollama/format"
|
|
)
|
|
|
|
var (
|
|
errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
|
|
errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
|
|
)
|
|
|
|
// acceleratedRunner returns the runner for this accelerator given the provided buildPath string.
|
|
func acceleratedRunner(buildPath string) []ModelRunner {
|
|
return []ModelRunner{
|
|
ModelRunner{
|
|
Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"),
|
|
Accelerated: true,
|
|
},
|
|
}
|
|
}
|
|
|
|
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
|
|
func CheckVRAM() (int64, error) {
|
|
cmd := exec.Command("nvidia-smi", "--query-gpu=memory.free", "--format=csv,noheader,nounits")
|
|
var stdout bytes.Buffer
|
|
cmd.Stdout = &stdout
|
|
err := cmd.Run()
|
|
if err != nil {
|
|
return 0, errNoAccel
|
|
}
|
|
|
|
var freeMiB int64
|
|
scanner := bufio.NewScanner(&stdout)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
if strings.Contains(line, "[Insufficient Permissions]") {
|
|
return 0, fmt.Errorf("GPU support may not enabled, check you have installed GPU drivers and have the necessary permissions to run nvidia-smi")
|
|
}
|
|
|
|
vram, err := strconv.ParseInt(strings.TrimSpace(line), 10, 64)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("failed to parse available VRAM: %v", err)
|
|
}
|
|
|
|
freeMiB += vram
|
|
}
|
|
|
|
freeBytes := freeMiB * 1024 * 1024
|
|
if freeBytes < 2*format.GigaByte {
|
|
log.Printf("less than 2 GB VRAM available")
|
|
return 0, errAvailableVRAM
|
|
}
|
|
|
|
return freeBytes, nil
|
|
}
|