2023-11-29 19:00:37 +00:00
|
|
|
//go:build linux || windows
|
|
|
|
|
|
|
|
package gpu
|
|
|
|
|
|
|
|
/*
|
2023-12-14 01:26:47 +00:00
|
|
|
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
|
|
|
|
#cgo windows LDFLAGS: -lpthread
|
|
|
|
|
2023-11-29 19:00:37 +00:00
|
|
|
#include "gpu_info.h"
|
|
|
|
*/
|
|
|
|
import "C"
|
2024-08-01 21:52:15 +00:00
|
|
|
|
2023-11-29 19:00:37 +00:00
|
|
|
import (
|
|
|
|
"fmt"
|
2024-01-18 18:52:01 +00:00
|
|
|
"log/slog"
|
2024-01-10 22:39:51 +00:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2023-12-23 19:35:44 +00:00
|
|
|
"runtime"
|
2024-01-10 22:39:51 +00:00
|
|
|
"strings"
|
2023-11-29 19:00:37 +00:00
|
|
|
"sync"
|
|
|
|
"unsafe"
|
2024-03-18 09:45:22 +00:00
|
|
|
|
2024-06-03 15:31:48 +00:00
|
|
|
"github.com/ollama/ollama/envconfig"
|
2024-05-15 22:13:16 +00:00
|
|
|
"github.com/ollama/ollama/format"
|
2023-11-29 19:00:37 +00:00
|
|
|
)
|
|
|
|
|
2024-05-29 23:37:34 +00:00
|
|
|
type cudaHandles struct {
|
2024-03-30 16:50:05 +00:00
|
|
|
deviceCount int
|
|
|
|
cudart *C.cudart_handle_t
|
2024-04-30 23:42:48 +00:00
|
|
|
nvcuda *C.nvcuda_handle_t
|
2024-06-03 22:07:50 +00:00
|
|
|
nvml *C.nvml_handle_t
|
2024-05-29 23:37:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type oneapiHandles struct {
|
2024-05-24 03:18:27 +00:00
|
|
|
oneapi *C.oneapi_handle_t
|
2024-05-29 23:37:34 +00:00
|
|
|
deviceCount int
|
2023-11-29 19:00:37 +00:00
|
|
|
}
|
|
|
|
|
2024-03-18 09:45:22 +00:00
|
|
|
const (
|
2024-05-10 16:15:28 +00:00
|
|
|
cudaMinimumMemory = 457 * format.MebiByte
|
|
|
|
rocmMinimumMemory = 457 * format.MebiByte
|
2024-05-29 23:37:34 +00:00
|
|
|
// TODO OneAPI minimum memory
|
2024-03-18 09:45:22 +00:00
|
|
|
)
|
|
|
|
|
2024-05-15 22:13:16 +00:00
|
|
|
var (
|
|
|
|
gpuMutex sync.Mutex
|
|
|
|
bootstrapped bool
|
|
|
|
cpuCapability CPUCapability
|
|
|
|
cpus []CPUInfo
|
|
|
|
cudaGPUs []CudaGPUInfo
|
|
|
|
nvcudaLibPath string
|
|
|
|
cudartLibPath string
|
|
|
|
oneapiLibPath string
|
2024-06-03 22:07:50 +00:00
|
|
|
nvmlLibPath string
|
2024-05-15 22:13:16 +00:00
|
|
|
rocmGPUs []RocmGPUInfo
|
|
|
|
oneapiGPUs []OneapiGPUInfo
|
|
|
|
)
|
2023-11-29 19:00:37 +00:00
|
|
|
|
2024-01-20 20:15:50 +00:00
|
|
|
// With our current CUDA compile flags, older than 5.0 will not work properly
|
|
|
|
var CudaComputeMin = [2]C.int{5, 0}
|
2024-01-07 05:40:04 +00:00
|
|
|
|
2024-03-30 16:50:05 +00:00
|
|
|
var RocmComputeMin = 9
|
2024-01-10 22:39:51 +00:00
|
|
|
|
2024-03-30 16:50:05 +00:00
|
|
|
// TODO find a better way to detect iGPU instead of minimum memory
|
|
|
|
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
|
2024-01-10 22:39:51 +00:00
|
|
|
|
2023-11-29 19:00:37 +00:00
|
|
|
// Note: gpuMutex must already be held
|
2024-05-29 23:37:34 +00:00
|
|
|
func initCudaHandles() *cudaHandles {
|
2023-12-14 01:26:47 +00:00
|
|
|
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
2024-01-10 22:39:51 +00:00
|
|
|
|
2024-05-29 23:37:34 +00:00
|
|
|
cHandles := &cudaHandles{}
|
2024-05-15 22:13:16 +00:00
|
|
|
// Short Circuit if we already know which library to use
|
2024-06-03 22:07:50 +00:00
|
|
|
if nvmlLibPath != "" {
|
|
|
|
cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
|
|
|
|
return cHandles
|
|
|
|
}
|
2024-05-15 22:13:16 +00:00
|
|
|
if nvcudaLibPath != "" {
|
2024-05-29 23:37:34 +00:00
|
|
|
cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
|
|
|
|
return cHandles
|
2024-05-15 22:13:16 +00:00
|
|
|
}
|
|
|
|
if cudartLibPath != "" {
|
2024-05-29 23:37:34 +00:00
|
|
|
cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
|
|
|
|
return cHandles
|
2024-05-15 22:13:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
slog.Debug("searching for GPU discovery libraries for NVIDIA")
|
2024-03-25 15:07:44 +00:00
|
|
|
var cudartMgmtPatterns []string
|
2024-06-03 22:07:50 +00:00
|
|
|
|
2024-06-05 19:07:20 +00:00
|
|
|
// Aligned with driver, we can't carry as payloads
|
|
|
|
nvcudaMgmtPatterns := NvcudaGlobs
|
2024-06-03 22:07:50 +00:00
|
|
|
|
2024-06-05 19:07:20 +00:00
|
|
|
if runtime.GOOS == "windows" {
|
|
|
|
localAppData := os.Getenv("LOCALAPPDATA")
|
|
|
|
cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
|
|
|
|
}
|
2024-09-12 19:10:30 +00:00
|
|
|
libDir := LibraryDir()
|
|
|
|
if libDir != "" {
|
|
|
|
cudartMgmtPatterns = []string{filepath.Join(libDir, CudartMgmtName)}
|
2024-01-10 22:39:51 +00:00
|
|
|
}
|
2024-06-05 19:07:20 +00:00
|
|
|
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
|
2024-01-10 22:39:51 +00:00
|
|
|
|
2024-06-05 19:07:20 +00:00
|
|
|
if len(NvmlGlobs) > 0 {
|
|
|
|
nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
|
2024-06-03 22:07:50 +00:00
|
|
|
if len(nvmlLibPaths) > 0 {
|
|
|
|
nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
|
|
|
|
if nvml != nil {
|
|
|
|
slog.Debug("nvidia-ml loaded", "library", libPath)
|
|
|
|
cHandles.nvml = nvml
|
|
|
|
nvmlLibPath = libPath
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-05 19:07:20 +00:00
|
|
|
nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
|
2024-04-30 23:42:48 +00:00
|
|
|
if len(nvcudaLibPaths) > 0 {
|
|
|
|
deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
|
|
|
|
if nvcuda != nil {
|
2024-05-07 21:54:26 +00:00
|
|
|
slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
|
2024-05-29 23:37:34 +00:00
|
|
|
cHandles.nvcuda = nvcuda
|
|
|
|
cHandles.deviceCount = deviceCount
|
2024-05-15 22:13:16 +00:00
|
|
|
nvcudaLibPath = libPath
|
2024-05-29 23:37:34 +00:00
|
|
|
return cHandles
|
2024-04-30 23:42:48 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-05 19:07:20 +00:00
|
|
|
cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
|
2024-03-25 15:07:44 +00:00
|
|
|
if len(cudartLibPaths) > 0 {
|
2024-03-30 16:50:05 +00:00
|
|
|
deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
|
2024-03-25 15:07:44 +00:00
|
|
|
if cudart != nil {
|
2024-05-07 21:54:26 +00:00
|
|
|
slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
|
2024-05-29 23:37:34 +00:00
|
|
|
cHandles.cudart = cudart
|
|
|
|
cHandles.deviceCount = deviceCount
|
2024-05-15 22:13:16 +00:00
|
|
|
cudartLibPath = libPath
|
2024-05-29 23:37:34 +00:00
|
|
|
return cHandles
|
2024-03-25 15:07:44 +00:00
|
|
|
}
|
|
|
|
}
|
2024-05-24 03:18:27 +00:00
|
|
|
|
2024-05-29 23:37:34 +00:00
|
|
|
return cHandles
|
|
|
|
}
|
|
|
|
|
|
|
|
// Note: gpuMutex must already be held
|
|
|
|
func initOneAPIHandles() *oneapiHandles {
|
|
|
|
oHandles := &oneapiHandles{}
|
|
|
|
|
|
|
|
// Short Circuit if we already know which library to use
|
|
|
|
if oneapiLibPath != "" {
|
|
|
|
oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
|
|
|
|
return oHandles
|
|
|
|
}
|
|
|
|
|
2024-06-05 19:07:20 +00:00
|
|
|
oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
|
2024-06-03 15:31:48 +00:00
|
|
|
if len(oneapiLibPaths) > 0 {
|
2024-05-29 23:37:34 +00:00
|
|
|
oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
|
2024-06-03 15:31:48 +00:00
|
|
|
}
|
|
|
|
|
2024-05-29 23:37:34 +00:00
|
|
|
return oHandles
|
2023-11-29 19:00:37 +00:00
|
|
|
}
|
|
|
|
|
2024-06-04 02:09:23 +00:00
|
|
|
func GetCPUInfo() GpuInfoList {
|
|
|
|
gpuMutex.Lock()
|
|
|
|
if !bootstrapped {
|
|
|
|
gpuMutex.Unlock()
|
|
|
|
GetGPUInfo()
|
|
|
|
} else {
|
|
|
|
gpuMutex.Unlock()
|
|
|
|
}
|
|
|
|
return GpuInfoList{cpus[0].GpuInfo}
|
|
|
|
}
|
|
|
|
|
2024-03-30 16:50:05 +00:00
|
|
|
func GetGPUInfo() GpuInfoList {
|
2023-11-29 19:00:37 +00:00
|
|
|
// TODO - consider exploring lspci (and equivalent on windows) to check for
|
|
|
|
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
|
|
|
|
gpuMutex.Lock()
|
|
|
|
defer gpuMutex.Unlock()
|
2024-05-15 22:13:16 +00:00
|
|
|
needRefresh := true
|
2024-05-29 23:37:34 +00:00
|
|
|
var cHandles *cudaHandles
|
|
|
|
var oHandles *oneapiHandles
|
2024-03-30 22:34:21 +00:00
|
|
|
defer func() {
|
2024-05-29 23:37:34 +00:00
|
|
|
if cHandles != nil {
|
|
|
|
if cHandles.cudart != nil {
|
|
|
|
C.cudart_release(*cHandles.cudart)
|
|
|
|
}
|
|
|
|
if cHandles.nvcuda != nil {
|
|
|
|
C.nvcuda_release(*cHandles.nvcuda)
|
|
|
|
}
|
2024-06-03 22:07:50 +00:00
|
|
|
if cHandles.nvml != nil {
|
|
|
|
C.nvml_release(*cHandles.nvml)
|
|
|
|
}
|
2024-03-30 22:34:21 +00:00
|
|
|
}
|
2024-05-29 23:37:34 +00:00
|
|
|
if oHandles != nil {
|
|
|
|
if oHandles.oneapi != nil {
|
|
|
|
// TODO - is this needed?
|
|
|
|
C.oneapi_release(*oHandles.oneapi)
|
|
|
|
}
|
2024-04-30 23:42:48 +00:00
|
|
|
}
|
2024-03-30 22:34:21 +00:00
|
|
|
}()
|
2023-11-29 19:00:37 +00:00
|
|
|
|
2024-05-15 22:13:16 +00:00
|
|
|
if !bootstrapped {
|
2024-07-03 17:30:07 +00:00
|
|
|
slog.Info("looking for compatible GPUs")
|
2024-05-15 22:13:16 +00:00
|
|
|
needRefresh = false
|
2024-06-05 19:07:20 +00:00
|
|
|
cpuCapability = GetCPUCapability()
|
2024-05-15 22:13:16 +00:00
|
|
|
var memInfo C.mem_info_t
|
2024-06-04 02:09:23 +00:00
|
|
|
|
|
|
|
mem, err := GetCPUMem()
|
|
|
|
if err != nil {
|
|
|
|
slog.Warn("error looking up system memory", "error", err)
|
2024-05-15 22:13:16 +00:00
|
|
|
}
|
2024-08-01 21:52:15 +00:00
|
|
|
cpus = []CPUInfo{
|
|
|
|
{
|
|
|
|
GpuInfo: GpuInfo{
|
|
|
|
memInfo: mem,
|
|
|
|
Library: "cpu",
|
2024-05-31 04:54:07 +00:00
|
|
|
Variant: cpuCapability.String(),
|
2024-08-01 21:52:15 +00:00
|
|
|
ID: "0",
|
|
|
|
},
|
2024-05-15 22:13:16 +00:00
|
|
|
},
|
2024-08-01 21:52:15 +00:00
|
|
|
}
|
2024-05-15 22:13:16 +00:00
|
|
|
|
|
|
|
// Fallback to CPU mode if we're lacking required vector extensions on x86
|
|
|
|
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
|
2024-06-05 19:07:20 +00:00
|
|
|
slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability)
|
2024-05-15 22:13:16 +00:00
|
|
|
bootstrapped = true
|
|
|
|
// No need to do any GPU discovery, since we can't run on them
|
|
|
|
return GpuInfoList{cpus[0].GpuInfo}
|
|
|
|
}
|
2024-01-26 19:11:09 +00:00
|
|
|
|
2024-08-15 21:38:14 +00:00
|
|
|
depPath := LibraryDir()
|
2024-05-15 22:13:16 +00:00
|
|
|
|
|
|
|
// Load ALL libraries
|
2024-05-29 23:37:34 +00:00
|
|
|
cHandles = initCudaHandles()
|
2024-05-15 22:13:16 +00:00
|
|
|
|
|
|
|
// NVIDIA
|
2024-05-29 23:37:34 +00:00
|
|
|
for i := range cHandles.deviceCount {
|
|
|
|
if cHandles.cudart != nil || cHandles.nvcuda != nil {
|
2024-05-15 22:13:16 +00:00
|
|
|
gpuInfo := CudaGPUInfo{
|
|
|
|
GpuInfo: GpuInfo{
|
|
|
|
Library: "cuda",
|
|
|
|
},
|
|
|
|
index: i,
|
|
|
|
}
|
|
|
|
var driverMajor int
|
|
|
|
var driverMinor int
|
2024-05-29 23:37:34 +00:00
|
|
|
if cHandles.cudart != nil {
|
|
|
|
C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
|
2024-05-15 22:13:16 +00:00
|
|
|
} else {
|
2024-05-29 23:37:34 +00:00
|
|
|
C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
|
|
|
|
driverMajor = int(cHandles.nvcuda.driver_major)
|
|
|
|
driverMinor = int(cHandles.nvcuda.driver_minor)
|
2024-05-15 22:13:16 +00:00
|
|
|
}
|
|
|
|
if memInfo.err != nil {
|
|
|
|
slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
|
|
|
|
C.free(unsafe.Pointer(memInfo.err))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
|
|
|
|
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
gpuInfo.TotalMemory = uint64(memInfo.total)
|
|
|
|
gpuInfo.FreeMemory = uint64(memInfo.free)
|
|
|
|
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
|
|
|
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
|
2024-06-14 03:46:14 +00:00
|
|
|
gpuInfo.computeMajor = int(memInfo.major)
|
|
|
|
gpuInfo.computeMinor = int(memInfo.minor)
|
2024-05-15 22:13:16 +00:00
|
|
|
gpuInfo.MinimumMemory = cudaMinimumMemory
|
2024-08-23 18:21:12 +00:00
|
|
|
gpuInfo.DriverMajor = driverMajor
|
|
|
|
gpuInfo.DriverMinor = driverMinor
|
2024-08-15 21:38:14 +00:00
|
|
|
variant := cudaVariant(gpuInfo)
|
2024-05-31 04:54:07 +00:00
|
|
|
if depPath != "" {
|
|
|
|
gpuInfo.DependencyPath = depPath
|
|
|
|
// Check for variant specific directory
|
2024-08-15 21:38:14 +00:00
|
|
|
if variant != "" {
|
|
|
|
if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil {
|
|
|
|
gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant)
|
2024-05-31 04:54:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-05-15 22:13:16 +00:00
|
|
|
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
2024-08-15 21:38:14 +00:00
|
|
|
gpuInfo.Variant = variant
|
2024-05-15 22:13:16 +00:00
|
|
|
|
2024-07-09 17:27:53 +00:00
|
|
|
// query the management library as well so we can record any skew between the two
|
|
|
|
// which represents overhead on the GPU we must set aside on subsequent updates
|
|
|
|
if cHandles.nvml != nil {
|
|
|
|
C.nvml_get_free(*cHandles.nvml, C.int(gpuInfo.index), &memInfo.free, &memInfo.total, &memInfo.used)
|
|
|
|
if memInfo.err != nil {
|
|
|
|
slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
|
|
|
|
C.free(unsafe.Pointer(memInfo.err))
|
|
|
|
} else {
|
|
|
|
if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
|
|
|
|
gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
|
|
|
|
slog.Info("detected OS VRAM overhead",
|
|
|
|
"id", gpuInfo.ID,
|
|
|
|
"library", gpuInfo.Library,
|
|
|
|
"compute", gpuInfo.Compute,
|
|
|
|
"driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
|
|
|
|
"name", gpuInfo.Name,
|
|
|
|
"overhead", format.HumanBytes2(gpuInfo.OSOverhead),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-15 22:13:16 +00:00
|
|
|
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
|
|
|
|
cudaGPUs = append(cudaGPUs, gpuInfo)
|
2024-05-24 03:18:27 +00:00
|
|
|
}
|
2024-05-29 23:37:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Intel
|
2024-07-04 00:22:13 +00:00
|
|
|
if envconfig.IntelGPU() {
|
2024-06-17 00:09:05 +00:00
|
|
|
oHandles = initOneAPIHandles()
|
2024-08-09 18:31:38 +00:00
|
|
|
if oHandles != nil && oHandles.oneapi != nil {
|
|
|
|
for d := range oHandles.oneapi.num_drivers {
|
|
|
|
if oHandles.oneapi == nil {
|
|
|
|
// shouldn't happen
|
|
|
|
slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
|
|
|
|
for i := range devCount {
|
|
|
|
gpuInfo := OneapiGPUInfo{
|
|
|
|
GpuInfo: GpuInfo{
|
|
|
|
Library: "oneapi",
|
|
|
|
},
|
|
|
|
driverIndex: int(d),
|
|
|
|
gpuIndex: int(i),
|
|
|
|
}
|
|
|
|
// TODO - split bootstrapping from updating free memory
|
|
|
|
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
|
|
|
|
// TODO - convert this to MinimumMemory based on testing...
|
|
|
|
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
|
|
|
memInfo.free = C.uint64_t(totalFreeMem)
|
|
|
|
gpuInfo.TotalMemory = uint64(memInfo.total)
|
|
|
|
gpuInfo.FreeMemory = uint64(memInfo.free)
|
|
|
|
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
|
|
|
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
|
|
|
gpuInfo.DependencyPath = depPath
|
|
|
|
oneapiGPUs = append(oneapiGPUs, gpuInfo)
|
2024-06-17 00:09:05 +00:00
|
|
|
}
|
2024-05-15 22:13:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
rocmGPUs = AMDGetGPUInfo()
|
|
|
|
bootstrapped = true
|
2024-07-03 17:30:07 +00:00
|
|
|
if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
|
|
|
|
slog.Info("no compatible GPUs were discovered")
|
|
|
|
}
|
2024-05-15 22:13:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// For detected GPUs, load library if not loaded
|
|
|
|
|
|
|
|
// Refresh free memory usage
|
|
|
|
if needRefresh {
|
2024-06-04 02:09:23 +00:00
|
|
|
mem, err := GetCPUMem()
|
|
|
|
if err != nil {
|
|
|
|
slog.Warn("error looking up system memory", "error", err)
|
|
|
|
} else {
|
|
|
|
slog.Debug("updating system memory data",
|
|
|
|
slog.Group(
|
|
|
|
"before",
|
|
|
|
"total", format.HumanBytes2(cpus[0].TotalMemory),
|
|
|
|
"free", format.HumanBytes2(cpus[0].FreeMemory),
|
2024-07-11 23:42:57 +00:00
|
|
|
"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
|
2024-06-04 02:09:23 +00:00
|
|
|
),
|
|
|
|
slog.Group(
|
|
|
|
"now",
|
|
|
|
"total", format.HumanBytes2(mem.TotalMemory),
|
|
|
|
"free", format.HumanBytes2(mem.FreeMemory),
|
2024-07-11 23:42:57 +00:00
|
|
|
"free_swap", format.HumanBytes2(mem.FreeSwap),
|
2024-06-04 02:09:23 +00:00
|
|
|
),
|
|
|
|
)
|
|
|
|
cpus[0].FreeMemory = mem.FreeMemory
|
2024-07-11 23:42:57 +00:00
|
|
|
cpus[0].FreeSwap = mem.FreeSwap
|
2024-06-04 02:09:23 +00:00
|
|
|
}
|
|
|
|
|
2024-05-15 22:13:16 +00:00
|
|
|
var memInfo C.mem_info_t
|
2024-05-29 23:37:34 +00:00
|
|
|
if cHandles == nil && len(cudaGPUs) > 0 {
|
|
|
|
cHandles = initCudaHandles()
|
2024-05-15 22:13:16 +00:00
|
|
|
}
|
|
|
|
for i, gpu := range cudaGPUs {
|
2024-06-03 22:07:50 +00:00
|
|
|
if cHandles.nvml != nil {
|
|
|
|
C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
|
|
|
|
} else if cHandles.cudart != nil {
|
2024-05-29 23:37:34 +00:00
|
|
|
C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
|
2024-06-03 22:07:50 +00:00
|
|
|
} else if cHandles.nvcuda != nil {
|
|
|
|
C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
|
|
|
|
memInfo.used = memInfo.total - memInfo.free
|
2024-05-24 03:18:27 +00:00
|
|
|
} else {
|
2024-06-03 22:07:50 +00:00
|
|
|
// shouldn't happen
|
|
|
|
slog.Warn("no valid cuda library loaded to refresh vram usage")
|
|
|
|
break
|
2024-05-24 03:18:27 +00:00
|
|
|
}
|
|
|
|
if memInfo.err != nil {
|
2024-05-15 22:13:16 +00:00
|
|
|
slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
|
2024-05-24 03:18:27 +00:00
|
|
|
C.free(unsafe.Pointer(memInfo.err))
|
|
|
|
continue
|
|
|
|
}
|
2024-05-15 22:13:16 +00:00
|
|
|
if memInfo.free == 0 {
|
|
|
|
slog.Warn("error looking up nvidia GPU memory")
|
2024-05-24 03:18:27 +00:00
|
|
|
continue
|
|
|
|
}
|
2024-07-09 17:27:53 +00:00
|
|
|
if cHandles.nvml != nil && gpu.OSOverhead > 0 {
|
|
|
|
// When using the management library update based on recorded overhead
|
|
|
|
memInfo.free -= C.uint64_t(gpu.OSOverhead)
|
|
|
|
}
|
2024-06-03 22:07:50 +00:00
|
|
|
slog.Debug("updating cuda memory data",
|
|
|
|
"gpu", gpu.ID,
|
|
|
|
"name", gpu.Name,
|
2024-07-09 17:27:53 +00:00
|
|
|
"overhead", format.HumanBytes2(gpu.OSOverhead),
|
2024-06-03 22:07:50 +00:00
|
|
|
slog.Group(
|
|
|
|
"before",
|
|
|
|
"total", format.HumanBytes2(gpu.TotalMemory),
|
|
|
|
"free", format.HumanBytes2(gpu.FreeMemory),
|
|
|
|
),
|
|
|
|
slog.Group(
|
|
|
|
"now",
|
|
|
|
"total", format.HumanBytes2(uint64(memInfo.total)),
|
|
|
|
"free", format.HumanBytes2(uint64(memInfo.free)),
|
|
|
|
"used", format.HumanBytes2(uint64(memInfo.used)),
|
|
|
|
),
|
|
|
|
)
|
2024-05-15 22:13:16 +00:00
|
|
|
cudaGPUs[i].FreeMemory = uint64(memInfo.free)
|
2023-12-14 01:26:47 +00:00
|
|
|
}
|
2024-05-29 23:37:34 +00:00
|
|
|
|
|
|
|
if oHandles == nil && len(oneapiGPUs) > 0 {
|
|
|
|
oHandles = initOneAPIHandles()
|
|
|
|
}
|
|
|
|
for i, gpu := range oneapiGPUs {
|
|
|
|
if oHandles.oneapi == nil {
|
|
|
|
// shouldn't happen
|
|
|
|
slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
|
|
|
|
// TODO - convert this to MinimumMemory based on testing...
|
|
|
|
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
|
|
|
memInfo.free = C.uint64_t(totalFreeMem)
|
|
|
|
oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
|
|
|
|
}
|
|
|
|
|
2024-06-04 02:09:23 +00:00
|
|
|
err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
|
2024-05-15 22:13:16 +00:00
|
|
|
if err != nil {
|
|
|
|
slog.Debug("problem refreshing ROCm free memory", "error", err)
|
2024-06-03 15:31:48 +00:00
|
|
|
}
|
2023-12-14 01:26:47 +00:00
|
|
|
}
|
2024-03-30 16:50:05 +00:00
|
|
|
|
2024-05-15 22:13:16 +00:00
|
|
|
resp := []GpuInfo{}
|
|
|
|
for _, gpu := range cudaGPUs {
|
|
|
|
resp = append(resp, gpu.GpuInfo)
|
|
|
|
}
|
|
|
|
for _, gpu := range rocmGPUs {
|
|
|
|
resp = append(resp, gpu.GpuInfo)
|
|
|
|
}
|
2024-05-29 23:37:34 +00:00
|
|
|
for _, gpu := range oneapiGPUs {
|
|
|
|
resp = append(resp, gpu.GpuInfo)
|
|
|
|
}
|
2024-03-30 16:50:05 +00:00
|
|
|
if len(resp) == 0 {
|
2024-05-15 22:13:16 +00:00
|
|
|
resp = append(resp, cpus[0].GpuInfo)
|
2023-11-29 19:00:37 +00:00
|
|
|
}
|
|
|
|
return resp
|
|
|
|
}
|
|
|
|
|
2024-04-30 23:42:48 +00:00
|
|
|
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
|
2024-01-10 22:39:51 +00:00
|
|
|
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
|
|
|
|
var ldPaths []string
|
|
|
|
gpuLibPaths := []string{}
|
2024-03-30 16:50:05 +00:00
|
|
|
slog.Debug("Searching for GPU library", "name", baseLibName)
|
2024-01-10 22:39:51 +00:00
|
|
|
|
2024-07-08 19:50:11 +00:00
|
|
|
// Start with our bundled libraries
|
2024-08-15 21:38:14 +00:00
|
|
|
patterns := []string{filepath.Join(LibraryDir(), baseLibName)}
|
2024-07-08 19:50:11 +00:00
|
|
|
|
2024-01-10 22:39:51 +00:00
|
|
|
switch runtime.GOOS {
|
|
|
|
case "windows":
|
|
|
|
ldPaths = strings.Split(os.Getenv("PATH"), ";")
|
|
|
|
case "linux":
|
|
|
|
ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
|
|
|
|
default:
|
|
|
|
return gpuLibPaths
|
|
|
|
}
|
2024-07-08 19:50:11 +00:00
|
|
|
|
|
|
|
// Then with whatever we find in the PATH/LD_LIBRARY_PATH
|
2024-01-10 22:39:51 +00:00
|
|
|
for _, ldPath := range ldPaths {
|
|
|
|
d, err := filepath.Abs(ldPath)
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
2024-07-08 19:50:11 +00:00
|
|
|
patterns = append(patterns, filepath.Join(d, baseLibName))
|
2024-01-10 22:39:51 +00:00
|
|
|
}
|
2024-04-30 23:42:48 +00:00
|
|
|
patterns = append(patterns, defaultPatterns...)
|
2024-03-30 16:50:05 +00:00
|
|
|
slog.Debug("gpu library search", "globs", patterns)
|
2024-01-10 22:39:51 +00:00
|
|
|
for _, pattern := range patterns {
|
2024-05-03 18:55:32 +00:00
|
|
|
|
|
|
|
// Nvidia PhysX known to return bogus results
|
|
|
|
if strings.Contains(pattern, "PhysX") {
|
|
|
|
slog.Debug("skipping PhysX cuda library path", "path", pattern)
|
2024-06-13 20:17:19 +00:00
|
|
|
continue
|
2024-05-03 18:55:32 +00:00
|
|
|
}
|
2024-01-10 22:39:51 +00:00
|
|
|
// Ignore glob discovery errors
|
|
|
|
matches, _ := filepath.Glob(pattern)
|
|
|
|
for _, match := range matches {
|
|
|
|
// Resolve any links so we don't try the same lib multiple times
|
|
|
|
// and weed out any dups across globs
|
|
|
|
libPath := match
|
|
|
|
tmp := match
|
|
|
|
var err error
|
|
|
|
for ; err == nil; tmp, err = os.Readlink(libPath) {
|
|
|
|
if !filepath.IsAbs(tmp) {
|
|
|
|
tmp = filepath.Join(filepath.Dir(libPath), tmp)
|
|
|
|
}
|
|
|
|
libPath = tmp
|
|
|
|
}
|
|
|
|
new := true
|
|
|
|
for _, cmp := range gpuLibPaths {
|
|
|
|
if cmp == libPath {
|
|
|
|
new = false
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if new {
|
|
|
|
gpuLibPaths = append(gpuLibPaths, libPath)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-03-30 16:50:05 +00:00
|
|
|
slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
|
2024-01-10 22:39:51 +00:00
|
|
|
return gpuLibPaths
|
|
|
|
}
|
|
|
|
|
2024-03-30 16:50:05 +00:00
|
|
|
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
|
2024-03-25 15:07:44 +00:00
|
|
|
var resp C.cudart_init_resp_t
|
2024-01-23 00:03:32 +00:00
|
|
|
resp.ch.verbose = getVerboseState()
|
2024-03-25 15:07:44 +00:00
|
|
|
for _, libPath := range cudartLibPaths {
|
2024-01-10 22:39:51 +00:00
|
|
|
lib := C.CString(libPath)
|
|
|
|
defer C.free(unsafe.Pointer(lib))
|
2024-03-25 15:07:44 +00:00
|
|
|
C.cudart_init(lib, &resp)
|
2024-01-10 22:39:51 +00:00
|
|
|
if resp.err != nil {
|
2024-03-30 16:50:05 +00:00
|
|
|
slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
|
2024-01-10 22:39:51 +00:00
|
|
|
C.free(unsafe.Pointer(resp.err))
|
|
|
|
} else {
|
2024-03-30 16:50:05 +00:00
|
|
|
return int(resp.num_devices), &resp.ch, libPath
|
2024-01-10 22:39:51 +00:00
|
|
|
}
|
|
|
|
}
|
2024-03-30 16:50:05 +00:00
|
|
|
return 0, nil, ""
|
2024-01-10 22:39:51 +00:00
|
|
|
}
|
|
|
|
|
2024-04-30 23:42:48 +00:00
|
|
|
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
|
|
|
|
var resp C.nvcuda_init_resp_t
|
|
|
|
resp.ch.verbose = getVerboseState()
|
|
|
|
for _, libPath := range nvcudaLibPaths {
|
|
|
|
lib := C.CString(libPath)
|
|
|
|
defer C.free(unsafe.Pointer(lib))
|
|
|
|
C.nvcuda_init(lib, &resp)
|
|
|
|
if resp.err != nil {
|
2024-07-03 17:30:07 +00:00
|
|
|
// Decide what log level based on the type of error message to help users understand why
|
|
|
|
msg := C.GoString(resp.err)
|
|
|
|
switch resp.cudaErr {
|
|
|
|
case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
|
|
|
|
slog.Warn("version mismatch between driver and cuda driver library - reboot or upgrade may be required", "library", libPath, "error", msg)
|
|
|
|
case C.CUDA_ERROR_NO_DEVICE:
|
|
|
|
slog.Info("no nvidia devices detected", "library", libPath)
|
|
|
|
case C.CUDA_ERROR_UNKNOWN:
|
|
|
|
slog.Warn("unknown error initializing cuda driver library", "library", libPath, "error", msg)
|
|
|
|
slog.Warn("see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information")
|
|
|
|
default:
|
|
|
|
if strings.Contains(msg, "wrong ELF class") {
|
|
|
|
slog.Debug("skipping 32bit library", "library", libPath)
|
|
|
|
} else {
|
|
|
|
slog.Info("unable to load cuda driver library", "library", libPath, "error", msg)
|
|
|
|
}
|
|
|
|
}
|
2024-04-30 23:42:48 +00:00
|
|
|
C.free(unsafe.Pointer(resp.err))
|
|
|
|
} else {
|
|
|
|
return int(resp.num_devices), &resp.ch, libPath
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0, nil, ""
|
|
|
|
}
|
|
|
|
|
2024-06-03 22:07:50 +00:00
|
|
|
func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
|
|
|
|
var resp C.nvml_init_resp_t
|
|
|
|
resp.ch.verbose = getVerboseState()
|
|
|
|
for _, libPath := range nvmlLibPaths {
|
|
|
|
lib := C.CString(libPath)
|
|
|
|
defer C.free(unsafe.Pointer(lib))
|
|
|
|
C.nvml_init(lib, &resp)
|
|
|
|
if resp.err != nil {
|
|
|
|
slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
|
|
|
|
C.free(unsafe.Pointer(resp.err))
|
|
|
|
} else {
|
|
|
|
return &resp.ch, libPath
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil, ""
|
|
|
|
}
|
|
|
|
|
2024-05-24 03:18:27 +00:00
|
|
|
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
|
|
|
|
var resp C.oneapi_init_resp_t
|
2024-05-29 23:37:34 +00:00
|
|
|
num_devices := 0
|
2024-05-24 03:18:27 +00:00
|
|
|
resp.oh.verbose = getVerboseState()
|
|
|
|
for _, libPath := range oneapiLibPaths {
|
|
|
|
lib := C.CString(libPath)
|
|
|
|
defer C.free(unsafe.Pointer(lib))
|
|
|
|
C.oneapi_init(lib, &resp)
|
|
|
|
if resp.err != nil {
|
|
|
|
slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
|
|
|
|
C.free(unsafe.Pointer(resp.err))
|
|
|
|
} else {
|
2024-06-05 19:07:20 +00:00
|
|
|
for i := range resp.oh.num_drivers {
|
2024-05-29 23:37:34 +00:00
|
|
|
num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
|
|
|
|
}
|
|
|
|
return num_devices, &resp.oh, libPath
|
2024-05-24 03:18:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0, nil, ""
|
|
|
|
}
|
|
|
|
|
2024-01-23 00:03:32 +00:00
|
|
|
func getVerboseState() C.uint16_t {
|
2024-07-03 23:00:54 +00:00
|
|
|
if envconfig.Debug() {
|
2024-01-23 00:03:32 +00:00
|
|
|
return C.uint16_t(1)
|
|
|
|
}
|
|
|
|
return C.uint16_t(0)
|
|
|
|
}
|
2024-03-30 16:50:05 +00:00
|
|
|
|
|
|
|
// Given the list of GPUs this instantiation is targeted for,
|
|
|
|
// figure out the visible devices environment variable
|
|
|
|
//
|
|
|
|
// If different libraries are detected, the first one is what we use
|
|
|
|
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
|
|
|
if len(l) == 0 {
|
|
|
|
return "", ""
|
|
|
|
}
|
|
|
|
switch l[0].Library {
|
|
|
|
case "cuda":
|
|
|
|
return cudaGetVisibleDevicesEnv(l)
|
|
|
|
case "rocm":
|
|
|
|
return rocmGetVisibleDevicesEnv(l)
|
2024-05-24 03:18:27 +00:00
|
|
|
case "oneapi":
|
|
|
|
return oneapiGetVisibleDevicesEnv(l)
|
2024-03-30 16:50:05 +00:00
|
|
|
default:
|
|
|
|
slog.Debug("no filter required for library " + l[0].Library)
|
|
|
|
return "", ""
|
|
|
|
}
|
|
|
|
}
|
2024-07-08 19:50:11 +00:00
|
|
|
|
2024-08-15 21:38:14 +00:00
|
|
|
func LibraryDir() string {
|
2024-07-08 19:50:11 +00:00
|
|
|
// On Windows/linux we bundle the dependencies at the same level as the executable
|
|
|
|
appExe, err := os.Executable()
|
|
|
|
if err != nil {
|
|
|
|
slog.Warn("failed to lookup executable path", "error", err)
|
|
|
|
}
|
|
|
|
cwd, err := os.Getwd()
|
|
|
|
if err != nil {
|
|
|
|
slog.Warn("failed to lookup working directory", "error", err)
|
|
|
|
}
|
|
|
|
// Scan for any of our dependeices, and pick first match
|
2024-08-27 23:19:00 +00:00
|
|
|
for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe()), cwd} {
|
2024-08-14 23:32:57 +00:00
|
|
|
libDep := filepath.Join("lib", "ollama")
|
2024-07-08 19:50:11 +00:00
|
|
|
if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
|
|
|
|
return filepath.Join(root, libDep)
|
|
|
|
}
|
|
|
|
// Developer mode, local build
|
|
|
|
if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
|
|
|
|
return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)
|
|
|
|
}
|
|
|
|
if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
|
|
|
|
return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
slog.Warn("unable to locate gpu dependency libraries")
|
|
|
|
return ""
|
|
|
|
}
|