2023-11-29 11:00:37 -08:00
//go:build linux || windows
2024-10-16 17:45:00 -07:00
package discover
2023-11-29 11:00:37 -08:00
/ *
2023-12-13 17:26:47 -08:00
# cgo linux LDFLAGS : - lrt - lpthread - ldl - lstdc ++ - lm
# cgo windows LDFLAGS : - lpthread
2023-11-29 11:00:37 -08:00
# include "gpu_info.h"
* /
import "C"
2024-08-01 14:52:15 -07:00
2023-11-29 11:00:37 -08:00
import (
"fmt"
2024-01-18 10:52:01 -08:00
"log/slog"
2024-01-10 14:39:51 -08:00
"os"
"path/filepath"
2023-12-23 11:35:44 -08:00
"runtime"
2024-01-10 14:39:51 -08:00
"strings"
2023-11-29 11:00:37 -08:00
"sync"
"unsafe"
2024-03-18 10:45:22 +01:00
2024-06-03 08:31:48 -07:00
"github.com/ollama/ollama/envconfig"
2024-05-15 15:13:16 -07:00
"github.com/ollama/ollama/format"
2023-11-29 11:00:37 -08:00
)
2024-05-29 16:37:34 -07:00
type cudaHandles struct {
2024-03-30 09:50:05 -07:00
deviceCount int
cudart * C . cudart_handle_t
2024-04-30 16:42:48 -07:00
nvcuda * C . nvcuda_handle_t
2024-06-03 15:07:50 -07:00
nvml * C . nvml_handle_t
2024-05-29 16:37:34 -07:00
}
type oneapiHandles struct {
2024-05-24 11:18:27 +08:00
oneapi * C . oneapi_handle_t
2024-05-29 16:37:34 -07:00
deviceCount int
2023-11-29 11:00:37 -08:00
}
2024-03-18 10:45:22 +01:00
const (
2024-05-10 09:15:28 -07:00
cudaMinimumMemory = 457 * format . MebiByte
rocmMinimumMemory = 457 * format . MebiByte
2024-05-29 16:37:34 -07:00
// TODO OneAPI minimum memory
2024-03-18 10:45:22 +01:00
)
2024-05-15 15:13:16 -07:00
var (
gpuMutex sync . Mutex
bootstrapped bool
cpuCapability CPUCapability
cpus [ ] CPUInfo
cudaGPUs [ ] CudaGPUInfo
nvcudaLibPath string
cudartLibPath string
oneapiLibPath string
2024-06-03 15:07:50 -07:00
nvmlLibPath string
2024-05-15 15:13:16 -07:00
rocmGPUs [ ] RocmGPUInfo
oneapiGPUs [ ] OneapiGPUInfo
2024-10-14 16:26:45 -07:00
// If any discovered GPUs are incompatible, report why
unsupportedGPUs [ ] UnsupportedGPUInfo
// Keep track of errors during bootstrapping so that if GPUs are missing
// they expected to be present this may explain why
bootstrapErrors [ ] error
2024-05-15 15:13:16 -07:00
)
2023-11-29 11:00:37 -08:00
2024-01-20 12:15:50 -08:00
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [ 2 ] C . int { 5 , 0 }
2024-01-06 21:40:04 -08:00
2024-03-30 09:50:05 -07:00
var RocmComputeMin = 9
2024-01-10 14:39:51 -08:00
2024-03-30 09:50:05 -07:00
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format . GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
2024-01-10 14:39:51 -08:00
2023-11-29 11:00:37 -08:00
// Note: gpuMutex must already be held
2024-05-29 16:37:34 -07:00
func initCudaHandles ( ) * cudaHandles {
2023-12-13 17:26:47 -08:00
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
2024-01-10 14:39:51 -08:00
2024-05-29 16:37:34 -07:00
cHandles := & cudaHandles { }
2024-05-15 15:13:16 -07:00
// Short Circuit if we already know which library to use
2024-10-14 16:26:45 -07:00
// ignore bootstrap errors in this case since we already recorded them
2024-06-03 15:07:50 -07:00
if nvmlLibPath != "" {
2024-10-14 16:26:45 -07:00
cHandles . nvml , _ , _ = loadNVMLMgmt ( [ ] string { nvmlLibPath } )
2024-06-03 15:07:50 -07:00
return cHandles
}
2024-05-15 15:13:16 -07:00
if nvcudaLibPath != "" {
2024-10-14 16:26:45 -07:00
cHandles . deviceCount , cHandles . nvcuda , _ , _ = loadNVCUDAMgmt ( [ ] string { nvcudaLibPath } )
2024-05-29 16:37:34 -07:00
return cHandles
2024-05-15 15:13:16 -07:00
}
if cudartLibPath != "" {
2024-10-14 16:26:45 -07:00
cHandles . deviceCount , cHandles . cudart , _ , _ = loadCUDARTMgmt ( [ ] string { cudartLibPath } )
2024-05-29 16:37:34 -07:00
return cHandles
2024-05-15 15:13:16 -07:00
}
slog . Debug ( "searching for GPU discovery libraries for NVIDIA" )
2024-03-25 11:07:44 -04:00
var cudartMgmtPatterns [ ] string
2024-06-03 15:07:50 -07:00
2024-06-05 12:07:20 -07:00
// Aligned with driver, we can't carry as payloads
nvcudaMgmtPatterns := NvcudaGlobs
2024-06-03 15:07:50 -07:00
2024-06-05 12:07:20 -07:00
if runtime . GOOS == "windows" {
localAppData := os . Getenv ( "LOCALAPPDATA" )
cudartMgmtPatterns = [ ] string { filepath . Join ( localAppData , "Programs" , "Ollama" , CudartMgmtName ) }
}
2024-09-12 12:10:30 -07:00
libDir := LibraryDir ( )
if libDir != "" {
cudartMgmtPatterns = [ ] string { filepath . Join ( libDir , CudartMgmtName ) }
2024-01-10 14:39:51 -08:00
}
2024-06-05 12:07:20 -07:00
cudartMgmtPatterns = append ( cudartMgmtPatterns , CudartGlobs ... )
2024-01-10 14:39:51 -08:00
2024-06-05 12:07:20 -07:00
if len ( NvmlGlobs ) > 0 {
nvmlLibPaths := FindGPULibs ( NvmlMgmtName , NvmlGlobs )
2024-06-03 15:07:50 -07:00
if len ( nvmlLibPaths ) > 0 {
2024-10-14 16:26:45 -07:00
nvml , libPath , err := loadNVMLMgmt ( nvmlLibPaths )
2024-06-03 15:07:50 -07:00
if nvml != nil {
slog . Debug ( "nvidia-ml loaded" , "library" , libPath )
cHandles . nvml = nvml
nvmlLibPath = libPath
}
2024-10-14 16:26:45 -07:00
if err != nil {
bootstrapErrors = append ( bootstrapErrors , err )
}
2024-06-03 15:07:50 -07:00
}
}
2024-06-05 12:07:20 -07:00
nvcudaLibPaths := FindGPULibs ( NvcudaMgmtName , nvcudaMgmtPatterns )
2024-04-30 16:42:48 -07:00
if len ( nvcudaLibPaths ) > 0 {
2024-10-14 16:26:45 -07:00
deviceCount , nvcuda , libPath , err := loadNVCUDAMgmt ( nvcudaLibPaths )
2024-04-30 16:42:48 -07:00
if nvcuda != nil {
2024-05-07 14:54:26 -07:00
slog . Debug ( "detected GPUs" , "count" , deviceCount , "library" , libPath )
2024-05-29 16:37:34 -07:00
cHandles . nvcuda = nvcuda
cHandles . deviceCount = deviceCount
2024-05-15 15:13:16 -07:00
nvcudaLibPath = libPath
2024-05-29 16:37:34 -07:00
return cHandles
2024-04-30 16:42:48 -07:00
}
2024-10-14 16:26:45 -07:00
if err != nil {
bootstrapErrors = append ( bootstrapErrors , err )
}
2024-04-30 16:42:48 -07:00
}
2024-06-05 12:07:20 -07:00
cudartLibPaths := FindGPULibs ( CudartMgmtName , cudartMgmtPatterns )
2024-03-25 11:07:44 -04:00
if len ( cudartLibPaths ) > 0 {
2024-10-14 16:26:45 -07:00
deviceCount , cudart , libPath , err := loadCUDARTMgmt ( cudartLibPaths )
2024-03-25 11:07:44 -04:00
if cudart != nil {
2024-05-07 14:54:26 -07:00
slog . Debug ( "detected GPUs" , "library" , libPath , "count" , deviceCount )
2024-05-29 16:37:34 -07:00
cHandles . cudart = cudart
cHandles . deviceCount = deviceCount
2024-05-15 15:13:16 -07:00
cudartLibPath = libPath
2024-05-29 16:37:34 -07:00
return cHandles
2024-03-25 11:07:44 -04:00
}
2024-10-14 16:26:45 -07:00
if err != nil {
bootstrapErrors = append ( bootstrapErrors , err )
}
2024-03-25 11:07:44 -04:00
}
2024-05-24 11:18:27 +08:00
2024-05-29 16:37:34 -07:00
return cHandles
}
// Note: gpuMutex must already be held
func initOneAPIHandles ( ) * oneapiHandles {
oHandles := & oneapiHandles { }
// Short Circuit if we already know which library to use
2024-10-14 16:26:45 -07:00
// ignore bootstrap errors in this case since we already recorded them
2024-05-29 16:37:34 -07:00
if oneapiLibPath != "" {
2024-10-14 16:26:45 -07:00
oHandles . deviceCount , oHandles . oneapi , _ , _ = loadOneapiMgmt ( [ ] string { oneapiLibPath } )
2024-05-29 16:37:34 -07:00
return oHandles
}
2024-06-05 12:07:20 -07:00
oneapiLibPaths := FindGPULibs ( OneapiMgmtName , OneapiGlobs )
2024-06-03 08:31:48 -07:00
if len ( oneapiLibPaths ) > 0 {
2024-10-14 16:26:45 -07:00
var err error
oHandles . deviceCount , oHandles . oneapi , oneapiLibPath , err = loadOneapiMgmt ( oneapiLibPaths )
if err != nil {
bootstrapErrors = append ( bootstrapErrors , err )
}
2024-06-03 08:31:48 -07:00
}
2024-05-29 16:37:34 -07:00
return oHandles
2023-11-29 11:00:37 -08:00
}
2024-06-03 19:09:23 -07:00
func GetCPUInfo ( ) GpuInfoList {
gpuMutex . Lock ( )
if ! bootstrapped {
gpuMutex . Unlock ( )
GetGPUInfo ( )
} else {
gpuMutex . Unlock ( )
}
return GpuInfoList { cpus [ 0 ] . GpuInfo }
}
2024-03-30 09:50:05 -07:00
func GetGPUInfo ( ) GpuInfoList {
2023-11-29 11:00:37 -08:00
// TODO - consider exploring lspci (and equivalent on windows) to check for
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
gpuMutex . Lock ( )
defer gpuMutex . Unlock ( )
2024-05-15 15:13:16 -07:00
needRefresh := true
2024-05-29 16:37:34 -07:00
var cHandles * cudaHandles
var oHandles * oneapiHandles
2024-03-30 15:34:21 -07:00
defer func ( ) {
2024-05-29 16:37:34 -07:00
if cHandles != nil {
if cHandles . cudart != nil {
C . cudart_release ( * cHandles . cudart )
}
if cHandles . nvcuda != nil {
C . nvcuda_release ( * cHandles . nvcuda )
}
2024-06-03 15:07:50 -07:00
if cHandles . nvml != nil {
C . nvml_release ( * cHandles . nvml )
}
2024-03-30 15:34:21 -07:00
}
2024-05-29 16:37:34 -07:00
if oHandles != nil {
if oHandles . oneapi != nil {
// TODO - is this needed?
C . oneapi_release ( * oHandles . oneapi )
}
2024-04-30 16:42:48 -07:00
}
2024-03-30 15:34:21 -07:00
} ( )
2023-11-29 11:00:37 -08:00
2024-05-15 15:13:16 -07:00
if ! bootstrapped {
2024-07-03 10:30:07 -07:00
slog . Info ( "looking for compatible GPUs" )
2024-10-14 16:26:45 -07:00
bootstrapErrors = [ ] error { }
2024-05-15 15:13:16 -07:00
needRefresh = false
2024-06-05 12:07:20 -07:00
cpuCapability = GetCPUCapability ( )
2024-05-15 15:13:16 -07:00
var memInfo C . mem_info_t
2024-06-03 19:09:23 -07:00
mem , err := GetCPUMem ( )
if err != nil {
slog . Warn ( "error looking up system memory" , "error" , err )
2024-05-15 15:13:16 -07:00
}
2024-09-21 16:28:29 -07:00
depPath := LibraryDir ( )
2024-10-15 11:36:08 -07:00
details , err := GetCPUDetails ( )
if err != nil {
slog . Warn ( "failed to lookup CPU details" , "error" , err )
}
2024-08-01 14:52:15 -07:00
cpus = [ ] CPUInfo {
{
GpuInfo : GpuInfo {
2024-09-21 16:28:29 -07:00
memInfo : mem ,
Library : "cpu" ,
Variant : cpuCapability . String ( ) ,
ID : "0" ,
DependencyPath : depPath ,
2024-08-01 14:52:15 -07:00
} ,
2024-10-15 11:36:08 -07:00
CPUs : details ,
2024-05-15 15:13:16 -07:00
} ,
2024-08-01 14:52:15 -07:00
}
2024-05-15 15:13:16 -07:00
// Fallback to CPU mode if we're lacking required vector extensions on x86
if cpuCapability < GPURunnerCPUCapability && runtime . GOARCH == "amd64" {
2024-10-14 16:26:45 -07:00
err := fmt . Errorf ( "CPU does not have minimum vector extensions, GPU inference disabled. Required:%s Detected:%s" , GPURunnerCPUCapability , cpuCapability )
slog . Warn ( err . Error ( ) )
bootstrapErrors = append ( bootstrapErrors , err )
2024-05-15 15:13:16 -07:00
bootstrapped = true
// No need to do any GPU discovery, since we can't run on them
return GpuInfoList { cpus [ 0 ] . GpuInfo }
}
2024-01-26 11:11:09 -08:00
2024-05-15 15:13:16 -07:00
// Load ALL libraries
2024-05-29 16:37:34 -07:00
cHandles = initCudaHandles ( )
2024-05-15 15:13:16 -07:00
// NVIDIA
2024-05-29 16:37:34 -07:00
for i := range cHandles . deviceCount {
if cHandles . cudart != nil || cHandles . nvcuda != nil {
2024-05-15 15:13:16 -07:00
gpuInfo := CudaGPUInfo {
GpuInfo : GpuInfo {
Library : "cuda" ,
} ,
index : i ,
}
var driverMajor int
var driverMinor int
2024-05-29 16:37:34 -07:00
if cHandles . cudart != nil {
C . cudart_bootstrap ( * cHandles . cudart , C . int ( i ) , & memInfo )
2024-05-15 15:13:16 -07:00
} else {
2024-05-29 16:37:34 -07:00
C . nvcuda_bootstrap ( * cHandles . nvcuda , C . int ( i ) , & memInfo )
driverMajor = int ( cHandles . nvcuda . driver_major )
driverMinor = int ( cHandles . nvcuda . driver_minor )
2024-05-15 15:13:16 -07:00
}
if memInfo . err != nil {
slog . Info ( "error looking up nvidia GPU memory" , "error" , C . GoString ( memInfo . err ) )
C . free ( unsafe . Pointer ( memInfo . err ) )
continue
}
gpuInfo . TotalMemory = uint64 ( memInfo . total )
gpuInfo . FreeMemory = uint64 ( memInfo . free )
gpuInfo . ID = C . GoString ( & memInfo . gpu_id [ 0 ] )
gpuInfo . Compute = fmt . Sprintf ( "%d.%d" , memInfo . major , memInfo . minor )
2024-06-13 20:46:14 -07:00
gpuInfo . computeMajor = int ( memInfo . major )
gpuInfo . computeMinor = int ( memInfo . minor )
2024-05-15 15:13:16 -07:00
gpuInfo . MinimumMemory = cudaMinimumMemory
2024-08-23 11:21:12 -07:00
gpuInfo . DriverMajor = driverMajor
gpuInfo . DriverMinor = driverMinor
2024-08-15 14:38:14 -07:00
variant := cudaVariant ( gpuInfo )
2024-05-30 21:54:07 -07:00
if depPath != "" {
gpuInfo . DependencyPath = depPath
// Check for variant specific directory
2024-08-15 14:38:14 -07:00
if variant != "" {
if _ , err := os . Stat ( filepath . Join ( depPath , "cuda_" + variant ) ) ; err == nil {
gpuInfo . DependencyPath = filepath . Join ( depPath , "cuda_" + variant )
2024-05-30 21:54:07 -07:00
}
}
}
2024-05-15 15:13:16 -07:00
gpuInfo . Name = C . GoString ( & memInfo . gpu_name [ 0 ] )
2024-08-15 14:38:14 -07:00
gpuInfo . Variant = variant
2024-05-15 15:13:16 -07:00
2024-10-14 16:26:45 -07:00
if memInfo . major < CudaComputeMin [ 0 ] || ( memInfo . major == CudaComputeMin [ 0 ] && memInfo . minor < CudaComputeMin [ 1 ] ) {
unsupportedGPUs = append ( unsupportedGPUs ,
UnsupportedGPUInfo {
GpuInfo : gpuInfo . GpuInfo ,
} )
slog . Info ( fmt . Sprintf ( "[%d] CUDA GPU is too old. Compute Capability detected: %d.%d" , i , memInfo . major , memInfo . minor ) )
continue
}
2024-07-09 10:27:53 -07:00
// query the management library as well so we can record any skew between the two
// which represents overhead on the GPU we must set aside on subsequent updates
if cHandles . nvml != nil {
2024-11-02 16:35:41 -07:00
uuid := C . CString ( gpuInfo . ID )
defer C . free ( unsafe . Pointer ( uuid ) )
C . nvml_get_free ( * cHandles . nvml , uuid , & memInfo . free , & memInfo . total , & memInfo . used )
2024-07-09 10:27:53 -07:00
if memInfo . err != nil {
slog . Warn ( "error looking up nvidia GPU memory" , "error" , C . GoString ( memInfo . err ) )
C . free ( unsafe . Pointer ( memInfo . err ) )
} else {
if memInfo . free != 0 && uint64 ( memInfo . free ) > gpuInfo . FreeMemory {
gpuInfo . OSOverhead = uint64 ( memInfo . free ) - gpuInfo . FreeMemory
slog . Info ( "detected OS VRAM overhead" ,
"id" , gpuInfo . ID ,
"library" , gpuInfo . Library ,
"compute" , gpuInfo . Compute ,
"driver" , fmt . Sprintf ( "%d.%d" , gpuInfo . DriverMajor , gpuInfo . DriverMinor ) ,
"name" , gpuInfo . Name ,
"overhead" , format . HumanBytes2 ( gpuInfo . OSOverhead ) ,
)
}
}
}
2024-05-15 15:13:16 -07:00
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
cudaGPUs = append ( cudaGPUs , gpuInfo )
2024-05-24 11:18:27 +08:00
}
2024-05-29 16:37:34 -07:00
}
// Intel
2024-07-03 17:22:13 -07:00
if envconfig . IntelGPU ( ) {
2024-06-16 20:09:05 -04:00
oHandles = initOneAPIHandles ( )
2024-08-09 11:31:38 -07:00
if oHandles != nil && oHandles . oneapi != nil {
for d := range oHandles . oneapi . num_drivers {
if oHandles . oneapi == nil {
// shouldn't happen
slog . Warn ( "nil oneapi handle with driver count" , "count" , int ( oHandles . oneapi . num_drivers ) )
continue
}
devCount := C . oneapi_get_device_count ( * oHandles . oneapi , C . int ( d ) )
for i := range devCount {
gpuInfo := OneapiGPUInfo {
GpuInfo : GpuInfo {
Library : "oneapi" ,
} ,
driverIndex : int ( d ) ,
gpuIndex : int ( i ) ,
}
// TODO - split bootstrapping from updating free memory
C . oneapi_check_vram ( * oHandles . oneapi , C . int ( d ) , i , & memInfo )
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64 ( memInfo . free ) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo . free = C . uint64_t ( totalFreeMem )
gpuInfo . TotalMemory = uint64 ( memInfo . total )
gpuInfo . FreeMemory = uint64 ( memInfo . free )
gpuInfo . ID = C . GoString ( & memInfo . gpu_id [ 0 ] )
gpuInfo . Name = C . GoString ( & memInfo . gpu_name [ 0 ] )
gpuInfo . DependencyPath = depPath
oneapiGPUs = append ( oneapiGPUs , gpuInfo )
2024-06-16 20:09:05 -04:00
}
2024-05-15 15:13:16 -07:00
}
}
}
2024-10-14 16:26:45 -07:00
rocmGPUs , err = AMDGetGPUInfo ( )
if err != nil {
bootstrapErrors = append ( bootstrapErrors , err )
}
2024-05-15 15:13:16 -07:00
bootstrapped = true
2024-07-03 10:30:07 -07:00
if len ( cudaGPUs ) == 0 && len ( rocmGPUs ) == 0 && len ( oneapiGPUs ) == 0 {
slog . Info ( "no compatible GPUs were discovered" )
}
2024-05-15 15:13:16 -07:00
}
// For detected GPUs, load library if not loaded
// Refresh free memory usage
if needRefresh {
2024-06-03 19:09:23 -07:00
mem , err := GetCPUMem ( )
if err != nil {
slog . Warn ( "error looking up system memory" , "error" , err )
} else {
slog . Debug ( "updating system memory data" ,
slog . Group (
"before" ,
"total" , format . HumanBytes2 ( cpus [ 0 ] . TotalMemory ) ,
"free" , format . HumanBytes2 ( cpus [ 0 ] . FreeMemory ) ,
2024-07-11 16:42:57 -07:00
"free_swap" , format . HumanBytes2 ( cpus [ 0 ] . FreeSwap ) ,
2024-06-03 19:09:23 -07:00
) ,
slog . Group (
"now" ,
"total" , format . HumanBytes2 ( mem . TotalMemory ) ,
"free" , format . HumanBytes2 ( mem . FreeMemory ) ,
2024-07-11 16:42:57 -07:00
"free_swap" , format . HumanBytes2 ( mem . FreeSwap ) ,
2024-06-03 19:09:23 -07:00
) ,
)
cpus [ 0 ] . FreeMemory = mem . FreeMemory
2024-07-11 16:42:57 -07:00
cpus [ 0 ] . FreeSwap = mem . FreeSwap
2024-06-03 19:09:23 -07:00
}
2024-05-15 15:13:16 -07:00
var memInfo C . mem_info_t
2024-05-29 16:37:34 -07:00
if cHandles == nil && len ( cudaGPUs ) > 0 {
cHandles = initCudaHandles ( )
2024-05-15 15:13:16 -07:00
}
for i , gpu := range cudaGPUs {
2024-06-03 15:07:50 -07:00
if cHandles . nvml != nil {
2024-11-02 16:35:41 -07:00
uuid := C . CString ( gpu . ID )
defer C . free ( unsafe . Pointer ( uuid ) )
C . nvml_get_free ( * cHandles . nvml , uuid , & memInfo . free , & memInfo . total , & memInfo . used )
2024-06-03 15:07:50 -07:00
} else if cHandles . cudart != nil {
2024-05-29 16:37:34 -07:00
C . cudart_bootstrap ( * cHandles . cudart , C . int ( gpu . index ) , & memInfo )
2024-06-03 15:07:50 -07:00
} else if cHandles . nvcuda != nil {
C . nvcuda_get_free ( * cHandles . nvcuda , C . int ( gpu . index ) , & memInfo . free , & memInfo . total )
memInfo . used = memInfo . total - memInfo . free
2024-05-24 11:18:27 +08:00
} else {
2024-06-03 15:07:50 -07:00
// shouldn't happen
slog . Warn ( "no valid cuda library loaded to refresh vram usage" )
break
2024-05-24 11:18:27 +08:00
}
if memInfo . err != nil {
2024-05-15 15:13:16 -07:00
slog . Warn ( "error looking up nvidia GPU memory" , "error" , C . GoString ( memInfo . err ) )
2024-05-24 11:18:27 +08:00
C . free ( unsafe . Pointer ( memInfo . err ) )
continue
}
2024-05-15 15:13:16 -07:00
if memInfo . free == 0 {
slog . Warn ( "error looking up nvidia GPU memory" )
2024-05-24 11:18:27 +08:00
continue
}
2024-07-09 10:27:53 -07:00
if cHandles . nvml != nil && gpu . OSOverhead > 0 {
// When using the management library update based on recorded overhead
memInfo . free -= C . uint64_t ( gpu . OSOverhead )
}
2024-06-03 15:07:50 -07:00
slog . Debug ( "updating cuda memory data" ,
"gpu" , gpu . ID ,
"name" , gpu . Name ,
2024-07-09 10:27:53 -07:00
"overhead" , format . HumanBytes2 ( gpu . OSOverhead ) ,
2024-06-03 15:07:50 -07:00
slog . Group (
"before" ,
"total" , format . HumanBytes2 ( gpu . TotalMemory ) ,
"free" , format . HumanBytes2 ( gpu . FreeMemory ) ,
) ,
slog . Group (
"now" ,
"total" , format . HumanBytes2 ( uint64 ( memInfo . total ) ) ,
"free" , format . HumanBytes2 ( uint64 ( memInfo . free ) ) ,
"used" , format . HumanBytes2 ( uint64 ( memInfo . used ) ) ,
) ,
)
2024-05-15 15:13:16 -07:00
cudaGPUs [ i ] . FreeMemory = uint64 ( memInfo . free )
2023-12-13 17:26:47 -08:00
}
2024-05-29 16:37:34 -07:00
if oHandles == nil && len ( oneapiGPUs ) > 0 {
oHandles = initOneAPIHandles ( )
}
for i , gpu := range oneapiGPUs {
if oHandles . oneapi == nil {
// shouldn't happen
slog . Warn ( "nil oneapi handle with device count" , "count" , oHandles . deviceCount )
continue
}
C . oneapi_check_vram ( * oHandles . oneapi , C . int ( gpu . driverIndex ) , C . int ( gpu . gpuIndex ) , & memInfo )
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64 ( memInfo . free ) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo . free = C . uint64_t ( totalFreeMem )
oneapiGPUs [ i ] . FreeMemory = uint64 ( memInfo . free )
}
2024-06-03 19:09:23 -07:00
err = RocmGPUInfoList ( rocmGPUs ) . RefreshFreeMemory ( )
2024-05-15 15:13:16 -07:00
if err != nil {
slog . Debug ( "problem refreshing ROCm free memory" , "error" , err )
2024-06-03 08:31:48 -07:00
}
2023-12-13 17:26:47 -08:00
}
2024-03-30 09:50:05 -07:00
2024-05-15 15:13:16 -07:00
resp := [ ] GpuInfo { }
for _ , gpu := range cudaGPUs {
resp = append ( resp , gpu . GpuInfo )
}
for _ , gpu := range rocmGPUs {
resp = append ( resp , gpu . GpuInfo )
}
2024-05-29 16:37:34 -07:00
for _ , gpu := range oneapiGPUs {
resp = append ( resp , gpu . GpuInfo )
}
2024-03-30 09:50:05 -07:00
if len ( resp ) == 0 {
2024-05-15 15:13:16 -07:00
resp = append ( resp , cpus [ 0 ] . GpuInfo )
2023-11-29 11:00:37 -08:00
}
return resp
}
2024-04-30 16:42:48 -07:00
func FindGPULibs ( baseLibName string , defaultPatterns [ ] string ) [ ] string {
2024-01-10 14:39:51 -08:00
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
var ldPaths [ ] string
gpuLibPaths := [ ] string { }
2024-03-30 09:50:05 -07:00
slog . Debug ( "Searching for GPU library" , "name" , baseLibName )
2024-01-10 14:39:51 -08:00
2024-07-08 12:50:11 -07:00
// Start with our bundled libraries
2024-08-15 14:38:14 -07:00
patterns := [ ] string { filepath . Join ( LibraryDir ( ) , baseLibName ) }
2024-07-08 12:50:11 -07:00
2024-01-10 14:39:51 -08:00
switch runtime . GOOS {
case "windows" :
ldPaths = strings . Split ( os . Getenv ( "PATH" ) , ";" )
case "linux" :
ldPaths = strings . Split ( os . Getenv ( "LD_LIBRARY_PATH" ) , ":" )
default :
return gpuLibPaths
}
2024-07-08 12:50:11 -07:00
// Then with whatever we find in the PATH/LD_LIBRARY_PATH
2024-01-10 14:39:51 -08:00
for _ , ldPath := range ldPaths {
d , err := filepath . Abs ( ldPath )
if err != nil {
continue
}
2024-07-08 12:50:11 -07:00
patterns = append ( patterns , filepath . Join ( d , baseLibName ) )
2024-01-10 14:39:51 -08:00
}
2024-04-30 16:42:48 -07:00
patterns = append ( patterns , defaultPatterns ... )
2024-03-30 09:50:05 -07:00
slog . Debug ( "gpu library search" , "globs" , patterns )
2024-01-10 14:39:51 -08:00
for _ , pattern := range patterns {
2024-05-03 11:55:32 -07:00
// Nvidia PhysX known to return bogus results
if strings . Contains ( pattern , "PhysX" ) {
slog . Debug ( "skipping PhysX cuda library path" , "path" , pattern )
2024-06-13 13:17:19 -07:00
continue
2024-05-03 11:55:32 -07:00
}
2024-01-10 14:39:51 -08:00
// Ignore glob discovery errors
matches , _ := filepath . Glob ( pattern )
for _ , match := range matches {
// Resolve any links so we don't try the same lib multiple times
// and weed out any dups across globs
libPath := match
tmp := match
var err error
for ; err == nil ; tmp , err = os . Readlink ( libPath ) {
if ! filepath . IsAbs ( tmp ) {
tmp = filepath . Join ( filepath . Dir ( libPath ) , tmp )
}
libPath = tmp
}
new := true
for _ , cmp := range gpuLibPaths {
if cmp == libPath {
new = false
break
}
}
if new {
gpuLibPaths = append ( gpuLibPaths , libPath )
}
}
}
2024-03-30 09:50:05 -07:00
slog . Debug ( "discovered GPU libraries" , "paths" , gpuLibPaths )
2024-01-10 14:39:51 -08:00
return gpuLibPaths
}
2024-10-14 16:26:45 -07:00
// Bootstrap the runtime library
// Returns: num devices, handle, libPath, error
func loadCUDARTMgmt ( cudartLibPaths [ ] string ) ( int , * C . cudart_handle_t , string , error ) {
2024-03-25 11:07:44 -04:00
var resp C . cudart_init_resp_t
2024-01-22 16:03:32 -08:00
resp . ch . verbose = getVerboseState ( )
2024-10-14 16:26:45 -07:00
var err error
2024-03-25 11:07:44 -04:00
for _ , libPath := range cudartLibPaths {
2024-01-10 14:39:51 -08:00
lib := C . CString ( libPath )
defer C . free ( unsafe . Pointer ( lib ) )
2024-03-25 11:07:44 -04:00
C . cudart_init ( lib , & resp )
2024-01-10 14:39:51 -08:00
if resp . err != nil {
2024-10-14 16:26:45 -07:00
err = fmt . Errorf ( "Unable to load cudart library %s: %s" , libPath , C . GoString ( resp . err ) )
slog . Debug ( err . Error ( ) )
2024-01-10 14:39:51 -08:00
C . free ( unsafe . Pointer ( resp . err ) )
} else {
2024-10-14 16:26:45 -07:00
err = nil
return int ( resp . num_devices ) , & resp . ch , libPath , err
2024-01-10 14:39:51 -08:00
}
}
2024-10-14 16:26:45 -07:00
return 0 , nil , "" , err
2024-01-10 14:39:51 -08:00
}
2024-10-14 16:26:45 -07:00
// Bootstrap the driver library
// Returns: num devices, handle, libPath, error
func loadNVCUDAMgmt ( nvcudaLibPaths [ ] string ) ( int , * C . nvcuda_handle_t , string , error ) {
2024-04-30 16:42:48 -07:00
var resp C . nvcuda_init_resp_t
resp . ch . verbose = getVerboseState ( )
2024-10-14 16:26:45 -07:00
var err error
2024-04-30 16:42:48 -07:00
for _ , libPath := range nvcudaLibPaths {
lib := C . CString ( libPath )
defer C . free ( unsafe . Pointer ( lib ) )
C . nvcuda_init ( lib , & resp )
if resp . err != nil {
2024-07-03 10:30:07 -07:00
// Decide what log level based on the type of error message to help users understand why
switch resp . cudaErr {
case C . CUDA_ERROR_INSUFFICIENT_DRIVER , C . CUDA_ERROR_SYSTEM_DRIVER_MISMATCH :
2024-10-14 16:26:45 -07:00
err = fmt . Errorf ( "version mismatch between driver and cuda driver library - reboot or upgrade may be required: library %s" , libPath )
slog . Warn ( err . Error ( ) )
2024-07-03 10:30:07 -07:00
case C . CUDA_ERROR_NO_DEVICE :
2024-10-14 16:26:45 -07:00
err = fmt . Errorf ( "no nvidia devices detected by library %s" , libPath )
slog . Info ( err . Error ( ) )
2024-07-03 10:30:07 -07:00
case C . CUDA_ERROR_UNKNOWN :
2024-10-14 16:26:45 -07:00
err = fmt . Errorf ( "unknown error initializing cuda driver library %s: %s. see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information" , libPath , C . GoString ( resp . err ) )
slog . Warn ( err . Error ( ) )
2024-07-03 10:30:07 -07:00
default :
2024-10-14 16:26:45 -07:00
msg := C . GoString ( resp . err )
2024-07-03 10:30:07 -07:00
if strings . Contains ( msg , "wrong ELF class" ) {
slog . Debug ( "skipping 32bit library" , "library" , libPath )
} else {
2024-10-14 16:26:45 -07:00
err = fmt . Errorf ( "Unable to load cudart library %s: %s" , libPath , C . GoString ( resp . err ) )
slog . Info ( err . Error ( ) )
2024-07-03 10:30:07 -07:00
}
}
2024-04-30 16:42:48 -07:00
C . free ( unsafe . Pointer ( resp . err ) )
} else {
2024-10-14 16:26:45 -07:00
err = nil
return int ( resp . num_devices ) , & resp . ch , libPath , err
2024-04-30 16:42:48 -07:00
}
}
2024-10-14 16:26:45 -07:00
return 0 , nil , "" , err
2024-04-30 16:42:48 -07:00
}
2024-10-14 16:26:45 -07:00
// Bootstrap the management library
// Returns: handle, libPath, error
func loadNVMLMgmt ( nvmlLibPaths [ ] string ) ( * C . nvml_handle_t , string , error ) {
2024-06-03 15:07:50 -07:00
var resp C . nvml_init_resp_t
resp . ch . verbose = getVerboseState ( )
2024-10-14 16:26:45 -07:00
var err error
2024-06-03 15:07:50 -07:00
for _ , libPath := range nvmlLibPaths {
lib := C . CString ( libPath )
defer C . free ( unsafe . Pointer ( lib ) )
C . nvml_init ( lib , & resp )
if resp . err != nil {
2024-10-14 16:26:45 -07:00
err = fmt . Errorf ( "Unable to load NVML management library %s: %s" , libPath , C . GoString ( resp . err ) )
slog . Info ( err . Error ( ) )
2024-06-03 15:07:50 -07:00
C . free ( unsafe . Pointer ( resp . err ) )
} else {
2024-10-14 16:26:45 -07:00
err = nil
return & resp . ch , libPath , err
2024-06-03 15:07:50 -07:00
}
}
2024-10-14 16:26:45 -07:00
return nil , "" , err
2024-06-03 15:07:50 -07:00
}
2024-10-14 16:26:45 -07:00
// bootstrap the Intel GPU library
// Returns: num devices, handle, libPath, error
func loadOneapiMgmt ( oneapiLibPaths [ ] string ) ( int , * C . oneapi_handle_t , string , error ) {
2024-05-24 11:18:27 +08:00
var resp C . oneapi_init_resp_t
2024-05-29 16:37:34 -07:00
num_devices := 0
2024-05-24 11:18:27 +08:00
resp . oh . verbose = getVerboseState ( )
2024-10-14 16:26:45 -07:00
var err error
2024-05-24 11:18:27 +08:00
for _ , libPath := range oneapiLibPaths {
lib := C . CString ( libPath )
defer C . free ( unsafe . Pointer ( lib ) )
C . oneapi_init ( lib , & resp )
if resp . err != nil {
2024-10-14 16:26:45 -07:00
err = fmt . Errorf ( "Unable to load oneAPI management library %s: %s" , libPath , C . GoString ( resp . err ) )
slog . Debug ( err . Error ( ) )
2024-05-24 11:18:27 +08:00
C . free ( unsafe . Pointer ( resp . err ) )
} else {
2024-10-14 16:26:45 -07:00
err = nil
2024-06-05 12:07:20 -07:00
for i := range resp . oh . num_drivers {
2024-05-29 16:37:34 -07:00
num_devices += int ( C . oneapi_get_device_count ( resp . oh , C . int ( i ) ) )
}
2024-10-14 16:26:45 -07:00
return num_devices , & resp . oh , libPath , err
2024-05-24 11:18:27 +08:00
}
}
2024-10-14 16:26:45 -07:00
return 0 , nil , "" , err
2024-05-24 11:18:27 +08:00
}
2024-01-22 16:03:32 -08:00
func getVerboseState ( ) C . uint16_t {
2024-07-03 16:00:54 -07:00
if envconfig . Debug ( ) {
2024-01-22 16:03:32 -08:00
return C . uint16_t ( 1 )
}
return C . uint16_t ( 0 )
}
2024-03-30 09:50:05 -07:00
// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func ( l GpuInfoList ) GetVisibleDevicesEnv ( ) ( string , string ) {
if len ( l ) == 0 {
return "" , ""
}
switch l [ 0 ] . Library {
case "cuda" :
return cudaGetVisibleDevicesEnv ( l )
case "rocm" :
return rocmGetVisibleDevicesEnv ( l )
2024-05-24 11:18:27 +08:00
case "oneapi" :
return oneapiGetVisibleDevicesEnv ( l )
2024-03-30 09:50:05 -07:00
default :
slog . Debug ( "no filter required for library " + l [ 0 ] . Library )
return "" , ""
}
}
2024-07-08 12:50:11 -07:00
2024-08-15 14:38:14 -07:00
func LibraryDir ( ) string {
2024-07-08 12:50:11 -07:00
// On Windows/linux we bundle the dependencies at the same level as the executable
appExe , err := os . Executable ( )
if err != nil {
slog . Warn ( "failed to lookup executable path" , "error" , err )
}
cwd , err := os . Getwd ( )
if err != nil {
slog . Warn ( "failed to lookup working directory" , "error" , err )
}
// Scan for any of our dependeices, and pick first match
2024-08-27 16:19:00 -07:00
for _ , root := range [ ] string { filepath . Dir ( appExe ) , filepath . Join ( filepath . Dir ( appExe ) , envconfig . LibRelativeToExe ( ) ) , cwd } {
2024-08-14 16:32:57 -07:00
libDep := filepath . Join ( "lib" , "ollama" )
2024-07-08 12:50:11 -07:00
if _ , err := os . Stat ( filepath . Join ( root , libDep ) ) ; err == nil {
return filepath . Join ( root , libDep )
}
// Developer mode, local build
if _ , err := os . Stat ( filepath . Join ( root , runtime . GOOS + "-" + runtime . GOARCH , libDep ) ) ; err == nil {
return filepath . Join ( root , runtime . GOOS + "-" + runtime . GOARCH , libDep )
}
if _ , err := os . Stat ( filepath . Join ( root , "dist" , runtime . GOOS + "-" + runtime . GOARCH , libDep ) ) ; err == nil {
return filepath . Join ( root , "dist" , runtime . GOOS + "-" + runtime . GOARCH , libDep )
}
}
slog . Warn ( "unable to locate gpu dependency libraries" )
return ""
}
2024-10-14 16:26:45 -07:00
func GetSystemInfo ( ) SystemInfo {
gpus := GetGPUInfo ( )
gpuMutex . Lock ( )
defer gpuMutex . Unlock ( )
discoveryErrors := [ ] string { }
for _ , err := range bootstrapErrors {
discoveryErrors = append ( discoveryErrors , err . Error ( ) )
}
if len ( gpus ) == 1 && gpus [ 0 ] . Library == "cpu" {
gpus = [ ] GpuInfo { }
}
return SystemInfo {
System : cpus [ 0 ] ,
GPUs : gpus ,
UnsupportedGPUs : unsupportedGPUs ,
DiscoveryErrors : discoveryErrors ,
}
}