Refine GPU discovery to bootstrap once

Now that we call the GPU discovery routines many times to update memory, this splits initial discovery from free memory updating.
2024-05-15 15:13:16 -07:00 · 2024-05-15 15:13:16 -07:00 · 43ed358f9a
commit 43ed358f9a
parent b32ebb4f29
9 changed files with 383 additions and 149 deletions
--- a/gpu/amd_linux.go
+++ b/gpu/amd_linux.go
@ -44,8 +44,8 @@ var (
 )
 // Gather GPU information from the amdgpu driver if any supported GPUs are detected
-func AMDGetGPUInfo() []GpuInfo {
+func AMDGetGPUInfo() []RocmGPUInfo {
-	resp := []GpuInfo{}
+	resp := []RocmGPUInfo{}
 	if !AMDDetected() {
 		return resp
 	}
@ -178,7 +178,7 @@ func AMDGetGPUInfo() []GpuInfo {
 		// Shouldn't happen, but just in case...
 		if gpuID < 0 {
 			slog.Error("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue")
-			return []GpuInfo{}
+			return []RocmGPUInfo{}
 		}
 		if int(major) < RocmComputeMin {
@ -189,6 +189,7 @@ func AMDGetGPUInfo() []GpuInfo {
 		// Look up the memory for the current node
 		totalMemory := uint64(0)
 		usedMemory := uint64(0)
 		var usedFile string
 		mapping := []struct {
 			id       uint64
 			filename string
@ -255,22 +256,10 @@ func AMDGetGPUInfo() []GpuInfo {
 				break
 			}
-			usedFile := filepath.Join(devDir, DRMUsedMemoryFile)
+			usedFile = filepath.Join(devDir, DRMUsedMemoryFile)
-			usedFp, err := os.Open(usedFile)
+			usedMemory, err = getFreeMemory(usedFile)
 			if err != nil {
-				slog.Debug("failed to open sysfs node", "file", usedFile, "error", err)
+				slog.Debug("failed to update used memory", "error", err)
 				break
 			}
 			defer totalFp.Close()
 			buf, err = io.ReadAll(usedFp)
 			if err != nil {
 				slog.Debug("failed to read sysfs node", "file", usedFile, "error", err)
 				break
 			}
 			usedMemory, err = strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
 			if err != nil {
 				slog.Debug("failed to parse sysfs node", "file", usedFile, "error", err)
 				break
 			}
 			break
 		}
@ -288,18 +277,21 @@ func AMDGetGPUInfo() []GpuInfo {
 		slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
 		slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
-		gpuInfo := GpuInfo{
+		gpuInfo := RocmGPUInfo{
-			Library: "rocm",
+			GpuInfo: GpuInfo{
-			memInfo: memInfo{
+				Library: "rocm",
-				TotalMemory: totalMemory,
+				memInfo: memInfo{
-				FreeMemory:  (totalMemory - usedMemory),
+					TotalMemory: totalMemory,
 					FreeMemory:  (totalMemory - usedMemory),
 				},
 				ID:            fmt.Sprintf("%d", gpuID),
 				Name:          name,
 				Compute:       fmt.Sprintf("gfx%d%x%x", major, minor, patch),
 				MinimumMemory: rocmMinimumMemory,
 				DriverMajor:   driverMajor,
 				DriverMinor:   driverMinor,
 			},
-			ID:            fmt.Sprintf("%d", gpuID),
+			usedFilepath: usedFile,
 			Name:          name,
 			Compute:       fmt.Sprintf("gfx%d%x%x", major, minor, patch),
 			MinimumMemory: rocmMinimumMemory,
 			DriverMajor:   driverMajor,
 			DriverMinor:   driverMinor,
 		}
 		// If the user wants to filter to a subset of devices, filter out if we aren't a match
@ -323,7 +315,7 @@ func AMDGetGPUInfo() []GpuInfo {
 			libDir, err = AMDValidateLibDir()
 			if err != nil {
 				slog.Warn("unable to verify rocm library, will use cpu", "error", err)
-				return []GpuInfo{}
+				return []RocmGPUInfo{}
 			}
 		}
 		gpuInfo.DependencyPath = libDir
@ -334,7 +326,7 @@ func AMDGetGPUInfo() []GpuInfo {
 				supported, err = GetSupportedGFX(libDir)
 				if err != nil {
 					slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err)
-					return []GpuInfo{}
+					return []RocmGPUInfo{}
 				}
 				slog.Debug("rocm supported GPUs", "types", supported)
 			}
@ -425,3 +417,36 @@ func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
 	}
 	return driverMajor, driverMinor, nil
 }
 func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
 	if len(gpus) == 0 {
 		return nil
 	}
 	for i := range gpus {
 		usedMemory, err := getFreeMemory(gpus[i].usedFilepath)
 		if err != nil {
 			return err
 		}
 		slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(gpus[i].TotalMemory-usedMemory))
 		gpus[i].FreeMemory = gpus[i].TotalMemory - usedMemory
 	}
 	return nil
 }
 func getFreeMemory(usedFile string) (uint64, error) {
 	usedFp, err := os.Open(usedFile)
 	if err != nil {
 		return 0, fmt.Errorf("failed to open sysfs node %s %w", usedFile, err)
 	}
 	defer usedFp.Close()
 	buf, err := io.ReadAll(usedFp)
 	if err != nil {
 		return 0, fmt.Errorf("failed to read sysfs node %s %w", usedFile, err)
 	}
 	usedMemory, err := strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
 	if err != nil {
 		slog.Debug("failed to parse sysfs node", "file", usedFile, "error", err)
 		return 0, fmt.Errorf("failed to parse sysfs node %s %w", usedFile, err)
 	}
 	return usedMemory, nil
 }
--- a/gpu/amd_windows.go
+++ b/gpu/amd_windows.go
@ -24,8 +24,8 @@ var (
 	RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\5.7\\bin"} // TODO glob?
 )
-func AMDGetGPUInfo() []GpuInfo {
+func AMDGetGPUInfo() []RocmGPUInfo {
-	resp := []GpuInfo{}
+	resp := []RocmGPUInfo{}
 	hl, err := NewHipLib()
 	if err != nil {
 		slog.Debug(err.Error())
@ -117,21 +117,24 @@ func AMDGetGPUInfo() []GpuInfo {
 		// v5.7 only reports VRAM used by this process, so it's completely wrong and unusable
 		slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
 		slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
-		gpuInfo := GpuInfo{
+		gpuInfo := RocmGPUInfo{
-			Library: "rocm",
+			GpuInfo: GpuInfo{
-			memInfo: memInfo{
+				Library: "rocm",
-				TotalMemory: totalMemory,
+				memInfo: memInfo{
-				FreeMemory:  freeMemory,
+					TotalMemory: totalMemory,
-			},
+					FreeMemory:  freeMemory,
-			ID:             fmt.Sprintf("%d", i), // TODO this is probably wrong if we specify visible devices
+				},
-			DependencyPath: libDir,
+				ID:             fmt.Sprintf("%d", i), // TODO this is probably wrong if we specify visible devices
-			MinimumMemory:  rocmMinimumMemory,
+				DependencyPath: libDir,
-			Name:           name,
+				MinimumMemory:  rocmMinimumMemory,
-			Compute:        gfx,
+				Name:           name,
 				Compute:        gfx,
-			// TODO - this information isn't accurate on windows, so don't report it until we find the right way to retrieve
+				// TODO - this information isn't accurate on windows, so don't report it until we find the right way to retrieve
-			// DriverMajor:    driverMajor,
+				// DriverMajor:    driverMajor,
-			// DriverMinor:    driverMinor,
+				// DriverMinor:    driverMinor,
 			},
 			index: i,
 		}
 		resp = append(resp, gpuInfo)
@ -159,3 +162,30 @@ func AMDValidateLibDir() (string, error) {
 	slog.Warn("amdgpu detected, but no compatible rocm library found.  Please install ROCm")
 	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
 }
 func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
 	if len(gpus) == 0 {
 		return nil
 	}
 	hl, err := NewHipLib()
 	if err != nil {
 		slog.Debug(err.Error())
 		return nil
 	}
 	defer hl.Release()
 	for i := range gpus {
 		err := hl.HipSetDevice(gpus[i].index)
 		if err != nil {
 			return err
 		}
 		freeMemory, _, err := hl.HipMemGetInfo()
 		if err != nil {
 			slog.Warn("get mem info", "id", i, "error", err)
 			continue
 		}
 		slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(freeMemory))
 		gpus[i].FreeMemory = freeMemory
 	}
 	return nil
 }
--- a/gpu/cpu_common.go
+++ b/gpu/cpu_common.go
@ -1,21 +1,20 @@
 package gpu
 import (
 	"log/slog"
 	"golang.org/x/sys/cpu"
 )
 func GetCPUVariant() string {
 	return getCPUCapability().ToVariant()
 }
 func getCPUCapability() CPUCapability {
 	if cpu.X86.HasAVX2 {
-		slog.Debug("CPU has AVX2")
+		return CPUCapabilityAVX2
 		return "avx2"
 	}
 	if cpu.X86.HasAVX {
-		slog.Debug("CPU has AVX")
+		return CPUCapabilityAVX
 		return "avx"
 	}
 	slog.Debug("CPU does not have vector extensions")
 	// else LCD
-	return ""
+	return CPUCapabilityBase
 }
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@ -21,8 +21,8 @@ import (
 	"sync"
 	"unsafe"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 )
 type handles struct {
@ -37,7 +37,18 @@ const (
 	rocmMinimumMemory = 457 * format.MebiByte
 )
-var gpuMutex sync.Mutex
+var (
 	gpuMutex      sync.Mutex
 	bootstrapped  bool
 	cpuCapability CPUCapability
 	cpus          []CPUInfo
 	cudaGPUs      []CudaGPUInfo
 	nvcudaLibPath string
 	cudartLibPath string
 	oneapiLibPath string
 	rocmGPUs      []RocmGPUInfo
 	oneapiGPUs    []OneapiGPUInfo
 )
 // With our current CUDA compile flags, older than 5.0 will not work properly
 var CudaComputeMin = [2]C.int{5, 0}
@ -96,11 +107,22 @@ var OneapiLinuxGlobs = []string{
 var CudaTegra string = os.Getenv("JETSON_JETPACK")
 // Note: gpuMutex must already be held
-func initGPUHandles() *handles {
+func initCudaHandles() *handles {
 	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
 	gpuHandles := &handles{}
 	// Short Circuit if we already know which library to use
 	if nvcudaLibPath != "" {
 		gpuHandles.deviceCount, gpuHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
 		return gpuHandles
 	}
 	if cudartLibPath != "" {
 		gpuHandles.deviceCount, gpuHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
 		return gpuHandles
 	}
 	slog.Debug("searching for GPU discovery libraries for NVIDIA")
 	var cudartMgmtName string
 	var cudartMgmtPatterns []string
 	var nvcudaMgmtName string
@ -136,7 +158,6 @@ func initGPUHandles() *handles {
 		return gpuHandles
 	}
 	slog.Debug("Detecting GPUs")
 	nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
 	if len(nvcudaLibPaths) > 0 {
 		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
@ -144,6 +165,7 @@ func initGPUHandles() *handles {
 			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
 			gpuHandles.nvcuda = nvcuda
 			gpuHandles.deviceCount = deviceCount
 			nvcudaLibPath = libPath
 			return gpuHandles
 		}
 	}
@ -155,6 +177,7 @@ func initGPUHandles() *handles {
 			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
 			gpuHandles.cudart = cudart
 			gpuHandles.deviceCount = deviceCount
 			cudartLibPath = libPath
 			return gpuHandles
 		}
 	}
@ -166,6 +189,7 @@ func initGPUHandles() *handles {
 			slog.Debug("detected Intel GPUs", "library", libPath, "count", deviceCount)
 			gpuHandles.oneapi = oneapi
 			gpuHandles.deviceCount = deviceCount
 			oneapiLibPath = libPath
 			return gpuHandles
 		}
 	}
@ -178,9 +202,12 @@ func GetGPUInfo() GpuInfoList {
 	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
 	gpuMutex.Lock()
 	defer gpuMutex.Unlock()
-
+	needRefresh := true
-	gpuHandles := initGPUHandles()
+	var gpuHandles *handles
 	defer func() {
 		if gpuHandles == nil {
 			return
 		}
 		if gpuHandles.cudart != nil {
 			C.cudart_release(*gpuHandles.cudart)
 		}
@ -189,97 +216,156 @@ func GetGPUInfo() GpuInfoList {
 		}
 	}()
-	// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
+	if !bootstrapped {
-	cpuVariant := GetCPUVariant()
+		slog.Debug("Detecting GPUs")
-	if cpuVariant == "" && runtime.GOARCH == "amd64" {
+		needRefresh = false
-		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
+		cpuCapability = getCPUCapability()
-	}
+		var memInfo C.mem_info_t
 	// On windows we bundle the nvidia library one level above the runner dir
 	depPath := ""
 	if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
 		depPath = filepath.Dir(envconfig.RunnersDir)
 	}
 	var memInfo C.mem_info_t
 	resp := []GpuInfo{}
 	// NVIDIA first
 	for i := range gpuHandles.deviceCount {
 		// TODO once we support CPU compilation variants of GPU libraries refine this...
 		if cpuVariant == "" && runtime.GOARCH == "amd64" {
 			continue
 		}
 		if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
 			gpuInfo := GpuInfo{
 				Library: "cuda",
 			}
 			var driverMajor int
 			var driverMinor int
 			if gpuHandles.cudart != nil {
 				C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
 			} else {
 				C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
 				driverMajor = int(gpuHandles.nvcuda.driver_major)
 				driverMinor = int(gpuHandles.nvcuda.driver_minor)
 			}
 			if memInfo.err != nil {
 				slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
 				C.free(unsafe.Pointer(memInfo.err))
 				continue
 			}
 			if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
 				slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
 				continue
 			}
 			gpuInfo.TotalMemory = uint64(memInfo.total)
 			gpuInfo.FreeMemory = uint64(memInfo.free)
 			gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
 			gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
 			gpuInfo.MinimumMemory = cudaMinimumMemory
 			gpuInfo.DependencyPath = depPath
 			gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
 			gpuInfo.DriverMajor = driverMajor
 			gpuInfo.DriverMinor = driverMinor
 			// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
 			resp = append(resp, gpuInfo)
 		}
 		if gpuHandles.oneapi != nil {
 			gpuInfo := GpuInfo{
 				Library: "oneapi",
 			}
 			C.oneapi_check_vram(*gpuHandles.oneapi, &memInfo)
 			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
 			memInfo.free = C.uint64_t(totalFreeMem)
 			gpuInfo.TotalMemory = uint64(memInfo.total)
 			gpuInfo.FreeMemory = uint64(memInfo.free)
 			gpuInfo.ID = strconv.Itoa(i)
 			resp = append(resp, gpuInfo)
 		}
 	}
 	// Then AMD
 	resp = append(resp, AMDGetGPUInfo()...)
 	if len(resp) == 0 {
 		C.cpu_check_ram(&memInfo)
 		if memInfo.err != nil {
 			slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
 			C.free(unsafe.Pointer(memInfo.err))
-			return resp
+			return []GpuInfo{}
 		}
-		gpuInfo := GpuInfo{
+		cpuInfo := CPUInfo{
-			Library: "cpu",
+			GpuInfo: GpuInfo{
-			Variant: cpuVariant,
+				Library: "cpu",
 				Variant: cpuCapability.ToVariant(),
 			},
 		}
-		gpuInfo.TotalMemory = uint64(memInfo.total)
+		cpuInfo.TotalMemory = uint64(memInfo.total)
-		gpuInfo.FreeMemory = uint64(memInfo.free)
+		cpuInfo.FreeMemory = uint64(memInfo.free)
-		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
+		cpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
 		cpus = []CPUInfo{cpuInfo}
-		resp = append(resp, gpuInfo)
+		// Fallback to CPU mode if we're lacking required vector extensions on x86
 		if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
 			slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability.ToString(), "detected", cpuCapability.ToString())
 			bootstrapped = true
 			// No need to do any GPU discovery, since we can't run on them
 			return GpuInfoList{cpus[0].GpuInfo}
 		}
 		// TODO - implement
 		// TODO refine the discovery to only gather total memory
 		// On windows we bundle the nvidia library one level above the runner dir
 		depPath := ""
 		if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
 			depPath = filepath.Dir(envconfig.RunnersDir)
 		}
 		// Load ALL libraries
 		gpuHandles = initCudaHandles()
 		// TODO needs a refactoring pass to init oneapi handles
 		// NVIDIA
 		for i := range gpuHandles.deviceCount {
 			if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
 				gpuInfo := CudaGPUInfo{
 					GpuInfo: GpuInfo{
 						Library: "cuda",
 					},
 					index: i,
 				}
 				var driverMajor int
 				var driverMinor int
 				if gpuHandles.cudart != nil {
 					C.cudart_bootstrap(*gpuHandles.cudart, C.int(i), &memInfo)
 				} else {
 					C.nvcuda_bootstrap(*gpuHandles.nvcuda, C.int(i), &memInfo)
 					driverMajor = int(gpuHandles.nvcuda.driver_major)
 					driverMinor = int(gpuHandles.nvcuda.driver_minor)
 				}
 				if memInfo.err != nil {
 					slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
 					C.free(unsafe.Pointer(memInfo.err))
 					continue
 				}
 				if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
 					slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
 					continue
 				}
 				gpuInfo.TotalMemory = uint64(memInfo.total)
 				gpuInfo.FreeMemory = uint64(memInfo.free)
 				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
 				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
 				gpuInfo.MinimumMemory = cudaMinimumMemory
 				gpuInfo.DependencyPath = depPath
 				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
 				gpuInfo.DriverMajor = int(driverMajor)
 				gpuInfo.DriverMinor = int(driverMinor)
 				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
 				cudaGPUs = append(cudaGPUs, gpuInfo)
 			}
 			if gpuHandles.oneapi != nil {
 				gpuInfo := OneapiGPUInfo{
 					GpuInfo: GpuInfo{
 						Library: "oneapi",
 					},
 					index: i,
 				}
 				// TODO - split bootstrapping from updating free memory
 				C.oneapi_check_vram(*gpuHandles.oneapi, &memInfo)
 				var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
 				memInfo.free = C.uint64_t(totalFreeMem)
 				gpuInfo.TotalMemory = uint64(memInfo.total)
 				gpuInfo.FreeMemory = uint64(memInfo.free)
 				gpuInfo.ID = strconv.Itoa(i)
 				oneapiGPUs = append(oneapiGPUs, gpuInfo)
 			}
 		}
 		rocmGPUs = AMDGetGPUInfo()
 		bootstrapped = true
 	}
 	// For detected GPUs, load library if not loaded
 	// Refresh free memory usage
 	if needRefresh {
 		// TODO - CPU system memory tracking/refresh
 		var memInfo C.mem_info_t
 		if gpuHandles == nil && len(cudaGPUs) > 0 {
 			gpuHandles = initCudaHandles()
 		}
 		for i, gpu := range cudaGPUs {
 			if gpuHandles.cudart != nil {
 				C.cudart_bootstrap(*gpuHandles.cudart, C.int(gpu.index), &memInfo)
 			} else {
 				C.nvcuda_get_free(*gpuHandles.nvcuda, C.int(gpu.index), &memInfo.free)
 			}
 			if memInfo.err != nil {
 				slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
 				C.free(unsafe.Pointer(memInfo.err))
 				continue
 			}
 			if memInfo.free == 0 {
 				slog.Warn("error looking up nvidia GPU memory")
 				continue
 			}
 			slog.Debug("updating cuda free memory", "gpu", gpu.ID, "name", gpu.Name, "before", format.HumanBytes2(gpu.FreeMemory), "now", format.HumanBytes2(uint64(memInfo.free)))
 			cudaGPUs[i].FreeMemory = uint64(memInfo.free)
 		}
 		err := RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
 		if err != nil {
 			slog.Debug("problem refreshing ROCm free memory", "error", err)
 		}
 	}
 	resp := []GpuInfo{}
 	for _, gpu := range cudaGPUs {
 		resp = append(resp, gpu.GpuInfo)
 	}
 	for _, gpu := range rocmGPUs {
 		resp = append(resp, gpu.GpuInfo)
 	}
 	if len(resp) == 0 {
 		resp = append(resp, cpus[0].GpuInfo)
 	}
 	return resp
 }
--- a/gpu/gpu_info_cudart.c
+++ b/gpu/gpu_info_cudart.c
@ -94,7 +94,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
 }
-void cudart_check_vram(cudart_handle_t h, int i, mem_info_t *resp) {
+void cudart_bootstrap(cudart_handle_t h, int i, mem_info_t *resp) {
  resp->err = NULL;
  cudartMemory_t memInfo = {0,0,0};
  cudartReturn_t ret;
--- a/gpu/gpu_info_cudart.h
+++ b/gpu/gpu_info_cudart.h
@ -140,7 +140,8 @@ typedef struct cudart_init_resp {
 } cudart_init_resp_t;
 void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp);
-void cudart_check_vram(cudart_handle_t ch, int device_id, mem_info_t *resp);
+void cudart_bootstrap(cudart_handle_t ch, int device_id, mem_info_t *resp);
 // TODO - if we keep this library longer term, add cudart_get_free
 void cudart_release(cudart_handle_t ch);
 #endif  // __GPU_INFO_CUDART_H__
--- a/gpu/gpu_info_nvcuda.c
+++ b/gpu/gpu_info_nvcuda.c
@ -96,7 +96,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
 }
 const int buflen = 256;
-void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
+void nvcuda_bootstrap(nvcuda_handle_t h, int i, mem_info_t *resp) {
  resp->err = NULL;
  nvcudaMemory_t memInfo = {0,0};
  CUresult ret;
@ -168,7 +168,7 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
  // To get memory we have to set (and release) a context
  ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device);
  if (ret != CUDA_SUCCESS) {
-    snprintf(buf, buflen, "nvcuda failed to get primary device context %d", ret);
+    snprintf(buf, buflen, "nvcuda failed to get device context %d", ret);
    resp->err = strdup(buf);
    return;
  }
@ -193,7 +193,42 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
  ret = (*h.cuCtxDestroy)(ctx);
  if (ret != CUDA_SUCCESS) {
-    LOG(1, "nvcuda failed to release primary device context %d", ret);
+    LOG(1, "nvcuda failed to release device context %d", ret);
  }
 }
 void nvcuda_get_free(nvcuda_handle_t h, int i, uint64_t *free) {
  CUresult ret;
  CUcontext ctx = NULL;
  CUdevice device = -1;
  *free = 0;
  uint64_t total = 0;
  ret = (*h.cuDeviceGet)(&device, i);
  if (ret != CUDA_SUCCESS) {
    LOG(1, "nvcuda device failed to initialize");
    return;
  }
  // To get memory we have to set (and release) a context
  ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device);
  if (ret != CUDA_SUCCESS) {
    LOG(1, "nvcuda failed to get device context %d", ret);
    return;
  }
  ret = (*h.cuMemGetInfo_v2)(free, &total);
  if (ret != CUDA_SUCCESS) {
    LOG(1, "nvcuda device memory info lookup failure %d", ret);
    // Best effort on failure...
    (*h.cuCtxDestroy)(ctx);
    return;
  }
  ret = (*h.cuCtxDestroy)(ctx);
  if (ret != CUDA_SUCCESS) {
    LOG(1, "nvcuda failed to release device context %d", ret);
  }
 }
--- a/gpu/gpu_info_nvcuda.h
+++ b/gpu/gpu_info_nvcuda.h
@ -67,7 +67,8 @@ typedef struct nvcuda_init_resp {
 } nvcuda_init_resp_t;
 void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp);
-void nvcuda_check_vram(nvcuda_handle_t ch, int device_id, mem_info_t *resp);
+void nvcuda_bootstrap(nvcuda_handle_t ch, int device_id, mem_info_t *resp);
 void nvcuda_get_free(nvcuda_handle_t ch,  int device_id, uint64_t *free);
 void nvcuda_release(nvcuda_handle_t ch);
 #endif  // __GPU_INFO_NVCUDA_H__
--- a/gpu/types.go
+++ b/gpu/types.go
@ -38,6 +38,29 @@ type GpuInfo struct {
 	// TODO other performance capability info to help in scheduling decisions
 }
 type CPUInfo struct {
 	GpuInfo
 }
 type CudaGPUInfo struct {
 	GpuInfo
 	index int // device index
 }
 type CudaGPUInfoList []CudaGPUInfo
 type RocmGPUInfo struct {
 	GpuInfo
 	usedFilepath string // linux
 	index        int    // device index on windows
 }
 type RocmGPUInfoList []RocmGPUInfo
 type OneapiGPUInfo struct {
 	GpuInfo
 	index int // device index
 }
 type OneapiGPUInfoList []OneapiGPUInfo
 type GpuInfoList []GpuInfo
 // Split up the set of gpu info's by Library and variant
@ -86,3 +109,37 @@ type ByFreeMemory []GpuInfo
 func (a ByFreeMemory) Len() int           { return len(a) }
 func (a ByFreeMemory) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
 type CPUCapability uint32
 // Override at build time when building base GPU runners
 var GPURunnerCPUCapability = CPUCapabilityAVX
 const (
 	CPUCapabilityBase CPUCapability = iota
 	CPUCapabilityAVX
 	CPUCapabilityAVX2
 	// TODO AVX512
 )
 func (c CPUCapability) ToString() string {
 	switch c {
 	case CPUCapabilityAVX:
 		return "AVX"
 	case CPUCapabilityAVX2:
 		return "AVX2"
 	default:
 		return "no vector extensions"
 	}
 }
 func (c CPUCapability) ToVariant() string {
 	switch c {
 	case CPUCapabilityAVX:
 		return "avx"
 	case CPUCapabilityAVX2:
 		return "avx2"
 	default:
 		return ""
 	}
 }