diff --git a/gpu/gpu.go b/gpu/gpu.go index dec3f95e..cf2f3b7f 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -35,7 +35,6 @@ const ( ) var gpuMutex sync.Mutex -var gpuHandles *handles = nil // With our current CUDA compile flags, older than 5.0 will not work properly var CudaComputeMin = [2]C.int{5, 0} @@ -85,11 +84,11 @@ var CudartWindowsGlobs = []string{ var CudaTegra string = os.Getenv("JETSON_JETPACK") // Note: gpuMutex must already be held -func initGPUHandles() { +func initGPUHandles() *handles { // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing - gpuHandles = &handles{nil, nil} + gpuHandles := &handles{nil, nil} var nvmlMgmtName string var nvmlMgmtPatterns []string var cudartMgmtName string @@ -116,7 +115,7 @@ func initGPUHandles() { } cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...) default: - return + return gpuHandles } slog.Info("Detecting GPU type") @@ -126,7 +125,7 @@ func initGPUHandles() { if cudart != nil { slog.Info("Nvidia GPU detected via cudart") gpuHandles.cudart = cudart - return + return gpuHandles } } @@ -137,10 +136,10 @@ func initGPUHandles() { if nvml != nil { slog.Info("Nvidia GPU detected via nvidia-ml") gpuHandles.nvml = nvml - return + return gpuHandles } } - + return gpuHandles } func GetGPUInfo() GpuInfo { @@ -148,9 +147,16 @@ func GetGPUInfo() GpuInfo { // GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries gpuMutex.Lock() defer gpuMutex.Unlock() - if gpuHandles == nil { - initGPUHandles() - } + + gpuHandles := initGPUHandles() + defer func() { + if gpuHandles.nvml != nil { + C.nvml_release(*gpuHandles.nvml) + } + if gpuHandles.cudart != nil { + C.cudart_release(*gpuHandles.cudart) + } + }() // All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX cpuVariant := GetCPUVariant() diff --git a/gpu/gpu_info_cudart.c b/gpu/gpu_info_cudart.c index ef13f5c0..27cd2342 100644 --- a/gpu/gpu_info_cudart.c +++ b/gpu/gpu_info_cudart.c @@ -191,4 +191,10 @@ void cudart_compute_capability(cudart_handle_t h, cudart_compute_capability_t *r } } +void cudart_release(cudart_handle_t h) { + LOG(h.verbose, "releasing cudart library\n"); + UNLOAD_LIBRARY(h.handle); + h.handle = NULL; +} + #endif // __APPLE__ \ No newline at end of file diff --git a/gpu/gpu_info_cudart.h b/gpu/gpu_info_cudart.h index 492704a8..eb9336ec 100644 --- a/gpu/gpu_info_cudart.h +++ b/gpu/gpu_info_cudart.h @@ -55,6 +55,7 @@ typedef struct cudart_compute_capability { void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp); void cudart_check_vram(cudart_handle_t ch, mem_info_t *resp); void cudart_compute_capability(cudart_handle_t th, cudart_compute_capability_t *cc); +void cudart_release(cudart_handle_t ch); #endif // __GPU_INFO_CUDART_H__ #endif // __APPLE__ diff --git a/gpu/gpu_info_nvml.c b/gpu/gpu_info_nvml.c index aacf0410..67c80b0f 100644 --- a/gpu/gpu_info_nvml.c +++ b/gpu/gpu_info_nvml.c @@ -211,4 +211,11 @@ void nvml_compute_capability(nvml_handle_t h, nvml_compute_capability_t *resp) { } } } + +void nvml_release(nvml_handle_t h) { + LOG(h.verbose, "releasing nvml library\n"); + UNLOAD_LIBRARY(h.handle); + h.handle = NULL; +} + #endif // __APPLE__ \ No newline at end of file diff --git a/gpu/gpu_info_nvml.h b/gpu/gpu_info_nvml.h index 819e41fd..bd1d6001 100644 --- a/gpu/gpu_info_nvml.h +++ b/gpu/gpu_info_nvml.h @@ -51,6 +51,7 @@ typedef struct nvml_compute_capability { void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp); void nvml_check_vram(nvml_handle_t ch, mem_info_t *resp); void nvml_compute_capability(nvml_handle_t ch, nvml_compute_capability_t *cc); +void nvml_release(nvml_handle_t ch); #endif // __GPU_INFO_NVML_H__ #endif // __APPLE__ \ No newline at end of file