Release gpu discovery library after use

Leaving the cudart library loaded kept ~30m of memory
pinned in the GPU in the main process.  This change ensures
we don't hold GPU resources when idle.
This commit is contained in:
Daniel Hiltgen 2024-03-30 15:34:21 -07:00
parent 0a74cb31d5
commit 526d4eb204
5 changed files with 31 additions and 10 deletions

View file

@ -35,7 +35,6 @@ const (
) )
var gpuMutex sync.Mutex var gpuMutex sync.Mutex
var gpuHandles *handles = nil
// With our current CUDA compile flags, older than 5.0 will not work properly // With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0} var CudaComputeMin = [2]C.int{5, 0}
@ -85,11 +84,11 @@ var CudartWindowsGlobs = []string{
var CudaTegra string = os.Getenv("JETSON_JETPACK") var CudaTegra string = os.Getenv("JETSON_JETPACK")
// Note: gpuMutex must already be held // Note: gpuMutex must already be held
func initGPUHandles() { func initGPUHandles() *handles {
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
gpuHandles = &handles{nil, nil} gpuHandles := &handles{nil, nil}
var nvmlMgmtName string var nvmlMgmtName string
var nvmlMgmtPatterns []string var nvmlMgmtPatterns []string
var cudartMgmtName string var cudartMgmtName string
@ -116,7 +115,7 @@ func initGPUHandles() {
} }
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...) cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
default: default:
return return gpuHandles
} }
slog.Info("Detecting GPU type") slog.Info("Detecting GPU type")
@ -126,7 +125,7 @@ func initGPUHandles() {
if cudart != nil { if cudart != nil {
slog.Info("Nvidia GPU detected via cudart") slog.Info("Nvidia GPU detected via cudart")
gpuHandles.cudart = cudart gpuHandles.cudart = cudart
return return gpuHandles
} }
} }
@ -137,10 +136,10 @@ func initGPUHandles() {
if nvml != nil { if nvml != nil {
slog.Info("Nvidia GPU detected via nvidia-ml") slog.Info("Nvidia GPU detected via nvidia-ml")
gpuHandles.nvml = nvml gpuHandles.nvml = nvml
return return gpuHandles
} }
} }
return gpuHandles
} }
func GetGPUInfo() GpuInfo { func GetGPUInfo() GpuInfo {
@ -148,9 +147,16 @@ func GetGPUInfo() GpuInfo {
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries // GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
gpuMutex.Lock() gpuMutex.Lock()
defer gpuMutex.Unlock() defer gpuMutex.Unlock()
if gpuHandles == nil {
initGPUHandles() gpuHandles := initGPUHandles()
defer func() {
if gpuHandles.nvml != nil {
C.nvml_release(*gpuHandles.nvml)
} }
if gpuHandles.cudart != nil {
C.cudart_release(*gpuHandles.cudart)
}
}()
// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX // All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
cpuVariant := GetCPUVariant() cpuVariant := GetCPUVariant()

View file

@ -191,4 +191,10 @@ void cudart_compute_capability(cudart_handle_t h, cudart_compute_capability_t *r
} }
} }
void cudart_release(cudart_handle_t h) {
LOG(h.verbose, "releasing cudart library\n");
UNLOAD_LIBRARY(h.handle);
h.handle = NULL;
}
#endif // __APPLE__ #endif // __APPLE__

View file

@ -55,6 +55,7 @@ typedef struct cudart_compute_capability {
void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp); void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp);
void cudart_check_vram(cudart_handle_t ch, mem_info_t *resp); void cudart_check_vram(cudart_handle_t ch, mem_info_t *resp);
void cudart_compute_capability(cudart_handle_t th, cudart_compute_capability_t *cc); void cudart_compute_capability(cudart_handle_t th, cudart_compute_capability_t *cc);
void cudart_release(cudart_handle_t ch);
#endif // __GPU_INFO_CUDART_H__ #endif // __GPU_INFO_CUDART_H__
#endif // __APPLE__ #endif // __APPLE__

View file

@ -211,4 +211,11 @@ void nvml_compute_capability(nvml_handle_t h, nvml_compute_capability_t *resp) {
} }
} }
} }
void nvml_release(nvml_handle_t h) {
LOG(h.verbose, "releasing nvml library\n");
UNLOAD_LIBRARY(h.handle);
h.handle = NULL;
}
#endif // __APPLE__ #endif // __APPLE__

View file

@ -51,6 +51,7 @@ typedef struct nvml_compute_capability {
void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp); void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp);
void nvml_check_vram(nvml_handle_t ch, mem_info_t *resp); void nvml_check_vram(nvml_handle_t ch, mem_info_t *resp);
void nvml_compute_capability(nvml_handle_t ch, nvml_compute_capability_t *cc); void nvml_compute_capability(nvml_handle_t ch, nvml_compute_capability_t *cc);
void nvml_release(nvml_handle_t ch);
#endif // __GPU_INFO_NVML_H__ #endif // __GPU_INFO_NVML_H__
#endif // __APPLE__ #endif // __APPLE__