Release gpu discovery library after use

Leaving the cudart library loaded kept ~30m of memory
pinned in the GPU in the main process.  This change ensures
we don't hold GPU resources when idle.
This commit is contained in:
Daniel Hiltgen 2024-03-30 15:34:21 -07:00
parent 0a74cb31d5
commit 526d4eb204
5 changed files with 31 additions and 10 deletions

View file

@ -35,7 +35,6 @@ const (
)
var gpuMutex sync.Mutex
var gpuHandles *handles = nil
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
@ -85,11 +84,11 @@ var CudartWindowsGlobs = []string{
var CudaTegra string = os.Getenv("JETSON_JETPACK")
// Note: gpuMutex must already be held
func initGPUHandles() {
func initGPUHandles() *handles {
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
gpuHandles = &handles{nil, nil}
gpuHandles := &handles{nil, nil}
var nvmlMgmtName string
var nvmlMgmtPatterns []string
var cudartMgmtName string
@ -116,7 +115,7 @@ func initGPUHandles() {
}
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
default:
return
return gpuHandles
}
slog.Info("Detecting GPU type")
@ -126,7 +125,7 @@ func initGPUHandles() {
if cudart != nil {
slog.Info("Nvidia GPU detected via cudart")
gpuHandles.cudart = cudart
return
return gpuHandles
}
}
@ -137,10 +136,10 @@ func initGPUHandles() {
if nvml != nil {
slog.Info("Nvidia GPU detected via nvidia-ml")
gpuHandles.nvml = nvml
return
return gpuHandles
}
}
return gpuHandles
}
func GetGPUInfo() GpuInfo {
@ -148,9 +147,16 @@ func GetGPUInfo() GpuInfo {
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
gpuMutex.Lock()
defer gpuMutex.Unlock()
if gpuHandles == nil {
initGPUHandles()
}
gpuHandles := initGPUHandles()
defer func() {
if gpuHandles.nvml != nil {
C.nvml_release(*gpuHandles.nvml)
}
if gpuHandles.cudart != nil {
C.cudart_release(*gpuHandles.cudart)
}
}()
// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
cpuVariant := GetCPUVariant()

View file

@ -191,4 +191,10 @@ void cudart_compute_capability(cudart_handle_t h, cudart_compute_capability_t *r
}
}
void cudart_release(cudart_handle_t h) {
LOG(h.verbose, "releasing cudart library\n");
UNLOAD_LIBRARY(h.handle);
h.handle = NULL;
}
#endif // __APPLE__

View file

@ -55,6 +55,7 @@ typedef struct cudart_compute_capability {
void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp);
void cudart_check_vram(cudart_handle_t ch, mem_info_t *resp);
void cudart_compute_capability(cudart_handle_t th, cudart_compute_capability_t *cc);
void cudart_release(cudart_handle_t ch);
#endif // __GPU_INFO_CUDART_H__
#endif // __APPLE__

View file

@ -211,4 +211,11 @@ void nvml_compute_capability(nvml_handle_t h, nvml_compute_capability_t *resp) {
}
}
}
void nvml_release(nvml_handle_t h) {
LOG(h.verbose, "releasing nvml library\n");
UNLOAD_LIBRARY(h.handle);
h.handle = NULL;
}
#endif // __APPLE__

View file

@ -51,6 +51,7 @@ typedef struct nvml_compute_capability {
void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp);
void nvml_check_vram(nvml_handle_t ch, mem_info_t *resp);
void nvml_compute_capability(nvml_handle_t ch, nvml_compute_capability_t *cc);
void nvml_release(nvml_handle_t ch);
#endif // __GPU_INFO_NVML_H__
#endif // __APPLE__