Release gpu discovery library after use
Leaving the cudart library loaded kept ~30m of memory pinned in the GPU in the main process. This change ensures we don't hold GPU resources when idle.
This commit is contained in:
parent
0a74cb31d5
commit
526d4eb204
5 changed files with 31 additions and 10 deletions
26
gpu/gpu.go
26
gpu/gpu.go
|
@ -35,7 +35,6 @@ const (
|
|||
)
|
||||
|
||||
var gpuMutex sync.Mutex
|
||||
var gpuHandles *handles = nil
|
||||
|
||||
// With our current CUDA compile flags, older than 5.0 will not work properly
|
||||
var CudaComputeMin = [2]C.int{5, 0}
|
||||
|
@ -85,11 +84,11 @@ var CudartWindowsGlobs = []string{
|
|||
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
||||
|
||||
// Note: gpuMutex must already be held
|
||||
func initGPUHandles() {
|
||||
func initGPUHandles() *handles {
|
||||
|
||||
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
||||
|
||||
gpuHandles = &handles{nil, nil}
|
||||
gpuHandles := &handles{nil, nil}
|
||||
var nvmlMgmtName string
|
||||
var nvmlMgmtPatterns []string
|
||||
var cudartMgmtName string
|
||||
|
@ -116,7 +115,7 @@ func initGPUHandles() {
|
|||
}
|
||||
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
|
||||
default:
|
||||
return
|
||||
return gpuHandles
|
||||
}
|
||||
|
||||
slog.Info("Detecting GPU type")
|
||||
|
@ -126,7 +125,7 @@ func initGPUHandles() {
|
|||
if cudart != nil {
|
||||
slog.Info("Nvidia GPU detected via cudart")
|
||||
gpuHandles.cudart = cudart
|
||||
return
|
||||
return gpuHandles
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -137,10 +136,10 @@ func initGPUHandles() {
|
|||
if nvml != nil {
|
||||
slog.Info("Nvidia GPU detected via nvidia-ml")
|
||||
gpuHandles.nvml = nvml
|
||||
return
|
||||
return gpuHandles
|
||||
}
|
||||
}
|
||||
|
||||
return gpuHandles
|
||||
}
|
||||
|
||||
func GetGPUInfo() GpuInfo {
|
||||
|
@ -148,9 +147,16 @@ func GetGPUInfo() GpuInfo {
|
|||
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
|
||||
gpuMutex.Lock()
|
||||
defer gpuMutex.Unlock()
|
||||
if gpuHandles == nil {
|
||||
initGPUHandles()
|
||||
}
|
||||
|
||||
gpuHandles := initGPUHandles()
|
||||
defer func() {
|
||||
if gpuHandles.nvml != nil {
|
||||
C.nvml_release(*gpuHandles.nvml)
|
||||
}
|
||||
if gpuHandles.cudart != nil {
|
||||
C.cudart_release(*gpuHandles.cudart)
|
||||
}
|
||||
}()
|
||||
|
||||
// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
|
||||
cpuVariant := GetCPUVariant()
|
||||
|
|
|
@ -191,4 +191,10 @@ void cudart_compute_capability(cudart_handle_t h, cudart_compute_capability_t *r
|
|||
}
|
||||
}
|
||||
|
||||
void cudart_release(cudart_handle_t h) {
|
||||
LOG(h.verbose, "releasing cudart library\n");
|
||||
UNLOAD_LIBRARY(h.handle);
|
||||
h.handle = NULL;
|
||||
}
|
||||
|
||||
#endif // __APPLE__
|
|
@ -55,6 +55,7 @@ typedef struct cudart_compute_capability {
|
|||
void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp);
|
||||
void cudart_check_vram(cudart_handle_t ch, mem_info_t *resp);
|
||||
void cudart_compute_capability(cudart_handle_t th, cudart_compute_capability_t *cc);
|
||||
void cudart_release(cudart_handle_t ch);
|
||||
|
||||
#endif // __GPU_INFO_CUDART_H__
|
||||
#endif // __APPLE__
|
||||
|
|
|
@ -211,4 +211,11 @@ void nvml_compute_capability(nvml_handle_t h, nvml_compute_capability_t *resp) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void nvml_release(nvml_handle_t h) {
|
||||
LOG(h.verbose, "releasing nvml library\n");
|
||||
UNLOAD_LIBRARY(h.handle);
|
||||
h.handle = NULL;
|
||||
}
|
||||
|
||||
#endif // __APPLE__
|
|
@ -51,6 +51,7 @@ typedef struct nvml_compute_capability {
|
|||
void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp);
|
||||
void nvml_check_vram(nvml_handle_t ch, mem_info_t *resp);
|
||||
void nvml_compute_capability(nvml_handle_t ch, nvml_compute_capability_t *cc);
|
||||
void nvml_release(nvml_handle_t ch);
|
||||
|
||||
#endif // __GPU_INFO_NVML_H__
|
||||
#endif // __APPLE__
|
Loading…
Reference in a new issue