Release gpu discovery library after use
Leaving the cudart library loaded kept ~30m of memory pinned in the GPU in the main process. This change ensures we don't hold GPU resources when idle.
This commit is contained in:
parent
0a74cb31d5
commit
526d4eb204
5 changed files with 31 additions and 10 deletions
24
gpu/gpu.go
24
gpu/gpu.go
|
@ -35,7 +35,6 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
var gpuMutex sync.Mutex
|
var gpuMutex sync.Mutex
|
||||||
var gpuHandles *handles = nil
|
|
||||||
|
|
||||||
// With our current CUDA compile flags, older than 5.0 will not work properly
|
// With our current CUDA compile flags, older than 5.0 will not work properly
|
||||||
var CudaComputeMin = [2]C.int{5, 0}
|
var CudaComputeMin = [2]C.int{5, 0}
|
||||||
|
@ -85,11 +84,11 @@ var CudartWindowsGlobs = []string{
|
||||||
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
||||||
|
|
||||||
// Note: gpuMutex must already be held
|
// Note: gpuMutex must already be held
|
||||||
func initGPUHandles() {
|
func initGPUHandles() *handles {
|
||||||
|
|
||||||
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
||||||
|
|
||||||
gpuHandles = &handles{nil, nil}
|
gpuHandles := &handles{nil, nil}
|
||||||
var nvmlMgmtName string
|
var nvmlMgmtName string
|
||||||
var nvmlMgmtPatterns []string
|
var nvmlMgmtPatterns []string
|
||||||
var cudartMgmtName string
|
var cudartMgmtName string
|
||||||
|
@ -116,7 +115,7 @@ func initGPUHandles() {
|
||||||
}
|
}
|
||||||
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
|
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
|
||||||
default:
|
default:
|
||||||
return
|
return gpuHandles
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Info("Detecting GPU type")
|
slog.Info("Detecting GPU type")
|
||||||
|
@ -126,7 +125,7 @@ func initGPUHandles() {
|
||||||
if cudart != nil {
|
if cudart != nil {
|
||||||
slog.Info("Nvidia GPU detected via cudart")
|
slog.Info("Nvidia GPU detected via cudart")
|
||||||
gpuHandles.cudart = cudart
|
gpuHandles.cudart = cudart
|
||||||
return
|
return gpuHandles
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -137,10 +136,10 @@ func initGPUHandles() {
|
||||||
if nvml != nil {
|
if nvml != nil {
|
||||||
slog.Info("Nvidia GPU detected via nvidia-ml")
|
slog.Info("Nvidia GPU detected via nvidia-ml")
|
||||||
gpuHandles.nvml = nvml
|
gpuHandles.nvml = nvml
|
||||||
return
|
return gpuHandles
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return gpuHandles
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetGPUInfo() GpuInfo {
|
func GetGPUInfo() GpuInfo {
|
||||||
|
@ -148,9 +147,16 @@ func GetGPUInfo() GpuInfo {
|
||||||
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
|
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
|
||||||
gpuMutex.Lock()
|
gpuMutex.Lock()
|
||||||
defer gpuMutex.Unlock()
|
defer gpuMutex.Unlock()
|
||||||
if gpuHandles == nil {
|
|
||||||
initGPUHandles()
|
gpuHandles := initGPUHandles()
|
||||||
|
defer func() {
|
||||||
|
if gpuHandles.nvml != nil {
|
||||||
|
C.nvml_release(*gpuHandles.nvml)
|
||||||
}
|
}
|
||||||
|
if gpuHandles.cudart != nil {
|
||||||
|
C.cudart_release(*gpuHandles.cudart)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
|
// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
|
||||||
cpuVariant := GetCPUVariant()
|
cpuVariant := GetCPUVariant()
|
||||||
|
|
|
@ -191,4 +191,10 @@ void cudart_compute_capability(cudart_handle_t h, cudart_compute_capability_t *r
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cudart_release(cudart_handle_t h) {
|
||||||
|
LOG(h.verbose, "releasing cudart library\n");
|
||||||
|
UNLOAD_LIBRARY(h.handle);
|
||||||
|
h.handle = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
#endif // __APPLE__
|
#endif // __APPLE__
|
|
@ -55,6 +55,7 @@ typedef struct cudart_compute_capability {
|
||||||
void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp);
|
void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp);
|
||||||
void cudart_check_vram(cudart_handle_t ch, mem_info_t *resp);
|
void cudart_check_vram(cudart_handle_t ch, mem_info_t *resp);
|
||||||
void cudart_compute_capability(cudart_handle_t th, cudart_compute_capability_t *cc);
|
void cudart_compute_capability(cudart_handle_t th, cudart_compute_capability_t *cc);
|
||||||
|
void cudart_release(cudart_handle_t ch);
|
||||||
|
|
||||||
#endif // __GPU_INFO_CUDART_H__
|
#endif // __GPU_INFO_CUDART_H__
|
||||||
#endif // __APPLE__
|
#endif // __APPLE__
|
||||||
|
|
|
@ -211,4 +211,11 @@ void nvml_compute_capability(nvml_handle_t h, nvml_compute_capability_t *resp) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void nvml_release(nvml_handle_t h) {
|
||||||
|
LOG(h.verbose, "releasing nvml library\n");
|
||||||
|
UNLOAD_LIBRARY(h.handle);
|
||||||
|
h.handle = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
#endif // __APPLE__
|
#endif // __APPLE__
|
|
@ -51,6 +51,7 @@ typedef struct nvml_compute_capability {
|
||||||
void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp);
|
void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp);
|
||||||
void nvml_check_vram(nvml_handle_t ch, mem_info_t *resp);
|
void nvml_check_vram(nvml_handle_t ch, mem_info_t *resp);
|
||||||
void nvml_compute_capability(nvml_handle_t ch, nvml_compute_capability_t *cc);
|
void nvml_compute_capability(nvml_handle_t ch, nvml_compute_capability_t *cc);
|
||||||
|
void nvml_release(nvml_handle_t ch);
|
||||||
|
|
||||||
#endif // __GPU_INFO_NVML_H__
|
#endif // __GPU_INFO_NVML_H__
|
||||||
#endif // __APPLE__
|
#endif // __APPLE__
|
Loading…
Reference in a new issue