Merge pull request #1914 from dhiltgen/smarter_cuda_detection

Smarter GPU Management library detection
2024-01-10 15:21:56 -08:00 · 2024-01-10 15:21:56 -08:00 · ac70ab6761
commit ac70ab6761
parent 224fbf2795 3c49c3ab0d
6 changed files with 171 additions and 67 deletions
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@ -13,7 +13,10 @@ import "C"
 import (
 	"fmt"
 	"log"
 	"os"
 	"path/filepath"
 	"runtime"
 	"strings"
 	"sync"
 	"unsafe"
 )
@ -29,31 +32,79 @@ var gpuHandles *handles = nil
 // With our current CUDA compile flags, 5.2 and older will not work properly
 const CudaComputeMajorMin = 6
 // Possible locations for the nvidia-ml library
 var CudaLinuxGlobs = []string{
 	"/usr/local/cuda/lib64/libnvidia-ml.so*",
 	"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so*",
 	"/usr/lib/x86_64-linux-gnu/libnvidia-ml.so*",
 	"/usr/lib/wsl/lib/libnvidia-ml.so*",
 	"/opt/cuda/lib64/libnvidia-ml.so*",
 	"/usr/lib*/libnvidia-ml.so*",
 	"/usr/local/lib*/libnvidia-ml.so*",
 	"/usr/lib/aarch64-linux-gnu/nvidia/current/libnvidia-ml.so*",
 	"/usr/lib/aarch64-linux-gnu/libnvidia-ml.so*",
 }
 var CudaWindowsGlobs = []string{
 	"c:\\Windows\\System32\\nvml.dll",
 }
 var RocmLinuxGlobs = []string{
 	"/opt/rocm*/lib*/librocm_smi64.so*",
 }
 var RocmWindowsGlobs = []string{
 	"c:\\Windows\\System32\\rocm_smi64.dll",
 }
 // Note: gpuMutex must already be held
 func initGPUHandles() {
 	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
 	var cudaMgmtName string
 	var cudaMgmtPatterns []string
 	var rocmMgmtName string
 	var rocmMgmtPatterns []string
 	switch runtime.GOOS {
 	case "windows":
 		cudaMgmtName = "nvml.dll"
 		cudaMgmtPatterns = make([]string, len(CudaWindowsGlobs))
 		copy(cudaMgmtPatterns, CudaWindowsGlobs)
 		rocmMgmtName = "rocm_smi64.dll"
 		rocmMgmtPatterns = make([]string, len(RocmWindowsGlobs))
 		copy(rocmMgmtPatterns, RocmWindowsGlobs)
 	case "linux":
 		cudaMgmtName = "libnvidia-ml.so"
 		cudaMgmtPatterns = make([]string, len(CudaLinuxGlobs))
 		copy(cudaMgmtPatterns, CudaLinuxGlobs)
 		rocmMgmtName = "librocm_smi64.so"
 		rocmMgmtPatterns = make([]string, len(RocmLinuxGlobs))
 		copy(rocmMgmtPatterns, RocmLinuxGlobs)
 	default:
 		return
 	}
 	log.Printf("Detecting GPU type")
 	gpuHandles = &handles{nil, nil}
-	var resp C.cuda_init_resp_t
+	cudaLibPaths := FindGPULibs(cudaMgmtName, cudaMgmtPatterns)
-	C.cuda_init(&resp)
+	if len(cudaLibPaths) > 0 {
-	if resp.err != nil {
+		cuda := LoadCUDAMgmt(cudaLibPaths)
-		log.Printf("CUDA not detected: %s", C.GoString(resp.err))
+		if cuda != nil {
 		C.free(unsafe.Pointer(resp.err))
 		var resp C.rocm_init_resp_t
 		C.rocm_init(&resp)
 		if resp.err != nil {
 			log.Printf("ROCm not detected: %s", C.GoString(resp.err))
 			C.free(unsafe.Pointer(resp.err))
 		} else {
 			log.Printf("Radeon GPU detected")
 			rocm := resp.rh
 			gpuHandles.rocm = &rocm
 		}
 	} else {
 			log.Printf("Nvidia GPU detected")
-		cuda := resp.ch
+			gpuHandles.cuda = cuda
-		gpuHandles.cuda = &cuda
+			return
 		}
 	}
 	rocmLibPaths := FindGPULibs(rocmMgmtName, rocmMgmtPatterns)
 	if len(rocmLibPaths) > 0 {
 		rocm := LoadROCMMgmt(rocmLibPaths)
 		if rocm != nil {
 			log.Printf("Radeon GPU detected")
 			gpuHandles.rocm = rocm
 			return
 		}
 	}
 }
@ -143,3 +194,88 @@ func CheckVRAM() (int64, error) {
 	return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
 }
 func FindGPULibs(baseLibName string, patterns []string) []string {
 	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
 	var ldPaths []string
 	gpuLibPaths := []string{}
 	log.Printf("Searching for GPU management library %s", baseLibName)
 	switch runtime.GOOS {
 	case "windows":
 		ldPaths = strings.Split(os.Getenv("PATH"), ";")
 	case "linux":
 		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
 	default:
 		return gpuLibPaths
 	}
 	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
 	for _, ldPath := range ldPaths {
 		d, err := filepath.Abs(ldPath)
 		if err != nil {
 			continue
 		}
 		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
 	}
 	for _, pattern := range patterns {
 		// Ignore glob discovery errors
 		matches, _ := filepath.Glob(pattern)
 		for _, match := range matches {
 			// Resolve any links so we don't try the same lib multiple times
 			// and weed out any dups across globs
 			libPath := match
 			tmp := match
 			var err error
 			for ; err == nil; tmp, err = os.Readlink(libPath) {
 				if !filepath.IsAbs(tmp) {
 					tmp = filepath.Join(filepath.Dir(libPath), tmp)
 				}
 				libPath = tmp
 			}
 			new := true
 			for _, cmp := range gpuLibPaths {
 				if cmp == libPath {
 					new = false
 					break
 				}
 			}
 			if new {
 				gpuLibPaths = append(gpuLibPaths, libPath)
 			}
 		}
 	}
 	log.Printf("Discovered GPU libraries: %v", gpuLibPaths)
 	return gpuLibPaths
 }
 func LoadCUDAMgmt(cudaLibPaths []string) *C.cuda_handle_t {
 	var resp C.cuda_init_resp_t
 	for _, libPath := range cudaLibPaths {
 		lib := C.CString(libPath)
 		defer C.free(unsafe.Pointer(lib))
 		C.cuda_init(lib, &resp)
 		if resp.err != nil {
 			log.Printf("Unable to load CUDA management library %s: %s", libPath, C.GoString(resp.err))
 			C.free(unsafe.Pointer(resp.err))
 		} else {
 			return &resp.ch
 		}
 	}
 	return nil
 }
 func LoadROCMMgmt(rocmLibPaths []string) *C.rocm_handle_t {
 	var resp C.rocm_init_resp_t
 	for _, libPath := range rocmLibPaths {
 		lib := C.CString(libPath)
 		defer C.free(unsafe.Pointer(lib))
 		C.rocm_init(lib, &resp)
 		if resp.err != nil {
 			log.Printf("Unable to load ROCm management library %s: %s", libPath, C.GoString(resp.err))
 			C.free(unsafe.Pointer(resp.err))
 		} else {
 			return &resp.rh
 		}
 	}
 	return nil
 }
--- a/gpu/gpu_info_cuda.c
+++ b/gpu/gpu_info_cuda.c
@ -4,26 +4,9 @@
 #include <string.h>
 #ifndef _WIN32
 const char *cuda_lib_paths[] = {
    "libnvidia-ml.so",
    "/usr/local/cuda/lib64/libnvidia-ml.so",
    "/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so",
    "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1",
    "/usr/lib/wsl/lib/libnvidia-ml.so.1",  // TODO Maybe glob?
    NULL,
 };
 #else
 const char *cuda_lib_paths[] = {
    "nvml.dll",
    "",
    NULL,
 };
 #endif
 #define CUDA_LOOKUP_SIZE 6
-void cuda_init(cuda_init_resp_t *resp) {
+void cuda_init(char *cuda_lib_path, cuda_init_resp_t *resp) {
  nvmlReturn_t ret;
  resp->err = NULL;
  const int buflen = 256;
@ -42,16 +25,12 @@ void cuda_init(cuda_init_resp_t *resp) {
      {"nvmlDeviceGetCudaComputeCapability", (void *)&resp->ch.getComputeCapability},
  };
-  for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) {
+  resp->ch.handle = LOAD_LIBRARY(cuda_lib_path, RTLD_LAZY);
    resp->ch.handle = LOAD_LIBRARY(cuda_lib_paths[i], RTLD_LAZY);
  }
  if (!resp->ch.handle) {
    // TODO improve error message, as the LOAD_ERR will have typically have the
    // final path that was checked which might be confusing.
    char *msg = LOAD_ERR();
    snprintf(buf, buflen,
             "Unable to load %s library to query for Nvidia GPUs: %s",
-             cuda_lib_paths[0], msg);
+             cuda_lib_path, msg);
    free(msg);
    resp->err = strdup(buf);
    return;
@ -73,6 +52,8 @@ void cuda_init(cuda_init_resp_t *resp) {
  ret = (*resp->ch.initFn)();
  if (ret != NVML_SUCCESS) {
    UNLOAD_LIBRARY(resp->ch.handle);
    resp->ch.handle = NULL;
    snprintf(buf, buflen, "nvml vram init failure: %d", ret);
    resp->err = strdup(buf);
  }
--- a/gpu/gpu_info_cuda.h
+++ b/gpu/gpu_info_cuda.h
@ -36,7 +36,7 @@ typedef struct cuda_compute_capability {
  int minor;
 } cuda_compute_capability_t;
-void cuda_init(cuda_init_resp_t *resp);
+void cuda_init(char *cuda_lib_path, cuda_init_resp_t *resp);
 void cuda_check_vram(cuda_handle_t ch, mem_info_t *resp);
 void cuda_compute_capability(cuda_handle_t ch, cuda_compute_capability_t *cc);
--- a/gpu/gpu_info_rocm.c
+++ b/gpu/gpu_info_rocm.c
@ -4,22 +4,7 @@
 #include <string.h>
-#ifndef _WIN32
+void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) {
 const char *rocm_lib_paths[] = {
    "librocm_smi64.so",
    "/opt/rocm/lib/librocm_smi64.so",
    NULL,
 };
 #else
 // TODO untested
 const char *rocm_lib_paths[] = {
    "rocm_smi64.dll",
    "/opt/rocm/lib/rocm_smi64.dll",
    NULL,
 };
 #endif
 void rocm_init(rocm_init_resp_t *resp) {
  rsmi_status_t ret;
  resp->err = NULL;
  const int buflen = 256;
@ -36,14 +21,12 @@ void rocm_init(rocm_init_resp_t *resp) {
      // { "rsmi_dev_id_get", (void*)&resp->rh.getHandle },
  };
-  for (i = 0; rocm_lib_paths[i] != NULL && resp->rh.handle == NULL; i++) {
+  resp->rh.handle = LOAD_LIBRARY(rocm_lib_path, RTLD_LAZY);
    resp->rh.handle = LOAD_LIBRARY(rocm_lib_paths[i], RTLD_LAZY);
  }
  if (!resp->rh.handle) {
    char *msg = LOAD_ERR();
    snprintf(buf, buflen,
             "Unable to load %s library to query for Radeon GPUs: %s\n",
-             rocm_lib_paths[0], msg);
+             rocm_lib_path, msg);
    free(msg);
    resp->err = strdup(buf);
    return;
@ -53,6 +36,7 @@ void rocm_init(rocm_init_resp_t *resp) {
    *l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s);
    if (!l[i].p) {
      UNLOAD_LIBRARY(resp->rh.handle);
      resp->rh.handle = NULL;
      char *msg = LOAD_ERR();
      snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
               msg);
@ -64,6 +48,8 @@ void rocm_init(rocm_init_resp_t *resp) {
  ret = (*resp->rh.initFn)(0);
  if (ret != RSMI_STATUS_SUCCESS) {
    UNLOAD_LIBRARY(resp->rh.handle);
    resp->rh.handle = NULL;
    snprintf(buf, buflen, "rocm vram init failure: %d", ret);
    resp->err = strdup(buf);
  }
@ -83,7 +69,7 @@ void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
  int i;
  if (h.handle == NULL) {
-    resp->err = strdup("nvml handle sn't initialized");
+    resp->err = strdup("rocm handle not initialized");
    return;
  }
--- a/gpu/gpu_info_rocm.h
+++ b/gpu/gpu_info_rocm.h
@ -29,7 +29,7 @@ typedef struct rocm_init_resp {
  rocm_handle_t rh;
 } rocm_init_resp_t;
-void rocm_init(rocm_init_resp_t *resp);
+void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp);
 void rocm_check_vram(rocm_handle_t rh, mem_info_t *resp);
 #endif  // __GPU_INFO_ROCM_H__
--- a/scripts/build_linux.sh
+++ b/scripts/build_linux.sh
@ -5,9 +5,10 @@ set -eu
 export VERSION=${VERSION:-0.0.0}
 export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
 BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
 mkdir -p dist
-for TARGETARCH in amd64 arm64; do
+for TARGETARCH in ${BUILD_ARCH}; do
    docker build --platform=linux/$TARGETARCH --build-arg=GOFLAGS --build-arg=CGO_CFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
    docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
    docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH