Merge pull request #1914 from dhiltgen/smarter_cuda_detection
Smarter GPU Management library detection
This commit is contained in:
commit
ac70ab6761
6 changed files with 171 additions and 67 deletions
174
gpu/gpu.go
174
gpu/gpu.go
|
@ -13,7 +13,10 @@ import "C"
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
)
|
)
|
||||||
|
@ -29,31 +32,79 @@ var gpuHandles *handles = nil
|
||||||
// With our current CUDA compile flags, 5.2 and older will not work properly
|
// With our current CUDA compile flags, 5.2 and older will not work properly
|
||||||
const CudaComputeMajorMin = 6
|
const CudaComputeMajorMin = 6
|
||||||
|
|
||||||
|
// Possible locations for the nvidia-ml library
|
||||||
|
var CudaLinuxGlobs = []string{
|
||||||
|
"/usr/local/cuda/lib64/libnvidia-ml.so*",
|
||||||
|
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so*",
|
||||||
|
"/usr/lib/x86_64-linux-gnu/libnvidia-ml.so*",
|
||||||
|
"/usr/lib/wsl/lib/libnvidia-ml.so*",
|
||||||
|
"/opt/cuda/lib64/libnvidia-ml.so*",
|
||||||
|
"/usr/lib*/libnvidia-ml.so*",
|
||||||
|
"/usr/local/lib*/libnvidia-ml.so*",
|
||||||
|
"/usr/lib/aarch64-linux-gnu/nvidia/current/libnvidia-ml.so*",
|
||||||
|
"/usr/lib/aarch64-linux-gnu/libnvidia-ml.so*",
|
||||||
|
}
|
||||||
|
|
||||||
|
var CudaWindowsGlobs = []string{
|
||||||
|
"c:\\Windows\\System32\\nvml.dll",
|
||||||
|
}
|
||||||
|
|
||||||
|
var RocmLinuxGlobs = []string{
|
||||||
|
"/opt/rocm*/lib*/librocm_smi64.so*",
|
||||||
|
}
|
||||||
|
|
||||||
|
var RocmWindowsGlobs = []string{
|
||||||
|
"c:\\Windows\\System32\\rocm_smi64.dll",
|
||||||
|
}
|
||||||
|
|
||||||
// Note: gpuMutex must already be held
|
// Note: gpuMutex must already be held
|
||||||
func initGPUHandles() {
|
func initGPUHandles() {
|
||||||
|
|
||||||
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
||||||
|
|
||||||
|
var cudaMgmtName string
|
||||||
|
var cudaMgmtPatterns []string
|
||||||
|
var rocmMgmtName string
|
||||||
|
var rocmMgmtPatterns []string
|
||||||
|
switch runtime.GOOS {
|
||||||
|
case "windows":
|
||||||
|
cudaMgmtName = "nvml.dll"
|
||||||
|
cudaMgmtPatterns = make([]string, len(CudaWindowsGlobs))
|
||||||
|
copy(cudaMgmtPatterns, CudaWindowsGlobs)
|
||||||
|
rocmMgmtName = "rocm_smi64.dll"
|
||||||
|
rocmMgmtPatterns = make([]string, len(RocmWindowsGlobs))
|
||||||
|
copy(rocmMgmtPatterns, RocmWindowsGlobs)
|
||||||
|
case "linux":
|
||||||
|
cudaMgmtName = "libnvidia-ml.so"
|
||||||
|
cudaMgmtPatterns = make([]string, len(CudaLinuxGlobs))
|
||||||
|
copy(cudaMgmtPatterns, CudaLinuxGlobs)
|
||||||
|
rocmMgmtName = "librocm_smi64.so"
|
||||||
|
rocmMgmtPatterns = make([]string, len(RocmLinuxGlobs))
|
||||||
|
copy(rocmMgmtPatterns, RocmLinuxGlobs)
|
||||||
|
default:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
log.Printf("Detecting GPU type")
|
log.Printf("Detecting GPU type")
|
||||||
gpuHandles = &handles{nil, nil}
|
gpuHandles = &handles{nil, nil}
|
||||||
var resp C.cuda_init_resp_t
|
cudaLibPaths := FindGPULibs(cudaMgmtName, cudaMgmtPatterns)
|
||||||
C.cuda_init(&resp)
|
if len(cudaLibPaths) > 0 {
|
||||||
if resp.err != nil {
|
cuda := LoadCUDAMgmt(cudaLibPaths)
|
||||||
log.Printf("CUDA not detected: %s", C.GoString(resp.err))
|
if cuda != nil {
|
||||||
C.free(unsafe.Pointer(resp.err))
|
|
||||||
|
|
||||||
var resp C.rocm_init_resp_t
|
|
||||||
C.rocm_init(&resp)
|
|
||||||
if resp.err != nil {
|
|
||||||
log.Printf("ROCm not detected: %s", C.GoString(resp.err))
|
|
||||||
C.free(unsafe.Pointer(resp.err))
|
|
||||||
} else {
|
|
||||||
log.Printf("Radeon GPU detected")
|
|
||||||
rocm := resp.rh
|
|
||||||
gpuHandles.rocm = &rocm
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log.Printf("Nvidia GPU detected")
|
log.Printf("Nvidia GPU detected")
|
||||||
cuda := resp.ch
|
gpuHandles.cuda = cuda
|
||||||
gpuHandles.cuda = &cuda
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rocmLibPaths := FindGPULibs(rocmMgmtName, rocmMgmtPatterns)
|
||||||
|
if len(rocmLibPaths) > 0 {
|
||||||
|
rocm := LoadROCMMgmt(rocmLibPaths)
|
||||||
|
if rocm != nil {
|
||||||
|
log.Printf("Radeon GPU detected")
|
||||||
|
gpuHandles.rocm = rocm
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,3 +194,88 @@ func CheckVRAM() (int64, error) {
|
||||||
|
|
||||||
return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
|
return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func FindGPULibs(baseLibName string, patterns []string) []string {
|
||||||
|
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
|
||||||
|
var ldPaths []string
|
||||||
|
gpuLibPaths := []string{}
|
||||||
|
log.Printf("Searching for GPU management library %s", baseLibName)
|
||||||
|
|
||||||
|
switch runtime.GOOS {
|
||||||
|
case "windows":
|
||||||
|
ldPaths = strings.Split(os.Getenv("PATH"), ";")
|
||||||
|
case "linux":
|
||||||
|
ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
|
||||||
|
default:
|
||||||
|
return gpuLibPaths
|
||||||
|
}
|
||||||
|
// Start with whatever we find in the PATH/LD_LIBRARY_PATH
|
||||||
|
for _, ldPath := range ldPaths {
|
||||||
|
d, err := filepath.Abs(ldPath)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
|
||||||
|
}
|
||||||
|
for _, pattern := range patterns {
|
||||||
|
// Ignore glob discovery errors
|
||||||
|
matches, _ := filepath.Glob(pattern)
|
||||||
|
for _, match := range matches {
|
||||||
|
// Resolve any links so we don't try the same lib multiple times
|
||||||
|
// and weed out any dups across globs
|
||||||
|
libPath := match
|
||||||
|
tmp := match
|
||||||
|
var err error
|
||||||
|
for ; err == nil; tmp, err = os.Readlink(libPath) {
|
||||||
|
if !filepath.IsAbs(tmp) {
|
||||||
|
tmp = filepath.Join(filepath.Dir(libPath), tmp)
|
||||||
|
}
|
||||||
|
libPath = tmp
|
||||||
|
}
|
||||||
|
new := true
|
||||||
|
for _, cmp := range gpuLibPaths {
|
||||||
|
if cmp == libPath {
|
||||||
|
new = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if new {
|
||||||
|
gpuLibPaths = append(gpuLibPaths, libPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Printf("Discovered GPU libraries: %v", gpuLibPaths)
|
||||||
|
return gpuLibPaths
|
||||||
|
}
|
||||||
|
|
||||||
|
func LoadCUDAMgmt(cudaLibPaths []string) *C.cuda_handle_t {
|
||||||
|
var resp C.cuda_init_resp_t
|
||||||
|
for _, libPath := range cudaLibPaths {
|
||||||
|
lib := C.CString(libPath)
|
||||||
|
defer C.free(unsafe.Pointer(lib))
|
||||||
|
C.cuda_init(lib, &resp)
|
||||||
|
if resp.err != nil {
|
||||||
|
log.Printf("Unable to load CUDA management library %s: %s", libPath, C.GoString(resp.err))
|
||||||
|
C.free(unsafe.Pointer(resp.err))
|
||||||
|
} else {
|
||||||
|
return &resp.ch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func LoadROCMMgmt(rocmLibPaths []string) *C.rocm_handle_t {
|
||||||
|
var resp C.rocm_init_resp_t
|
||||||
|
for _, libPath := range rocmLibPaths {
|
||||||
|
lib := C.CString(libPath)
|
||||||
|
defer C.free(unsafe.Pointer(lib))
|
||||||
|
C.rocm_init(lib, &resp)
|
||||||
|
if resp.err != nil {
|
||||||
|
log.Printf("Unable to load ROCm management library %s: %s", libPath, C.GoString(resp.err))
|
||||||
|
C.free(unsafe.Pointer(resp.err))
|
||||||
|
} else {
|
||||||
|
return &resp.rh
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -4,26 +4,9 @@
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#ifndef _WIN32
|
|
||||||
const char *cuda_lib_paths[] = {
|
|
||||||
"libnvidia-ml.so",
|
|
||||||
"/usr/local/cuda/lib64/libnvidia-ml.so",
|
|
||||||
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so",
|
|
||||||
"/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1",
|
|
||||||
"/usr/lib/wsl/lib/libnvidia-ml.so.1", // TODO Maybe glob?
|
|
||||||
NULL,
|
|
||||||
};
|
|
||||||
#else
|
|
||||||
const char *cuda_lib_paths[] = {
|
|
||||||
"nvml.dll",
|
|
||||||
"",
|
|
||||||
NULL,
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define CUDA_LOOKUP_SIZE 6
|
#define CUDA_LOOKUP_SIZE 6
|
||||||
|
|
||||||
void cuda_init(cuda_init_resp_t *resp) {
|
void cuda_init(char *cuda_lib_path, cuda_init_resp_t *resp) {
|
||||||
nvmlReturn_t ret;
|
nvmlReturn_t ret;
|
||||||
resp->err = NULL;
|
resp->err = NULL;
|
||||||
const int buflen = 256;
|
const int buflen = 256;
|
||||||
|
@ -42,16 +25,12 @@ void cuda_init(cuda_init_resp_t *resp) {
|
||||||
{"nvmlDeviceGetCudaComputeCapability", (void *)&resp->ch.getComputeCapability},
|
{"nvmlDeviceGetCudaComputeCapability", (void *)&resp->ch.getComputeCapability},
|
||||||
};
|
};
|
||||||
|
|
||||||
for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) {
|
resp->ch.handle = LOAD_LIBRARY(cuda_lib_path, RTLD_LAZY);
|
||||||
resp->ch.handle = LOAD_LIBRARY(cuda_lib_paths[i], RTLD_LAZY);
|
|
||||||
}
|
|
||||||
if (!resp->ch.handle) {
|
if (!resp->ch.handle) {
|
||||||
// TODO improve error message, as the LOAD_ERR will have typically have the
|
|
||||||
// final path that was checked which might be confusing.
|
|
||||||
char *msg = LOAD_ERR();
|
char *msg = LOAD_ERR();
|
||||||
snprintf(buf, buflen,
|
snprintf(buf, buflen,
|
||||||
"Unable to load %s library to query for Nvidia GPUs: %s",
|
"Unable to load %s library to query for Nvidia GPUs: %s",
|
||||||
cuda_lib_paths[0], msg);
|
cuda_lib_path, msg);
|
||||||
free(msg);
|
free(msg);
|
||||||
resp->err = strdup(buf);
|
resp->err = strdup(buf);
|
||||||
return;
|
return;
|
||||||
|
@ -73,6 +52,8 @@ void cuda_init(cuda_init_resp_t *resp) {
|
||||||
|
|
||||||
ret = (*resp->ch.initFn)();
|
ret = (*resp->ch.initFn)();
|
||||||
if (ret != NVML_SUCCESS) {
|
if (ret != NVML_SUCCESS) {
|
||||||
|
UNLOAD_LIBRARY(resp->ch.handle);
|
||||||
|
resp->ch.handle = NULL;
|
||||||
snprintf(buf, buflen, "nvml vram init failure: %d", ret);
|
snprintf(buf, buflen, "nvml vram init failure: %d", ret);
|
||||||
resp->err = strdup(buf);
|
resp->err = strdup(buf);
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,7 +36,7 @@ typedef struct cuda_compute_capability {
|
||||||
int minor;
|
int minor;
|
||||||
} cuda_compute_capability_t;
|
} cuda_compute_capability_t;
|
||||||
|
|
||||||
void cuda_init(cuda_init_resp_t *resp);
|
void cuda_init(char *cuda_lib_path, cuda_init_resp_t *resp);
|
||||||
void cuda_check_vram(cuda_handle_t ch, mem_info_t *resp);
|
void cuda_check_vram(cuda_handle_t ch, mem_info_t *resp);
|
||||||
void cuda_compute_capability(cuda_handle_t ch, cuda_compute_capability_t *cc);
|
void cuda_compute_capability(cuda_handle_t ch, cuda_compute_capability_t *cc);
|
||||||
|
|
||||||
|
|
|
@ -4,22 +4,7 @@
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#ifndef _WIN32
|
void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) {
|
||||||
const char *rocm_lib_paths[] = {
|
|
||||||
"librocm_smi64.so",
|
|
||||||
"/opt/rocm/lib/librocm_smi64.so",
|
|
||||||
NULL,
|
|
||||||
};
|
|
||||||
#else
|
|
||||||
// TODO untested
|
|
||||||
const char *rocm_lib_paths[] = {
|
|
||||||
"rocm_smi64.dll",
|
|
||||||
"/opt/rocm/lib/rocm_smi64.dll",
|
|
||||||
NULL,
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void rocm_init(rocm_init_resp_t *resp) {
|
|
||||||
rsmi_status_t ret;
|
rsmi_status_t ret;
|
||||||
resp->err = NULL;
|
resp->err = NULL;
|
||||||
const int buflen = 256;
|
const int buflen = 256;
|
||||||
|
@ -36,14 +21,12 @@ void rocm_init(rocm_init_resp_t *resp) {
|
||||||
// { "rsmi_dev_id_get", (void*)&resp->rh.getHandle },
|
// { "rsmi_dev_id_get", (void*)&resp->rh.getHandle },
|
||||||
};
|
};
|
||||||
|
|
||||||
for (i = 0; rocm_lib_paths[i] != NULL && resp->rh.handle == NULL; i++) {
|
resp->rh.handle = LOAD_LIBRARY(rocm_lib_path, RTLD_LAZY);
|
||||||
resp->rh.handle = LOAD_LIBRARY(rocm_lib_paths[i], RTLD_LAZY);
|
|
||||||
}
|
|
||||||
if (!resp->rh.handle) {
|
if (!resp->rh.handle) {
|
||||||
char *msg = LOAD_ERR();
|
char *msg = LOAD_ERR();
|
||||||
snprintf(buf, buflen,
|
snprintf(buf, buflen,
|
||||||
"Unable to load %s library to query for Radeon GPUs: %s\n",
|
"Unable to load %s library to query for Radeon GPUs: %s\n",
|
||||||
rocm_lib_paths[0], msg);
|
rocm_lib_path, msg);
|
||||||
free(msg);
|
free(msg);
|
||||||
resp->err = strdup(buf);
|
resp->err = strdup(buf);
|
||||||
return;
|
return;
|
||||||
|
@ -53,6 +36,7 @@ void rocm_init(rocm_init_resp_t *resp) {
|
||||||
*l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s);
|
*l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s);
|
||||||
if (!l[i].p) {
|
if (!l[i].p) {
|
||||||
UNLOAD_LIBRARY(resp->rh.handle);
|
UNLOAD_LIBRARY(resp->rh.handle);
|
||||||
|
resp->rh.handle = NULL;
|
||||||
char *msg = LOAD_ERR();
|
char *msg = LOAD_ERR();
|
||||||
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
|
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
|
||||||
msg);
|
msg);
|
||||||
|
@ -64,6 +48,8 @@ void rocm_init(rocm_init_resp_t *resp) {
|
||||||
|
|
||||||
ret = (*resp->rh.initFn)(0);
|
ret = (*resp->rh.initFn)(0);
|
||||||
if (ret != RSMI_STATUS_SUCCESS) {
|
if (ret != RSMI_STATUS_SUCCESS) {
|
||||||
|
UNLOAD_LIBRARY(resp->rh.handle);
|
||||||
|
resp->rh.handle = NULL;
|
||||||
snprintf(buf, buflen, "rocm vram init failure: %d", ret);
|
snprintf(buf, buflen, "rocm vram init failure: %d", ret);
|
||||||
resp->err = strdup(buf);
|
resp->err = strdup(buf);
|
||||||
}
|
}
|
||||||
|
@ -83,7 +69,7 @@ void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (h.handle == NULL) {
|
if (h.handle == NULL) {
|
||||||
resp->err = strdup("nvml handle sn't initialized");
|
resp->err = strdup("rocm handle not initialized");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ typedef struct rocm_init_resp {
|
||||||
rocm_handle_t rh;
|
rocm_handle_t rh;
|
||||||
} rocm_init_resp_t;
|
} rocm_init_resp_t;
|
||||||
|
|
||||||
void rocm_init(rocm_init_resp_t *resp);
|
void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp);
|
||||||
void rocm_check_vram(rocm_handle_t rh, mem_info_t *resp);
|
void rocm_check_vram(rocm_handle_t rh, mem_info_t *resp);
|
||||||
|
|
||||||
#endif // __GPU_INFO_ROCM_H__
|
#endif // __GPU_INFO_ROCM_H__
|
||||||
|
|
|
@ -5,9 +5,10 @@ set -eu
|
||||||
export VERSION=${VERSION:-0.0.0}
|
export VERSION=${VERSION:-0.0.0}
|
||||||
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
|
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
|
||||||
|
|
||||||
|
BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
|
||||||
mkdir -p dist
|
mkdir -p dist
|
||||||
|
|
||||||
for TARGETARCH in amd64 arm64; do
|
for TARGETARCH in ${BUILD_ARCH}; do
|
||||||
docker build --platform=linux/$TARGETARCH --build-arg=GOFLAGS --build-arg=CGO_CFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
|
docker build --platform=linux/$TARGETARCH --build-arg=GOFLAGS --build-arg=CGO_CFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
|
||||||
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
|
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
|
||||||
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
|
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
|
||||||
|
|
Loading…
Reference in a new issue