diff --git a/Dockerfile.build b/Dockerfile.build index ca6b1a29..9ba44398 100644 --- a/Dockerfile.build +++ b/Dockerfile.build @@ -1,7 +1,6 @@ ARG GOLANG_VERSION=1.21.3 ARG CMAKE_VERSION=3.22.1 ARG CUDA_VERSION=11.3.1 -ARG ROCM_VERSION=5.7.1 FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64 @@ -16,9 +15,11 @@ ADD https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CM RUN tar -zx -C /usr --strip-components 1 +#define ROCM_LOOKUP_SIZE 5 + void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) { rsmi_status_t ret; resp->err = NULL; @@ -13,11 +15,12 @@ void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) { struct lookup { char *s; void **p; - } l[4] = { + } l[ROCM_LOOKUP_SIZE] = { {"rsmi_init", (void *)&resp->rh.initFn}, {"rsmi_shut_down", (void *)&resp->rh.shutdownFn}, {"rsmi_dev_memory_total_get", (void *)&resp->rh.totalMemFn}, {"rsmi_dev_memory_usage_get", (void *)&resp->rh.usageMemFn}, + {"rsmi_version_get", (void *)&resp->rh.versionGetFn}, // { "rsmi_dev_id_get", (void*)&resp->rh.getHandle }, }; @@ -32,7 +35,7 @@ void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) { return; } - for (i = 0; i < 4; i++) { + for (i = 0; i < ROCM_LOOKUP_SIZE; i++) { *l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s); if (!l[i].p) { UNLOAD_LIBRARY(resp->rh.handle); @@ -103,4 +106,25 @@ void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) { return; } +void rocm_get_version(rocm_handle_t h, rocm_version_resp_t *resp) { + const int buflen = 256; + char buf[buflen + 1]; + if (h.handle == NULL) { + resp->str = strdup("nvml handle not initialized"); + resp->status = 1; + return; + } + rsmi_version_t ver; + rsmi_status_t ret; + ret = h.versionGetFn(&ver); + if (ret != RSMI_STATUS_SUCCESS) { + snprintf(buf, buflen, "unexpected response on version lookup %d", ret); + resp->status = 1; + } else { + snprintf(buf, buflen, "%d", ver.major); + resp->status = 0; + } + resp->str = strdup(buf); +} + #endif // __APPLE__ \ No newline at end of file diff --git a/gpu/gpu_info_rocm.h b/gpu/gpu_info_rocm.h index 1f74713b..90d9a09f 100644 --- a/gpu/gpu_info_rocm.h +++ b/gpu/gpu_info_rocm.h @@ -15,12 +15,20 @@ typedef enum rsmi_memory_type { RSMI_MEM_TYPE_GTT, } rsmi_memory_type_t; + typedef struct { + uint32_t major; + uint32_t minor; + uint32_t patch; + const char *build; + } rsmi_version_t; + typedef struct rocm_handle { void *handle; rsmi_status_t (*initFn)(uint64_t); rsmi_status_t (*shutdownFn)(void); rsmi_status_t (*totalMemFn)(uint32_t, rsmi_memory_type_t, uint64_t *); rsmi_status_t (*usageMemFn)(uint32_t, rsmi_memory_type_t, uint64_t *); + rsmi_status_t (*versionGetFn) (rsmi_version_t *version); // rsmi_status_t (*getHandle)(uint32_t, uint16_t *); } rocm_handle_t; @@ -29,8 +37,14 @@ typedef struct rocm_init_resp { rocm_handle_t rh; } rocm_init_resp_t; +typedef struct rocm_version_resp { + rsmi_status_t status; + char *str; // Contains version or error string if status != 0 +} rocm_version_resp_t; + void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp); void rocm_check_vram(rocm_handle_t rh, mem_info_t *resp); +void rocm_get_version(rocm_handle_t rh, rocm_version_resp_t *resp); #endif // __GPU_INFO_ROCM_H__ #endif // __APPLE__ \ No newline at end of file diff --git a/gpu/types.go b/gpu/types.go index abc16dbc..24fa4a24 100644 --- a/gpu/types.go +++ b/gpu/types.go @@ -11,5 +11,8 @@ type GpuInfo struct { memInfo Library string `json:"library,omitempty"` + // Optional variant to select (e.g. versions, cpu feature flags) + Variant string `json:"variant,omitempty"` + // TODO add other useful attributes about the card here for discovery information } diff --git a/llm/dynamic_shim.c b/llm/dynamic_shim.c index c3e74d4a..ca7c372a 100644 --- a/llm/dynamic_shim.c +++ b/llm/dynamic_shim.c @@ -58,7 +58,7 @@ void dynamic_shim_init(const char *libPath, struct dynamic_llama_server *s, {"", NULL}, }; - printf("Lazy loading %s library\n", libPath); + printf("loading %s library\n", libPath); s->handle = LOAD_LIBRARY(libPath, RTLD_NOW); if (!s->handle) { err->id = -1; diff --git a/llm/ext_server/README.md b/llm/ext_server/README.md index ac58d9c8..bfb0d4a6 100644 --- a/llm/ext_server/README.md +++ b/llm/ext_server/README.md @@ -1,4 +1,18 @@ # Extern C Server -This directory contains a thin facade we layer on top of the Llama.cpp server -to expose `extern C` interfaces to access the functionality through direct API calls in-process +This directory contains a thin facade we layer on top of the Llama.cpp server to +expose `extern C` interfaces to access the functionality through direct API +calls in-process. The llama.cpp code uses compile time macros to configure GPU +type along with other settings. During the `go generate ./...` execution, the +build will generate one or more copies of the llama.cpp `extern C` server based +on what GPU libraries are detected to support multiple GPU types as well as CPU +only support. The Ollama go build then embeds these different servers to support +different GPUs and settings at runtime. + +If you are making changes to the code in this directory, make sure to disable +caching during your go build to ensure you pick up your changes. A typical +iteration cycle from the top of the source tree looks like: + +``` +go generate ./... && go build -a . +``` \ No newline at end of file diff --git a/llm/ext_server_windows.go b/llm/ext_server_windows.go index 39b5f096..9d361cf8 100644 --- a/llm/ext_server_windows.go +++ b/llm/ext_server_windows.go @@ -1,6 +1,8 @@ package llm import ( + "fmt" + "github.com/jmorganca/ollama/api" ) @@ -8,5 +10,6 @@ func newDefaultExtServer(model string, adapters, projectors []string, opts api.O // On windows we always load the llama.cpp libraries dynamically to avoid startup DLL dependencies // This ensures we can update the PATH at runtime to get everything loaded - return newDynamicShimExtServer(AvailableShims["cpu"], model, adapters, projectors, opts) + // This should never happen as we'll always try to load one or more cpu dynamic libaries before hitting default + return nil, fmt.Errorf("no available default llm library on windows") } diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index 52081156..99f5b0ac 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -48,23 +48,31 @@ init_vars git_module_setup apply_patches -# -# CPU first for the default library -# -CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}" -BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu" +if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then + # + # CPU first for the default library + # + CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}" + BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu" -build -install + build + install -# Placeholder to keep go embed happy until we start building dynamic CPU lib variants -touch ${BUILD_DIR}/lib/dummy.so + # Placeholder to keep go embed happy until we start building dynamic CPU lib variants + touch ${BUILD_DIR}/lib/dummy.so +else + echo "Skipping CPU generation step as requested" +fi if [ -d /usr/local/cuda/lib64/ ]; then echo "CUDA libraries detected - building dynamic CUDA library" init_vars + CUDA_MAJOR=$(ls /usr/local/cuda/lib64/libcudart.so.* | head -1 | cut -f3 -d. || true) + if [ -n "${CUDA_MAJOR}" ]; then + CUDA_VARIANT=_v${CUDA_MAJOR} + fi CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}" - BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda" + BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}" CUDA_LIB_DIR=/usr/local/cuda/lib64 build install @@ -96,9 +104,12 @@ fi if [ -d "${ROCM_PATH}" ]; then echo "ROCm libraries detected - building dynamic ROCm library" + if [ -f ${ROCM_PATH}/lib/librocm_smi64.so.? ]; then + ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocm_smi64.so.? | cut -f3 -d. || true) + fi init_vars CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)" - BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm" + BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm${ROCM_VARIANT}" build install gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \ diff --git a/llm/llm.go b/llm/llm.go index 940c0d93..4031cc28 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -19,8 +19,6 @@ type LLM interface { Close() } -var AvailableShims = map[string]string{} - func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) { if _, err := os.Stat(model); err != nil { return nil, err @@ -131,7 +129,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options) opts.NumGQA = 0 opts.RopeFrequencyBase = 0.0 opts.RopeFrequencyScale = 0.0 - return newLlmServer(library, model, adapters, projectors, opts) + gpuInfo := gpu.GetGPUInfo() + return newLlmServer(gpuInfo, model, adapters, projectors, opts) } // Give any native cgo implementations an opportunity to initialize @@ -139,15 +138,18 @@ func Init(workdir string) error { return nativeInit(workdir) } -func newLlmServer(library, model string, adapters, projectors []string, opts api.Options) (extServer, error) { - if _, libPresent := AvailableShims[library]; libPresent && library != "default" { - srv, err := newDynamicShimExtServer(AvailableShims[library], model, adapters, projectors, opts) +func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []string, opts api.Options) (extServer, error) { + for _, shim := range getShims(gpuInfo) { + if shim == "default" { + break + } + srv, err := newDynamicShimExtServer(shim, model, adapters, projectors, opts) if err == nil { return srv, nil } - log.Printf("Failed to load dynamic library %s - falling back to CPU mode %s", library, err) - // TODO - update some state to indicate we were unable to load the GPU library for future "info" ux + log.Printf("Failed to load dynamic library %s %s", shim, err) } return newDefaultExtServer(model, adapters, projectors, opts) + } diff --git a/llm/shim.go b/llm/shim.go new file mode 100644 index 00000000..bbf995f9 --- /dev/null +++ b/llm/shim.go @@ -0,0 +1,228 @@ +package llm + +import ( + "errors" + "fmt" + "io" + "io/fs" + "log" + "os" + "path/filepath" + "runtime" + "slices" + "strings" + + "github.com/jmorganca/ollama/gpu" +) + +// Shims names may contain an optional variant separated by '_' +// For example, "rocm_v6" and "rocm_v5" or "cpu" and "cpu_avx2" +var availableShims = map[string]string{} + +const pathComponentCount = 6 + +// getShims returns an ordered list of shims to try, starting with the best +func getShims(gpuInfo gpu.GpuInfo) []string { + exactMatch := "" + shims := []string{} + altShims := []string{} + requested := gpuInfo.Library + if gpuInfo.Variant != "" { + requested += "_" + gpuInfo.Variant + } + // First try to find an exact match + for cmp := range availableShims { + if requested == cmp { + exactMatch = cmp + shims = append(shims, availableShims[cmp]) + break + } + } + // Then load alternates and sort the list for consistent load ordering + for cmp := range availableShims { + if gpuInfo.Library == strings.Split(cmp, "_")[0] && cmp != exactMatch { + altShims = append(altShims, cmp) + } + } + slices.Sort(altShims) + for _, altShim := range altShims { + shims = append(shims, availableShims[altShim]) + } + + // Load up the CPU alternates if not primary requested + if gpuInfo.Library != "cpu" { + altShims = []string{} + for cmp := range availableShims { + if strings.Split(cmp, "_")[0] == "cpu" { + altShims = append(altShims, cmp) + } + } + slices.Sort(altShims) + for _, altShim := range altShims { + shims = append(shims, availableShims[altShim]) + } + } + // default is always last as the lowest common denominator + shims = append(shims, "default") + return shims +} + +func rocmShimPresent() bool { + for shimName := range availableShims { + if strings.HasPrefix(shimName, "rocm") { + return true + } + } + return false +} + +func nativeInit(workdir string) error { + if runtime.GOOS == "darwin" { + err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal") + if err != nil { + if err == payloadMissing { + // TODO perhaps consider this a hard failure on arm macs? + log.Printf("ggml-meta.metal payload missing") + return nil + } + return err + } + os.Setenv("GGML_METAL_PATH_RESOURCES", workdir) + return nil + } + + libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*") + if err != nil { + if err == payloadMissing { + log.Printf("%s", payloadMissing) + return nil + } + return err + } + for _, lib := range libs { + // The last dir component is the variant name + variant := filepath.Base(filepath.Dir(lib)) + availableShims[variant] = lib + } + + if err := verifyDriverAccess(); err != nil { + return err + } + + // Report which dynamic libraries we have loaded to assist troubleshooting + variants := make([]string, len(availableShims)) + i := 0 + for variant := range availableShims { + variants[i] = variant + i++ + } + log.Printf("Dynamic LLM variants %v", variants) + + return nil +} + +func extractDynamicLibs(workDir, glob string) ([]string, error) { + files, err := fs.Glob(libEmbed, glob) + if err != nil || len(files) == 0 { + return nil, payloadMissing + } + libs := []string{} + + for _, file := range files { + pathComps := strings.Split(file, "/") + if len(pathComps) != pathComponentCount { + log.Printf("unexpected payload components: %v", pathComps) + continue + } + // llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY + // Include the variant in the path to avoid conflicts between multiple server libs + targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3]) + srcFile, err := libEmbed.Open(file) + if err != nil { + return nil, fmt.Errorf("read payload %s: %v", file, err) + } + defer srcFile.Close() + if err := os.MkdirAll(targetDir, 0o755); err != nil { + return nil, fmt.Errorf("create payload temp dir %s: %v", workDir, err) + } + + destFile := filepath.Join(targetDir, filepath.Base(file)) + if strings.Contains(destFile, "server") { + libs = append(libs, destFile) + } + + _, err = os.Stat(destFile) + switch { + case errors.Is(err, os.ErrNotExist): + destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) + if err != nil { + return nil, fmt.Errorf("write payload %s: %v", file, err) + } + defer destFile.Close() + if _, err := io.Copy(destFile, srcFile); err != nil { + return nil, fmt.Errorf("copy payload %s: %v", file, err) + } + case err != nil: + return nil, fmt.Errorf("stat payload %s: %v", file, err) + } + } + return libs, nil +} + +func extractPayloadFiles(workDir, glob string) error { + files, err := fs.Glob(libEmbed, glob) + if err != nil || len(files) == 0 { + return payloadMissing + } + + for _, file := range files { + srcFile, err := libEmbed.Open(file) + if err != nil { + return fmt.Errorf("read payload %s: %v", file, err) + } + defer srcFile.Close() + if err := os.MkdirAll(workDir, 0o755); err != nil { + return fmt.Errorf("create payload temp dir %s: %v", workDir, err) + } + + destFile := filepath.Join(workDir, filepath.Base(file)) + _, err = os.Stat(destFile) + switch { + case errors.Is(err, os.ErrNotExist): + destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) + if err != nil { + return fmt.Errorf("write payload %s: %v", file, err) + } + defer destFile.Close() + if _, err := io.Copy(destFile, srcFile); err != nil { + return fmt.Errorf("copy payload %s: %v", file, err) + } + case err != nil: + return fmt.Errorf("stat payload %s: %v", file, err) + } + } + return nil +} + +func verifyDriverAccess() error { + if runtime.GOOS != "linux" { + return nil + } + // Only check ROCm access if we have the dynamic lib loaded + if rocmShimPresent() { + // Verify we have permissions - either running as root, or we have group access to the driver + fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666) + if err != nil { + if errors.Is(err, fs.ErrPermission) { + return fmt.Errorf("Radeon card detected, but permissions not set up properly. Either run ollama as root, or add you user account to the render group.") + } else if errors.Is(err, fs.ErrNotExist) { + // expected behavior without a radeon card + return nil + } + + return fmt.Errorf("failed to check permission on /dev/kfd: %w", err) + } + fd.Close() + } + return nil +} diff --git a/llm/shim_darwin.go b/llm/shim_darwin.go index 3baafd1e..9ef8ef96 100644 --- a/llm/shim_darwin.go +++ b/llm/shim_darwin.go @@ -2,13 +2,7 @@ package llm import ( "embed" - "errors" "fmt" - "io" - "io/fs" - "log" - "os" - "path/filepath" "github.com/jmorganca/ollama/api" ) @@ -20,52 +14,3 @@ func newDynamicShimExtServer(library, model string, adapters, projectors []strin // should never happen... return nil, fmt.Errorf("Dynamic library loading not supported on Mac") } - -func nativeInit(workdir string) error { - err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal") - if err != nil { - if err == payloadMissing { - // TODO perhaps consider this a hard failure on arm macs? - log.Printf("ggml-meta.metal payload missing") - return nil - } - return err - } - os.Setenv("GGML_METAL_PATH_RESOURCES", workdir) - return nil -} - -func extractPayloadFiles(workDir, glob string) error { - files, err := fs.Glob(libEmbed, glob) - if err != nil || len(files) == 0 { - return payloadMissing - } - - for _, file := range files { - srcFile, err := libEmbed.Open(file) - if err != nil { - return fmt.Errorf("read payload %s: %v", file, err) - } - defer srcFile.Close() - if err := os.MkdirAll(workDir, 0o755); err != nil { - return fmt.Errorf("create payload temp dir %s: %v", workDir, err) - } - - destFile := filepath.Join(workDir, filepath.Base(file)) - _, err = os.Stat(destFile) - switch { - case errors.Is(err, os.ErrNotExist): - destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) - if err != nil { - return fmt.Errorf("write payload %s: %v", file, err) - } - defer destFile.Close() - if _, err := io.Copy(destFile, srcFile); err != nil { - return fmt.Errorf("copy payload %s: %v", file, err) - } - case err != nil: - return fmt.Errorf("stat payload %s: %v", file, err) - } - } - return nil -} diff --git a/llm/shim_ext_server.go b/llm/shim_ext_server.go index dca7b38d..102f059c 100644 --- a/llm/shim_ext_server.go +++ b/llm/shim_ext_server.go @@ -11,14 +11,9 @@ package llm import "C" import ( "context" - "errors" "fmt" - "io" - "io/fs" "log" - "os" "path/filepath" - "strings" "sync" "unsafe" @@ -34,8 +29,6 @@ type shimExtServer struct { var shimMutex sync.Mutex var llm *shimExtServer -const pathComponentCount = 6 - func (llm *shimExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) { C.dynamic_shim_llama_server_init(llm.s, sparams, err) } @@ -112,82 +105,3 @@ func (llm *shimExtServer) Embedding(ctx context.Context, input string) ([]float6 func (llm *shimExtServer) Close() { close(llm) } - -func nativeInit(workdir string) error { - libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*") - if err != nil { - if err == payloadMissing { - log.Printf("%s", payloadMissing) - return nil - } - return err - } - for _, lib := range libs { - // The last dir component is the variant name - variant := filepath.Base(filepath.Dir(lib)) - AvailableShims[variant] = lib - } - - if err := verifyDriverAccess(); err != nil { - return err - } - - // Report which dynamic libraries we have loaded to assist troubleshooting - variants := make([]string, len(AvailableShims)) - i := 0 - for variant := range AvailableShims { - variants[i] = variant - i++ - } - log.Printf("Dynamic LLM variants %v", variants) - - return nil -} - -func extractDynamicLibs(workDir, glob string) ([]string, error) { - files, err := fs.Glob(libEmbed, glob) - if err != nil || len(files) == 0 { - return nil, payloadMissing - } - libs := []string{} - - for _, file := range files { - pathComps := strings.Split(file, "/") - if len(pathComps) != pathComponentCount { - log.Printf("unexpected payload components: %v", pathComps) - continue - } - // llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY - // Include the variant in the path to avoid conflicts between multiple server libs - targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3]) - srcFile, err := libEmbed.Open(file) - if err != nil { - return nil, fmt.Errorf("read payload %s: %v", file, err) - } - defer srcFile.Close() - if err := os.MkdirAll(targetDir, 0o755); err != nil { - return nil, fmt.Errorf("create payload temp dir %s: %v", workDir, err) - } - - destFile := filepath.Join(targetDir, filepath.Base(file)) - if strings.Contains(destFile, "server") { - libs = append(libs, destFile) - } - - _, err = os.Stat(destFile) - switch { - case errors.Is(err, os.ErrNotExist): - destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) - if err != nil { - return nil, fmt.Errorf("write payload %s: %v", file, err) - } - defer destFile.Close() - if _, err := io.Copy(destFile, srcFile); err != nil { - return nil, fmt.Errorf("copy payload %s: %v", file, err) - } - case err != nil: - return nil, fmt.Errorf("stat payload %s: %v", file, err) - } - } - return libs, nil -} diff --git a/llm/shim_ext_server_linux.go b/llm/shim_ext_server_linux.go index e0ad5da4..a9a8aca2 100644 --- a/llm/shim_ext_server_linux.go +++ b/llm/shim_ext_server_linux.go @@ -2,9 +2,6 @@ package llm import ( "embed" - "errors" - "fmt" - "io/fs" "log" "os" "strings" @@ -24,23 +21,3 @@ func updatePath(dir string) { log.Printf("Updating PATH to %s", newPath) os.Setenv("PATH", newPath) } - -func verifyDriverAccess() error { - // Only check ROCm access if we have the dynamic lib loaded - if _, rocmPresent := AvailableShims["rocm"]; rocmPresent { - // Verify we have permissions - either running as root, or we have group access to the driver - fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666) - if err != nil { - if errors.Is(err, fs.ErrPermission) { - return fmt.Errorf("Radeon card detected, but permissions not set up properly. Either run ollama as root, or add you user account to the render group.") - } else if errors.Is(err, fs.ErrNotExist) { - // expected behavior without a radeon card - return nil - } - - return fmt.Errorf("failed to check permission on /dev/kfd: %w", err) - } - fd.Close() - } - return nil -} diff --git a/llm/shim_ext_server_windows.go b/llm/shim_ext_server_windows.go index e95c8afa..c218c6f3 100644 --- a/llm/shim_ext_server_windows.go +++ b/llm/shim_ext_server_windows.go @@ -29,8 +29,3 @@ func updatePath(dir string) { log.Printf("Updating PATH to %s", newPath) os.Setenv("PATH", newPath) } - -func verifyDriverAccess() error { - // TODO if applicable - return nil -} diff --git a/llm/shim_test.go b/llm/shim_test.go new file mode 100644 index 00000000..7a1c5acc --- /dev/null +++ b/llm/shim_test.go @@ -0,0 +1,61 @@ +package llm + +import ( + "testing" + + "github.com/jmorganca/ollama/gpu" + "github.com/stretchr/testify/assert" +) + +func TestGetShims(t *testing.T) { + availableShims = map[string]string{ + "cpu": "X_cpu", + } + assert.Equal(t, false, rocmShimPresent()) + res := getShims(gpu.GpuInfo{Library: "cpu"}) + assert.Len(t, res, 2) + assert.Equal(t, availableShims["cpu"], res[0]) + assert.Equal(t, "default", res[1]) + + availableShims = map[string]string{ + "rocm_v5": "X_rocm_v5", + "rocm_v6": "X_rocm_v6", + "cpu": "X_cpu", + } + assert.Equal(t, true, rocmShimPresent()) + res = getShims(gpu.GpuInfo{Library: "rocm"}) + assert.Len(t, res, 4) + assert.Equal(t, availableShims["rocm_v5"], res[0]) + assert.Equal(t, availableShims["rocm_v6"], res[1]) + assert.Equal(t, availableShims["cpu"], res[2]) + assert.Equal(t, "default", res[3]) + + res = getShims(gpu.GpuInfo{Library: "rocm", Variant: "v6"}) + assert.Len(t, res, 4) + assert.Equal(t, availableShims["rocm_v6"], res[0]) + assert.Equal(t, availableShims["rocm_v5"], res[1]) + assert.Equal(t, availableShims["cpu"], res[2]) + assert.Equal(t, "default", res[3]) + + res = getShims(gpu.GpuInfo{Library: "cuda"}) + assert.Len(t, res, 2) + assert.Equal(t, availableShims["cpu"], res[0]) + assert.Equal(t, "default", res[1]) + + res = getShims(gpu.GpuInfo{Library: "default"}) + assert.Len(t, res, 2) + assert.Equal(t, availableShims["cpu"], res[0]) + assert.Equal(t, "default", res[1]) + + availableShims = map[string]string{ + "rocm": "X_rocm_v5", + "cpu": "X_cpu", + } + assert.Equal(t, true, rocmShimPresent()) + res = getShims(gpu.GpuInfo{Library: "rocm", Variant: "v6"}) + assert.Len(t, res, 3) + assert.Equal(t, availableShims["rocm"], res[0]) + assert.Equal(t, availableShims["cpu"], res[1]) + assert.Equal(t, "default", res[2]) + +}