diff --git a/Dockerfile.build b/Dockerfile.build
index ca6b1a29..9ba44398 100644
--- a/Dockerfile.build
+++ b/Dockerfile.build
@@ -1,7 +1,6 @@
 ARG GOLANG_VERSION=1.21.3
 ARG CMAKE_VERSION=3.22.1
 ARG CUDA_VERSION=11.3.1
-ARG ROCM_VERSION=5.7.1
 
 FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
 
@@ -16,9 +15,11 @@ ADD https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CM
 RUN tar -zx -C /usr --strip-components 1 </tmp/cmake-$CMAKE_VERSION.tar.gz
 
 WORKDIR /go/src/github.com/jmorganca/ollama
-COPY . .
+COPY .git .git
+COPY .gitmodules .gitmodules
+COPY llm llm
 
-WORKDIR llm/generate
+WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
 RUN sh gen_linux.sh
 
 FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
@@ -28,30 +29,46 @@ ARG CMAKE_VERSION
 RUN dnf install -y git cmake
 
 WORKDIR /go/src/github.com/jmorganca/ollama
-COPY . .
+COPY .git .git
+COPY .gitmodules .gitmodules
+COPY llm llm
 
-WORKDIR llm/generate
+WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
 RUN sh gen_linux.sh
 
-FROM --platform=linux/amd64 rocm/dev-centos-7:$ROCM_VERSION-complete AS rocm-build-amd64
-
+FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
 ARG CMAKE_VERSION
-
 RUN yum install -y https://repo.ius.io/ius-release-el7.rpm centos-release-scl \
     && yum update -y \
     && yum remove -y git \
     && yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++ git236
 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
 ENV LIBRARY_PATH /opt/amdgpu/lib64
-
 ADD https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-linux-x86_64.tar.gz /tmp/cmake-$CMAKE_VERSION.tar.gz
 RUN tar -zx -C /usr --strip-components 1 </tmp/cmake-$CMAKE_VERSION.tar.gz
-
 WORKDIR /go/src/github.com/jmorganca/ollama
-COPY . .
+COPY .git .git
+COPY .gitmodules .gitmodules
+COPY llm llm
+WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
+RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
 
-WORKDIR llm/generate
-RUN sh gen_linux.sh
+FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
+ARG CMAKE_VERSION
+RUN yum install -y https://repo.ius.io/ius-release-el7.rpm centos-release-scl \
+    && yum update -y \
+    && yum remove -y git \
+    && yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++ git236
+ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
+ENV LIBRARY_PATH /opt/amdgpu/lib64
+ADD https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-linux-x86_64.tar.gz /tmp/cmake-$CMAKE_VERSION.tar.gz
+RUN tar -zx -C /usr --strip-components 1 </tmp/cmake-$CMAKE_VERSION.tar.gz
+WORKDIR /go/src/github.com/jmorganca/ollama
+COPY .git .git
+COPY .gitmodules .gitmodules
+COPY llm llm
+WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
+RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
 
 FROM --platform=linux/amd64 centos:7 AS build-amd64
 ENV CGO_ENABLED 1
@@ -71,9 +88,9 @@ ENV PATH /usr/local/go/bin:$PATH
 
 WORKDIR /go/src/github.com/jmorganca/ollama
 COPY . .
-COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cpu/lib llm/llama.cpp/build/linux/cpu/lib
-COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cuda/lib llm/llama.cpp/build/linux/cuda/lib
-COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/rocm/lib llm/llama.cpp/build/linux/rocm/lib
+COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
+COPY --from=rocm-5-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
+COPY --from=rocm-6-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
 RUN go build .
 
 FROM --platform=linux/arm64 centos:7 AS build-arm64
@@ -94,8 +111,7 @@ ENV PATH /usr/local/go/bin:$PATH
 
 WORKDIR /go/src/github.com/jmorganca/ollama
 COPY . .
-COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cpu/lib llm/llama.cpp/build/linux/cpu/lib
-COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cuda/lib llm/llama.cpp/build/linux/cuda/lib
+COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
 RUN go build .
 
 FROM build-$TARGETARCH
diff --git a/gpu/gpu.go b/gpu/gpu.go
index b51dc9e9..06f6526d 100644
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -145,6 +145,15 @@ func GetGPUInfo() GpuInfo {
 			C.free(unsafe.Pointer(memInfo.err))
 		} else {
 			resp.Library = "rocm"
+			var version C.rocm_version_resp_t
+			C.rocm_get_version(*gpuHandles.rocm, &version)
+			verString := C.GoString(version.str)
+			if version.status == 0 {
+				resp.Variant = "v" + verString
+			} else {
+				log.Printf("failed to look up ROCm version: %s", verString)
+			}
+			C.free(unsafe.Pointer(version.str))
 		}
 	}
 	if resp.Library == "" {
diff --git a/gpu/gpu_info_rocm.c b/gpu/gpu_info_rocm.c
index 58d1c973..845274e1 100644
--- a/gpu/gpu_info_rocm.c
+++ b/gpu/gpu_info_rocm.c
@@ -4,6 +4,8 @@
 
 #include <string.h>
 
+#define ROCM_LOOKUP_SIZE 5
+
 void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) {
   rsmi_status_t ret;
   resp->err = NULL;
@@ -13,11 +15,12 @@ void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) {
   struct lookup {
     char *s;
     void **p;
-  } l[4] = {
+  } l[ROCM_LOOKUP_SIZE] = {
       {"rsmi_init", (void *)&resp->rh.initFn},
       {"rsmi_shut_down", (void *)&resp->rh.shutdownFn},
       {"rsmi_dev_memory_total_get", (void *)&resp->rh.totalMemFn},
       {"rsmi_dev_memory_usage_get", (void *)&resp->rh.usageMemFn},
+      {"rsmi_version_get", (void *)&resp->rh.versionGetFn},
       // { "rsmi_dev_id_get", (void*)&resp->rh.getHandle },
   };
 
@@ -32,7 +35,7 @@ void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) {
     return;
   }
 
-  for (i = 0; i < 4; i++) {
+  for (i = 0; i < ROCM_LOOKUP_SIZE; i++) {
     *l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s);
     if (!l[i].p) {
       UNLOAD_LIBRARY(resp->rh.handle);
@@ -103,4 +106,25 @@ void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
   return;
 }
 
+void rocm_get_version(rocm_handle_t h, rocm_version_resp_t *resp) {
+  const int buflen = 256;
+  char buf[buflen + 1];
+  if (h.handle == NULL) {
+    resp->str = strdup("nvml handle not initialized");
+    resp->status = 1;
+    return;
+  }
+  rsmi_version_t ver;
+  rsmi_status_t ret;
+  ret = h.versionGetFn(&ver);
+  if (ret != RSMI_STATUS_SUCCESS) {
+    snprintf(buf, buflen, "unexpected response on version lookup %d", ret);
+    resp->status = 1;
+  } else {
+    snprintf(buf, buflen, "%d", ver.major);
+    resp->status = 0;
+  }
+  resp->str = strdup(buf);
+}
+
 #endif  // __APPLE__
\ No newline at end of file
diff --git a/gpu/gpu_info_rocm.h b/gpu/gpu_info_rocm.h
index 1f74713b..90d9a09f 100644
--- a/gpu/gpu_info_rocm.h
+++ b/gpu/gpu_info_rocm.h
@@ -15,12 +15,20 @@ typedef enum rsmi_memory_type {
   RSMI_MEM_TYPE_GTT,
 } rsmi_memory_type_t;
 
+ typedef struct {
+     uint32_t major;     
+     uint32_t minor;     
+     uint32_t patch;     
+     const char *build;  
+ } rsmi_version_t;
+
 typedef struct rocm_handle {
   void *handle;
   rsmi_status_t (*initFn)(uint64_t);
   rsmi_status_t (*shutdownFn)(void);
   rsmi_status_t (*totalMemFn)(uint32_t, rsmi_memory_type_t, uint64_t *);
   rsmi_status_t (*usageMemFn)(uint32_t, rsmi_memory_type_t, uint64_t *);
+  rsmi_status_t (*versionGetFn) (rsmi_version_t *version);
   // rsmi_status_t (*getHandle)(uint32_t, uint16_t *);
 } rocm_handle_t;
 
@@ -29,8 +37,14 @@ typedef struct rocm_init_resp {
   rocm_handle_t rh;
 } rocm_init_resp_t;
 
+typedef struct rocm_version_resp {
+  rsmi_status_t status;
+  char *str; // Contains version or error string if status != 0 
+} rocm_version_resp_t;
+
 void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp);
 void rocm_check_vram(rocm_handle_t rh, mem_info_t *resp);
+void rocm_get_version(rocm_handle_t rh, rocm_version_resp_t *resp);
 
 #endif  // __GPU_INFO_ROCM_H__
 #endif  // __APPLE__
\ No newline at end of file
diff --git a/gpu/types.go b/gpu/types.go
index abc16dbc..24fa4a24 100644
--- a/gpu/types.go
+++ b/gpu/types.go
@@ -11,5 +11,8 @@ type GpuInfo struct {
 	memInfo
 	Library string `json:"library,omitempty"`
 
+	// Optional variant to select (e.g. versions, cpu feature flags)
+	Variant string `json:"variant,omitempty"`
+
 	// TODO add other useful attributes about the card here for discovery information
 }
diff --git a/llm/dynamic_shim.c b/llm/dynamic_shim.c
index c3e74d4a..ca7c372a 100644
--- a/llm/dynamic_shim.c
+++ b/llm/dynamic_shim.c
@@ -58,7 +58,7 @@ void dynamic_shim_init(const char *libPath, struct dynamic_llama_server *s,
       {"", NULL},
   };
 
-  printf("Lazy loading %s library\n", libPath);
+  printf("loading %s library\n", libPath);
   s->handle = LOAD_LIBRARY(libPath, RTLD_NOW);
   if (!s->handle) {
     err->id = -1;
diff --git a/llm/ext_server/README.md b/llm/ext_server/README.md
index ac58d9c8..bfb0d4a6 100644
--- a/llm/ext_server/README.md
+++ b/llm/ext_server/README.md
@@ -1,4 +1,18 @@
 # Extern C Server
 
-This directory contains a thin facade we layer on top of the Llama.cpp server
-to expose `extern C` interfaces to access the functionality through direct API calls in-process
+This directory contains a thin facade we layer on top of the Llama.cpp server to
+expose `extern C` interfaces to access the functionality through direct API
+calls in-process.  The llama.cpp code uses compile time macros to configure GPU
+type along with other settings.  During the `go generate ./...` execution, the
+build will generate one or more copies of the llama.cpp `extern C` server based
+on what GPU libraries are detected to support multiple GPU types as well as CPU
+only support. The Ollama go build then embeds these different servers to support
+different GPUs and settings at runtime.
+
+If you are making changes to the code in this directory, make sure to disable
+caching during your go build to ensure you pick up your changes.  A typical
+iteration cycle from the top of the source tree looks like:
+
+```
+go generate ./... && go build -a .
+```
\ No newline at end of file
diff --git a/llm/ext_server_windows.go b/llm/ext_server_windows.go
index 39b5f096..9d361cf8 100644
--- a/llm/ext_server_windows.go
+++ b/llm/ext_server_windows.go
@@ -1,6 +1,8 @@
 package llm
 
 import (
+	"fmt"
+
 	"github.com/jmorganca/ollama/api"
 )
 
@@ -8,5 +10,6 @@ func newDefaultExtServer(model string, adapters, projectors []string, opts api.O
 	// On windows we always load the llama.cpp libraries dynamically to avoid startup DLL dependencies
 	// This ensures we can update the PATH at runtime to get everything loaded
 
-	return newDynamicShimExtServer(AvailableShims["cpu"], model, adapters, projectors, opts)
+	// This should never happen as we'll always try to load one or more cpu dynamic libaries before hitting default
+	return nil, fmt.Errorf("no available default llm library on windows")
 }
diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh
index 52081156..99f5b0ac 100755
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -48,23 +48,31 @@ init_vars
 git_module_setup
 apply_patches
 
-#
-# CPU first for the default library
-#
-CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
-BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
+if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
+    #
+    # CPU first for the default library
+    #
+    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
+    BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
 
-build
-install
+    build
+    install
 
-# Placeholder to keep go embed happy until we start building dynamic CPU lib variants
-touch ${BUILD_DIR}/lib/dummy.so
+    # Placeholder to keep go embed happy until we start building dynamic CPU lib variants
+    touch ${BUILD_DIR}/lib/dummy.so
+else
+    echo "Skipping CPU generation step as requested"
+fi
 
 if [ -d /usr/local/cuda/lib64/ ]; then
     echo "CUDA libraries detected - building dynamic CUDA library"
     init_vars
+    CUDA_MAJOR=$(ls /usr/local/cuda/lib64/libcudart.so.* | head -1 | cut -f3 -d. || true)
+    if [ -n "${CUDA_MAJOR}" ]; then
+        CUDA_VARIANT=_v${CUDA_MAJOR}
+    fi
     CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
-    BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda"
+    BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}"
     CUDA_LIB_DIR=/usr/local/cuda/lib64
     build
     install
@@ -96,9 +104,12 @@ fi
 
 if [ -d "${ROCM_PATH}" ]; then
     echo "ROCm libraries detected - building dynamic ROCm library"
+    if [ -f ${ROCM_PATH}/lib/librocm_smi64.so.? ]; then
+        ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocm_smi64.so.? | cut -f3 -d. || true)
+    fi
     init_vars
     CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
-    BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm"
+    BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm${ROCM_VARIANT}"
     build
     install
     gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
diff --git a/llm/llm.go b/llm/llm.go
index 940c0d93..4031cc28 100644
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -19,8 +19,6 @@ type LLM interface {
 	Close()
 }
 
-var AvailableShims = map[string]string{}
-
 func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
 	if _, err := os.Stat(model); err != nil {
 		return nil, err
@@ -131,7 +129,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 	opts.NumGQA = 0
 	opts.RopeFrequencyBase = 0.0
 	opts.RopeFrequencyScale = 0.0
-	return newLlmServer(library, model, adapters, projectors, opts)
+	gpuInfo := gpu.GetGPUInfo()
+	return newLlmServer(gpuInfo, model, adapters, projectors, opts)
 }
 
 // Give any native cgo implementations an opportunity to initialize
@@ -139,15 +138,18 @@ func Init(workdir string) error {
 	return nativeInit(workdir)
 }
 
-func newLlmServer(library, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
-	if _, libPresent := AvailableShims[library]; libPresent && library != "default" {
-		srv, err := newDynamicShimExtServer(AvailableShims[library], model, adapters, projectors, opts)
+func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
+	for _, shim := range getShims(gpuInfo) {
+		if shim == "default" {
+			break
+		}
+		srv, err := newDynamicShimExtServer(shim, model, adapters, projectors, opts)
 		if err == nil {
 			return srv, nil
 		}
-		log.Printf("Failed to load dynamic library %s - falling back to CPU mode %s", library, err)
-		// TODO - update some state to indicate we were unable to load the GPU library for future "info" ux
+		log.Printf("Failed to load dynamic library %s  %s", shim, err)
 	}
 
 	return newDefaultExtServer(model, adapters, projectors, opts)
+
 }
diff --git a/llm/shim.go b/llm/shim.go
new file mode 100644
index 00000000..bbf995f9
--- /dev/null
+++ b/llm/shim.go
@@ -0,0 +1,228 @@
+package llm
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"log"
+	"os"
+	"path/filepath"
+	"runtime"
+	"slices"
+	"strings"
+
+	"github.com/jmorganca/ollama/gpu"
+)
+
+// Shims names may contain an optional variant separated by '_'
+// For example, "rocm_v6" and "rocm_v5" or "cpu" and "cpu_avx2"
+var availableShims = map[string]string{}
+
+const pathComponentCount = 6
+
+// getShims returns an ordered list of shims to try, starting with the best
+func getShims(gpuInfo gpu.GpuInfo) []string {
+	exactMatch := ""
+	shims := []string{}
+	altShims := []string{}
+	requested := gpuInfo.Library
+	if gpuInfo.Variant != "" {
+		requested += "_" + gpuInfo.Variant
+	}
+	// First try to find an exact match
+	for cmp := range availableShims {
+		if requested == cmp {
+			exactMatch = cmp
+			shims = append(shims, availableShims[cmp])
+			break
+		}
+	}
+	// Then load alternates and sort the list for consistent load ordering
+	for cmp := range availableShims {
+		if gpuInfo.Library == strings.Split(cmp, "_")[0] && cmp != exactMatch {
+			altShims = append(altShims, cmp)
+		}
+	}
+	slices.Sort(altShims)
+	for _, altShim := range altShims {
+		shims = append(shims, availableShims[altShim])
+	}
+
+	// Load up the CPU alternates if not primary requested
+	if gpuInfo.Library != "cpu" {
+		altShims = []string{}
+		for cmp := range availableShims {
+			if strings.Split(cmp, "_")[0] == "cpu" {
+				altShims = append(altShims, cmp)
+			}
+		}
+		slices.Sort(altShims)
+		for _, altShim := range altShims {
+			shims = append(shims, availableShims[altShim])
+		}
+	}
+	// default is always last as the lowest common denominator
+	shims = append(shims, "default")
+	return shims
+}
+
+func rocmShimPresent() bool {
+	for shimName := range availableShims {
+		if strings.HasPrefix(shimName, "rocm") {
+			return true
+		}
+	}
+	return false
+}
+
+func nativeInit(workdir string) error {
+	if runtime.GOOS == "darwin" {
+		err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
+		if err != nil {
+			if err == payloadMissing {
+				// TODO perhaps consider this a hard failure on arm macs?
+				log.Printf("ggml-meta.metal payload missing")
+				return nil
+			}
+			return err
+		}
+		os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
+		return nil
+	}
+
+	libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*")
+	if err != nil {
+		if err == payloadMissing {
+			log.Printf("%s", payloadMissing)
+			return nil
+		}
+		return err
+	}
+	for _, lib := range libs {
+		// The last dir component is the variant name
+		variant := filepath.Base(filepath.Dir(lib))
+		availableShims[variant] = lib
+	}
+
+	if err := verifyDriverAccess(); err != nil {
+		return err
+	}
+
+	// Report which dynamic libraries we have loaded to assist troubleshooting
+	variants := make([]string, len(availableShims))
+	i := 0
+	for variant := range availableShims {
+		variants[i] = variant
+		i++
+	}
+	log.Printf("Dynamic LLM variants %v", variants)
+
+	return nil
+}
+
+func extractDynamicLibs(workDir, glob string) ([]string, error) {
+	files, err := fs.Glob(libEmbed, glob)
+	if err != nil || len(files) == 0 {
+		return nil, payloadMissing
+	}
+	libs := []string{}
+
+	for _, file := range files {
+		pathComps := strings.Split(file, "/")
+		if len(pathComps) != pathComponentCount {
+			log.Printf("unexpected payload components: %v", pathComps)
+			continue
+		}
+		// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
+		// Include the variant in the path to avoid conflicts between multiple server libs
+		targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
+		srcFile, err := libEmbed.Open(file)
+		if err != nil {
+			return nil, fmt.Errorf("read payload %s: %v", file, err)
+		}
+		defer srcFile.Close()
+		if err := os.MkdirAll(targetDir, 0o755); err != nil {
+			return nil, fmt.Errorf("create payload temp dir %s: %v", workDir, err)
+		}
+
+		destFile := filepath.Join(targetDir, filepath.Base(file))
+		if strings.Contains(destFile, "server") {
+			libs = append(libs, destFile)
+		}
+
+		_, err = os.Stat(destFile)
+		switch {
+		case errors.Is(err, os.ErrNotExist):
+			destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
+			if err != nil {
+				return nil, fmt.Errorf("write payload %s: %v", file, err)
+			}
+			defer destFile.Close()
+			if _, err := io.Copy(destFile, srcFile); err != nil {
+				return nil, fmt.Errorf("copy payload %s: %v", file, err)
+			}
+		case err != nil:
+			return nil, fmt.Errorf("stat payload %s: %v", file, err)
+		}
+	}
+	return libs, nil
+}
+
+func extractPayloadFiles(workDir, glob string) error {
+	files, err := fs.Glob(libEmbed, glob)
+	if err != nil || len(files) == 0 {
+		return payloadMissing
+	}
+
+	for _, file := range files {
+		srcFile, err := libEmbed.Open(file)
+		if err != nil {
+			return fmt.Errorf("read payload %s: %v", file, err)
+		}
+		defer srcFile.Close()
+		if err := os.MkdirAll(workDir, 0o755); err != nil {
+			return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
+		}
+
+		destFile := filepath.Join(workDir, filepath.Base(file))
+		_, err = os.Stat(destFile)
+		switch {
+		case errors.Is(err, os.ErrNotExist):
+			destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
+			if err != nil {
+				return fmt.Errorf("write payload %s: %v", file, err)
+			}
+			defer destFile.Close()
+			if _, err := io.Copy(destFile, srcFile); err != nil {
+				return fmt.Errorf("copy payload %s: %v", file, err)
+			}
+		case err != nil:
+			return fmt.Errorf("stat payload %s: %v", file, err)
+		}
+	}
+	return nil
+}
+
+func verifyDriverAccess() error {
+	if runtime.GOOS != "linux" {
+		return nil
+	}
+	// Only check ROCm access if we have the dynamic lib loaded
+	if rocmShimPresent() {
+		// Verify we have permissions - either running as root, or we have group access to the driver
+		fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666)
+		if err != nil {
+			if errors.Is(err, fs.ErrPermission) {
+				return fmt.Errorf("Radeon card detected, but permissions not set up properly.  Either run ollama as root, or add you user account to the render group.")
+			} else if errors.Is(err, fs.ErrNotExist) {
+				// expected behavior without a radeon card
+				return nil
+			}
+
+			return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
+		}
+		fd.Close()
+	}
+	return nil
+}
diff --git a/llm/shim_darwin.go b/llm/shim_darwin.go
index 3baafd1e..9ef8ef96 100644
--- a/llm/shim_darwin.go
+++ b/llm/shim_darwin.go
@@ -2,13 +2,7 @@ package llm
 
 import (
 	"embed"
-	"errors"
 	"fmt"
-	"io"
-	"io/fs"
-	"log"
-	"os"
-	"path/filepath"
 
 	"github.com/jmorganca/ollama/api"
 )
@@ -20,52 +14,3 @@ func newDynamicShimExtServer(library, model string, adapters, projectors []strin
 	// should never happen...
 	return nil, fmt.Errorf("Dynamic library loading not supported on Mac")
 }
-
-func nativeInit(workdir string) error {
-	err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
-	if err != nil {
-		if err == payloadMissing {
-			// TODO perhaps consider this a hard failure on arm macs?
-			log.Printf("ggml-meta.metal payload missing")
-			return nil
-		}
-		return err
-	}
-	os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
-	return nil
-}
-
-func extractPayloadFiles(workDir, glob string) error {
-	files, err := fs.Glob(libEmbed, glob)
-	if err != nil || len(files) == 0 {
-		return payloadMissing
-	}
-
-	for _, file := range files {
-		srcFile, err := libEmbed.Open(file)
-		if err != nil {
-			return fmt.Errorf("read payload %s: %v", file, err)
-		}
-		defer srcFile.Close()
-		if err := os.MkdirAll(workDir, 0o755); err != nil {
-			return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
-		}
-
-		destFile := filepath.Join(workDir, filepath.Base(file))
-		_, err = os.Stat(destFile)
-		switch {
-		case errors.Is(err, os.ErrNotExist):
-			destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
-			if err != nil {
-				return fmt.Errorf("write payload %s: %v", file, err)
-			}
-			defer destFile.Close()
-			if _, err := io.Copy(destFile, srcFile); err != nil {
-				return fmt.Errorf("copy payload %s: %v", file, err)
-			}
-		case err != nil:
-			return fmt.Errorf("stat payload %s: %v", file, err)
-		}
-	}
-	return nil
-}
diff --git a/llm/shim_ext_server.go b/llm/shim_ext_server.go
index dca7b38d..102f059c 100644
--- a/llm/shim_ext_server.go
+++ b/llm/shim_ext_server.go
@@ -11,14 +11,9 @@ package llm
 import "C"
 import (
 	"context"
-	"errors"
 	"fmt"
-	"io"
-	"io/fs"
 	"log"
-	"os"
 	"path/filepath"
-	"strings"
 	"sync"
 	"unsafe"
 
@@ -34,8 +29,6 @@ type shimExtServer struct {
 var shimMutex sync.Mutex
 var llm *shimExtServer
 
-const pathComponentCount = 6
-
 func (llm *shimExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) {
 	C.dynamic_shim_llama_server_init(llm.s, sparams, err)
 }
@@ -112,82 +105,3 @@ func (llm *shimExtServer) Embedding(ctx context.Context, input string) ([]float6
 func (llm *shimExtServer) Close() {
 	close(llm)
 }
-
-func nativeInit(workdir string) error {
-	libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*")
-	if err != nil {
-		if err == payloadMissing {
-			log.Printf("%s", payloadMissing)
-			return nil
-		}
-		return err
-	}
-	for _, lib := range libs {
-		// The last dir component is the variant name
-		variant := filepath.Base(filepath.Dir(lib))
-		AvailableShims[variant] = lib
-	}
-
-	if err := verifyDriverAccess(); err != nil {
-		return err
-	}
-
-	// Report which dynamic libraries we have loaded to assist troubleshooting
-	variants := make([]string, len(AvailableShims))
-	i := 0
-	for variant := range AvailableShims {
-		variants[i] = variant
-		i++
-	}
-	log.Printf("Dynamic LLM variants %v", variants)
-
-	return nil
-}
-
-func extractDynamicLibs(workDir, glob string) ([]string, error) {
-	files, err := fs.Glob(libEmbed, glob)
-	if err != nil || len(files) == 0 {
-		return nil, payloadMissing
-	}
-	libs := []string{}
-
-	for _, file := range files {
-		pathComps := strings.Split(file, "/")
-		if len(pathComps) != pathComponentCount {
-			log.Printf("unexpected payload components: %v", pathComps)
-			continue
-		}
-		// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
-		// Include the variant in the path to avoid conflicts between multiple server libs
-		targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
-		srcFile, err := libEmbed.Open(file)
-		if err != nil {
-			return nil, fmt.Errorf("read payload %s: %v", file, err)
-		}
-		defer srcFile.Close()
-		if err := os.MkdirAll(targetDir, 0o755); err != nil {
-			return nil, fmt.Errorf("create payload temp dir %s: %v", workDir, err)
-		}
-
-		destFile := filepath.Join(targetDir, filepath.Base(file))
-		if strings.Contains(destFile, "server") {
-			libs = append(libs, destFile)
-		}
-
-		_, err = os.Stat(destFile)
-		switch {
-		case errors.Is(err, os.ErrNotExist):
-			destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
-			if err != nil {
-				return nil, fmt.Errorf("write payload %s: %v", file, err)
-			}
-			defer destFile.Close()
-			if _, err := io.Copy(destFile, srcFile); err != nil {
-				return nil, fmt.Errorf("copy payload %s: %v", file, err)
-			}
-		case err != nil:
-			return nil, fmt.Errorf("stat payload %s: %v", file, err)
-		}
-	}
-	return libs, nil
-}
diff --git a/llm/shim_ext_server_linux.go b/llm/shim_ext_server_linux.go
index e0ad5da4..a9a8aca2 100644
--- a/llm/shim_ext_server_linux.go
+++ b/llm/shim_ext_server_linux.go
@@ -2,9 +2,6 @@ package llm
 
 import (
 	"embed"
-	"errors"
-	"fmt"
-	"io/fs"
 	"log"
 	"os"
 	"strings"
@@ -24,23 +21,3 @@ func updatePath(dir string) {
 	log.Printf("Updating PATH to %s", newPath)
 	os.Setenv("PATH", newPath)
 }
-
-func verifyDriverAccess() error {
-	// Only check ROCm access if we have the dynamic lib loaded
-	if _, rocmPresent := AvailableShims["rocm"]; rocmPresent {
-		// Verify we have permissions - either running as root, or we have group access to the driver
-		fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666)
-		if err != nil {
-			if errors.Is(err, fs.ErrPermission) {
-				return fmt.Errorf("Radeon card detected, but permissions not set up properly.  Either run ollama as root, or add you user account to the render group.")
-			} else if errors.Is(err, fs.ErrNotExist) {
-				// expected behavior without a radeon card
-				return nil
-			}
-
-			return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
-		}
-		fd.Close()
-	}
-	return nil
-}
diff --git a/llm/shim_ext_server_windows.go b/llm/shim_ext_server_windows.go
index e95c8afa..c218c6f3 100644
--- a/llm/shim_ext_server_windows.go
+++ b/llm/shim_ext_server_windows.go
@@ -29,8 +29,3 @@ func updatePath(dir string) {
 	log.Printf("Updating PATH to %s", newPath)
 	os.Setenv("PATH", newPath)
 }
-
-func verifyDriverAccess() error {
-	// TODO if applicable
-	return nil
-}
diff --git a/llm/shim_test.go b/llm/shim_test.go
new file mode 100644
index 00000000..7a1c5acc
--- /dev/null
+++ b/llm/shim_test.go
@@ -0,0 +1,61 @@
+package llm
+
+import (
+	"testing"
+
+	"github.com/jmorganca/ollama/gpu"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestGetShims(t *testing.T) {
+	availableShims = map[string]string{
+		"cpu": "X_cpu",
+	}
+	assert.Equal(t, false, rocmShimPresent())
+	res := getShims(gpu.GpuInfo{Library: "cpu"})
+	assert.Len(t, res, 2)
+	assert.Equal(t, availableShims["cpu"], res[0])
+	assert.Equal(t, "default", res[1])
+
+	availableShims = map[string]string{
+		"rocm_v5": "X_rocm_v5",
+		"rocm_v6": "X_rocm_v6",
+		"cpu":     "X_cpu",
+	}
+	assert.Equal(t, true, rocmShimPresent())
+	res = getShims(gpu.GpuInfo{Library: "rocm"})
+	assert.Len(t, res, 4)
+	assert.Equal(t, availableShims["rocm_v5"], res[0])
+	assert.Equal(t, availableShims["rocm_v6"], res[1])
+	assert.Equal(t, availableShims["cpu"], res[2])
+	assert.Equal(t, "default", res[3])
+
+	res = getShims(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
+	assert.Len(t, res, 4)
+	assert.Equal(t, availableShims["rocm_v6"], res[0])
+	assert.Equal(t, availableShims["rocm_v5"], res[1])
+	assert.Equal(t, availableShims["cpu"], res[2])
+	assert.Equal(t, "default", res[3])
+
+	res = getShims(gpu.GpuInfo{Library: "cuda"})
+	assert.Len(t, res, 2)
+	assert.Equal(t, availableShims["cpu"], res[0])
+	assert.Equal(t, "default", res[1])
+
+	res = getShims(gpu.GpuInfo{Library: "default"})
+	assert.Len(t, res, 2)
+	assert.Equal(t, availableShims["cpu"], res[0])
+	assert.Equal(t, "default", res[1])
+
+	availableShims = map[string]string{
+		"rocm": "X_rocm_v5",
+		"cpu":  "X_cpu",
+	}
+	assert.Equal(t, true, rocmShimPresent())
+	res = getShims(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
+	assert.Len(t, res, 3)
+	assert.Equal(t, availableShims["rocm"], res[0])
+	assert.Equal(t, availableShims["cpu"], res[1])
+	assert.Equal(t, "default", res[2])
+
+}