diff --git a/gpu/gpu_darwin.go b/gpu/gpu_darwin.go index e4a9456a..14bd2655 100644 --- a/gpu/gpu_darwin.go +++ b/gpu/gpu_darwin.go @@ -4,6 +4,8 @@ package gpu import "C" import ( + "runtime" + "github.com/jmorganca/ollama/api" ) @@ -25,8 +27,12 @@ func GetGPUInfo() GpuInfo { } func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int { - // default to enable metal on macOS - return 1 + if runtime.GOARCH == "arm64" { + return 1 + } + + // metal only supported on arm64 + return 0 } func nativeInit() error { diff --git a/llm/ext_server.go b/llm/ext_server.go index 5fcd8e92..ab74eb00 100644 --- a/llm/ext_server.go +++ b/llm/ext_server.go @@ -7,17 +7,13 @@ package llm #cgo CPPFLAGS: -Ofast -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-unused-but-set-variable #cgo darwin CFLAGS: -D_DARWIN_C_SOURCE #cgo darwin CPPFLAGS: -DGGML_USE_ACCELERATE -#cgo darwin,arm64 CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG +#cgo darwin CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG #cgo darwin LDFLAGS: -lc++ -framework Accelerate -#cgo darwin,arm64 LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders -#cgo darwin,arm64 LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/metal/common/libcommon.a -#cgo darwin,arm64 LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/metal/examples/server/libext_server.a -#cgo darwin,arm64 LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/metal/libllama.a -#cgo darwin,arm64 LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/metal/libggml_static.a -#cgo darwin,amd64 LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/cpu/common/libcommon.a -#cgo darwin,amd64 LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/cpu/examples/server/libext_server.a -#cgo darwin,amd64 LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/cpu/libllama.a -#cgo darwin,amd64 LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/cpu/libggml_static.a +#cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders +#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/metal/common/libcommon.a +#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/metal/examples/server/libext_server.a +#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/metal/libllama.a +#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/metal/libggml_static.a #cgo linux CFLAGS: -D_GNU_SOURCE #cgo linux windows CFLAGS: -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_USE_CUBLAS #cgo linux LDFLAGS: -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib/stubs diff --git a/llm/llama.cpp/gen_darwin.sh b/llm/llama.cpp/gen_darwin.sh index f159ceff..1364e9d1 100755 --- a/llm/llama.cpp/gen_darwin.sh +++ b/llm/llama.cpp/gen_darwin.sh @@ -9,15 +9,14 @@ set -o pipefail echo "Starting darwin generate script" source $(dirname $0)/gen_common.sh init_vars -CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 ${CMAKE_DEFS}" +CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on ${CMAKE_DEFS}" +BUILD_DIR="gguf/build/metal" case "${GOARCH}" in "amd64") - CMAKE_DEFS="-DLLAMA_METAL=off -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 ${CMAKE_DEFS}" - BUILD_DIR="gguf/build/cpu" + CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 ${CMAKE_DEFS}" ;; "arm64") - CMAKE_DEFS="-DLLAMA_METAL=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 ${CMAKE_DEFS}" - BUILD_DIR="gguf/build/metal" + CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 ${CMAKE_DEFS}" ;; *) echo "GOARCH must be set" @@ -28,10 +27,4 @@ esac git_module_setup apply_patches -build - -# TODO - improve this to handle test cases that need it to be in "." around the tree -# Enable local debug/run usecase -if [ -e "gguf/ggml-metal.metal" ]; then - cp gguf/ggml-metal.metal ../../ -fi +build \ No newline at end of file diff --git a/llm/shim_darwin.go b/llm/shim_darwin.go index 3cefe4c5..f63ce8c8 100644 --- a/llm/shim_darwin.go +++ b/llm/shim_darwin.go @@ -9,7 +9,7 @@ import ( "github.com/jmorganca/ollama/api" ) -//go:embed llama.cpp/gguf/build/*/bin/ggml-metal.metal +//go:embed llama.cpp/gguf/ggml-metal.metal var libEmbed embed.FS func newRocmShimExtServer(model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) { @@ -18,7 +18,7 @@ func newRocmShimExtServer(model string, adapters, projectors []string, numLayers } func nativeInit(workdir string) error { - err := extractLib(workdir, "llama.cpp/gguf/build/*/bin/ggml-metal.metal") + err := extractLib(workdir, "llama.cpp/gguf/ggml-metal.metal") if err != nil { if err == payloadMissing { // TODO perhaps consider this a hard failure on arm macs?