enable metal gpu acceleration

ggml-metal.metal must be in the same directory as the ollama binary otherwise llama.cpp will not be able to find it and load it. 1. go generate llama/llama_metal.go 2. go build . 3. ./ollama serve
2023-07-05 17:45:11 -07:00 · 2023-07-05 17:45:11 -07:00 · 1b7183c5a1
commit 1b7183c5a1
parent b9fb988112
3 changed files with 15 additions and 15 deletions
--- a/llama/CMakeLists.txt
+++ b/llama/CMakeLists.txt
@ -1,4 +1,6 @@
-cmake_minimum_required(VERSION 3.10) 
+cmake_minimum_required(VERSION 3.12)
+project(binding)
+
 include(FetchContent)

 FetchContent_Declare(
@ -9,20 +11,13 @@ FetchContent_Declare(

 FetchContent_MakeAvailable(llama_cpp)

-if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-    set(LLAMA_METAL ON)
-    add_compile_definitions(GGML_USE_METAL)
-endif()
-
-project(binding)
-
 add_library(binding ${CMAKE_CURRENT_SOURCE_DIR}/binding/binding.cpp ${llama_cpp_SOURCE_DIR}/examples/common.cpp)
-target_compile_features(binding PRIVATE cxx_std_11)
-target_include_directories(binding PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
-target_include_directories(binding PRIVATE ${llama_cpp_SOURCE_DIR})
 target_include_directories(binding PRIVATE ${llama_cpp_SOURCE_DIR}/examples)
 target_link_libraries(binding llama ggml_static)

-configure_file(${llama_cpp_SOURCE_DIR}/ggml-metal.metal ${CMAKE_CURRENT_BINARY_DIR}/ggml-metal.metal COPYONLY)
-add_custom_target(copy_libllama ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different ${llama_cpp_BINARY_DIR}/libllama.a ${CMAKE_CURRENT_BINARY_DIR})
-add_custom_target(copy_libggml_static ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different ${llama_cpp_BINARY_DIR}/libggml_static.a ${CMAKE_CURRENT_BINARY_DIR})
+if (LLAMA_METAL)
+    configure_file(${llama_cpp_SOURCE_DIR}/ggml-metal.metal ${CMAKE_CURRENT_BINARY_DIR}/../../ggml-metal.metal COPYONLY)
+endif()
+
+add_custom_target(copy_libllama ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:llama> ${CMAKE_CURRENT_BINARY_DIR})
+add_custom_target(copy_libggml_static ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:ggml_static> ${CMAKE_CURRENT_BINARY_DIR})
--- a/llama/llama_metal.go
+++ b/llama/llama_metal.go
@ -0,0 +1,5 @@
+//go:build metal
+package llama
+
+//go:generate cmake -S . -B build --fresh -DLLAMA_METAL=on
+//go:generate cmake --build build
--- a/server/routes.go
+++ b/server/routes.go
@ -22,7 +22,7 @@ func pull(c *gin.Context) {

 func generate(c *gin.Context) {
 	// TODO: these should be request parameters
-	gpulayers := 0
+	gpulayers := 1
 	tokens := 512
 	threads := runtime.NumCPU()
 	// TODO: set prompt from template