enable metal gpu acceleration

ggml-metal.metal must be in the same directory as the ollama binary
otherwise llama.cpp will not be able to find it and load it.

1. go generate llama/llama_metal.go
2. go build .
3. ./ollama serve
This commit is contained in:
Michael Yang 2023-07-05 17:45:11 -07:00 committed by Jeffrey Morgan
parent b9fb988112
commit 1b7183c5a1
3 changed files with 15 additions and 15 deletions

View file

@ -1,4 +1,6 @@
cmake_minimum_required(VERSION 3.10)
cmake_minimum_required(VERSION 3.12)
project(binding)
include(FetchContent)
FetchContent_Declare(
@ -9,20 +11,13 @@ FetchContent_Declare(
FetchContent_MakeAvailable(llama_cpp)
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(LLAMA_METAL ON)
add_compile_definitions(GGML_USE_METAL)
endif()
project(binding)
add_library(binding ${CMAKE_CURRENT_SOURCE_DIR}/binding/binding.cpp ${llama_cpp_SOURCE_DIR}/examples/common.cpp)
target_compile_features(binding PRIVATE cxx_std_11)
target_include_directories(binding PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(binding PRIVATE ${llama_cpp_SOURCE_DIR})
target_include_directories(binding PRIVATE ${llama_cpp_SOURCE_DIR}/examples)
target_link_libraries(binding llama ggml_static)
configure_file(${llama_cpp_SOURCE_DIR}/ggml-metal.metal ${CMAKE_CURRENT_BINARY_DIR}/ggml-metal.metal COPYONLY)
add_custom_target(copy_libllama ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different ${llama_cpp_BINARY_DIR}/libllama.a ${CMAKE_CURRENT_BINARY_DIR})
add_custom_target(copy_libggml_static ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different ${llama_cpp_BINARY_DIR}/libggml_static.a ${CMAKE_CURRENT_BINARY_DIR})
if (LLAMA_METAL)
configure_file(${llama_cpp_SOURCE_DIR}/ggml-metal.metal ${CMAKE_CURRENT_BINARY_DIR}/../../ggml-metal.metal COPYONLY)
endif()
add_custom_target(copy_libllama ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:llama> ${CMAKE_CURRENT_BINARY_DIR})
add_custom_target(copy_libggml_static ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:ggml_static> ${CMAKE_CURRENT_BINARY_DIR})

5
llama/llama_metal.go Normal file
View file

@ -0,0 +1,5 @@
//go:build metal
package llama
//go:generate cmake -S . -B build --fresh -DLLAMA_METAL=on
//go:generate cmake --build build

View file

@ -22,7 +22,7 @@ func pull(c *gin.Context) {
func generate(c *gin.Context) {
// TODO: these should be request parameters
gpulayers := 0
gpulayers := 1
tokens := 512
threads := runtime.NumCPU()
// TODO: set prompt from template