Code shuffle to clean up the llm dir
This commit is contained in:
parent
b5939008a1
commit
77d96da94b
19 changed files with 54 additions and 47 deletions
|
@ -2,7 +2,7 @@
|
||||||
ollama
|
ollama
|
||||||
app
|
app
|
||||||
dist
|
dist
|
||||||
llm/llama.cpp/gguf
|
llm/llama.cpp
|
||||||
.env
|
.env
|
||||||
.cache
|
.cache
|
||||||
test_data
|
test_data
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
set(TARGET ext_server)
|
set(TARGET ext_server)
|
||||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||||
add_library(${TARGET} STATIC ../../../ext_server.cpp)
|
add_library(${TARGET} STATIC ../../../ext_server/ext_server.cpp)
|
||||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
target_include_directories(${TARGET} PRIVATE ../../common)
|
||||||
target_include_directories(${TARGET} PRIVATE ../..)
|
target_include_directories(${TARGET} PRIVATE ../..)
|
||||||
target_include_directories(${TARGET} PRIVATE ../../..)
|
target_include_directories(${TARGET} PRIVATE ../../..)
|
4
llm/ext_server/README.md
Normal file
4
llm/ext_server/README.md
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
# Extern C Server
|
||||||
|
|
||||||
|
This directory contains a thin facade we layer on top of the Llama.cpp server
|
||||||
|
to expose `extern C` interfaces to access the functionality through direct API calls in-process
|
|
@ -1,7 +1,7 @@
|
||||||
package llm
|
package llm
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#cgo CFLAGS: -I${SRCDIR}/llama.cpp -I${SRCDIR}/llama.cpp/gguf -I${SRCDIR}/llama.cpp/gguf/common -I${SRCDIR}/llama.cpp/gguf/examples/server
|
#cgo CFLAGS: -I${SRCDIR}/ext_server -I${SRCDIR}/llama.cpp -I${SRCDIR}/llama.cpp/common -I${SRCDIR}/llama.cpp/examples/server
|
||||||
#cgo CFLAGS: -DNDEBUG -DLLAMA_SERVER_LIBRARY=1 -D_XOPEN_SOURCE=600 -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
|
#cgo CFLAGS: -DNDEBUG -DLLAMA_SERVER_LIBRARY=1 -D_XOPEN_SOURCE=600 -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
|
||||||
#cgo CFLAGS: -Wmissing-noreturn -Wall -Wextra -Wcast-qual -Wno-unused-function -Wno-array-bounds
|
#cgo CFLAGS: -Wmissing-noreturn -Wall -Wextra -Wcast-qual -Wno-unused-function -Wno-array-bounds
|
||||||
#cgo CPPFLAGS: -Ofast -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-unused-but-set-variable
|
#cgo CPPFLAGS: -Ofast -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-unused-but-set-variable
|
||||||
|
@ -10,17 +10,17 @@ package llm
|
||||||
#cgo darwin CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG
|
#cgo darwin CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG
|
||||||
#cgo darwin LDFLAGS: -lc++ -framework Accelerate
|
#cgo darwin LDFLAGS: -lc++ -framework Accelerate
|
||||||
#cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
#cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||||
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libcommon.a
|
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libcommon.a
|
||||||
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libext_server.a
|
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libext_server.a
|
||||||
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libllama.a
|
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libllama.a
|
||||||
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libggml_static.a
|
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libggml_static.a
|
||||||
#cgo linux CFLAGS: -D_GNU_SOURCE
|
#cgo linux CFLAGS: -D_GNU_SOURCE
|
||||||
#cgo linux windows CFLAGS: -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_USE_CUBLAS
|
#cgo linux windows CFLAGS: -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_USE_CUBLAS
|
||||||
#cgo linux LDFLAGS: -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib/stubs
|
#cgo linux LDFLAGS: -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib/stubs
|
||||||
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libext_server.a
|
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libext_server.a
|
||||||
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libcommon.a
|
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libcommon.a
|
||||||
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libllama.a
|
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libllama.a
|
||||||
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libggml_static.a
|
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libggml_static.a
|
||||||
#cgo linux LDFLAGS: -lrt -ldl -lstdc++ -lm
|
#cgo linux LDFLAGS: -lrt -ldl -lstdc++ -lm
|
||||||
#cgo linux windows LDFLAGS: -lpthread
|
#cgo linux windows LDFLAGS: -lpthread
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# common logic accross linux and darwin
|
# common logic accross linux and darwin
|
||||||
|
|
||||||
init_vars() {
|
init_vars() {
|
||||||
LLAMACPP_DIR=gguf
|
LLAMACPP_DIR=../llama.cpp
|
||||||
PATCHES="0001-Expose-callable-API-for-server.patch"
|
PATCHES="0001-Expose-callable-API-for-server.patch"
|
||||||
CMAKE_DEFS=""
|
CMAKE_DEFS=""
|
||||||
CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
|
CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
|
||||||
|
@ -19,18 +19,18 @@ git_module_setup() {
|
||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
git submodule init
|
git submodule init
|
||||||
git submodule update --force gguf
|
git submodule update --force ${LLAMACPP_DIR}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
apply_patches() {
|
apply_patches() {
|
||||||
# Wire up our CMakefile
|
# Wire up our CMakefile
|
||||||
if ! grep ollama gguf/examples/server/CMakeLists.txt; then
|
if ! grep ollama ${LLAMACPP_DIR}/examples/server/CMakeLists.txt; then
|
||||||
echo 'include (../../../CMakeLists.txt) # ollama' >>gguf/examples/server/CMakeLists.txt
|
echo 'include (../../../ext_server/CMakeLists.txt) # ollama' >>${LLAMACPP_DIR}/examples/server/CMakeLists.txt
|
||||||
fi
|
fi
|
||||||
# Avoid duplicate main symbols when we link into the cgo binary
|
# Avoid duplicate main symbols when we link into the cgo binary
|
||||||
sed -e 's/int main(/int __main(/g' <./gguf/examples/server/server.cpp >./gguf/examples/server/server.cpp.tmp &&
|
sed -e 's/int main(/int __main(/g' <${LLAMACPP_DIR}/examples/server/server.cpp >${LLAMACPP_DIR}/examples/server/server.cpp.tmp &&
|
||||||
mv ./gguf/examples/server/server.cpp.tmp ./gguf/examples/server/server.cpp
|
mv ${LLAMACPP_DIR}/examples/server/server.cpp.tmp ${LLAMACPP_DIR}/examples/server/server.cpp
|
||||||
}
|
}
|
||||||
|
|
||||||
build() {
|
build() {
|
||||||
|
@ -49,5 +49,5 @@ install() {
|
||||||
|
|
||||||
# Keep the local tree clean after we're done with the build
|
# Keep the local tree clean after we're done with the build
|
||||||
cleanup() {
|
cleanup() {
|
||||||
(cd gguf/examples/server/ && git checkout CMakeLists.txt server.cpp)
|
(cd ${LLAMACPP_DIR}/examples/server/ && git checkout CMakeLists.txt server.cpp)
|
||||||
}
|
}
|
|
@ -1,6 +1,6 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# This script is intended to run inside the go generate
|
# This script is intended to run inside the go generate
|
||||||
# working directory must be ../llm/llama.cpp
|
# working directory must be ./llm/generate/
|
||||||
|
|
||||||
# TODO - add hardening to detect missing tools (cmake, etc.)
|
# TODO - add hardening to detect missing tools (cmake, etc.)
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ echo "Starting darwin generate script"
|
||||||
source $(dirname $0)/gen_common.sh
|
source $(dirname $0)/gen_common.sh
|
||||||
init_vars
|
init_vars
|
||||||
CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
|
CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
|
||||||
BUILD_DIR="gguf/build/darwin/metal"
|
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal"
|
||||||
case "${GOARCH}" in
|
case "${GOARCH}" in
|
||||||
"amd64")
|
"amd64")
|
||||||
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
|
@ -1,6 +1,6 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# This script is intended to run inside the go generate
|
# This script is intended to run inside the go generate
|
||||||
# working directory must be llm/llama.cpp
|
# working directory must be llm/generate/
|
||||||
|
|
||||||
# First we build our default built-in library which will be linked into the CGO
|
# First we build our default built-in library which will be linked into the CGO
|
||||||
# binary as a normal dependency. This default build is CPU based.
|
# binary as a normal dependency. This default build is CPU based.
|
||||||
|
@ -52,7 +52,7 @@ apply_patches
|
||||||
# CPU first for the default library
|
# CPU first for the default library
|
||||||
#
|
#
|
||||||
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
|
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
|
||||||
BUILD_DIR="gguf/build/linux/cpu"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
|
||||||
|
|
||||||
build
|
build
|
||||||
install
|
install
|
||||||
|
@ -64,7 +64,7 @@ if [ -d /usr/local/cuda/lib64/ ]; then
|
||||||
echo "CUDA libraries detected - building dynamic CUDA library"
|
echo "CUDA libraries detected - building dynamic CUDA library"
|
||||||
init_vars
|
init_vars
|
||||||
CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
|
CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
|
||||||
BUILD_DIR="gguf/build/linux/cuda"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda"
|
||||||
CUDA_LIB_DIR=/usr/local/cuda/lib64
|
CUDA_LIB_DIR=/usr/local/cuda/lib64
|
||||||
build
|
build
|
||||||
install
|
install
|
||||||
|
@ -98,7 +98,7 @@ if [ -d "${ROCM_PATH}" ]; then
|
||||||
echo "ROCm libraries detected - building dynamic ROCm library"
|
echo "ROCm libraries detected - building dynamic ROCm library"
|
||||||
init_vars
|
init_vars
|
||||||
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
|
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
|
||||||
BUILD_DIR="gguf/build/linux/rocm"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm"
|
||||||
build
|
build
|
||||||
install
|
install
|
||||||
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
|
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
|
|
@ -3,6 +3,7 @@
|
||||||
$ErrorActionPreference = "Stop"
|
$ErrorActionPreference = "Stop"
|
||||||
|
|
||||||
function init_vars {
|
function init_vars {
|
||||||
|
$script:llamacppDir = "../llama.cpp"
|
||||||
$script:patches = @("0001-Expose-callable-API-for-server.patch")
|
$script:patches = @("0001-Expose-callable-API-for-server.patch")
|
||||||
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-A","x64")
|
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-A","x64")
|
||||||
$script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")
|
$script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")
|
||||||
|
@ -19,25 +20,25 @@ function git_module_setup {
|
||||||
# TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
|
# TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
|
||||||
& git submodule init
|
& git submodule init
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||||
& git submodule update --force gguf
|
& git submodule update --force "${script:llamacppDir}"
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||||
}
|
}
|
||||||
|
|
||||||
function apply_patches {
|
function apply_patches {
|
||||||
# Wire up our CMakefile
|
# Wire up our CMakefile
|
||||||
if (!(Select-String -Path "gguf/examples/server/CMakeLists.txt" -Pattern 'ollama')) {
|
if (!(Select-String -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Pattern 'ollama')) {
|
||||||
Add-Content -Path "gguf/examples/server/CMakeLists.txt" -Value 'include (../../../CMakeLists.txt) # ollama'
|
Add-Content -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Value 'include (../../../ext_server/CMakeLists.txt) # ollama'
|
||||||
}
|
}
|
||||||
# Avoid duplicate main symbols when we link into the cgo binary
|
# Avoid duplicate main symbols when we link into the cgo binary
|
||||||
$content = Get-Content -Path "./gguf/examples/server/server.cpp"
|
$content = Get-Content -Path "${script:llamacppDir}/examples/server/server.cpp"
|
||||||
$content = $content -replace 'int main\(', 'int __main('
|
$content = $content -replace 'int main\(', 'int __main('
|
||||||
Set-Content -Path "./gguf/examples/server/server.cpp" -Value $content
|
Set-Content -Path "${script:llamacppDir}/examples/server/server.cpp" -Value $content
|
||||||
}
|
}
|
||||||
|
|
||||||
function build {
|
function build {
|
||||||
write-host "generating config with: cmake -S gguf -B $script:buildDir $script:cmakeDefs"
|
write-host "generating config with: cmake -S ${script:llamacppDir} -B $script:buildDir $script:cmakeDefs"
|
||||||
& cmake --version
|
& cmake --version
|
||||||
& cmake -S gguf -B $script:buildDir $script:cmakeDefs
|
& cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||||
write-host "building with: cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })"
|
write-host "building with: cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })"
|
||||||
& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })
|
& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })
|
||||||
|
@ -55,7 +56,7 @@ function install {
|
||||||
}
|
}
|
||||||
|
|
||||||
function cleanup {
|
function cleanup {
|
||||||
Set-Location "gguf/examples/server"
|
Set-Location "${script:llamacppDir}/examples/server"
|
||||||
git checkout CMakeLists.txt server.cpp
|
git checkout CMakeLists.txt server.cpp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,20 +65,20 @@ git_module_setup
|
||||||
apply_patches
|
apply_patches
|
||||||
|
|
||||||
# first build CPU based
|
# first build CPU based
|
||||||
$script:buildDir="gguf/build/windows/cpu"
|
$script:buildDir="${script:llamacppDir}/build/windows/cpu"
|
||||||
|
|
||||||
build
|
build
|
||||||
install
|
install
|
||||||
|
|
||||||
# Then build cuda as a dynamically loaded library
|
# Then build cuda as a dynamically loaded library
|
||||||
init_vars
|
init_vars
|
||||||
$script:buildDir="gguf/build/windows/cuda"
|
$script:buildDir="${script:llamacppDir}/build/windows/cuda"
|
||||||
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON")
|
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON")
|
||||||
build
|
build
|
||||||
install
|
install
|
||||||
|
|
||||||
# TODO - actually implement ROCm support on windows
|
# TODO - actually implement ROCm support on windows
|
||||||
$script:buildDir="gguf/build/windows/rocm"
|
$script:buildDir="${script:llamacppDir}/build/windows/rocm"
|
||||||
|
|
||||||
rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
|
rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
|
||||||
md "${script:buildDir}/lib" -ea 0 > $null
|
md "${script:buildDir}/lib" -ea 0 > $null
|
|
@ -1,3 +1,3 @@
|
||||||
package llm
|
package generate
|
||||||
|
|
||||||
//go:generate sh ./gen_darwin.sh
|
//go:generate sh ./gen_darwin.sh
|
|
@ -1,3 +1,3 @@
|
||||||
package llm
|
package generate
|
||||||
|
|
||||||
//go:generate bash ./gen_linux.sh
|
//go:generate bash ./gen_linux.sh
|
|
@ -1,3 +1,3 @@
|
||||||
package llm
|
package generate
|
||||||
|
|
||||||
//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
|
//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
|
|
@ -13,7 +13,7 @@ import (
|
||||||
"github.com/jmorganca/ollama/api"
|
"github.com/jmorganca/ollama/api"
|
||||||
)
|
)
|
||||||
|
|
||||||
//go:embed llama.cpp/gguf/ggml-metal.metal
|
//go:embed llama.cpp/ggml-metal.metal
|
||||||
var libEmbed embed.FS
|
var libEmbed embed.FS
|
||||||
|
|
||||||
func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
|
func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
|
||||||
|
@ -22,7 +22,7 @@ func newDynamicShimExtServer(library, model string, adapters, projectors []strin
|
||||||
}
|
}
|
||||||
|
|
||||||
func nativeInit(workdir string) error {
|
func nativeInit(workdir string) error {
|
||||||
err := extractPayloadFiles(workdir, "llama.cpp/gguf/ggml-metal.metal")
|
err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == payloadMissing {
|
if err == payloadMissing {
|
||||||
// TODO perhaps consider this a hard failure on arm macs?
|
// TODO perhaps consider this a hard failure on arm macs?
|
||||||
|
|
|
@ -34,6 +34,8 @@ type shimExtServer struct {
|
||||||
var shimMutex sync.Mutex
|
var shimMutex sync.Mutex
|
||||||
var llm *shimExtServer
|
var llm *shimExtServer
|
||||||
|
|
||||||
|
const pathComponentCount = 6
|
||||||
|
|
||||||
func (llm *shimExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) {
|
func (llm *shimExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) {
|
||||||
C.dynamic_shim_llama_server_init(llm.s, sparams, err)
|
C.dynamic_shim_llama_server_init(llm.s, sparams, err)
|
||||||
}
|
}
|
||||||
|
@ -112,7 +114,7 @@ func (llm *shimExtServer) Close() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func nativeInit(workdir string) error {
|
func nativeInit(workdir string) error {
|
||||||
libs, err := extractDynamicLibs(workdir, "llama.cpp/gguf/build/*/*/lib/*")
|
libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == payloadMissing {
|
if err == payloadMissing {
|
||||||
log.Printf("%s", payloadMissing)
|
log.Printf("%s", payloadMissing)
|
||||||
|
@ -151,13 +153,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
|
||||||
|
|
||||||
for _, file := range files {
|
for _, file := range files {
|
||||||
pathComps := strings.Split(file, "/")
|
pathComps := strings.Split(file, "/")
|
||||||
if len(pathComps) != 7 {
|
if len(pathComps) != pathComponentCount {
|
||||||
log.Printf("unexpected payload components: %v", pathComps)
|
log.Printf("unexpected payload components: %v", pathComps)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// llama.cpp/gguf/build/$OS/$VARIANT/lib/$LIBRARY
|
// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
|
||||||
// Include the variant in the path to avoid conflicts between multiple server libs
|
// Include the variant in the path to avoid conflicts between multiple server libs
|
||||||
targetDir := filepath.Join(workDir, pathComps[4])
|
targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
|
||||||
srcFile, err := libEmbed.Open(file)
|
srcFile, err := libEmbed.Open(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("read payload %s: %v", file, err)
|
return nil, fmt.Errorf("read payload %s: %v", file, err)
|
||||||
|
|
|
@ -10,7 +10,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
//go:embed llama.cpp/gguf/build/*/*/lib/*.so
|
//go:embed llama.cpp/build/*/*/lib/*.so
|
||||||
var libEmbed embed.FS
|
var libEmbed embed.FS
|
||||||
|
|
||||||
func updatePath(dir string) {
|
func updatePath(dir string) {
|
||||||
|
|
|
@ -8,7 +8,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
//go:embed llama.cpp/gguf/build/windows/*/lib/*.dll
|
//go:embed llama.cpp/build/windows/*/lib/*.dll
|
||||||
var libEmbed embed.FS
|
var libEmbed embed.FS
|
||||||
|
|
||||||
func updatePath(dir string) {
|
func updatePath(dir string) {
|
||||||
|
|
Loading…
Reference in a new issue