ollama/llm/shim_ext_server.go

//go:build !darwin

package llm

/*

#include <stdlib.h>
#include "dynamic_shim.h"

*/
import "C"
import (
	"context"
	"errors"
	"fmt"
	"io"
	"io/fs"
	"log"
	"os"
	"path/filepath"
	"strings"
	"sync"
	"unsafe"

	"github.com/jmorganca/ollama/api"
)

type shimExtServer struct {
	s       C.struct_dynamic_llama_server
	options api.Options
}

// Note: current implementation does not support concurrent instantiations
var shimMutex sync.Mutex
var llm *shimExtServer

const pathComponentCount = 6

func (llm *shimExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) {
	C.dynamic_shim_llama_server_init(llm.s, sparams, err)
}
func (llm *shimExtServer) llama_server_start() {
	C.dynamic_shim_llama_server_start(llm.s)
}
func (llm *shimExtServer) llama_server_stop() {
	C.dynamic_shim_llama_server_stop(llm.s)
}

func (llm *shimExtServer) llama_server_completion(json_req *C.char, resp *C.ext_server_resp_t) {
	C.dynamic_shim_llama_server_completion(llm.s, json_req, resp)
}
func (llm *shimExtServer) llama_server_completion_next_result(task_id C.int, resp *C.ext_server_task_result_t) {
	C.dynamic_shim_llama_server_completion_next_result(llm.s, task_id, resp)
}
func (llm *shimExtServer) llama_server_completion_cancel(task_id C.int, err *C.ext_server_resp_t) {
	C.dynamic_shim_llama_server_completion_cancel(llm.s, task_id, err)
}
func (llm *shimExtServer) llama_server_release_task_result(result *C.ext_server_task_result_t) {
	C.dynamic_shim_llama_server_release_task_result(llm.s, result)
}

func (llm *shimExtServer) llama_server_tokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
	C.dynamic_shim_llama_server_tokenize(llm.s, json_req, json_resp, err)
}
func (llm *shimExtServer) llama_server_detokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
	C.dynamic_shim_llama_server_detokenize(llm.s, json_req, json_resp, err)
}
func (llm *shimExtServer) llama_server_embedding(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
	C.dynamic_shim_llama_server_embedding(llm.s, json_req, json_resp, err)
}
func (llm *shimExtServer) llama_server_release_json_resp(json_resp **C.char) {
	C.dynamic_shim_llama_server_release_json_resp(llm.s, json_resp)
}

func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
	shimMutex.Lock()
	defer shimMutex.Unlock()
	updatePath(filepath.Dir(library))
	libPath := C.CString(library)
	defer C.free(unsafe.Pointer(libPath))
	resp := newExtServerResp(128)
	defer freeExtServerResp(resp)
	var srv C.struct_dynamic_llama_server
	C.dynamic_shim_init(libPath, &srv, &resp)
	if resp.id < 0 {
		return nil, fmt.Errorf("Unable to load dynamic library: %s", C.GoString(resp.msg))
	}
	llm = &shimExtServer{
		s:       srv,
		options: opts,
	}
	log.Printf("Loading Dynamic Shim llm server: %s", library)
	return newExtServer(llm, model, adapters, projectors, numLayers, opts)
}

func (llm *shimExtServer) Predict(ctx context.Context, pred PredictOpts, fn func(PredictResult)) error {
	return predict(ctx, llm, pred, fn)
}

func (llm *shimExtServer) Encode(ctx context.Context, prompt string) ([]int, error) {
	return encode(llm, ctx, prompt)
}

func (llm *shimExtServer) Decode(ctx context.Context, tokens []int) (string, error) {
	return decode(llm, ctx, tokens)
}

func (llm *shimExtServer) Embedding(ctx context.Context, input string) ([]float64, error) {
	return embedding(llm, ctx, input)
}

func (llm *shimExtServer) Close() {
	close(llm)
}

func nativeInit(workdir string) error {
	libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*")
	if err != nil {
		if err == payloadMissing {
			log.Printf("%s", payloadMissing)
			return nil
		}
		return err
	}
	for _, lib := range libs {
		// The last dir component is the variant name
		variant := filepath.Base(filepath.Dir(lib))
		AvailableShims[variant] = lib
	}

	if err := verifyDriverAccess(); err != nil {
		return err
	}

	// Report which dynamic libraries we have loaded to assist troubleshooting
	variants := make([]string, len(AvailableShims))
	i := 0
	for variant := range AvailableShims {
		variants[i] = variant
		i++
	}
	log.Printf("Dynamic LLM variants %v", variants)

	return nil
}

func extractDynamicLibs(workDir, glob string) ([]string, error) {
	files, err := fs.Glob(libEmbed, glob)
	if err != nil || len(files) == 0 {
		return nil, payloadMissing
	}
	libs := []string{}

	for _, file := range files {
		pathComps := strings.Split(file, "/")
		if len(pathComps) != pathComponentCount {
			log.Printf("unexpected payload components: %v", pathComps)
			continue
		}
		// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
		// Include the variant in the path to avoid conflicts between multiple server libs
		targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
		srcFile, err := libEmbed.Open(file)
		if err != nil {
			return nil, fmt.Errorf("read payload %s: %v", file, err)
		}
		defer srcFile.Close()
		if err := os.MkdirAll(targetDir, 0o755); err != nil {
			return nil, fmt.Errorf("create payload temp dir %s: %v", workDir, err)
		}

		destFile := filepath.Join(targetDir, filepath.Base(file))
		if strings.Contains(destFile, "server") {
			libs = append(libs, destFile)
		}

		_, err = os.Stat(destFile)
		switch {
		case errors.Is(err, os.ErrNotExist):
			destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
			if err != nil {
				return nil, fmt.Errorf("write payload %s: %v", file, err)
			}
			defer destFile.Close()
			if _, err := io.Copy(destFile, srcFile); err != nil {
				return nil, fmt.Errorf("copy payload %s: %v", file, err)
			}
		case err != nil:
			return nil, fmt.Errorf("stat payload %s: %v", file, err)
		}
	}
	return libs, nil
}
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`//go:build !darwin`

			`package llm`

			`/*`

			`#include <stdlib.h>`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`#include "dynamic_shim.h"`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00
			`*/`
			`import "C"`
			`import (`
			`"context"`
			`"errors"`
			`"fmt"`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`"io"`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`"io/fs"`
			`"log"`
			`"os"`
			`"path/filepath"`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`"strings"`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`"sync"`
			`"unsafe"`

			`"github.com/jmorganca/ollama/api"`
			`)`

			`type shimExtServer struct {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`s C.struct_dynamic_llama_server`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`options api.Options`
			`}`

			`// Note: current implementation does not support concurrent instantiations`
			`var shimMutex sync.Mutex`
			`var llm *shimExtServer`

Code shuffle to clean up the llm dir 2024-01-04 17:40:15 +00:00			`const pathComponentCount = 6`

Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`func (llm shimExtServer) llama_server_init(sparams C.ext_server_params_t, err *C.ext_server_resp_t) {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`C.dynamic_shim_llama_server_init(llm.s, sparams, err)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`
			`func (llm *shimExtServer) llama_server_start() {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`C.dynamic_shim_llama_server_start(llm.s)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`
			`func (llm *shimExtServer) llama_server_stop() {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`C.dynamic_shim_llama_server_stop(llm.s)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`

			`func (llm shimExtServer) llama_server_completion(json_req C.char, resp *C.ext_server_resp_t) {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`C.dynamic_shim_llama_server_completion(llm.s, json_req, resp)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`
			`func (llm shimExtServer) llama_server_completion_next_result(task_id C.int, resp C.ext_server_task_result_t) {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`C.dynamic_shim_llama_server_completion_next_result(llm.s, task_id, resp)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`
			`func (llm shimExtServer) llama_server_completion_cancel(task_id C.int, err C.ext_server_resp_t) {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`C.dynamic_shim_llama_server_completion_cancel(llm.s, task_id, err)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`
			`func (llm shimExtServer) llama_server_release_task_result(result C.ext_server_task_result_t) {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`C.dynamic_shim_llama_server_release_task_result(llm.s, result)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`

			`func (llm shimExtServer) llama_server_tokenize(json_req C.char, json_resp *C.char, err C.ext_server_resp_t) {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`C.dynamic_shim_llama_server_tokenize(llm.s, json_req, json_resp, err)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`
			`func (llm shimExtServer) llama_server_detokenize(json_req C.char, json_resp *C.char, err C.ext_server_resp_t) {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`C.dynamic_shim_llama_server_detokenize(llm.s, json_req, json_resp, err)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`
			`func (llm shimExtServer) llama_server_embedding(json_req C.char, json_resp *C.char, err C.ext_server_resp_t) {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`C.dynamic_shim_llama_server_embedding(llm.s, json_req, json_resp, err)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`
			`func (llm shimExtServer) llama_server_release_json_resp(json_resp *C.char) {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`C.dynamic_shim_llama_server_release_json_resp(llm.s, json_resp)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`

Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {`
			`shimMutex.Lock()`
			`defer shimMutex.Unlock()`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`updatePath(filepath.Dir(library))`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`libPath := C.CString(library)`
			`defer C.free(unsafe.Pointer(libPath))`
			`resp := newExtServerResp(128)`
			`defer freeExtServerResp(resp)`
			`var srv C.struct_dynamic_llama_server`
			`C.dynamic_shim_init(libPath, &srv, &resp)`
			`if resp.id < 0 {`
			`return nil, fmt.Errorf("Unable to load dynamic library: %s", C.GoString(resp.msg))`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`llm = &shimExtServer{`
			`s: srv,`
			`options: opts,`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`log.Printf("Loading Dynamic Shim llm server: %s", library)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`return newExtServer(llm, model, adapters, projectors, numLayers, opts)`
			`}`

			`func (llm *shimExtServer) Predict(ctx context.Context, pred PredictOpts, fn func(PredictResult)) error {`
fix: relay request opts to loaded llm prediction (#1761) 2024-01-03 17:01:42 +00:00			`return predict(ctx, llm, pred, fn)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`

			`func (llm *shimExtServer) Encode(ctx context.Context, prompt string) ([]int, error) {`
			`return encode(llm, ctx, prompt)`
			`}`

			`func (llm *shimExtServer) Decode(ctx context.Context, tokens []int) (string, error) {`
			`return decode(llm, ctx, tokens)`
			`}`

			`func (llm *shimExtServer) Embedding(ctx context.Context, input string) ([]float64, error) {`
			`return embedding(llm, ctx, input)`
			`}`

			`func (llm *shimExtServer) Close() {`
			`close(llm)`
			`}`

			`func nativeInit(workdir string) error {`
Code shuffle to clean up the llm dir 2024-01-04 17:40:15 +00:00			`libs, err := extractDynamicLibs(workdir, "llama.cpp/build///lib/*")`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`if err != nil {`
Carry ggml-metal.metal as payload 2023-12-19 02:32:04 +00:00			`if err == payloadMissing {`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`log.Printf("%s", payloadMissing)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`return nil`
			`}`
			`return err`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`}`
			`for _, lib := range libs {`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`// The last dir component is the variant name`
			`variant := filepath.Base(filepath.Dir(lib))`
			`AvailableShims[variant] = lib`
			`}`

			`if err := verifyDriverAccess(); err != nil {`
			`return err`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`

Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`// Report which dynamic libraries we have loaded to assist troubleshooting`
			`variants := make([]string, len(AvailableShims))`
			`i := 0`
			`for variant := range AvailableShims {`
			`variants[i] = variant`
			`i++`
			`}`
			`log.Printf("Dynamic LLM variants %v", variants)`

			`return nil`
			`}`

			`func extractDynamicLibs(workDir, glob string) ([]string, error) {`
			`files, err := fs.Glob(libEmbed, glob)`
			`if err != nil \|\| len(files) == 0 {`
			`return nil, payloadMissing`
			`}`
Fix CPU only builds Go embed doesn't like when there's no matching files, so put a dummy placeholder in to allow building without any GPU support If no "server" library is found, it's safely ignored at runtime. 2024-01-04 00:08:34 +00:00			`libs := []string{}`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00
Fix CPU only builds Go embed doesn't like when there's no matching files, so put a dummy placeholder in to allow building without any GPU support If no "server" library is found, it's safely ignored at runtime. 2024-01-04 00:08:34 +00:00			`for _, file := range files {`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`pathComps := strings.Split(file, "/")`
Code shuffle to clean up the llm dir 2024-01-04 17:40:15 +00:00			`if len(pathComps) != pathComponentCount {`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`log.Printf("unexpected payload components: %v", pathComps)`
			`continue`
			`}`
Code shuffle to clean up the llm dir 2024-01-04 17:40:15 +00:00			`// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`// Include the variant in the path to avoid conflicts between multiple server libs`
Code shuffle to clean up the llm dir 2024-01-04 17:40:15 +00:00			`targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`srcFile, err := libEmbed.Open(file)`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00			`if err != nil {`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`return nil, fmt.Errorf("read payload %s: %v", file, err)`
			`}`
			`defer srcFile.Close()`
			`if err := os.MkdirAll(targetDir, 0o755); err != nil {`
			`return nil, fmt.Errorf("create payload temp dir %s: %v", workDir, err)`
			`}`
Revamp the dynamic library shim This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped. 2023-12-20 18:36:01 +00:00
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`destFile := filepath.Join(targetDir, filepath.Base(file))`
			`if strings.Contains(destFile, "server") {`
Fix CPU only builds Go embed doesn't like when there's no matching files, so put a dummy placeholder in to allow building without any GPU support If no "server" library is found, it's safely ignored at runtime. 2024-01-04 00:08:34 +00:00			`libs = append(libs, destFile)`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`

Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`_, err = os.Stat(destFile)`
			`switch {`
			`case errors.Is(err, os.ErrNotExist):`
			`destFile, err := os.OpenFile(destFile, os.O_WRONLY\|os.O_CREATE\|os.O_TRUNC, 0o755)`
			`if err != nil {`
			`return nil, fmt.Errorf("write payload %s: %v", file, err)`
			`}`
			`defer destFile.Close()`
			`if _, err := io.Copy(destFile, srcFile); err != nil {`
			`return nil, fmt.Errorf("copy payload %s: %v", file, err)`
			`}`
			`case err != nil:`
			`return nil, fmt.Errorf("stat payload %s: %v", file, err)`
			`}`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`
Switch windows build to fully dynamic Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH. 2023-12-23 19:35:44 +00:00			`return libs, nil`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`}`