2023-07-21 13:33:56 -07:00
|
|
|
package llm
|
|
|
|
|
2024-07-05 19:34:21 -04:00
|
|
|
// #cgo CFLAGS: -Illama.cpp -Illama.cpp/include -Illama.cpp/ggml/include
|
2024-07-06 12:54:02 -04:00
|
|
|
// #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread
|
2024-07-05 19:34:21 -04:00
|
|
|
// #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal
|
|
|
|
// #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src
|
2024-07-09 11:17:44 -07:00
|
|
|
// #cgo windows,amd64 LDFLAGS: -static-libstdc++ -static-libgcc -static -L${SRCDIR}/build/windows/amd64_static -L${SRCDIR}/build/windows/amd64_static/src -L${SRCDIR}/build/windows/amd64_static/ggml/src
|
|
|
|
// #cgo windows,arm64 LDFLAGS: -static-libstdc++ -static-libgcc -static -L${SRCDIR}/build/windows/arm64_static -L${SRCDIR}/build/windows/arm64_static/src -L${SRCDIR}/build/windows/arm64_static/ggml/src
|
2024-07-05 19:34:21 -04:00
|
|
|
// #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/linux/x86_64_static -L${SRCDIR}/build/linux/x86_64_static/src -L${SRCDIR}/build/linux/x86_64_static/ggml/src
|
|
|
|
// #cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/linux/arm64_static -L${SRCDIR}/build/linux/arm64_static/src -L${SRCDIR}/build/linux/arm64_static/ggml/src
|
2024-04-05 08:49:04 -07:00
|
|
|
// #include <stdlib.h>
|
2024-03-14 10:24:13 -07:00
|
|
|
// #include "llama.h"
|
|
|
|
import "C"
|
2024-08-01 14:52:15 -07:00
|
|
|
|
2024-04-05 08:49:04 -07:00
|
|
|
import (
|
2024-08-01 14:52:15 -07:00
|
|
|
"errors"
|
2024-04-05 08:49:04 -07:00
|
|
|
"unsafe"
|
|
|
|
)
|
2024-03-14 10:24:13 -07:00
|
|
|
|
|
|
|
// SystemInfo is an unused example of calling llama.cpp functions using CGo
|
|
|
|
func SystemInfo() string {
|
|
|
|
return C.GoString(C.llama_print_system_info())
|
2023-12-20 10:36:01 -08:00
|
|
|
}
|
2024-04-05 08:49:04 -07:00
|
|
|
|
2024-04-23 15:18:45 -07:00
|
|
|
func Quantize(infile, outfile string, ftype fileType) error {
|
2024-04-05 08:49:04 -07:00
|
|
|
cinfile := C.CString(infile)
|
|
|
|
defer C.free(unsafe.Pointer(cinfile))
|
|
|
|
|
|
|
|
coutfile := C.CString(outfile)
|
|
|
|
defer C.free(unsafe.Pointer(coutfile))
|
|
|
|
|
|
|
|
params := C.llama_model_quantize_default_params()
|
|
|
|
params.nthread = -1
|
2024-04-12 13:55:12 -07:00
|
|
|
params.ftype = ftype.Value()
|
2024-04-05 08:49:04 -07:00
|
|
|
|
2024-04-12 13:55:12 -07:00
|
|
|
if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 {
|
2024-08-01 14:52:15 -07:00
|
|
|
return errors.New("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
|
2024-04-05 08:49:04 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|