2023-07-21 13:33:56 -07:00
|
|
|
package llm
|
|
|
|
|
2024-03-14 10:24:13 -07:00
|
|
|
// #cgo CFLAGS: -Illama.cpp
|
|
|
|
// #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++
|
|
|
|
// #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++
|
|
|
|
// #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
|
|
|
|
// #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
|
|
|
|
// #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
|
2024-04-05 08:49:04 -07:00
|
|
|
// #include <stdlib.h>
|
2024-03-14 10:24:13 -07:00
|
|
|
// #include "llama.h"
|
|
|
|
import "C"
|
2024-04-05 08:49:04 -07:00
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"unsafe"
|
|
|
|
)
|
2024-03-14 10:24:13 -07:00
|
|
|
|
|
|
|
// SystemInfo is an unused example of calling llama.cpp functions using CGo
|
|
|
|
func SystemInfo() string {
|
|
|
|
return C.GoString(C.llama_print_system_info())
|
2023-12-20 10:36:01 -08:00
|
|
|
}
|
2024-04-05 08:49:04 -07:00
|
|
|
|
|
|
|
func Quantize(infile, outfile, filetype string) error {
|
|
|
|
cinfile := C.CString(infile)
|
|
|
|
defer C.free(unsafe.Pointer(cinfile))
|
|
|
|
|
|
|
|
coutfile := C.CString(outfile)
|
|
|
|
defer C.free(unsafe.Pointer(coutfile))
|
|
|
|
|
|
|
|
params := C.llama_model_quantize_default_params()
|
|
|
|
params.nthread = -1
|
|
|
|
|
|
|
|
switch filetype {
|
|
|
|
case "F32":
|
|
|
|
params.ftype = fileTypeF32
|
|
|
|
case "F16":
|
|
|
|
params.ftype = fileTypeF16
|
|
|
|
case "Q4_0":
|
|
|
|
params.ftype = fileTypeQ4_0
|
|
|
|
case "Q4_1":
|
|
|
|
params.ftype = fileTypeQ4_1
|
|
|
|
case "Q4_1_F16":
|
|
|
|
params.ftype = fileTypeQ4_1_F16
|
|
|
|
case "Q8_0":
|
|
|
|
params.ftype = fileTypeQ8_0
|
|
|
|
case "Q5_0":
|
|
|
|
params.ftype = fileTypeQ5_0
|
|
|
|
case "Q5_1":
|
|
|
|
params.ftype = fileTypeQ5_1
|
|
|
|
case "Q2_K":
|
|
|
|
params.ftype = fileTypeQ2_K
|
|
|
|
case "Q3_K_S":
|
|
|
|
params.ftype = fileTypeQ3_K_S
|
|
|
|
case "Q3_K_M":
|
|
|
|
params.ftype = fileTypeQ3_K_M
|
|
|
|
case "Q3_K_L":
|
|
|
|
params.ftype = fileTypeQ3_K_L
|
|
|
|
case "Q4_K_S":
|
|
|
|
params.ftype = fileTypeQ4_K_S
|
|
|
|
case "Q4_K_M":
|
|
|
|
params.ftype = fileTypeQ4_K_M
|
|
|
|
case "Q5_K_S":
|
|
|
|
params.ftype = fileTypeQ5_K_S
|
|
|
|
case "Q5_K_M":
|
|
|
|
params.ftype = fileTypeQ5_K_M
|
|
|
|
case "Q6_K":
|
|
|
|
params.ftype = fileTypeQ6_K
|
|
|
|
case "IQ2_XXS":
|
|
|
|
params.ftype = fileTypeIQ2_XXS
|
|
|
|
case "IQ2_XS":
|
|
|
|
params.ftype = fileTypeIQ2_XS
|
|
|
|
case "Q2_K_S":
|
|
|
|
params.ftype = fileTypeQ2_K_S
|
|
|
|
case "Q3_K_XS":
|
|
|
|
params.ftype = fileTypeQ3_K_XS
|
|
|
|
case "IQ3_XXS":
|
|
|
|
params.ftype = fileTypeIQ3_XXS
|
|
|
|
default:
|
|
|
|
return fmt.Errorf("unknown filetype: %s", filetype)
|
|
|
|
}
|
|
|
|
|
|
|
|
if retval := C.llama_model_quantize(cinfile, coutfile, ¶ms); retval != 0 {
|
|
|
|
return fmt.Errorf("llama_model_quantize: %d", retval)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|