commit
c77d45d836
7 changed files with 145 additions and 45 deletions
|
@ -5,7 +5,6 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net"
|
"net"
|
||||||
|
@ -301,18 +300,7 @@ func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*Embedd
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
|
func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
|
||||||
if err := c.do(ctx, http.MethodHead, fmt.Sprintf("/api/blobs/%s", digest), nil, nil); err != nil {
|
return c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil)
|
||||||
var statusError StatusError
|
|
||||||
if !errors.As(err, &statusError) || statusError.StatusCode != http.StatusNotFound {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) Version(ctx context.Context) (string, error) {
|
func (c *Client) Version(ctx context.Context) (string, error) {
|
||||||
|
|
|
@ -141,6 +141,7 @@ type CreateRequest struct {
|
||||||
Path string `json:"path"`
|
Path string `json:"path"`
|
||||||
Modelfile string `json:"modelfile"`
|
Modelfile string `json:"modelfile"`
|
||||||
Stream *bool `json:"stream,omitempty"`
|
Stream *bool `json:"stream,omitempty"`
|
||||||
|
Quantization string `json:"quantization,omitempty"`
|
||||||
|
|
||||||
// Name is deprecated, see Model
|
// Name is deprecated, see Model
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
|
|
@ -194,7 +194,9 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)}
|
quantization, _ := cmd.Flags().GetString("quantization")
|
||||||
|
|
||||||
|
request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile), Quantization: quantization}
|
||||||
if err := client.Create(cmd.Context(), &request, fn); err != nil {
|
if err := client.Create(cmd.Context(), &request, fn); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -943,6 +945,7 @@ func NewCLI() *cobra.Command {
|
||||||
}
|
}
|
||||||
|
|
||||||
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")")
|
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")")
|
||||||
|
createCmd.Flags().StringP("quantization", "q", "", "Quantization level.")
|
||||||
|
|
||||||
showCmd := &cobra.Command{
|
showCmd := &cobra.Command{
|
||||||
Use: "show MODEL",
|
Use: "show MODEL",
|
||||||
|
|
71
llm/llm.go
71
llm/llm.go
|
@ -6,10 +6,81 @@ package llm
|
||||||
// #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
|
// #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
|
||||||
// #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
|
// #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
|
||||||
// #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
|
// #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
|
||||||
|
// #include <stdlib.h>
|
||||||
// #include "llama.h"
|
// #include "llama.h"
|
||||||
import "C"
|
import "C"
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
// SystemInfo is an unused example of calling llama.cpp functions using CGo
|
// SystemInfo is an unused example of calling llama.cpp functions using CGo
|
||||||
func SystemInfo() string {
|
func SystemInfo() string {
|
||||||
return C.GoString(C.llama_print_system_info())
|
return C.GoString(C.llama_print_system_info())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Quantize(infile, outfile, filetype string) error {
|
||||||
|
cinfile := C.CString(infile)
|
||||||
|
defer C.free(unsafe.Pointer(cinfile))
|
||||||
|
|
||||||
|
coutfile := C.CString(outfile)
|
||||||
|
defer C.free(unsafe.Pointer(coutfile))
|
||||||
|
|
||||||
|
params := C.llama_model_quantize_default_params()
|
||||||
|
params.nthread = -1
|
||||||
|
|
||||||
|
switch filetype {
|
||||||
|
case "F32":
|
||||||
|
params.ftype = fileTypeF32
|
||||||
|
case "F16":
|
||||||
|
params.ftype = fileTypeF16
|
||||||
|
case "Q4_0":
|
||||||
|
params.ftype = fileTypeQ4_0
|
||||||
|
case "Q4_1":
|
||||||
|
params.ftype = fileTypeQ4_1
|
||||||
|
case "Q4_1_F16":
|
||||||
|
params.ftype = fileTypeQ4_1_F16
|
||||||
|
case "Q8_0":
|
||||||
|
params.ftype = fileTypeQ8_0
|
||||||
|
case "Q5_0":
|
||||||
|
params.ftype = fileTypeQ5_0
|
||||||
|
case "Q5_1":
|
||||||
|
params.ftype = fileTypeQ5_1
|
||||||
|
case "Q2_K":
|
||||||
|
params.ftype = fileTypeQ2_K
|
||||||
|
case "Q3_K_S":
|
||||||
|
params.ftype = fileTypeQ3_K_S
|
||||||
|
case "Q3_K_M":
|
||||||
|
params.ftype = fileTypeQ3_K_M
|
||||||
|
case "Q3_K_L":
|
||||||
|
params.ftype = fileTypeQ3_K_L
|
||||||
|
case "Q4_K_S":
|
||||||
|
params.ftype = fileTypeQ4_K_S
|
||||||
|
case "Q4_K_M":
|
||||||
|
params.ftype = fileTypeQ4_K_M
|
||||||
|
case "Q5_K_S":
|
||||||
|
params.ftype = fileTypeQ5_K_S
|
||||||
|
case "Q5_K_M":
|
||||||
|
params.ftype = fileTypeQ5_K_M
|
||||||
|
case "Q6_K":
|
||||||
|
params.ftype = fileTypeQ6_K
|
||||||
|
case "IQ2_XXS":
|
||||||
|
params.ftype = fileTypeIQ2_XXS
|
||||||
|
case "IQ2_XS":
|
||||||
|
params.ftype = fileTypeIQ2_XS
|
||||||
|
case "Q2_K_S":
|
||||||
|
params.ftype = fileTypeQ2_K_S
|
||||||
|
case "Q3_K_XS":
|
||||||
|
params.ftype = fileTypeQ3_K_XS
|
||||||
|
case "IQ3_XXS":
|
||||||
|
params.ftype = fileTypeIQ3_XXS
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("unknown filetype: %s", filetype)
|
||||||
|
}
|
||||||
|
|
||||||
|
if retval := C.llama_model_quantize(cinfile, coutfile, ¶ms); retval != 0 {
|
||||||
|
return fmt.Errorf("llama_model_quantize: %d", retval)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -284,7 +284,7 @@ func realpath(mfDir, from string) string {
|
||||||
return abspath
|
return abspath
|
||||||
}
|
}
|
||||||
|
|
||||||
func CreateModel(ctx context.Context, name, modelFileDir string, commands []parser.Command, fn func(resp api.ProgressResponse)) error {
|
func CreateModel(ctx context.Context, name, modelFileDir, quantization string, commands []parser.Command, fn func(resp api.ProgressResponse)) error {
|
||||||
deleteMap := make(map[string]struct{})
|
deleteMap := make(map[string]struct{})
|
||||||
if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil {
|
if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil {
|
||||||
for _, layer := range append(manifest.Layers, manifest.Config) {
|
for _, layer := range append(manifest.Layers, manifest.Config) {
|
||||||
|
@ -337,8 +337,27 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
||||||
|
|
||||||
if ggufName != "" {
|
if ggufName != "" {
|
||||||
pathName = ggufName
|
pathName = ggufName
|
||||||
slog.Debug(fmt.Sprintf("new image layer path: %s", pathName))
|
|
||||||
defer os.RemoveAll(ggufName)
|
defer os.RemoveAll(ggufName)
|
||||||
|
|
||||||
|
if quantization != "" {
|
||||||
|
quantization = strings.ToUpper(quantization)
|
||||||
|
fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", "F16", quantization)})
|
||||||
|
tempfile, err := os.CreateTemp(filepath.Dir(ggufName), quantization)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(tempfile.Name())
|
||||||
|
|
||||||
|
if err := llm.Quantize(ggufName, tempfile.Name(), quantization); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := tempfile.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
pathName = tempfile.Name()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bin, err := os.Open(pathName)
|
bin, err := os.Open(pathName)
|
||||||
|
|
|
@ -647,7 +647,7 @@ func CreateModelHandler(c *gin.Context) {
|
||||||
ctx, cancel := context.WithCancel(c.Request.Context())
|
ctx, cancel := context.WithCancel(c.Request.Context())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
if err := CreateModel(ctx, model, filepath.Dir(req.Path), commands, fn); err != nil {
|
if err := CreateModel(ctx, model, filepath.Dir(req.Path), req.Quantization, commands, fn); err != nil {
|
||||||
ch <- gin.H{"error": err.Error()}
|
ch <- gin.H{"error": err.Error()}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
@ -913,6 +913,24 @@ func HeadBlobHandler(c *gin.Context) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func CreateBlobHandler(c *gin.Context) {
|
func CreateBlobHandler(c *gin.Context) {
|
||||||
|
path, err := GetBlobsPath(c.Param("digest"))
|
||||||
|
if err != nil {
|
||||||
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = os.Stat(path)
|
||||||
|
switch {
|
||||||
|
case errors.Is(err, os.ErrNotExist):
|
||||||
|
// noop
|
||||||
|
case err != nil:
|
||||||
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
c.Status(http.StatusOK)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
layer, err := NewLayer(c.Request.Body, "")
|
layer, err := NewLayer(c.Request.Body, "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
|
|
@ -61,7 +61,7 @@ func Test_Routes(t *testing.T) {
|
||||||
fn := func(resp api.ProgressResponse) {
|
fn := func(resp api.ProgressResponse) {
|
||||||
t.Logf("Status: %s", resp.Status)
|
t.Logf("Status: %s", resp.Status)
|
||||||
}
|
}
|
||||||
err = CreateModel(context.TODO(), name, "", commands, fn)
|
err = CreateModel(context.TODO(), name, "", "", commands, fn)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue