Use --quantize flag and quantize api parameter (#4321)

* rename `--quantization` to `--quantize`

* backwards

* Update api/types.go

Co-authored-by: Michael Yang <mxyng@pm.me>

---------

Co-authored-by: Michael Yang <mxyng@pm.me>
This commit is contained in:
Jeffrey Morgan 2024-05-10 13:06:13 -07:00 committed by GitHub
parent ea0fdaed28
commit 6602e793c0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 17 additions and 9 deletions

View file

@ -197,14 +197,17 @@ type EmbeddingResponse struct {
// CreateRequest is the request passed to [Client.Create].
type CreateRequest struct {
Model string `json:"model"`
Path string `json:"path"`
Modelfile string `json:"modelfile"`
Stream *bool `json:"stream,omitempty"`
Quantization string `json:"quantization,omitempty"`
Model string `json:"model"`
Path string `json:"path"`
Modelfile string `json:"modelfile"`
Stream *bool `json:"stream,omitempty"`
Quantize string `json:"quantize,omitempty"`
// Name is deprecated, see Model
Name string `json:"name"`
// Quantization is deprecated, see Quantize
Quantization string `json:"quantization,omitempty"`
}
// DeleteRequest is the request passed to [Client.Delete].

View file

@ -142,9 +142,9 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return nil
}
quantization, _ := cmd.Flags().GetString("quantization")
quantize, _ := cmd.Flags().GetString("quantize")
request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantization: quantization}
request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantize: quantize}
if err := client.Create(cmd.Context(), &request, fn); err != nil {
return err
}
@ -1051,7 +1051,7 @@ func NewCLI() *cobra.Command {
}
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")")
createCmd.Flags().StringP("quantization", "q", "", "Quantization level.")
createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_0)")
showCmd := &cobra.Command{
Use: "show MODEL",

View file

@ -554,7 +554,12 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
ctx, cancel := context.WithCancel(c.Request.Context())
defer cancel()
if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), strings.ToUpper(req.Quantization), modelfile, fn); err != nil {
quantization := req.Quantization
if req.Quantize != "" {
quantization = req.Quantize
}
if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), strings.ToUpper(quantization), modelfile, fn); err != nil {
ch <- gin.H{"error": err.Error()}
}
}()