Merge pull request #4244 from ollama/mxyng/skip-if-same

skip if same quantization
This commit is contained in:
Michael Yang 2024-05-07 19:03:37 -07:00 committed by GitHub
commit 88cf154483
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 26 additions and 26 deletions

View file

@ -45,12 +45,12 @@ func (kv KV) ParameterCount() uint64 {
return kv.u64("general.parameter_count") return kv.u64("general.parameter_count")
} }
func (kv KV) FileType() string { func (kv KV) FileType() fileType {
if u64 := kv.u64("general.file_type"); u64 > 0 { if u64 := kv.u64("general.file_type"); u64 > 0 {
return fileType(uint32(u64)).String() return fileType(uint32(u64))
} }
return "unknown" return fileTypeUnknown
} }
func (kv KV) BlockCount() uint64 { func (kv KV) BlockCount() uint64 {

View file

@ -370,37 +370,37 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m
baseLayer.MediaType == "application/vnd.ollama.image.model" && baseLayer.MediaType == "application/vnd.ollama.image.model" &&
baseLayer.GGML != nil && baseLayer.GGML != nil &&
baseLayer.GGML.Name() == "gguf" { baseLayer.GGML.Name() == "gguf" {
ftype, err := llm.ParseFileType(quantization) want, err := llm.ParseFileType(quantization)
if err != nil { if err != nil {
return err return err
} }
filetype := baseLayer.GGML.KV().FileType() ft := baseLayer.GGML.KV().FileType()
if !slices.Contains([]string{"F16", "F32"}, filetype) { if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
return errors.New("quantization is only supported for F16 and F32 models") return errors.New("quantization is only supported for F16 and F32 models")
} } else if want != ft {
fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantization)})
fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", filetype, quantization)}) blob, err := GetBlobsPath(baseLayer.Digest)
if err != nil {
return err
}
blob, err := GetBlobsPath(baseLayer.Digest) temp, err := os.CreateTemp(filepath.Dir(blob), quantization)
if err != nil { if err != nil {
return err return err
} }
defer temp.Close()
defer os.Remove(temp.Name())
temp, err := os.CreateTemp(filepath.Dir(blob), quantization) if err := llm.Quantize(blob, temp.Name(), want); err != nil {
if err != nil { return err
return err }
}
defer temp.Close()
defer os.Remove(temp.Name())
if err := llm.Quantize(blob, temp.Name(), ftype); err != nil { baseLayer.Layer, err = NewLayer(temp, baseLayer.Layer.MediaType)
return err if err != nil {
} return err
}
baseLayer.Layer, err = NewLayer(temp, baseLayer.Layer.MediaType)
if err != nil {
return err
} }
} }
@ -408,7 +408,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m
config.ModelFormat = cmp.Or(config.ModelFormat, baseLayer.GGML.Name()) config.ModelFormat = cmp.Or(config.ModelFormat, baseLayer.GGML.Name())
config.ModelFamily = cmp.Or(config.ModelFamily, baseLayer.GGML.KV().Architecture()) config.ModelFamily = cmp.Or(config.ModelFamily, baseLayer.GGML.KV().Architecture())
config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount())) config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType()) config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType().String())
config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture()) config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
} }