add error for falcon and starcoder vocab compatibility (#844)

add error for falcon and starcoder vocab compatibility
---------
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
This commit is contained in:
Jeffrey Morgan 2023-10-19 12:18:31 -04:00 committed by GitHub
parent 6b213216d5
commit a7dad24d92
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 4 deletions

View file

@ -18,6 +18,7 @@ import (
"path" "path"
"path/filepath" "path/filepath"
"runtime" "runtime"
"slices"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
@ -285,7 +286,7 @@ func (w *StatusWriter) Write(b []byte) (int, error) {
return os.Stderr.Write(b) return os.Stderr.Write(b)
} }
func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) { func newLlama(model string, adapters []string, runners []ModelRunner, ggml *GGML, opts api.Options) (*llama, error) {
fileInfo, err := os.Stat(model) fileInfo, err := os.Stat(model)
if err != nil { if err != nil {
return nil, err return nil, err
@ -295,7 +296,7 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided") return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
} }
numGPU := NumGPU(numLayers, fileInfo.Size(), opts) numGPU := NumGPU(ggml.NumLayers(), fileInfo.Size(), opts)
params := []string{ params := []string{
"--model", model, "--model", model,
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx), "--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
@ -406,6 +407,13 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
if runnerErr != nil { if runnerErr != nil {
// this is the error returned from the llama runner process that failed most recently // this is the error returned from the llama runner process that failed most recently
// falcon and starcoder model families are not compatible with older versions of llama.cpp
families := []string{"falcon", "starcoder"}
if strings.Contains(runnerErr.Error(), "failed to load model") && slices.Contains(families, ggml.ModelFamily()) {
return nil, fmt.Errorf("%v: %s", runnerErr, "this model may be incompatible with your version of Ollama. Please run `ollama pull` to get the latest version of this model.")
}
return nil, runnerErr return nil, runnerErr
} }

View file

@ -86,9 +86,9 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
switch ggml.Name() { switch ggml.Name() {
case "gguf": case "gguf":
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts) return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml, opts)
case "ggml", "ggmf", "ggjt", "ggla": case "ggml", "ggmf", "ggjt", "ggla":
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts) return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml, opts)
default: default:
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily()) return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
} }