simpler check for model loading compatibility errors
This commit is contained in:
parent
cc1d03f4ec
commit
7ed5a39bc7
3 changed files with 11 additions and 12 deletions
12
llm/llama.go
12
llm/llama.go
|
@ -18,7 +18,6 @@ import (
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
"slices"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -286,7 +285,7 @@ func (w *StatusWriter) Write(b []byte) (int, error) {
|
||||||
return os.Stderr.Write(b)
|
return os.Stderr.Write(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
func newLlama(model string, adapters []string, runners []ModelRunner, ggml *GGML, opts api.Options) (*llama, error) {
|
func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) {
|
||||||
fileInfo, err := os.Stat(model)
|
fileInfo, err := os.Stat(model)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -296,7 +295,7 @@ func newLlama(model string, adapters []string, runners []ModelRunner, ggml *GGML
|
||||||
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
|
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
|
||||||
}
|
}
|
||||||
|
|
||||||
numGPU := NumGPU(ggml.NumLayers(), fileInfo.Size(), opts)
|
numGPU := NumGPU(numLayers, fileInfo.Size(), opts)
|
||||||
params := []string{
|
params := []string{
|
||||||
"--model", model,
|
"--model", model,
|
||||||
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
|
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
|
||||||
|
@ -407,13 +406,6 @@ func newLlama(model string, adapters []string, runners []ModelRunner, ggml *GGML
|
||||||
|
|
||||||
if runnerErr != nil {
|
if runnerErr != nil {
|
||||||
// this is the error returned from the llama runner process that failed most recently
|
// this is the error returned from the llama runner process that failed most recently
|
||||||
|
|
||||||
// falcon and starcoder model families are not compatible with older versions of llama.cpp
|
|
||||||
families := []string{"falcon", "starcoder"}
|
|
||||||
if strings.Contains(runnerErr.Error(), "failed to load model") && slices.Contains(families, ggml.ModelFamily()) {
|
|
||||||
return nil, fmt.Errorf("%v: %s", runnerErr, "this model may be incompatible with your version of Ollama. Please run `ollama pull` to get the latest version of this model.")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, runnerErr
|
return nil, runnerErr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -86,9 +86,9 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
|
||||||
switch ggml.Name() {
|
switch ggml.Name() {
|
||||||
case "gguf":
|
case "gguf":
|
||||||
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
|
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
|
||||||
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml, opts)
|
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
|
||||||
case "ggml", "ggmf", "ggjt", "ggla":
|
case "ggml", "ggmf", "ggjt", "ggla":
|
||||||
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml, opts)
|
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
|
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,6 +97,13 @@ func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]
|
||||||
|
|
||||||
llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, opts)
|
llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// some older models are not compatible with newer versions of llama.cpp
|
||||||
|
// show a generalized compatibility error until there is a better way to
|
||||||
|
// check for model compatibility
|
||||||
|
if strings.Contains(err.Error(), "failed to load model") {
|
||||||
|
err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, model.ShortName)
|
||||||
|
}
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue