diff --git a/llm/llama.go b/llm/llama.go index 52e54554..61288b52 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -18,7 +18,6 @@ import ( "path" "path/filepath" "runtime" - "slices" "strconv" "strings" "sync" @@ -286,7 +285,7 @@ func (w *StatusWriter) Write(b []byte) (int, error) { return os.Stderr.Write(b) } -func newLlama(model string, adapters []string, runners []ModelRunner, ggml *GGML, opts api.Options) (*llama, error) { +func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) { fileInfo, err := os.Stat(model) if err != nil { return nil, err @@ -296,7 +295,7 @@ func newLlama(model string, adapters []string, runners []ModelRunner, ggml *GGML return nil, errors.New("ollama supports only one lora adapter, but multiple were provided") } - numGPU := NumGPU(ggml.NumLayers(), fileInfo.Size(), opts) + numGPU := NumGPU(numLayers, fileInfo.Size(), opts) params := []string{ "--model", model, "--ctx-size", fmt.Sprintf("%d", opts.NumCtx), @@ -407,13 +406,6 @@ func newLlama(model string, adapters []string, runners []ModelRunner, ggml *GGML if runnerErr != nil { // this is the error returned from the llama runner process that failed most recently - - // falcon and starcoder model families are not compatible with older versions of llama.cpp - families := []string{"falcon", "starcoder"} - if strings.Contains(runnerErr.Error(), "failed to load model") && slices.Contains(families, ggml.ModelFamily()) { - return nil, fmt.Errorf("%v: %s", runnerErr, "this model may be incompatible with your version of Ollama. Please run `ollama pull` to get the latest version of this model.") - } - return nil, runnerErr } diff --git a/llm/llm.go b/llm/llm.go index 061c2065..e25558f0 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -86,9 +86,9 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error switch ggml.Name() { case "gguf": opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions - return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml, opts) + return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts) case "ggml", "ggmf", "ggjt", "ggla": - return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml, opts) + return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts) default: return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily()) } diff --git a/server/routes.go b/server/routes.go index 3754e9a8..cdd3caf5 100644 --- a/server/routes.go +++ b/server/routes.go @@ -97,6 +97,13 @@ func load(ctx context.Context, workDir string, model *Model, reqOpts map[string] llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, opts) if err != nil { + // some older models are not compatible with newer versions of llama.cpp + // show a generalized compatibility error until there is a better way to + // check for model compatibility + if strings.Contains(err.Error(), "failed to load model") { + err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, model.ShortName) + } + return err }