diff --git a/llm/llama.go b/llm/llama.go
index 52e54554..61288b52 100644
--- a/llm/llama.go
+++ b/llm/llama.go
@@ -18,7 +18,6 @@ import (
 	"path"
 	"path/filepath"
 	"runtime"
-	"slices"
 	"strconv"
 	"strings"
 	"sync"
@@ -286,7 +285,7 @@ func (w *StatusWriter) Write(b []byte) (int, error) {
 	return os.Stderr.Write(b)
 }
 
-func newLlama(model string, adapters []string, runners []ModelRunner, ggml *GGML, opts api.Options) (*llama, error) {
+func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) {
 	fileInfo, err := os.Stat(model)
 	if err != nil {
 		return nil, err
@@ -296,7 +295,7 @@ func newLlama(model string, adapters []string, runners []ModelRunner, ggml *GGML
 		return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
 	}
 
-	numGPU := NumGPU(ggml.NumLayers(), fileInfo.Size(), opts)
+	numGPU := NumGPU(numLayers, fileInfo.Size(), opts)
 	params := []string{
 		"--model", model,
 		"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
@@ -407,13 +406,6 @@ func newLlama(model string, adapters []string, runners []ModelRunner, ggml *GGML
 
 	if runnerErr != nil {
 		// this is the error returned from the llama runner process that failed most recently
-
-		// falcon and starcoder model families are not compatible with older versions of llama.cpp
-		families := []string{"falcon", "starcoder"}
-		if strings.Contains(runnerErr.Error(), "failed to load model") && slices.Contains(families, ggml.ModelFamily()) {
-			return nil, fmt.Errorf("%v: %s", runnerErr, "this model may be incompatible with your version of Ollama. Please run `ollama pull` to get the latest version of this model.")
-		}
-
 		return nil, runnerErr
 	}
 
diff --git a/llm/llm.go b/llm/llm.go
index 061c2065..e25558f0 100644
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -86,9 +86,9 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
 	switch ggml.Name() {
 	case "gguf":
 		opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
-		return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml, opts)
+		return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
 	case "ggml", "ggmf", "ggjt", "ggla":
-		return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml, opts)
+		return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
 	default:
 		return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
 	}
diff --git a/server/routes.go b/server/routes.go
index 3754e9a8..cdd3caf5 100644
--- a/server/routes.go
+++ b/server/routes.go
@@ -97,6 +97,13 @@ func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]
 
 		llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, opts)
 		if err != nil {
+			// some older models are not compatible with newer versions of llama.cpp
+			// show a generalized compatibility error until there is a better way to
+			// check for model compatibility
+			if strings.Contains(err.Error(), "failed to load model") {
+				err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, model.ShortName)
+			}
+
 			return err
 		}