From a4c70fe157477fc25940a0ff1f544632464f2e77 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 5 Nov 2024 14:21:45 -0800 Subject: [PATCH] One corrupt manifest should not wedge model operations (#7515) One potential failure mode is an empty file which bubbles up as an EOF error, leading to all pulls and listing operations failing. Instead, continue and warn about the corrupt manifest. This also allows re-pulling the corrupt manifest to repair the system. --- server/images.go | 7 ++++--- server/layer.go | 3 ++- server/manifest.go | 15 +++++++++++---- server/manifest_test.go | 2 +- server/routes.go | 28 ++++++++++++++++------------ 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/server/images.go b/server/images.go index 9d2e1959..584b7b13 100644 --- a/server/images.go +++ b/server/images.go @@ -690,7 +690,8 @@ func CopyModel(src, dst model.Name) error { } func deleteUnusedLayers(deleteMap map[string]struct{}) error { - manifests, err := Manifests() + // Ignore corrupt manifests to avoid blocking deletion of layers that are freshly orphaned + manifests, err := Manifests(true) if err != nil { return err } @@ -853,8 +854,8 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu manifest, _, err := GetManifest(mp) if errors.Is(err, os.ErrNotExist) { // noop - } else if err != nil && !errors.Is(err, os.ErrNotExist) { - return err + } else if err != nil { + slog.Warn("pulling model with bad existing manifest", "name", name, "error", err) } else { for _, l := range manifest.Layers { deleteMap[l.Digest] = struct{}{} diff --git a/server/layer.go b/server/layer.go index 0bdee72b..f1fbabea 100644 --- a/server/layer.go +++ b/server/layer.go @@ -106,7 +106,8 @@ func (l *Layer) Remove() error { return nil } - ms, err := Manifests() + // Ignore corrupt manifests to avoid blocking deletion of layers that are freshly orphaned + ms, err := Manifests(true) if err != nil { return err } diff --git a/server/manifest.go b/server/manifest.go index 6b04753f..0d348dd0 100644 --- a/server/manifest.go +++ b/server/manifest.go @@ -123,7 +123,7 @@ func WriteManifest(name model.Name, config Layer, layers []Layer) error { return json.NewEncoder(f).Encode(m) } -func Manifests() (map[model.Name]*Manifest, error) { +func Manifests(continueOnError bool) (map[model.Name]*Manifest, error) { manifests, err := GetManifestPath() if err != nil { return nil, err @@ -145,22 +145,29 @@ func Manifests() (map[model.Name]*Manifest, error) { if !fi.IsDir() { rel, err := filepath.Rel(manifests, match) if err != nil { + if !continueOnError { + return nil, fmt.Errorf("%s %w", match, err) + } slog.Warn("bad filepath", "path", match, "error", err) continue } n := model.ParseNameFromFilepath(rel) if !n.IsValid() { + if !continueOnError { + return nil, fmt.Errorf("%s %w", rel, err) + } slog.Warn("bad manifest name", "path", rel) continue } m, err := ParseNamedManifest(n) - if syntax := &(json.SyntaxError{}); errors.As(err, &syntax) { + if err != nil { + if !continueOnError { + return nil, fmt.Errorf("%s %w", n, err) + } slog.Warn("bad manifest", "name", n, "error", err) continue - } else if err != nil { - return nil, fmt.Errorf("%s: %w", n, err) } ms[n] = m diff --git a/server/manifest_test.go b/server/manifest_test.go index 70ab7fa2..d94deefb 100644 --- a/server/manifest_test.go +++ b/server/manifest_test.go @@ -112,7 +112,7 @@ func TestManifests(t *testing.T) { createManifest(t, d, p) } - ms, err := Manifests() + ms, err := Manifests(true) if err != nil { t.Fatal(err) } diff --git a/server/routes.go b/server/routes.go index 1d7a9559..c5fd3293 100644 --- a/server/routes.go +++ b/server/routes.go @@ -622,7 +622,7 @@ func (s *Server) PushHandler(c *gin.Context) { } func checkNameExists(name model.Name) error { - names, err := Manifests() + names, err := Manifests(true) if err != nil { return err } @@ -894,7 +894,7 @@ func getKVData(digest string, verbose bool) (llm.KV, error) { } func (s *Server) ListHandler(c *gin.Context) { - ms, err := Manifests() + ms, err := Manifests(true) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return @@ -1211,18 +1211,22 @@ func Serve(ln net.Listener) error { } if !envconfig.NoPrune() { - // clean up unused layers and manifests - if err := PruneLayers(); err != nil { - return err - } + if _, err := Manifests(false); err != nil { + slog.Warn("corrupt manifests detected, skipping prune operation. Re-pull or delete to clear", "error", err) + } else { + // clean up unused layers and manifests + if err := PruneLayers(); err != nil { + return err + } - manifestsPath, err := GetManifestPath() - if err != nil { - return err - } + manifestsPath, err := GetManifestPath() + if err != nil { + return err + } - if err := PruneDirectory(manifestsPath); err != nil { - return err + if err := PruneDirectory(manifestsPath); err != nil { + return err + } } }