One corrupt manifest should not wedge model operations (#7515)

One potential failure mode is an empty file which bubbles up as an EOF error,
leading to all pulls and listing operations failing.  Instead, continue and
warn about the corrupt manifest.  This also allows re-pulling the corrupt
manifest to repair the system.
This commit is contained in:
Daniel Hiltgen 2024-11-05 14:21:45 -08:00 committed by GitHub
parent 34a75102f7
commit a4c70fe157
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 34 additions and 21 deletions

View file

@ -690,7 +690,8 @@ func CopyModel(src, dst model.Name) error {
}
func deleteUnusedLayers(deleteMap map[string]struct{}) error {
manifests, err := Manifests()
// Ignore corrupt manifests to avoid blocking deletion of layers that are freshly orphaned
manifests, err := Manifests(true)
if err != nil {
return err
}
@ -853,8 +854,8 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
manifest, _, err := GetManifest(mp)
if errors.Is(err, os.ErrNotExist) {
// noop
} else if err != nil && !errors.Is(err, os.ErrNotExist) {
return err
} else if err != nil {
slog.Warn("pulling model with bad existing manifest", "name", name, "error", err)
} else {
for _, l := range manifest.Layers {
deleteMap[l.Digest] = struct{}{}

View file

@ -106,7 +106,8 @@ func (l *Layer) Remove() error {
return nil
}
ms, err := Manifests()
// Ignore corrupt manifests to avoid blocking deletion of layers that are freshly orphaned
ms, err := Manifests(true)
if err != nil {
return err
}

View file

@ -123,7 +123,7 @@ func WriteManifest(name model.Name, config Layer, layers []Layer) error {
return json.NewEncoder(f).Encode(m)
}
func Manifests() (map[model.Name]*Manifest, error) {
func Manifests(continueOnError bool) (map[model.Name]*Manifest, error) {
manifests, err := GetManifestPath()
if err != nil {
return nil, err
@ -145,22 +145,29 @@ func Manifests() (map[model.Name]*Manifest, error) {
if !fi.IsDir() {
rel, err := filepath.Rel(manifests, match)
if err != nil {
if !continueOnError {
return nil, fmt.Errorf("%s %w", match, err)
}
slog.Warn("bad filepath", "path", match, "error", err)
continue
}
n := model.ParseNameFromFilepath(rel)
if !n.IsValid() {
if !continueOnError {
return nil, fmt.Errorf("%s %w", rel, err)
}
slog.Warn("bad manifest name", "path", rel)
continue
}
m, err := ParseNamedManifest(n)
if syntax := &(json.SyntaxError{}); errors.As(err, &syntax) {
if err != nil {
if !continueOnError {
return nil, fmt.Errorf("%s %w", n, err)
}
slog.Warn("bad manifest", "name", n, "error", err)
continue
} else if err != nil {
return nil, fmt.Errorf("%s: %w", n, err)
}
ms[n] = m

View file

@ -112,7 +112,7 @@ func TestManifests(t *testing.T) {
createManifest(t, d, p)
}
ms, err := Manifests()
ms, err := Manifests(true)
if err != nil {
t.Fatal(err)
}

View file

@ -622,7 +622,7 @@ func (s *Server) PushHandler(c *gin.Context) {
}
func checkNameExists(name model.Name) error {
names, err := Manifests()
names, err := Manifests(true)
if err != nil {
return err
}
@ -894,7 +894,7 @@ func getKVData(digest string, verbose bool) (llm.KV, error) {
}
func (s *Server) ListHandler(c *gin.Context) {
ms, err := Manifests()
ms, err := Manifests(true)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
@ -1211,6 +1211,9 @@ func Serve(ln net.Listener) error {
}
if !envconfig.NoPrune() {
if _, err := Manifests(false); err != nil {
slog.Warn("corrupt manifests detected, skipping prune operation. Re-pull or delete to clear", "error", err)
} else {
// clean up unused layers and manifests
if err := PruneLayers(); err != nil {
return err
@ -1225,6 +1228,7 @@ func Serve(ln net.Listener) error {
return err
}
}
}
ctx, done := context.WithCancel(context.Background())
schedCtx, schedDone := context.WithCancel(ctx)