From 37f9c8ad994efa83b9cb12afb43f18579f669af1 Mon Sep 17 00:00:00 2001 From: Blake Mizerany Date: Fri, 26 Apr 2024 13:08:32 -0700 Subject: [PATCH] types/model: overhaul Name and Digest types (#3924) --- server/images.go | 11 +- types/model/digest.go | 87 -- types/model/digest_test.go | 46 - types/model/name.go | 986 ++++++------------ types/model/name_test.go | 912 +++++----------- .../d37463aa416f6bab} | 2 +- .../fuzz/FuzzParseRef/1d43ee52085cb4aa | 2 - .../fuzz/FuzzParseRef/27fd759314f0e6d6 | 2 - .../fuzz/FuzzParseRef/3e3b70dba384074d | 2 - .../fuzz/FuzzParseRef/71f1fdff711b6dab | 2 - .../fuzz/FuzzParseRef/b51b1c875e61a948 | 2 - 11 files changed, 585 insertions(+), 1469 deletions(-) delete mode 100644 types/model/digest.go delete mode 100644 types/model/digest_test.go rename types/model/testdata/fuzz/{FuzzParseRef/82c2975c430ac608 => FuzzName/d37463aa416f6bab} (53%) delete mode 100644 types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa delete mode 100644 types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6 delete mode 100644 types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d delete mode 100644 types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab delete mode 100644 types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948 diff --git a/server/images.go b/server/images.go index 7ba5134c..3d0eba19 100644 --- a/server/images.go +++ b/server/images.go @@ -703,17 +703,24 @@ func convertModel(name, path string, fn func(resp api.ProgressResponse)) (string } func CopyModel(src, dst model.Name) error { + if !dst.IsFullyQualified() { + return model.Unqualified(dst) + } + if !src.IsFullyQualified() { + return model.Unqualified(src) + } + manifests, err := GetManifestPath() if err != nil { return err } - dstpath := filepath.Join(manifests, dst.FilepathNoBuild()) + dstpath := filepath.Join(manifests, dst.Filepath()) if err := os.MkdirAll(filepath.Dir(dstpath), 0o755); err != nil { return err } - srcpath := filepath.Join(manifests, src.FilepathNoBuild()) + srcpath := filepath.Join(manifests, src.Filepath()) srcfile, err := os.Open(srcpath) if err != nil { return err diff --git a/types/model/digest.go b/types/model/digest.go deleted file mode 100644 index a122d63a..00000000 --- a/types/model/digest.go +++ /dev/null @@ -1,87 +0,0 @@ -package model - -import ( - "fmt" - "log/slog" - "strings" - "unicode" -) - -// Digest represents a digest of a model Manifest. It is a comparable value -// type and is immutable. -// -// The zero Digest is not a valid digest. -type Digest struct { - s string -} - -// Split returns the digest type and the digest value. -func (d Digest) Split() (typ, digest string) { - typ, digest, _ = strings.Cut(d.s, "-") - return -} - -// String returns the digest in the form of "-", or the -// empty string if the digest is invalid. -func (d Digest) String() string { return d.s } - -// IsValid returns true if the digest is valid (not zero). -// -// A valid digest may be created only by ParseDigest, or -// ParseName(name).Digest(). -func (d Digest) IsValid() bool { return d.s != "" } - -// LogValue implements slog.Value. -func (d Digest) LogValue() slog.Value { - return slog.StringValue(d.String()) -} - -var ( - _ slog.LogValuer = Digest{} -) - -// ParseDigest parses a string in the form of "-" into a -// Digest. -func ParseDigest(s string) Digest { - typ, digest, ok := strings.Cut(s, "-") - if !ok { - typ, digest, ok = strings.Cut(s, ":") - } - if ok && isValidDigestType(typ) && isValidHex(digest) && len(digest) >= 2 { - return Digest{s: fmt.Sprintf("%s-%s", typ, digest)} - } - return Digest{} -} - -func MustParseDigest(s string) Digest { - d := ParseDigest(s) - if !d.IsValid() { - panic(fmt.Sprintf("invalid digest: %q", s)) - } - return d -} - -func isValidDigestType(s string) bool { - if len(s) == 0 { - return false - } - for _, r := range s { - if !unicode.IsLower(r) && !unicode.IsDigit(r) { - return false - } - } - return true -} - -func isValidHex(s string) bool { - if len(s) == 0 { - return false - } - for i := range s { - c := s[i] - if c < '0' || c > '9' && c < 'a' || c > 'f' { - return false - } - } - return true -} diff --git a/types/model/digest_test.go b/types/model/digest_test.go deleted file mode 100644 index 5096a28a..00000000 --- a/types/model/digest_test.go +++ /dev/null @@ -1,46 +0,0 @@ -package model - -import "testing" - -var testDigests = map[string]Digest{ - "": {}, - "sha256-1234": {s: "sha256-1234"}, - "sha256-5678": {s: "sha256-5678"}, - "blake2-9abc": {s: "blake2-9abc"}, - "-1234": {}, - "sha256-": {}, - "sha256-1234-5678": {}, - "sha256-P": {}, // invalid hex - "sha256-1234P": {}, - "---": {}, -} - -func TestDigestParse(t *testing.T) { - // Test cases. - for s, want := range testDigests { - got := ParseDigest(s) - t.Logf("ParseDigest(%q) = %#v", s, got) - if got != want { - t.Errorf("ParseDigest(%q) = %q; want %q", s, got, want) - } - } -} - -func TestDigestString(t *testing.T) { - // Test cases. - for s, d := range testDigests { - want := s - if !d.IsValid() { - want = "" - } - got := d.String() - if got != want { - t.Errorf("ParseDigest(%q).String() = %q; want %q", s, got, want) - } - - got = ParseDigest(s).String() - if got != want { - t.Errorf("roundtrip ParseDigest(%q).String() = %q; want %q", s, got, want) - } - } -} diff --git a/types/model/name.go b/types/model/name.go index 73d2c306..09895b1f 100644 --- a/types/model/name.go +++ b/types/model/name.go @@ -1,718 +1,390 @@ +// Package model contains types and utilities for parsing, validating, and +// working with model names and digests. package model import ( "cmp" + "encoding/hex" "errors" "fmt" - "hash/maphash" - "io" "log/slog" - "path" "path/filepath" - "slices" "strings" - "sync" - - "github.com/ollama/ollama/types/structs" ) // Errors var ( - // ErrInvalidName, ErrIncompleteName, and ErrInvalidDigest are not - // used by this package, but are exported so that other packages can - // use them, instead of defining their own errors for them. - ErrInvalidName = errors.New("invalid model name") - ErrIncompleteName = errors.New("incomplete model name") - ErrInvalidDigest = errors.New("invalid digest") + // ErrUnqualifiedName represents an error where a name is not fully + // qualified. It is not used directly in this package, but is here + // to avoid other packages inventing their own error type. + // Additionally, it can be conveniently used via [Unqualified]. + ErrUnqualifiedName = errors.New("unqualified name") ) -// Defaults -const ( - // MaskDefault is the default mask used by [Name.DisplayShortest]. - MaskDefault = "registry.ollama.ai/library/?:latest" - - // MaskNothing is a mask that masks nothing. - MaskNothing = "?/?/?:?" - - // DefaultFill is the default fill used by [ParseName]. - FillDefault = "registry.ollama.ai/library/?:latest+Q4_0" - - // FillNothing is a fill that fills nothing. - FillNothing = "?/?/?:?+?" -) - -const MaxNamePartLen = 128 - -type PartKind int - -// Levels of concreteness -const ( - // Each value aligns with its index in the Name.parts array. - - PartHost PartKind = iota - PartNamespace - PartModel - PartTag - PartBuild - PartDigest - - // NumParts is the number of parts in a Name. In this list, it must - // follow the final part. - NumParts - - PartExtraneous = -1 -) - -var kindNames = map[PartKind]string{ - PartHost: "Host", - PartNamespace: "Namespace", - PartModel: "Name", - PartTag: "Tag", - PartBuild: "Build", - PartDigest: "Digest", +// Unqualified is a helper function that returns an error with +// ErrUnqualifiedName as the cause and the name as the message. +func Unqualified(n Name) error { + return fmt.Errorf("%w: %s", ErrUnqualifiedName, n) } -func (k PartKind) String() string { - return cmp.Or(kindNames[k], "Unknown") +// MissingPart is used to indicate any part of a name that was "promised" by +// the presence of a separator, but is missing. +// +// The value was chosen because it is deemed unlikely to be set by a user, +// not a valid part name valid when checked by [Name.IsValid], and easy to +// spot in logs. +const MissingPart = "!MISSING!" + +// DefaultName returns a name with the default values for the host, namespace, +// and tag parts. The model and digest parts are empty. +// +// - The default host is ("registry.ollama.ai") +// - The default namespace is ("library") +// - The default tag is ("latest") +func DefaultName() Name { + return Name{ + Host: "registry.ollama.ai", + Namespace: "library", + Tag: "latest", + } } -// Name is an opaque reference to a model. It holds the parts of a model -// with the case preserved, but is not directly comparable with other Names -// since model names can be represented with different casing depending on -// the use case. For instance, "Mistral" and "mistral" are the same model -// but each version may have come from different sources (e.g. copied from a -// Web page, or from a file path). +type partKind int + +const ( + kindHost partKind = iota + kindNamespace + kindModel + kindTag + kindDigest +) + +func (k partKind) String() string { + switch k { + case kindHost: + return "host" + case kindNamespace: + return "namespace" + case kindModel: + return "model" + case kindTag: + return "tag" + case kindDigest: + return "digest" + default: + return "unknown" + } +} + +// Name is a structured representation of a model name string, as defined by +// [ParseNameNoDefaults]. // -// Valid Names can ONLY be constructed by calling [ParseName]. +// It is not guaranteed to be valid. Use [Name.IsValid] to check if the name +// is valid. // -// A Name is valid if and only if is have a valid Model part. The other parts -// are optional. -// -// A Name is considered "complete" if it has all parts present. To check if a -// Name is complete, use [Name.IsComplete]. -// -// To compare two names in a case-insensitive manner, use [Name.EqualFold]. -// -// The parts of a Name are: -// -// - Host: the domain of the model (optional) -// - Namespace: the namespace of the model (optional) -// - Model: the name of the model (required) -// - Tag: the tag of the model (optional) -// - Build: the build of the model; usually the quantization or "file type" (optional) -// -// The parts can be obtained in their original form by calling [Name.Parts]. -// -// To check if a Name has at minimum a valid model part, use [Name.IsValid]. +// It is not directly comparable with other Names. Use [Name.Equal] and +// [Name.MapHash] for determining equality and using as a map key. type Name struct { - _ structs.Incomparable - parts [NumParts]string // host, namespace, model, tag, build, digest - - // TODO(bmizerany): track offsets and hold s (raw string) here? We - // could pack the offsets all into a single uint64 since the first - // parts take less bits since their max offset is less than the max - // offset of the next part. This would save a ton of bytes per Name - // and mean zero allocations for String. + Host string + Namespace string + Model string + Tag string + RawDigest string } -// ParseName parses s into a Name, and returns the result of filling it with -// defaults. The input string must be a valid string -// representation of a model name in the form: +// ParseName parses and assembles a Name from a name string. The +// format of a valid name string is: // -// [host/][namespace/][:tag][+build][@-] +// s: +// { host } "/" { namespace } "/" { model } ":" { tag } "@" { digest } +// { host } "/" { namespace } "/" { model } ":" { tag } +// { host } "/" { namespace } "/" { model } "@" { digest } +// { host } "/" { namespace } "/" { model } +// { namespace } "/" { model } ":" { tag } "@" { digest } +// { namespace } "/" { model } ":" { tag } +// { namespace } "/" { model } "@" { digest } +// { namespace } "/" { model } +// { model } ":" { tag } "@" { digest } +// { model } ":" { tag } +// { model } "@" { digest } +// { model } +// "@" { digest } +// host: +// pattern: alphanum { alphanum | "-" | "_" | "." | ":" }* +// length: [1, 350] +// namespace: +// pattern: alphanum { alphanum | "-" | "_" }* +// length: [2, 80] +// model: +// pattern: alphanum { alphanum | "-" | "_" | "." }* +// length: [2, 80] +// tag: +// pattern: alphanum { alphanum | "-" | "_" | "." }* +// length: [1, 80] +// digest: +// pattern: alphanum { alphanum | "-" | ":" }* +// length: [2, 80] // -// The name part is required, all others are optional. If a part is missing, -// it is left empty in the returned Name. If a part is invalid, the zero Ref -// value is returned. +// Most users should use [ParseName] instead, unless need to support +// different defaults than DefaultName. // -// The build part is normalized to uppercase. -// -// Examples of valid paths: -// -// "example.com/library/mistral:7b+x" -// "example.com/eva/mistral:7b+Q4_0" -// "mistral:7b+x" -// "example.com/mike/mistral:latest+Q4_0" -// "example.com/bruce/mistral:latest" -// "example.com/pdevine/thisisfine:7b+Q4_0@sha256-1234567890abcdef" -// -// Examples of invalid paths: -// -// "example.com/mistral:7b+" -// "example.com/mistral:7b+Q4_0+" -// "x/y/z/z:8n+I" -// "" -// -// It returns the zero value if any part is invalid. -// -// # Fills -// -// For any valid s, the fill string is used to fill in missing parts of the -// Name. The fill string must be a valid Name with the exception that any part -// may be the string ("?"), which will not be considered for filling. -func ParseNameFill(s, fill string) Name { - var r Name - parts(s)(func(kind PartKind, part string) bool { - if kind == PartDigest && !ParseDigest(part).IsValid() { - r = Name{} - return false - } - if kind == PartExtraneous || !IsValidNamePart(kind, part) { - r = Name{} - return false - } - r.parts[kind] = part - return true - }) - if r.IsValid() || r.IsResolved() { - return fillName(r, fill) - } - return Name{} -} - -// ParseName parses s into a Name, and returns the result of filling it -// with FillDefault. The input string must be a valid string representation -// of a model +// The name returned is not guaranteed to be valid. If it is not valid, the +// field values are left in an undefined state. Use [Name.IsValid] to check +// if the name is valid. func ParseName(s string) Name { - return ParseNameFill(s, "") + return merge(parseName(s), DefaultName()) } -func parseMask(s string) Name { - var r Name - parts(s)(func(kind PartKind, part string) bool { - if part == "?" { - // mask part; treat as empty but valid - return true - } - if !IsValidNamePart(kind, part) { - panic(fmt.Errorf("invalid mask part %s: %q", kind, part)) - } - r.parts[kind] = part - return true - }) - return r -} +// parseName is the same as [ParseName] without a merge. +func parseName(s string) Name { + var n Name + var promised bool -func MustParseName(s, fill string) Name { - r := ParseNameFill(s, fill) - if !r.IsValid() { - panic("invalid Name: " + s) + s, n.RawDigest, promised = cutLast(s, "@") + if promised && n.RawDigest == "" { + n.RawDigest = MissingPart } - return r -} -// fillName fills in the missing parts of dst with the parts of src. -// -// The returned Name will only be valid if dst is valid. -// -// It skipps fill parts that are "?". -func fillName(r Name, fill string) Name { - fill = cmp.Or(fill, FillDefault) - f := parseMask(fill) - if fill != FillNothing && f.IsZero() { - panic("invalid fill") + s, n.Tag, _ = cutPromised(s, ":") + s, n.Model, promised = cutPromised(s, "/") + if !promised { + n.Model = s + return n } - for i := range r.parts { - if f.parts[i] == "?" { - continue - } - r.parts[i] = cmp.Or(r.parts[i], f.parts[i]) + s, n.Namespace, promised = cutPromised(s, "/") + if !promised { + n.Namespace = s + return n } - return r + n.Host = s + + return n } -// WithBuild returns a copy of r with the build set to the given string. -func (r Name) WithBuild(build string) Name { - r.parts[PartBuild] = build - return r +// merge merges the host, namespace, and tag parts of the two names, +// preferring the non-empty parts of a. +func merge(a, b Name) Name { + a.Host = cmp.Or(a.Host, b.Host) + a.Namespace = cmp.Or(a.Namespace, b.Namespace) + a.Tag = cmp.Or(a.Tag, b.Tag) + return a } -func (r Name) WithDigest(digest Digest) Name { - r.parts[PartDigest] = digest.String() - return r +// Digest returns the result of [ParseDigest] with the RawDigest field. +func (n Name) Digest() Digest { + return ParseDigest(n.RawDigest) } -var mapHashSeed = maphash.MakeSeed() - -// MapHash returns a case insensitive hash for use in maps and equality -// checks. For a convenient way to compare names, use [Name.EqualFold]. -// -//nolint:errcheck -func (r Name) MapHash() uint64 { - // correctly hash the parts with case insensitive comparison - var h maphash.Hash - h.SetSeed(mapHashSeed) - for _, part := range r.parts { - // downcase the part for hashing - for i := range part { - c := part[i] - if c >= 'A' && c <= 'Z' { - c = c - 'A' + 'a' - } - h.WriteByte(c) - } +// String returns the name string, in the format that [ParseNameNoDefaults] +// accepts as valid, if [Name.IsValid] reports true; otherwise the empty +// string is returned. +func (n Name) String() string { + var b strings.Builder + if n.Host != "" { + b.WriteString(n.Host) + b.WriteByte('/') } - return h.Sum64() -} - -func (r Name) slice(from, to PartKind) Name { - var v Name - copy(v.parts[from:to+1], r.parts[from:to+1]) - return v -} - -// DisplayShortest returns the shortest possible, masked display string in form: -// -// [host/][/][:] -// -// # Masks -// -// The mask is a string that specifies which parts of the name to omit based -// on case-insensitive comparison. [Name.DisplayShortest] omits parts of the name -// that are the same as the mask, moving from left to right until the first -// unequal part is found. It then moves right to left until the first unequal -// part is found. The result is the shortest possible display string. -// -// Unlike a [Name] the mask can contain "?" characters which are treated as -// wildcards. A "?" will never match a part of the name, since a valid name -// can never contain a "?" character. -// -// For example: Given a Name ("registry.ollama.ai/library/mistral:latest") masked -// with ("registry.ollama.ai/library/?:latest") will produce the display string -// ("mistral"). -// -// If mask is the empty string, then [MaskDefault] is used. -// -// DisplayShortest panics if the mask is not the empty string, MaskNothing, and -// invalid. -// -// # Builds -// -// For now, DisplayShortest does consider the build or return one in the -// result. We can lift this restriction when needed. -func (r Name) DisplayShortest(mask string) string { - mask = cmp.Or(mask, MaskDefault) - d := parseMask(mask) - if mask != MaskNothing && r.IsZero() { - panic("invalid Name") + if n.Namespace != "" { + b.WriteString(n.Namespace) + b.WriteByte('/') } - for i := range PartTag { - if !strings.EqualFold(r.parts[i], d.parts[i]) { - break - } - r.parts[i] = "" + b.WriteString(n.Model) + if n.Tag != "" { + b.WriteByte(':') + b.WriteString(n.Tag) } - for i := PartTag; i >= 0; i-- { - if !strings.EqualFold(r.parts[i], d.parts[i]) { - break - } - r.parts[i] = "" + if n.RawDigest != "" { + b.WriteByte('@') + b.WriteString(n.RawDigest) } - return r.slice(PartHost, PartTag).DisplayLong() -} - -// DisplayLongest returns the result of r.DisplayShortest(MaskNothing). -func (r Name) DisplayLongest() string { - return r.DisplayShortest(MaskNothing) -} - -var seps = [...]string{ - PartHost: "/", - PartNamespace: "/", - PartModel: ":", - PartTag: "+", - PartBuild: "@", - PartDigest: "", -} - -// WriteTo implements io.WriterTo. It writes the fullest possible display -// string in form: -// -// //:+@- -// -// Missing parts and their separators are not written. -// -// The full digest is always prefixed with "@". That is if [Name.IsValid] -// reports false and [Name.IsResolved] reports true, then the string is -// returned as "@-". -func (r Name) writeTo(w io.StringWriter) error { - var partsWritten int - for i := range r.parts { - if r.parts[i] == "" { - continue - } - if partsWritten > 0 || i == int(PartDigest) { - if _, err := w.WriteString(seps[i-1]); err != nil { - return err - } - } - if _, err := w.WriteString(r.parts[i]); err != nil { - return err - } - partsWritten++ - } - return nil -} - -var builderPool = sync.Pool{ - New: func() interface{} { - return &strings.Builder{} - }, -} - -// DisplayLong returns the fullest possible display string in form: -// -// //:+ -// -// If any part is missing, it is omitted from the display string. -func (r Name) DisplayLong() string { - b := builderPool.Get().(*strings.Builder) - defer builderPool.Put(b) - b.Reset() - b.Grow(50) // arbitrarily long enough for most names - _ = r.writeTo(b) return b.String() } -// GoString implements fmt.GoStringer. It returns a string suitable for -// debugging and logging. It is similar to [Name.DisplayLong] but it always -// returns a string that includes all parts of the Name, with missing parts -// replaced with a ("?"). -func (r Name) GoString() string { - for i := range r.parts { - r.parts[i] = cmp.Or(r.parts[i], "?") - } - return r.DisplayLong() -} - -// LogValue implements slog.Valuer. -func (r Name) LogValue() slog.Value { - return slog.StringValue(r.GoString()) -} - -// IsComplete reports whether the Name is fully qualified. That is it has a -// domain, namespace, name, tag, and build. -func (r Name) IsComplete() bool { - return !slices.Contains(r.parts[:PartDigest], "") -} - -// IsCompleteNoBuild is like [Name.IsComplete] but it does not require the -// build part to be present. -func (r Name) IsCompleteNoBuild() bool { - return !slices.Contains(r.parts[:PartBuild], "") -} - -// IsResolved reports true if the Name has a valid digest. -// -// It is possible to have a valid Name, or a complete Name that is not -// resolved. -func (r Name) IsResolved() bool { - return r.Digest().IsValid() -} - -// Digest returns the digest part of the Name, if any. -// -// If Digest returns a non-empty string, then [Name.IsResolved] will return -// true, and digest is considered valid. -func (r Name) Digest() Digest { - // This was already validated by ParseName, so we can just return it. - return Digest{r.parts[PartDigest]} -} - -// EqualFold reports whether r and o are equivalent model names, ignoring -// case. -func (r Name) EqualFold(o Name) bool { - return r.CompareFold(o) == 0 -} - -// CompareFold performs a case-insensitive cmp.Compare on r and o. -// -// This can be used with [slices.SortFunc]. -// -// For simple equality checks, use [Name.EqualFold]. -func (r Name) CompareFold(o Name) int { - return slices.CompareFunc(r.parts[:], o.parts[:], compareFold) -} - -func compareFold(a, b string) int { - return slices.CompareFunc([]rune(a), []rune(b), func(a, b rune) int { - return cmp.Compare(downcase(a), downcase(b)) - }) -} - -func downcase(r rune) rune { - if r >= 'A' && r <= 'Z' { - return r - 'A' + 'a' - } - return r -} - -func (r Name) Host() string { return r.parts[PartHost] } -func (r Name) Namespace() string { return r.parts[PartNamespace] } -func (r Name) Model() string { return r.parts[PartModel] } -func (r Name) Build() string { return r.parts[PartBuild] } -func (r Name) Tag() string { return r.parts[PartTag] } - -// iter_Seq2 is a iter.Seq2 defined here to avoid the current build -// restrictions in the go1.22 iter package requiring the -// goexperiment.rangefunc tag to be set via the GOEXPERIMENT=rangefunc flag, -// which we are not yet ready to support. -// -// Once we are ready to support rangefunc, this can be removed and replaced -// with the iter.Seq2 type. -type iter_Seq2[A, B any] func(func(A, B) bool) - -// Parts returns a sequence of the parts of a Name string from most specific -// to least specific. -// -// It normalizes the input string by removing "http://" and "https://" only. -// No other normalizations are performed. -func parts(s string) iter_Seq2[PartKind, string] { - return func(yield func(PartKind, string) bool) { - if strings.HasPrefix(s, "http://") { - s = strings.TrimPrefix(s, "http://") - } else { - s = strings.TrimPrefix(s, "https://") - } - - if len(s) > MaxNamePartLen || len(s) == 0 { - return - } - - numConsecutiveDots := 0 - partLen := 0 - state, j := PartDigest, len(s) - for i := len(s) - 1; i >= 0; i-- { - if partLen++; partLen > MaxNamePartLen { - // catch a part that is too long early, so - // we don't keep spinning on it, waiting for - // an isInValidPart check which would scan - // over it again. - yield(state, s[i+1:j]) - return - } - - switch s[i] { - case '@': - switch state { - case PartDigest: - if !yield(PartDigest, s[i+1:j]) { - return - } - if i == 0 { - // This is the form - // "@" which is valid. - // - // We're done. - return - } - state, j, partLen = PartBuild, i, 0 - default: - yield(PartExtraneous, s[i+1:j]) - return - } - case '+': - switch state { - case PartBuild, PartDigest: - if !yield(PartBuild, s[i+1:j]) { - return - } - state, j, partLen = PartTag, i, 0 - default: - yield(PartExtraneous, s[i+1:j]) - return - } - case ':': - switch state { - case PartTag, PartBuild, PartDigest: - if !yield(PartTag, s[i+1:j]) { - return - } - state, j, partLen = PartModel, i, 0 - case PartHost: - // noop: support for host:port - default: - yield(PartExtraneous, s[i+1:j]) - return - } - case '/': - switch state { - case PartModel, PartTag, PartBuild, PartDigest: - if !yield(PartModel, s[i+1:j]) { - return - } - state, j = PartNamespace, i - case PartNamespace: - if !yield(PartNamespace, s[i+1:j]) { - return - } - state, j, partLen = PartHost, i, 0 - default: - yield(PartExtraneous, s[i+1:j]) - return - } - default: - if s[i] == '.' { - if numConsecutiveDots++; numConsecutiveDots > 1 { - yield(state, "") - return - } - } else { - numConsecutiveDots = 0 - } - } - } - - if state <= PartNamespace { - yield(state, s[:j]) - } else { - yield(PartModel, s[:j]) - } - } -} - -func (r Name) IsZero() bool { - return r.parts == [NumParts]string{} -} - -// IsValid reports if a model has at minimum a valid model part. -func (r Name) IsValid() bool { - // Parts ensures we only have valid parts, so no need to validate - // them here, only check if we have a name or not. - return r.parts[PartModel] != "" -} - -// ParseNameFromURLPath parses forms of a URL path into a Name. Specifically, -// it trims any leading "/" and then calls [ParseName] with fill. -func ParseNameFromURLPath(s, fill string) Name { - s = strings.TrimPrefix(s, "/") - return ParseNameFill(s, fill) -} - -func ParseNameFromURLPathFill(s, fill string) Name { - return ParseNameFill(s, fill) -} - -// URLPath returns a complete, canonicalized, relative URL path using the parts of a -// complete Name. -// -// The parts maintain their original case. -// -// Example: -// -// ParseName("example.com/namespace/model:tag+build").URLPath() // returns "/example.com/namespace/model:tag" -func (r Name) DisplayURLPath() string { - return r.DisplayShortest(MaskNothing) -} - -// URLPath returns a complete, canonicalized, relative URL path using the parts of a -// complete Name in the form: -// -// /// -// -// The parts are downcased. -func (r Name) URLPath() string { - return strings.ToLower(path.Join(r.parts[:PartBuild]...)) -} - -// ParseNameFromFilepath parses a file path into a Name. The input string must be a -// valid file path representation of a model name in the form: -// -// host/namespace/model/tag/build -// -// The zero valid is returned if s does not contain all path elements -// leading up to the model part, or if any path element is an invalid part -// for the its corresponding part kind. -// -// The fill string is used to fill in missing parts of any constructed Name. -// See [ParseName] for more information on the fill string. -func ParseNameFromFilepath(s, fill string) Name { - var r Name - for i := range PartBuild + 1 { - part, rest, _ := strings.Cut(s, string(filepath.Separator)) - if !IsValidNamePart(i, part) { - return Name{} - } - r.parts[i] = part - s = rest - if s == "" { - break - } - } - if s != "" { - return Name{} - } - if !r.IsValid() { - return Name{} - } - return fillName(r, fill) -} - -// Filepath returns a complete, canonicalized, relative file path using the -// parts of a complete Name. -// -// Each parts is downcased, except for the build part which is upcased. -// -// Example: -// -// ParseName("example.com/namespace/model:tag+build").Filepath() // returns "example.com/namespace/model/tag/BUILD" -func (r Name) Filepath() string { - for i := range r.parts { - if PartKind(i) == PartBuild { - r.parts[i] = strings.ToUpper(r.parts[i]) - } else { - r.parts[i] = strings.ToLower(r.parts[i]) - } - } - return filepath.Join(r.parts[:]...) -} - -// FilepathNoBuild returns a complete, canonicalized, relative file path using -// the parts of a complete Name, but without the build part. -func (r Name) FilepathNoBuild() string { - for i := range PartBuild { - r.parts[i] = strings.ToLower(r.parts[i]) - } - return filepath.Join(r.parts[:PartBuild]...) -} - -// IsValidNamePart reports if s contains all valid characters for the given -// part kind and is under MaxNamePartLen bytes. -func IsValidNamePart(kind PartKind, s string) bool { - if len(s) > MaxNamePartLen { +// IsValid reports whether all parts of the name are present and valid. The +// digest is a special case, and is checked for validity only if present. +func (n Name) IsValid() bool { + if n.RawDigest != "" && !ParseDigest(n.RawDigest).IsValid() { return false } - if s == "" { - return false + return n.IsFullyQualified() +} + +// IsFullyQualified returns true if all parts of the name are present and +// valid without the digest. +func (n Name) IsFullyQualified() bool { + var parts = []string{ + n.Host, + n.Namespace, + n.Model, + n.Tag, } - var consecutiveDots int - for _, c := range []byte(s) { - if c == '.' { - if consecutiveDots++; consecutiveDots >= 2 { - return false - } - } else { - consecutiveDots = 0 - } - if !isValidByteFor(kind, c) { + for i, part := range parts { + if !isValidPart(partKind(i), part) { return false } } return true } -func isValidByteFor(kind PartKind, c byte) bool { - if kind == PartNamespace && c == '.' { +// Filepath returns a canonical filepath that represents the name with each part from +// host to tag as a directory in the form: +// +// {host}/{namespace}/{model}/{tag} +// +// It uses the system's filepath separator and ensures the path is clean. +// +// It panics if the name is not fully qualified. Use [Name.IsFullyQualified] +// to check if the name is fully qualified. +func (n Name) Filepath() string { + if !n.IsFullyQualified() { + panic("illegal attempt to get filepath of invalid name") + } + return filepath.Join( + strings.ToLower(n.Host), + strings.ToLower(n.Namespace), + strings.ToLower(n.Model), + strings.ToLower(n.Tag), + ) +} + +// LogValue returns a slog.Value that represents the name as a string. +func (n Name) LogValue() slog.Value { + return slog.StringValue(n.String()) +} + +func isValidLen(kind partKind, s string) bool { + switch kind { + case kindHost: + return len(s) >= 1 && len(s) <= 350 + case kindTag: + return len(s) >= 1 && len(s) <= 80 + default: + return len(s) >= 2 && len(s) <= 80 + } +} + +func isValidPart(kind partKind, s string) bool { + if !isValidLen(kind, s) { return false } - if kind == PartHost && c == ':' { - return true + for i := range s { + if i == 0 { + if !isAlphanumeric(s[i]) { + return false + } + continue + } + switch s[i] { + case '_', '-': + case '.': + if kind == kindNamespace { + return false + } + case ':': + if kind != kindHost { + return false + } + default: + if !isAlphanumeric(s[i]) { + return false + } + } } - if c == '.' || c == '-' { - return true - } - if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' { - return true - } - return false + return true +} + +func isAlphanumeric(c byte) bool { + return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9' +} + +func cutLast(s, sep string) (before, after string, ok bool) { + i := strings.LastIndex(s, sep) + if i >= 0 { + return s[:i], s[i+len(sep):], true + } + return s, "", false +} + +// cutPromised cuts the last part of s at the last occurrence of sep. If sep is +// found, the part before and after sep are returned as-is unless empty, in +// which case they are returned as MissingPart, which will cause +// [Name.IsValid] to return false. +func cutPromised(s, sep string) (before, after string, ok bool) { + before, after, ok = cutLast(s, sep) + if !ok { + return before, after, false + } + return cmp.Or(before, MissingPart), cmp.Or(after, MissingPart), true +} + +type DigestType int + +const ( + DigestTypeInvalid DigestType = iota + DigestTypeSHA256 +) + +func (t DigestType) String() string { + if t == DigestTypeSHA256 { + return "sha256" + } + return "unknown" +} + +// Digest represents a type and hash of a digest. It is comparable and can +// be used as a map key. +type Digest struct { + Type DigestType + Hash [32]byte +} + +// ParseDigest parses a digest string into a Digest struct. It accepts both +// the forms: +// +// sha256:deadbeef +// sha256-deadbeef +// +// The hash part must be exactly 64 characters long. +// +// The form "type:hash" does not round trip through [Digest.String]. +func ParseDigest(s string) Digest { + typ, hash, ok := cutLast(s, ":") + if !ok { + typ, hash, ok = cutLast(s, "-") + if !ok { + return Digest{} + } + } + if typ != "sha256" { + return Digest{} + } + var d Digest + n, err := hex.Decode(d.Hash[:], []byte(hash)) + if err != nil || n != 32 { + return Digest{} + } + return Digest{Type: DigestTypeSHA256, Hash: d.Hash} +} + +// IsValid returns true if the digest has a valid Type and Hash. +func (d Digest) IsValid() bool { + if d.Type != DigestTypeSHA256 { + return false + } + return d.Hash != [32]byte{} +} + +// String returns the digest as a string in the form "type-hash". The hash +// is encoded as a hex string. +func (d Digest) String() string { + var b strings.Builder + b.WriteString(d.Type.String()) + b.WriteByte('-') + b.WriteString(hex.EncodeToString(d.Hash[:])) + return b.String() +} + +// LogValue returns a slog.Value that represents the digest as a string. +func (d Digest) LogValue() slog.Value { + return slog.StringValue(d.String()) } diff --git a/types/model/name_test.go b/types/model/name_test.go index 7299f5d0..81e9d44c 100644 --- a/types/model/name_test.go +++ b/types/model/name_test.go @@ -1,715 +1,295 @@ package model import ( - "bytes" - "cmp" - "fmt" - "log/slog" - "path/filepath" - "slices" + "reflect" "strings" "testing" ) -type fields struct { - host, namespace, model, tag, build string - digest string -} +const ( + part80 = "88888888888888888888888888888888888888888888888888888888888888888888888888888888" + part350 = "33333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333" +) -func fieldsFromName(p Name) fields { - return fields{ - host: p.parts[PartHost], - namespace: p.parts[PartNamespace], - model: p.parts[PartModel], - tag: p.parts[PartTag], - build: p.parts[PartBuild], - digest: p.parts[PartDigest], - } -} - -var testNames = map[string]fields{ - "mistral:latest": {model: "mistral", tag: "latest"}, - "mistral": {model: "mistral"}, - "mistral:30B": {model: "mistral", tag: "30B"}, - "mistral:7b": {model: "mistral", tag: "7b"}, - "mistral:7b+Q4_0": {model: "mistral", tag: "7b", build: "Q4_0"}, - "mistral+KQED": {model: "mistral", build: "KQED"}, - "mistral.x-3:7b+Q4_0": {model: "mistral.x-3", tag: "7b", build: "Q4_0"}, - "mistral:7b+q4_0": {model: "mistral", tag: "7b", build: "q4_0"}, - "llama2": {model: "llama2"}, - "user/model": {namespace: "user", model: "model"}, - "example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"}, - "example.com/ns/mistral:7b+X": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"}, - "localhost:5000/ns/mistral": {host: "localhost:5000", namespace: "ns", model: "mistral"}, - - // invalid digest - "mistral:latest@invalid256-": {}, - "mistral:latest@-123": {}, - "mistral:latest@!-123": {}, - "mistral:latest@1-!": {}, - "mistral:latest@": {}, - - // resolved - "x@sha123-12": {model: "x", digest: "sha123-12"}, - "@sha456-22": {digest: "sha456-22"}, - "@sha456-1": {}, - "@@sha123-22": {}, - - // preserves case for build - "x+b": {model: "x", build: "b"}, - - // invalid (includes fuzzing trophies) - " / / : + ": {}, - " / : + ": {}, - " : + ": {}, - " + ": {}, - " : ": {}, - " / ": {}, - " /": {}, - "/ ": {}, - "/": {}, - ":": {}, - "+": {}, - - // (".") in namepsace is not allowed - "invalid.com/7b+x": {}, - - "invalid:7b+Q4_0:latest": {}, - "in valid": {}, - "invalid/y/z/foo": {}, - "/0": {}, - "0 /0": {}, - "0 /": {}, - "0/": {}, - ":/0": {}, - "+0/00000": {}, - "0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91": {}, - "0//0": {}, - "m+^^^": {}, - "file:///etc/passwd": {}, - "file:///etc/passwd:latest": {}, - "file:///etc/passwd:latest+u": {}, - - ":x": {}, - "+x": {}, - "x+": {}, - - // Disallow ("\.+") in any part to prevent path traversal anywhere - // we convert the name to a path. - "../etc/passwd": {}, - ".../etc/passwd": {}, - "./../passwd": {}, - "./0+..": {}, - - strings.Repeat("a", MaxNamePartLen): {model: strings.Repeat("a", MaxNamePartLen)}, - strings.Repeat("a", MaxNamePartLen+1): {}, -} - -func TestIsValidNameLen(t *testing.T) { - if IsValidNamePart(PartNamespace, strings.Repeat("a", MaxNamePartLen+1)) { - t.Errorf("unexpectedly valid long name") - } -} - -// TestConsecutiveDots tests that consecutive dots are not allowed in any -// part, to avoid path traversal. There also are some tests in testNames, but -// this test is more exhaustive and exists to emphasize the importance of -// preventing path traversal. -func TestNameConsecutiveDots(t *testing.T) { - for i := 1; i < 10; i++ { - s := strings.Repeat(".", i) - if i > 1 { - if g := ParseNameFill(s, FillNothing).DisplayLong(); g != "" { - t.Errorf("ParseName(%q) = %q; want empty string", s, g) - } - } else { - if g := ParseNameFill(s, FillNothing).DisplayLong(); g != s { - t.Errorf("ParseName(%q) = %q; want %q", s, g, s) - } - } - } -} - -func TestNameParts(t *testing.T) { - var p Name - if w, g := int(NumParts), len(p.parts); w != g { - t.Errorf("Parts() = %d; want %d", g, w) - } -} - -func TestNamePartString(t *testing.T) { - if g := PartKind(-2).String(); g != "Unknown" { - t.Errorf("Unknown part = %q; want %q", g, "Unknown") - } - for kind, name := range kindNames { - if g := kind.String(); g != name { - t.Errorf("%s = %q; want %q", kind, g, name) - } - } -} - -func TestParseName(t *testing.T) { - for baseName, want := range testNames { - for _, prefix := range []string{"", "https://", "http://"} { - // We should get the same results with or without the - // http(s) prefixes - s := prefix + baseName - - t.Run(s, func(t *testing.T) { - name := ParseNameFill(s, FillNothing) - got := fieldsFromName(name) - if got != want { - t.Errorf("ParseName(%q) = %q; want %q", s, got, want) - } - - // test round-trip - if !ParseNameFill(name.DisplayLong(), FillNothing).EqualFold(name) { - t.Errorf("ParseName(%q).String() = %s; want %s", s, name.DisplayLong(), baseName) - } - }) - } - } -} - -func TestParseNameFill(t *testing.T) { - cases := []struct { - in string - fill string - want string - }{ - {"mistral", "example.com/library/?:latest+Q4_0", "example.com/library/mistral:latest+Q4_0"}, - {"mistral", "example.com/library/?:latest", "example.com/library/mistral:latest"}, - {"llama2:x", "example.com/library/?:latest+Q4_0", "example.com/library/llama2:x+Q4_0"}, - - // Invalid - {"", "example.com/library/?:latest+Q4_0", ""}, - {"llama2:?", "example.com/library/?:latest+Q4_0", ""}, - } - - for _, tt := range cases { - t.Run(tt.in, func(t *testing.T) { - name := ParseNameFill(tt.in, tt.fill) - if g := name.DisplayLong(); g != tt.want { - t.Errorf("ParseName(%q, %q) = %q; want %q", tt.in, tt.fill, g, tt.want) - } - }) - } - - t.Run("invalid fill", func(t *testing.T) { - defer func() { - if recover() == nil { - t.Fatal("expected panic") - } - }() - ParseNameFill("x", "^") - }) -} - -func TestParseNameHTTPDoublePrefixStrip(t *testing.T) { - cases := []string{ - "http://https://valid.com/valid/valid:latest", - "https://http://valid.com/valid/valid:latest", - } - for _, s := range cases { - t.Run(s, func(t *testing.T) { - name := ParseNameFill(s, FillNothing) - if name.IsValid() { - t.Errorf("expected invalid path; got %#v", name) - } - }) - } - -} - -func TestCompleteWithAndWithoutBuild(t *testing.T) { +func TestParseNameParts(t *testing.T) { cases := []struct { in string - complete bool - completeNoBuild bool + want Name + wantValidDigest bool }{ - {"", false, false}, - {"incomplete/mistral:7b+x", false, false}, - {"incomplete/mistral:7b+Q4_0", false, false}, - {"incomplete:7b+x", false, false}, - {"complete.com/x/mistral:latest+Q4_0", true, true}, - {"complete.com/x/mistral:latest", false, true}, + { + in: "host/namespace/model:tag", + want: Name{ + Host: "host", + Namespace: "namespace", + Model: "model", + Tag: "tag", + }, + }, + { + in: "host/namespace/model", + want: Name{ + Host: "host", + Namespace: "namespace", + Model: "model", + }, + }, + { + in: "namespace/model", + want: Name{ + Namespace: "namespace", + Model: "model", + }, + }, + { + in: "model", + want: Name{ + Model: "model", + }, + }, + { + in: "h/nn/mm:t", + want: Name{ + Host: "h", + Namespace: "nn", + Model: "mm", + Tag: "t", + }, + }, + { + in: part80 + "/" + part80 + "/" + part80 + ":" + part80, + want: Name{ + Host: part80, + Namespace: part80, + Model: part80, + Tag: part80, + }, + }, + { + in: part350 + "/" + part80 + "/" + part80 + ":" + part80, + want: Name{ + Host: part350, + Namespace: part80, + Model: part80, + Tag: part80, + }, + }, + { + in: "@digest", + want: Name{ + RawDigest: "digest", + }, + wantValidDigest: false, + }, + { + in: "model@sha256:" + validSHA256Hex, + want: Name{ + Model: "model", + RawDigest: "sha256:" + validSHA256Hex, + }, + wantValidDigest: true, + }, } for _, tt := range cases { t.Run(tt.in, func(t *testing.T) { - p := ParseNameFill(tt.in, FillNothing) - t.Logf("ParseName(%q) = %#v", tt.in, p) - if g := p.IsComplete(); g != tt.complete { - t.Errorf("Complete(%q) = %v; want %v", tt.in, g, tt.complete) + got := parseName(tt.in) + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("parseName(%q) = %v; want %v", tt.in, got, tt.want) } - if g := p.IsCompleteNoBuild(); g != tt.completeNoBuild { - t.Errorf("CompleteNoBuild(%q) = %v; want %v", tt.in, g, tt.completeNoBuild) - } - }) - } - - // Complete uses Parts which returns a slice, but it should be - // inlined when used in Complete, preventing any allocations or - // escaping to the heap. - allocs := testing.AllocsPerRun(1000, func() { - keep(ParseNameFill("complete.com/x/mistral:latest+Q4_0", FillNothing).IsComplete()) - }) - if allocs > 0 { - t.Errorf("Complete allocs = %v; want 0", allocs) - } -} - -func TestNameLogValue(t *testing.T) { - cases := []string{ - "example.com/library/mistral:latest+Q4_0", - "mistral:latest", - "mistral:7b+Q4_0", - } - for _, s := range cases { - t.Run(s, func(t *testing.T) { - var b bytes.Buffer - log := slog.New(slog.NewTextHandler(&b, nil)) - name := ParseNameFill(s, FillNothing) - log.Info("", "name", name) - want := fmt.Sprintf("name=%s", name.GoString()) - got := b.String() - if !strings.Contains(got, want) { - t.Errorf("expected log output to contain %q; got %q", want, got) + if got.Digest().IsValid() != tt.wantValidDigest { + t.Errorf("parseName(%q).Digest().IsValid() = %v; want %v", tt.in, got.Digest().IsValid(), tt.wantValidDigest) } }) } } -func TestNameGoString(t *testing.T) { +var testCases = map[string]bool{ // name -> valid + "host/namespace/model:tag": true, + "host/namespace/model": false, + "namespace/model": false, + "model": false, + "@sha256-1000000000000000000000000000000000000000000000000000000000000000": false, + "model@sha256-1000000000000000000000000000000000000000000000000000000000000000": false, + "model@sha256:1000000000000000000000000000000000000000000000000000000000000000": false, + + // long (but valid) + part80 + "/" + part80 + "/" + part80 + ":" + part80: true, + part350 + "/" + part80 + "/" + part80 + ":" + part80: true, + + "h/nn/mm:t@sha256-1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes + "h/nn/mm:t@sha256:1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes + + "m": false, // model too short + "n/mm:": false, // namespace too short + "h/n/mm:t": false, // namespace too short + "@t": false, // digest too short + "mm@d": false, // digest too short + + // invalids + "^": false, + "mm:": false, + "/nn/mm": false, + "//": false, + "//mm": false, + "hh//": false, + "//mm:@": false, + "00@": false, + "@": false, + + // not starting with alphanum + "-hh/nn/mm:tt@dd": false, + "hh/-nn/mm:tt@dd": false, + "hh/nn/-mm:tt@dd": false, + "hh/nn/mm:-tt@dd": false, + "hh/nn/mm:tt@-dd": false, + + "": false, + + // hosts + "host:https/namespace/model:tag": true, + + // colon in non-host part before tag + "host/name:space/model:tag": false, +} + +func TestNameparseNameDefault(t *testing.T) { + const name = "xx" + n := ParseName(name) + got := n.String() + want := "registry.ollama.ai/library/xx:latest" + if got != want { + t.Errorf("parseName(%q).String() = %q; want %q", name, got, want) + } +} + +func TestNameIsValid(t *testing.T) { + var numStringTests int + for s, want := range testCases { + n := parseName(s) + t.Logf("n: %#v", n) + got := n.IsValid() + if got != want { + t.Errorf("parseName(%q).IsValid() = %v; want %v", s, got, want) + } + + // Test roundtrip with String + if got { + got := parseName(s).String() + if got != s { + t.Errorf("parseName(%q).String() = %q; want %q", s, got, s) + } + numStringTests++ + } + } + + if numStringTests == 0 { + t.Errorf("no tests for Name.String") + } +} + +func TestNameIsValidPart(t *testing.T) { cases := []struct { - name string - in string - wantString string - wantGoString string // default is tt.in + kind partKind + s string + want bool }{ - { - name: "Complete Name", - in: "example.com/library/mistral:latest+Q4_0", - wantGoString: "example.com/library/mistral:latest+Q4_0@?", - }, - { - name: "Short Name", - in: "mistral:latest", - wantGoString: "?/?/mistral:latest+?@?", - }, - { - name: "Long Name", - in: "library/mistral:latest", - wantGoString: "?/library/mistral:latest+?@?", - }, - { - name: "Case Preserved", - in: "Library/Mistral:Latest", - wantGoString: "?/Library/Mistral:Latest+?@?", - }, - { - name: "With digest", - in: "Library/Mistral:Latest@sha256-123456", - wantGoString: "?/Library/Mistral:Latest+?@sha256-123456", - }, + {kind: kindHost, s: "", want: false}, + {kind: kindHost, s: "a", want: true}, + {kind: kindHost, s: "a.", want: true}, + {kind: kindHost, s: "a.b", want: true}, + {kind: kindHost, s: "a:123", want: true}, + {kind: kindHost, s: "a:123/aa/bb", want: false}, + {kind: kindNamespace, s: "bb", want: true}, + {kind: kindNamespace, s: "a.", want: false}, + {kind: kindModel, s: "-h", want: false}, + {kind: kindDigest, s: "sha256-1000000000000000000000000000000000000000000000000000000000000000", want: true}, } - for _, tt := range cases { - t.Run(tt.name, func(t *testing.T) { - p := ParseNameFill(tt.in, FillNothing) - tt.wantGoString = cmp.Or(tt.wantGoString, tt.in) - if g := fmt.Sprintf("%#v", p); g != tt.wantGoString { - t.Errorf("GoString() = %q; want %q", g, tt.wantGoString) + t.Run(tt.s, func(t *testing.T) { + got := isValidPart(tt.kind, tt.s) + if got != tt.want { + t.Errorf("isValidPart(%s, %q) = %v; want %v", tt.kind, tt.s, got, tt.want) } }) } + } -func TestDisplayLongest(t *testing.T) { - g := ParseNameFill("example.com/library/mistral:latest+Q4_0", FillNothing).DisplayLongest() - if g != "example.com/library/mistral:latest" { - t.Errorf("got = %q; want %q", g, "example.com/library/mistral:latest") +func FuzzName(f *testing.F) { + for s := range testCases { + f.Add(s) } -} - -func TestDisplayShortest(t *testing.T) { - cases := []struct { - in string - mask string - want string - wantPanic bool - }{ - {"example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false}, - {"example.com/library/mistral:latest+Q4_0", "example.com/_/_:latest", "library/mistral", false}, - {"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false}, - {"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false}, - - // case-insensitive - {"Example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false}, - {"example.com/Library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false}, - {"example.com/library/Mistral:latest+Q4_0", "example.com/library/_:latest", "Mistral", false}, - {"example.com/library/mistral:Latest+Q4_0", "example.com/library/_:latest", "mistral", false}, - {"example.com/library/mistral:Latest+q4_0", "example.com/library/_:latest", "mistral", false}, - - // zero value - {"", MaskDefault, "", true}, - - // invalid mask - {"example.com/library/mistral:latest+Q4_0", "example.com/mistral", "", true}, - - // DefaultMask - {"registry.ollama.ai/library/mistral:latest+Q4_0", MaskDefault, "mistral", false}, - - // Auto-Fill - {"x", "example.com/library/_:latest", "x", false}, - {"x", "example.com/library/_:latest+Q4_0", "x", false}, - {"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false}, - {"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false}, - } - - for _, tt := range cases { - t.Run("", func(t *testing.T) { - defer func() { - if tt.wantPanic { - if recover() == nil { - t.Errorf("expected panic") - } + f.Fuzz(func(t *testing.T, s string) { + n := parseName(s) + if n.IsValid() { + parts := [...]string{n.Host, n.Namespace, n.Model, n.Tag, n.RawDigest} + for _, part := range parts { + if part == ".." { + t.Errorf("unexpected .. as valid part") } - }() + if len(part) > 350 { + t.Errorf("part too long: %q", part) + } + } + if n.String() != s { + t.Errorf("String() = %q; want %q", n.String(), s) + } + } - p := ParseNameFill(tt.in, FillNothing) - t.Logf("ParseName(%q) = %#v", tt.in, p) - if g := p.DisplayShortest(tt.mask); g != tt.want { - t.Errorf("got = %q; want %q", g, tt.want) + }) +} + +const validSHA256Hex = "abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789" + +func TestParseDigest(t *testing.T) { + cases := map[string]bool{ + "sha256-1000000000000000000000000000000000000000000000000000000000000000": true, + "sha256:1000000000000000000000000000000000000000000000000000000000000000": true, + "sha256:0000000000000000000000000000000000000000000000000000000000000000": false, + + "sha256:" + validSHA256Hex: true, + "sha256-" + validSHA256Hex: true, + + "": false, + "sha134:" + validSHA256Hex: false, + "sha256:" + validSHA256Hex + "x": false, + "sha256:x" + validSHA256Hex: false, + "sha256-" + validSHA256Hex + "x": false, + "sha256-x": false, + } + + for s, want := range cases { + t.Run(s, func(t *testing.T) { + d := ParseDigest(s) + if d.IsValid() != want { + t.Errorf("ParseDigest(%q).IsValid() = %v; want %v", s, d.IsValid(), want) + } + norm := strings.ReplaceAll(s, ":", "-") + if d.IsValid() && d.String() != norm { + t.Errorf("ParseDigest(%q).String() = %q; want %q", s, d.String(), norm) } }) } } -func TestParseNameAllocs(t *testing.T) { - allocs := testing.AllocsPerRun(1000, func() { - keep(ParseNameFill("example.com/mistral:7b+Q4_0", FillNothing)) - }) - if allocs > 0 { - t.Errorf("ParseName allocs = %v; want 0", allocs) - } -} - -func BenchmarkParseName(b *testing.B) { - b.ReportAllocs() - - for range b.N { - keep(ParseNameFill("example.com/mistral:7b+Q4_0", FillNothing)) - } -} - -func FuzzParseNameFromFilepath(f *testing.F) { - f.Add("example.com/library/mistral/7b/Q4_0") - f.Add("example.com/../mistral/7b/Q4_0") - f.Add("example.com/x/../7b/Q4_0") - f.Add("example.com/x/../7b") - f.Fuzz(func(t *testing.T, s string) { - name := ParseNameFromFilepath(s, FillNothing) - if strings.Contains(s, "..") && !name.IsZero() { - t.Fatalf("non-zero value for path with '..': %q", s) - } - if name.IsValid() == name.IsZero() { - t.Errorf("expected valid path to be non-zero value; got %#v", name) - } - }) -} - -func FuzzParseName(f *testing.F) { - f.Add("example.com/mistral:7b+Q4_0") - f.Add("example.com/mistral:7b+q4_0") - f.Add("example.com/mistral:7b+x") - f.Add("x/y/z:8n+I") - f.Add(":x") - f.Add("@sha256-123456") - f.Add("example.com/mistral:latest+Q4_0@sha256-123456") - f.Add(":@!@") - f.Add("...") - f.Fuzz(func(t *testing.T, s string) { - r0 := ParseNameFill(s, FillNothing) - - if strings.Contains(s, "..") && !r0.IsZero() { - t.Fatalf("non-zero value for path with '..': %q", s) - } - - if !r0.IsValid() && !r0.IsResolved() { - if !r0.EqualFold(Name{}) { - t.Errorf("expected invalid path to be zero value; got %#v", r0) - } - t.Skipf("invalid path: %q", s) - } - - for _, p := range r0.parts { - if len(p) > MaxNamePartLen { - t.Errorf("part too long: %q", p) - } - } - - if !strings.EqualFold(r0.DisplayLong(), s) { - t.Errorf("String() did not round-trip with case insensitivity: %q\ngot = %q\nwant = %q", s, r0.DisplayLong(), s) - } - - r1 := ParseNameFill(r0.DisplayLong(), FillNothing) - if !r0.EqualFold(r1) { - t.Errorf("round-trip mismatch: %+v != %+v", r0, r1) - } - }) -} - -func TestNameStringAllocs(t *testing.T) { - name := ParseNameFill("example.com/ns/mistral:latest+Q4_0", FillNothing) - allocs := testing.AllocsPerRun(1000, func() { - keep(name.DisplayLong()) - }) - if allocs > 1 { - t.Errorf("String allocs = %v; want 0", allocs) - } -} - -func TestNamePath(t *testing.T) { +func TestDigestString(t *testing.T) { cases := []struct { in string want string }{ - {"example.com/library/mistral:latest+Q4_0", "example.com/library/mistral:latest"}, - - // incomplete - {"example.com/library/mistral:latest", "example.com/library/mistral:latest"}, - {"", ""}, + {in: "sha256:" + validSHA256Hex, want: "sha256-" + validSHA256Hex}, + {in: "sha256-" + validSHA256Hex, want: "sha256-" + validSHA256Hex}, + {in: "", want: "unknown-0000000000000000000000000000000000000000000000000000000000000000"}, + {in: "blah-100000000000000000000000000000000000000000000000000000000000000", want: "unknown-0000000000000000000000000000000000000000000000000000000000000000"}, } + for _, tt := range cases { t.Run(tt.in, func(t *testing.T) { - p := ParseNameFill(tt.in, FillNothing) - t.Logf("ParseName(%q) = %#v", tt.in, p) - if g := p.DisplayURLPath(); g != tt.want { - t.Errorf("got = %q; want %q", g, tt.want) + d := ParseDigest(tt.in) + if d.String() != tt.want { + t.Errorf("ParseDigest(%q).String() = %q; want %q", tt.in, d.String(), tt.want) } }) } } - -func TestNameFilepath(t *testing.T) { - cases := []struct { - in string - want string - wantNoBuild string - }{ - { - in: "example.com/library/mistral:latest+Q4_0", - want: "example.com/library/mistral/latest/Q4_0", - wantNoBuild: "example.com/library/mistral/latest", - }, - { - in: "Example.Com/Library/Mistral:Latest+Q4_0", - want: "example.com/library/mistral/latest/Q4_0", - wantNoBuild: "example.com/library/mistral/latest", - }, - { - in: "Example.Com/Library/Mistral:Latest+Q4_0", - want: "example.com/library/mistral/latest/Q4_0", - wantNoBuild: "example.com/library/mistral/latest", - }, - { - in: "example.com/library/mistral:latest", - want: "example.com/library/mistral/latest", - wantNoBuild: "example.com/library/mistral/latest", - }, - { - in: "", - want: "", - wantNoBuild: "", - }, - } - for _, tt := range cases { - t.Run(tt.in, func(t *testing.T) { - p := ParseNameFill(tt.in, FillNothing) - t.Logf("ParseName(%q) = %#v", tt.in, p) - g := p.Filepath() - g = filepath.ToSlash(g) - if g != tt.want { - t.Errorf("got = %q; want %q", g, tt.want) - } - g = p.FilepathNoBuild() - g = filepath.ToSlash(g) - if g != tt.wantNoBuild { - t.Errorf("got = %q; want %q", g, tt.wantNoBuild) - } - }) - } -} - -func TestParseNameFilepath(t *testing.T) { - cases := []struct { - in string - fill string // default is FillNothing - want string - }{ - { - in: "example.com/library/mistral/latest/Q4_0", - want: "example.com/library/mistral:latest+Q4_0", - }, - { - in: "example.com/library/mistral/latest", - fill: "?/?/?:latest+Q4_0", - want: "example.com/library/mistral:latest+Q4_0", - }, - { - in: "example.com/library/mistral", - fill: "?/?/?:latest+Q4_0", - want: "example.com/library/mistral:latest+Q4_0", - }, - { - in: "example.com/library", - want: "", - }, - { - in: "example.com/", - want: "", - }, - { - in: "example.com/^/mistral/latest/Q4_0", - want: "", - }, - { - in: "example.com/library/mistral/../Q4_0", - want: "", - }, - { - in: "example.com/library/mistral/latest/Q4_0/extra", - want: "", - }, - } - for _, tt := range cases { - t.Run(tt.in, func(t *testing.T) { - in := strings.ReplaceAll(tt.in, "/", string(filepath.Separator)) - fill := cmp.Or(tt.fill, FillNothing) - want := ParseNameFill(tt.want, fill) - if g := ParseNameFromFilepath(in, fill); !g.EqualFold(want) { - t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want) - } - }) - } -} - -func TestParseNameFromPath(t *testing.T) { - cases := []struct { - in string - want string - fill string // default is FillNothing - }{ - { - in: "example.com/library/mistral:latest+Q4_0", - want: "example.com/library/mistral:latest+Q4_0", - }, - { - in: "/example.com/library/mistral:latest+Q4_0", - want: "example.com/library/mistral:latest+Q4_0", - }, - { - in: "/example.com/library/mistral", - want: "example.com/library/mistral", - }, - { - in: "/example.com/library/mistral", - fill: "?/?/?:latest+Q4_0", - want: "example.com/library/mistral:latest+Q4_0", - }, - { - in: "/example.com/library", - want: "", - }, - { - in: "/example.com/", - want: "", - }, - { - in: "/example.com/^/mistral/latest", - want: "", - }, - } - for _, tt := range cases { - t.Run(tt.in, func(t *testing.T) { - fill := cmp.Or(tt.fill, FillNothing) - if g := ParseNameFromURLPath(tt.in, fill); g.DisplayLong() != tt.want { - t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want) - } - }) - } -} - -func ExampleName_MapHash() { - m := map[uint64]bool{} - - // key 1 - m[ParseNameFill("mistral:latest+q4", FillNothing).MapHash()] = true - m[ParseNameFill("miSTRal:latest+Q4", FillNothing).MapHash()] = true - m[ParseNameFill("mistral:LATest+Q4", FillNothing).MapHash()] = true - - // key 2 - m[ParseNameFill("mistral:LATest", FillNothing).MapHash()] = true - - fmt.Println(len(m)) - // Output: - // 2 -} - -func ExampleName_CompareFold_sort() { - names := []Name{ - ParseNameFill("mistral:latest", FillNothing), - ParseNameFill("mistRal:7b+q4", FillNothing), - ParseNameFill("MIstral:7b", FillNothing), - } - - slices.SortFunc(names, Name.CompareFold) - - for _, n := range names { - fmt.Println(n.DisplayLong()) - } - - // Output: - // MIstral:7b - // mistRal:7b+q4 - // mistral:latest -} - -func ExampleName_completeAndResolved() { - for _, s := range []string{ - "x/y/z:latest+q4_0@sha123-abc", - "x/y/z:latest+q4_0", - "@sha123-abc", - } { - name := ParseNameFill(s, FillNothing) - fmt.Printf("complete:%v resolved:%v digest:%s\n", name.IsComplete(), name.IsResolved(), name.Digest()) - } - - // Output: - // complete:true resolved:true digest:sha123-abc - // complete:true resolved:false digest: - // complete:false resolved:true digest:sha123-abc -} - -func ExampleName_DisplayShortest() { - name := ParseNameFill("example.com/jmorganca/mistral:latest+Q4_0", FillNothing) - - fmt.Println(name.DisplayShortest("example.com/jmorganca/_:latest")) - fmt.Println(name.DisplayShortest("example.com/_/_:latest")) - fmt.Println(name.DisplayShortest("example.com/_/_:_")) - fmt.Println(name.DisplayShortest("_/_/_:_")) - - // Default - name = ParseNameFill("registry.ollama.ai/library/mistral:latest+Q4_0", FillNothing) - fmt.Println(name.DisplayShortest("")) - - // Output: - // mistral - // jmorganca/mistral - // jmorganca/mistral:latest - // example.com/jmorganca/mistral:latest - // mistral -} - -func keep[T any](v T) T { return v } diff --git a/types/model/testdata/fuzz/FuzzParseRef/82c2975c430ac608 b/types/model/testdata/fuzz/FuzzName/d37463aa416f6bab similarity index 53% rename from types/model/testdata/fuzz/FuzzParseRef/82c2975c430ac608 rename to types/model/testdata/fuzz/FuzzName/d37463aa416f6bab index 0a66beb8..0034d9f5 100644 --- a/types/model/testdata/fuzz/FuzzParseRef/82c2975c430ac608 +++ b/types/model/testdata/fuzz/FuzzName/d37463aa416f6bab @@ -1,2 +1,2 @@ go test fuzz v1 -string(":") +string("00@") diff --git a/types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa b/types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa deleted file mode 100644 index 0cdf1eac..00000000 --- a/types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -string("/0") diff --git a/types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6 b/types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6 deleted file mode 100644 index c5d09a4c..00000000 --- a/types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -string("0//0") diff --git a/types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d b/types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d deleted file mode 100644 index 880ce7a3..00000000 --- a/types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -string("0 /0") diff --git a/types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab b/types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab deleted file mode 100644 index fa981c52..00000000 --- a/types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -string("+0/00000") diff --git a/types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948 b/types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948 deleted file mode 100644 index db07727d..00000000 --- a/types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -string("0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91")