Merge branch 'ollama:main' into custom-gpu-defs
This commit is contained in:
commit
3934c15895
5 changed files with 25 additions and 9 deletions
|
@ -381,6 +381,12 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||||
)
|
)
|
||||||
|
|
||||||
partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
|
partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
|
||||||
|
case "stablelm":
|
||||||
|
fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
|
||||||
|
partialOffload = max(
|
||||||
|
4*batch*(vocab+2*embedding),
|
||||||
|
fullOffload,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
21
llm/gguf.go
21
llm/gguf.go
|
@ -248,13 +248,17 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
padding := llm.padding(offset, int64(alignment))
|
padding := llm.padding(offset, int64(alignment))
|
||||||
if _, err := rs.Seek(padding-offset, io.SeekCurrent); err != nil {
|
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tensor := range llm.tensors {
|
for _, tensor := range llm.tensors {
|
||||||
padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
|
if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
|
||||||
if _, err := rs.Seek(padded, io.SeekCurrent); err != nil {
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
padding := llm.padding(int64(tensor.size()), int64(alignment))
|
||||||
|
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -623,8 +627,9 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
padding := llm.padding(offset, 32)
|
var alignment int64 = 32
|
||||||
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
|
padding := llm.padding(offset, alignment)
|
||||||
|
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -638,8 +643,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
padding := llm.padding(offset, 32)
|
padding := llm.padding(offset, alignment)
|
||||||
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
|
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -648,5 +653,5 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (gguf) padding(offset, align int64) int64 {
|
func (gguf) padding(offset, align int64) int64 {
|
||||||
return (offset + align - 1) / align * align
|
return (align - offset%align) % align
|
||||||
}
|
}
|
||||||
|
|
|
@ -112,7 +112,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||||
var memoryLayerOutput uint64
|
var memoryLayerOutput uint64
|
||||||
for k, v := range layers {
|
for k, v := range layers {
|
||||||
if !strings.HasPrefix(k, "blk.") {
|
if !strings.HasPrefix(k, "blk.") {
|
||||||
slog.Info("aaa", "name", k, "size", format.HumanBytes2(v.size()))
|
|
||||||
memoryLayerOutput += v.size()
|
memoryLayerOutput += v.size()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -521,6 +521,8 @@ func parts(s string) iter_Seq2[PartKind, string] {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
state, j, partLen = PartModel, i, 0
|
state, j, partLen = PartModel, i, 0
|
||||||
|
case PartHost:
|
||||||
|
// noop: support for host:port
|
||||||
default:
|
default:
|
||||||
yield(PartExtraneous, s[i+1:j])
|
yield(PartExtraneous, s[i+1:j])
|
||||||
return
|
return
|
||||||
|
@ -678,6 +680,9 @@ func isValidByteFor(kind PartKind, c byte) bool {
|
||||||
if kind == PartNamespace && c == '.' {
|
if kind == PartNamespace && c == '.' {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
if kind == PartHost && c == ':' {
|
||||||
|
return true
|
||||||
|
}
|
||||||
if c == '.' || c == '-' {
|
if c == '.' || c == '-' {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,6 +40,7 @@ var testNames = map[string]fields{
|
||||||
"user/model": {namespace: "user", model: "model"},
|
"user/model": {namespace: "user", model: "model"},
|
||||||
"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
|
"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
|
||||||
"example.com/ns/mistral:7b+X": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
|
"example.com/ns/mistral:7b+X": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
|
||||||
|
"localhost:5000/ns/mistral": {host: "localhost:5000", namespace: "ns", model: "mistral"},
|
||||||
|
|
||||||
// invalid digest
|
// invalid digest
|
||||||
"mistral:latest@invalid256-": {},
|
"mistral:latest@invalid256-": {},
|
||||||
|
|
Loading…
Reference in a new issue