fix concurrency test
This commit is contained in:
parent
50ee8b5f56
commit
7ed367419e
4 changed files with 17 additions and 18 deletions
|
@ -5,6 +5,7 @@ package integration
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
|
@ -13,7 +14,6 @@ import (
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/envconfig"
|
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
resp = [2][]string{
|
resp = [2][]string{
|
||||||
[]string{"sunlight"},
|
{"sunlight"},
|
||||||
[]string{"england", "english", "massachusetts", "pilgrims", "british"},
|
{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
@ -71,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
|
||||||
reqLimit := len(req)
|
reqLimit := len(req)
|
||||||
iterLimit := 5
|
iterLimit := 5
|
||||||
|
|
||||||
vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM
|
if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
|
||||||
if vram != "" {
|
maxVram, err := strconv.ParseUint(s, 10, 64)
|
||||||
max, err := strconv.ParseUint(vram, 10, 64)
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
// Don't hammer on small VRAM cards...
|
// Don't hammer on small VRAM cards...
|
||||||
if max < 4*1024*1024*1024 {
|
if maxVram < 4*format.GibiByte {
|
||||||
reqLimit = min(reqLimit, 2)
|
reqLimit = min(reqLimit, 2)
|
||||||
iterLimit = 2
|
iterLimit = 2
|
||||||
}
|
}
|
||||||
|
@ -233,12 +232,12 @@ func TestMultiModelStress(t *testing.T) {
|
||||||
consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
|
consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
|
||||||
for i := 0; i < len(req); i++ {
|
for i := 0; i < len(req); i++ {
|
||||||
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
|
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
|
||||||
if i > 1 && consumed > vram {
|
if i > 1 && consumed > maxVram {
|
||||||
slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))
|
slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
consumed += chosenModels[i].size
|
consumed += chosenModels[i].size
|
||||||
slog.Info("target vram", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))
|
slog.Info("target vram", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(i int) {
|
go func(i int) {
|
||||||
|
|
|
@ -35,8 +35,8 @@ var (
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
resp = [2][]string{
|
resp = [2][]string{
|
||||||
[]string{"sunlight"},
|
{"sunlight"},
|
||||||
[]string{"england", "english", "massachusetts", "pilgrims"},
|
{"england", "english", "massachusetts", "pilgrims"},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) {
|
||||||
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
|
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
|
||||||
threadCount := 32
|
threadCount := 32
|
||||||
if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
|
if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
|
||||||
threadCount = maxQueue
|
threadCount = int(maxQueue)
|
||||||
} else {
|
} else {
|
||||||
t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
|
t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
|
||||||
}
|
}
|
||||||
|
|
|
@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
[][]string{
|
[][]string{
|
||||||
[]string{"sunlight"},
|
{"sunlight"},
|
||||||
[]string{"soil", "organic", "earth", "black", "tan"},
|
{"soil", "organic", "earth", "black", "tan"},
|
||||||
[]string{"england", "english", "massachusetts", "pilgrims", "british"},
|
{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||||
[]string{"fourth", "july", "declaration", "independence"},
|
{"fourth", "july", "declaration", "independence"},
|
||||||
[]string{"nitrogen", "oxygen", "carbon", "dioxide"},
|
{"nitrogen", "oxygen", "carbon", "dioxide"},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue