Merge pull request #6190 from ollama/mxyng/fix-integration

fix concurrency test
This commit is contained in:
Michael Yang 2024-08-05 16:45:49 -07:00 committed by GitHub
commit 10d49bce70
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 17 additions and 18 deletions

View file

@ -5,6 +5,7 @@ package integration
import ( import (
"context" "context"
"log/slog" "log/slog"
"os"
"strconv" "strconv"
"sync" "sync"
"testing" "testing"
@ -13,7 +14,6 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
) )
@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
}, },
} }
resp = [2][]string{ resp = [2][]string{
[]string{"sunlight"}, {"sunlight"},
[]string{"england", "english", "massachusetts", "pilgrims", "british"}, {"england", "english", "massachusetts", "pilgrims", "british"},
} }
) )
var wg sync.WaitGroup var wg sync.WaitGroup
@ -71,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
reqLimit := len(req) reqLimit := len(req)
iterLimit := 5 iterLimit := 5
vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
if vram != "" { maxVram, err := strconv.ParseUint(s, 10, 64)
max, err := strconv.ParseUint(vram, 10, 64)
require.NoError(t, err) require.NoError(t, err)
// Don't hammer on small VRAM cards... // Don't hammer on small VRAM cards...
if max < 4*1024*1024*1024 { if maxVram < 4*format.GibiByte {
reqLimit = min(reqLimit, 2) reqLimit = min(reqLimit, 2)
iterLimit = 2 iterLimit = 2
} }
@ -233,12 +232,12 @@ func TestMultiModelStress(t *testing.T) {
consumed := uint64(256 * format.MebiByte) // Assume some baseline usage consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
for i := 0; i < len(req); i++ { for i := 0; i < len(req); i++ {
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long // Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
if i > 1 && consumed > vram { if i > 1 && consumed > maxVram {
slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed)) slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
break break
} }
consumed += chosenModels[i].size consumed += chosenModels[i].size
slog.Info("target vram", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed)) slog.Info("target vram", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
wg.Add(1) wg.Add(1)
go func(i int) { go func(i int) {

View file

@ -35,8 +35,8 @@ var (
}, },
} }
resp = [2][]string{ resp = [2][]string{
[]string{"sunlight"}, {"sunlight"},
[]string{"england", "english", "massachusetts", "pilgrims"}, {"england", "english", "massachusetts", "pilgrims"},
} }
) )

View file

@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) {
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits // Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
threadCount := 32 threadCount := 32
if maxQueue := envconfig.MaxQueue(); maxQueue != 0 { if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
threadCount = maxQueue threadCount = int(maxQueue)
} else { } else {
t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount)) t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
} }

View file

@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
}, },
}, },
[][]string{ [][]string{
[]string{"sunlight"}, {"sunlight"},
[]string{"soil", "organic", "earth", "black", "tan"}, {"soil", "organic", "earth", "black", "tan"},
[]string{"england", "english", "massachusetts", "pilgrims", "british"}, {"england", "english", "massachusetts", "pilgrims", "british"},
[]string{"fourth", "july", "declaration", "independence"}, {"fourth", "july", "declaration", "independence"},
[]string{"nitrogen", "oxygen", "carbon", "dioxide"}, {"nitrogen", "oxygen", "carbon", "dioxide"},
} }
} }