refined test timing
adjust timing on some tests so they don't timeout on small/slow GPUs
This commit is contained in:
parent
5e8ff556cb
commit
68dfc6236a
4 changed files with 59 additions and 34 deletions
|
@ -19,17 +19,19 @@ func TestMultiModelConcurrency(t *testing.T) {
|
||||||
var (
|
var (
|
||||||
req = [2]api.GenerateRequest{
|
req = [2]api.GenerateRequest{
|
||||||
{
|
{
|
||||||
Model: "orca-mini",
|
Model: "orca-mini",
|
||||||
Prompt: "why is the ocean blue?",
|
Prompt: "why is the ocean blue?",
|
||||||
Stream: &stream,
|
Stream: &stream,
|
||||||
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||||
Options: map[string]interface{}{
|
Options: map[string]interface{}{
|
||||||
"seed": 42,
|
"seed": 42,
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
Model: "tinydolphin",
|
Model: "tinydolphin",
|
||||||
Prompt: "what is the origin of the us thanksgiving holiday?",
|
Prompt: "what is the origin of the us thanksgiving holiday?",
|
||||||
Stream: &stream,
|
Stream: &stream,
|
||||||
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||||
Options: map[string]interface{}{
|
Options: map[string]interface{}{
|
||||||
"seed": 42,
|
"seed": 42,
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
|
@ -43,7 +45,7 @@ func TestMultiModelConcurrency(t *testing.T) {
|
||||||
)
|
)
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(len(req))
|
wg.Add(len(req))
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*120)
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second*240)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
client, _, cleanup := InitServerConnection(ctx, t)
|
client, _, cleanup := InitServerConnection(ctx, t)
|
||||||
|
@ -56,32 +58,46 @@ func TestMultiModelConcurrency(t *testing.T) {
|
||||||
for i := 0; i < len(req); i++ {
|
for i := 0; i < len(req); i++ {
|
||||||
go func(i int) {
|
go func(i int) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
DoGenerate(ctx, t, client, req[i], resp[i], 30*time.Second, 10*time.Second)
|
DoGenerate(ctx, t, client, req[i], resp[i], 60*time.Second, 10*time.Second)
|
||||||
}(i)
|
}(i)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
|
func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) // GTX 750 2G card takes ~9 minutes
|
req, resp := GenerateRequests()
|
||||||
|
reqLimit := len(req)
|
||||||
|
iterLimit := 5
|
||||||
|
|
||||||
|
vram := os.Getenv("OLLAMA_MAX_VRAM")
|
||||||
|
if vram != "" {
|
||||||
|
max, err := strconv.ParseUint(vram, 10, 64)
|
||||||
|
require.NoError(t, err)
|
||||||
|
// Don't hammer on small VRAM cards...
|
||||||
|
if max < 4*1024*1024*1024 {
|
||||||
|
reqLimit = min(reqLimit, 2)
|
||||||
|
iterLimit = 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 9*time.Minute)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
client, _, cleanup := InitServerConnection(ctx, t)
|
client, _, cleanup := InitServerConnection(ctx, t)
|
||||||
defer cleanup()
|
defer cleanup()
|
||||||
|
|
||||||
req, resp := GenerateRequests()
|
|
||||||
// Get the server running (if applicable) warm the model up with a single initial request
|
// Get the server running (if applicable) warm the model up with a single initial request
|
||||||
DoGenerate(ctx, t, client, req[0], resp[0], 60*time.Second, 5*time.Second)
|
DoGenerate(ctx, t, client, req[0], resp[0], 60*time.Second, 10*time.Second)
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(len(req))
|
wg.Add(reqLimit)
|
||||||
for i := 0; i < len(req); i++ {
|
for i := 0; i < reqLimit; i++ {
|
||||||
go func(i int) {
|
go func(i int) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
for j := 0; j < 5; j++ {
|
for j := 0; j < iterLimit; j++ {
|
||||||
slog.Info("Starting", "req", i, "iter", j)
|
slog.Info("Starting", "req", i, "iter", j)
|
||||||
// On slower GPUs it can take a while to process the 4 concurrent requests
|
// On slower GPUs it can take a while to process the concurrent requests
|
||||||
// so we allow a much longer initial timeout
|
// so we allow a much longer initial timeout
|
||||||
DoGenerate(ctx, t, client, req[i], resp[i], 90*time.Second, 5*time.Second)
|
DoGenerate(ctx, t, client, req[i], resp[i], 120*time.Second, 20*time.Second)
|
||||||
}
|
}
|
||||||
}(i)
|
}(i)
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestContextExhaustion(t *testing.T) {
|
func TestContextExhaustion(t *testing.T) {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Minute) // Longer needed for small footprint GPUs
|
ctx, cancel := context.WithTimeout(context.Background(), 6*time.Minute) // Longer needed for small footprint GPUs
|
||||||
defer cancel()
|
defer cancel()
|
||||||
// Set up the test data
|
// Set up the test data
|
||||||
req := api.GenerateRequest{
|
req := api.GenerateRequest{
|
||||||
|
|
|
@ -32,7 +32,11 @@ func TestIntegrationMultimodal(t *testing.T) {
|
||||||
resp := "the ollam"
|
resp := "the ollam"
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
GenerateTestHelper(ctx, t, req, []string{resp})
|
client, _, cleanup := InitServerConnection(ctx, t)
|
||||||
|
defer cleanup()
|
||||||
|
require.NoError(t, PullIfMissing(ctx, client, req.Model))
|
||||||
|
// llava models on CPU can be quite slow to start,
|
||||||
|
DoGenerate(ctx, t, client, req, []string{resp}, 120*time.Second, 30*time.Second)
|
||||||
}
|
}
|
||||||
|
|
||||||
const imageEncoding = `iVBORw0KGgoAAAANSUhEUgAAANIAAAB4CAYAAACHHqzKAAAAAXNSR0IArs4c6QAAAIRlWElmTU0AKgAAAAgABQESAAMAAAABAAEAAAEaAAUAAAABAAAASgEb
|
const imageEncoding = `iVBORw0KGgoAAAANSUhEUgAAANIAAAB4CAYAAACHHqzKAAAAAXNSR0IArs4c6QAAAIRlWElmTU0AKgAAAAgABQESAAMAAAABAAEAAAEaAAUAAAABAAAASgEb
|
||||||
|
|
|
@ -140,7 +140,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
|
||||||
|
|
||||||
showCtx, cancel := context.WithDeadlineCause(
|
showCtx, cancel := context.WithDeadlineCause(
|
||||||
ctx,
|
ctx,
|
||||||
time.Now().Add(5*time.Second),
|
time.Now().Add(10*time.Second),
|
||||||
fmt.Errorf("show for existing model %s took too long", modelName),
|
fmt.Errorf("show for existing model %s took too long", modelName),
|
||||||
)
|
)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
@ -287,41 +287,46 @@ func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq ap
|
||||||
func GenerateRequests() ([]api.GenerateRequest, [][]string) {
|
func GenerateRequests() ([]api.GenerateRequest, [][]string) {
|
||||||
return []api.GenerateRequest{
|
return []api.GenerateRequest{
|
||||||
{
|
{
|
||||||
Model: "orca-mini",
|
Model: "orca-mini",
|
||||||
Prompt: "why is the ocean blue?",
|
Prompt: "why is the ocean blue?",
|
||||||
Stream: &stream,
|
Stream: &stream,
|
||||||
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||||
Options: map[string]interface{}{
|
Options: map[string]interface{}{
|
||||||
"seed": 42,
|
"seed": 42,
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
Model: "orca-mini",
|
Model: "orca-mini",
|
||||||
Prompt: "why is the color of dirt brown?",
|
Prompt: "why is the color of dirt brown?",
|
||||||
Stream: &stream,
|
Stream: &stream,
|
||||||
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||||
Options: map[string]interface{}{
|
Options: map[string]interface{}{
|
||||||
"seed": 42,
|
"seed": 42,
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
Model: "orca-mini",
|
Model: "orca-mini",
|
||||||
Prompt: "what is the origin of the us thanksgiving holiday?",
|
Prompt: "what is the origin of the us thanksgiving holiday?",
|
||||||
Stream: &stream,
|
Stream: &stream,
|
||||||
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||||
Options: map[string]interface{}{
|
Options: map[string]interface{}{
|
||||||
"seed": 42,
|
"seed": 42,
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
Model: "orca-mini",
|
Model: "orca-mini",
|
||||||
Prompt: "what is the origin of independence day?",
|
Prompt: "what is the origin of independence day?",
|
||||||
Stream: &stream,
|
Stream: &stream,
|
||||||
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||||
Options: map[string]interface{}{
|
Options: map[string]interface{}{
|
||||||
"seed": 42,
|
"seed": 42,
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
Model: "orca-mini",
|
Model: "orca-mini",
|
||||||
Prompt: "what is the composition of air?",
|
Prompt: "what is the composition of air?",
|
||||||
Stream: &stream,
|
Stream: &stream,
|
||||||
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||||
Options: map[string]interface{}{
|
Options: map[string]interface{}{
|
||||||
"seed": 42,
|
"seed": 42,
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
|
|
Loading…
Reference in a new issue