6fd04ca922
Still not complete, needs some refinement to our prediction to understand the discrete GPUs available space so we can see how many layers fit in each one since we can't split one layer across multiple GPUs we can't treat free space as one logical block
28 lines
619 B
Go
28 lines
619 B
Go
//go:build integration
|
|
|
|
package integration
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/ollama/ollama/api"
|
|
)
|
|
|
|
func TestContextExhaustion(t *testing.T) {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Minute) // Longer needed for small footprint GPUs
|
|
defer cancel()
|
|
// Set up the test data
|
|
req := api.GenerateRequest{
|
|
Model: "llama2",
|
|
Prompt: "Write me a story with a ton of emojis?",
|
|
Stream: &stream,
|
|
Options: map[string]interface{}{
|
|
"temperature": 0,
|
|
"seed": 123,
|
|
"num_ctx": 128,
|
|
},
|
|
}
|
|
GenerateTestHelper(ctx, t, req, []string{"once", "upon", "lived"})
|
|
}
|