diff --git a/integration/llm_image_test.go b/integration/llm_image_test.go index d0c861cc..c7b56890 100644 --- a/integration/llm_image_test.go +++ b/integration/llm_image_test.go @@ -12,7 +12,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestIntegrationMultimodal(t *testing.T) { +func TestIntegrationLlava(t *testing.T) { image, err := base64.StdEncoding.DecodeString(imageEncoding) require.NoError(t, err) req := api.GenerateRequest{ @@ -39,6 +39,33 @@ func TestIntegrationMultimodal(t *testing.T) { DoGenerate(ctx, t, client, req, []string{resp}, 120*time.Second, 30*time.Second) } +func TestIntegrationMllama(t *testing.T) { + image, err := base64.StdEncoding.DecodeString(imageEncoding) + require.NoError(t, err) + req := api.GenerateRequest{ + // TODO fix up once we publish the final image + Model: "x/llama3.2-vision", + Prompt: "what does the text in this image say?", + Stream: &stream, + Options: map[string]interface{}{ + "seed": 42, + "temperature": 0.0, + }, + Images: []api.ImageData{ + image, + }, + } + + resp := "the ollamas" + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + client, _, cleanup := InitServerConnection(ctx, t) + defer cleanup() + require.NoError(t, PullIfMissing(ctx, client, req.Model)) + // mllama models on CPU can be quite slow to start, + DoGenerate(ctx, t, client, req, []string{resp}, 240*time.Second, 30*time.Second) +} + const imageEncoding = `iVBORw0KGgoAAAANSUhEUgAAANIAAAB4CAYAAACHHqzKAAAAAXNSR0IArs4c6QAAAIRlWElmTU0AKgAAAAgABQESAAMAAAABAAEAAAEaAAUAAAABAAAASgEb AAUAAAABAAAAUgEoAAMAAAABAAIAAIdpAAQAAAABAAAAWgAAAAAAAABIAAAAAQAAAEgAAAABAAOgAQADAAAAAQABAACgAgAEAAAAAQAAANKgAwAEAAAAAQAA AHgAAAAAXdsepgAAAAlwSFlzAAALEwAACxMBAJqcGAAAAVlpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6