diff --git a/server/sched_test.go b/server/sched_test.go index 7991e7c5..9ddd1fab 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -7,7 +7,6 @@ import ( "fmt" "log/slog" "os" - "runtime" "testing" "time" @@ -356,42 +355,6 @@ func TestRequestsMultipleLoadedModels(t *testing.T) { s.loadedMu.Unlock() } -func TestRequestsModelTooBigForSystem(t *testing.T) { - ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond) - defer done() - s := InitScheduler(ctx) - s.getGpuFn = func() gpu.GpuInfoList { - g := gpu.GpuInfo{Library: "metal"} - g.TotalMemory = 4 * format.MebiByte - g.FreeMemory = 3 * format.MebiByte - return []gpu.GpuInfo{g} - } - - s.getCpuFn = func() gpu.GpuInfoList { - g := gpu.GpuInfo{Library: "cpu"} - g.TotalMemory = 4 * format.MebiByte - g.FreeMemory = 2 * format.MebiByte - return []gpu.GpuInfo{g} - } - a := newScenarioRequest(t, ctx, "ollama-model-1", 10, &api.Duration{Duration: 5 * time.Millisecond}) - - s.newServerFn = a.newServer - slog.Info("a") - s.pendingReqCh <- a.req - require.Len(t, s.pendingReqCh, 1) - s.Run(ctx) - select { - case <-a.req.successCh: - if runtime.GOOS == "linux" { - t.Fatal("request should have been rejected with out of space") - } - // else - Darwin and Windows don't reject right now - case err := <-a.req.errCh: - require.Contains(t, err.Error(), "too large") - case <-ctx.Done(): - t.Fatal("timeout") - } -} func TestGetRunner(t *testing.T) { ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) defer done()