diff --git a/server/sched_test.go b/server/sched_test.go
index 7991e7c5..9ddd1fab 100644
--- a/server/sched_test.go
+++ b/server/sched_test.go
@@ -7,7 +7,6 @@ import (
 	"fmt"
 	"log/slog"
 	"os"
-	"runtime"
 	"testing"
 	"time"
 
@@ -356,42 +355,6 @@ func TestRequestsMultipleLoadedModels(t *testing.T) {
 	s.loadedMu.Unlock()
 }
 
-func TestRequestsModelTooBigForSystem(t *testing.T) {
-	ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond)
-	defer done()
-	s := InitScheduler(ctx)
-	s.getGpuFn = func() gpu.GpuInfoList {
-		g := gpu.GpuInfo{Library: "metal"}
-		g.TotalMemory = 4 * format.MebiByte
-		g.FreeMemory = 3 * format.MebiByte
-		return []gpu.GpuInfo{g}
-	}
-
-	s.getCpuFn = func() gpu.GpuInfoList {
-		g := gpu.GpuInfo{Library: "cpu"}
-		g.TotalMemory = 4 * format.MebiByte
-		g.FreeMemory = 2 * format.MebiByte
-		return []gpu.GpuInfo{g}
-	}
-	a := newScenarioRequest(t, ctx, "ollama-model-1", 10, &api.Duration{Duration: 5 * time.Millisecond})
-
-	s.newServerFn = a.newServer
-	slog.Info("a")
-	s.pendingReqCh <- a.req
-	require.Len(t, s.pendingReqCh, 1)
-	s.Run(ctx)
-	select {
-	case <-a.req.successCh:
-		if runtime.GOOS == "linux" {
-			t.Fatal("request should have been rejected with out of space")
-		}
-		// else - Darwin and Windows don't reject right now
-	case err := <-a.req.errCh:
-		require.Contains(t, err.Error(), "too large")
-	case <-ctx.Done():
-		t.Fatal("timeout")
-	}
-}
 func TestGetRunner(t *testing.T) {
 	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
 	defer done()