From 0ad0e738cd7ed1266b3c210ad54dcd2b70142563 Mon Sep 17 00:00:00 2001
From: Richard Lyons <frob@cloudstaff.com>
Date: Sun, 18 Aug 2024 01:43:26 +0200
Subject: [PATCH 1/3] Override numParallel only if unset.

---
 server/sched.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/server/sched.go b/server/sched.go
index 9947fd32..4d9c0296 100644
--- a/server/sched.go
+++ b/server/sched.go
@@ -734,7 +734,9 @@ func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoL
 
 // If multiple Libraries are detected, pick the Library which loads the most layers for the model
 func pickBestPartialFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numParallel *int) gpu.GpuInfoList {
-	*numParallel = 1
+	if *numParallel <= 0 {
+		*numParallel = 1
+        }
 	byLibrary := gpus.ByLibrary()
 	if len(byLibrary) <= 1 {
 		return gpus

From 9352eeb752531decccc7c6b91a07bc3dd5efa67e Mon Sep 17 00:00:00 2001
From: Richard Lyons <frob@cloudstaff.com>
Date: Sun, 18 Aug 2024 02:55:01 +0200
Subject: [PATCH 2/3] Reset NumCtx.

---
 server/sched.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/server/sched.go b/server/sched.go
index 4d9c0296..3fe6d7fc 100644
--- a/server/sched.go
+++ b/server/sched.go
@@ -736,6 +736,7 @@ func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoL
 func pickBestPartialFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numParallel *int) gpu.GpuInfoList {
 	if *numParallel <= 0 {
 		*numParallel = 1
+                req.opts.NumCtx = req.origNumCtx
         }
 	byLibrary := gpus.ByLibrary()
 	if len(byLibrary) <= 1 {

From 885cf45087863aa2e064a05da99e8bd07d69970a Mon Sep 17 00:00:00 2001
From: Richard Lyons <frob@cloudstaff.com>
Date: Sun, 18 Aug 2024 03:07:16 +0200
Subject: [PATCH 3/3] Fix white space.

---
 server/sched.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/sched.go b/server/sched.go
index 3fe6d7fc..9d8c4144 100644
--- a/server/sched.go
+++ b/server/sched.go
@@ -736,8 +736,8 @@ func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoL
 func pickBestPartialFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numParallel *int) gpu.GpuInfoList {
 	if *numParallel <= 0 {
 		*numParallel = 1
-                req.opts.NumCtx = req.origNumCtx
-        }
+		req.opts.NumCtx = req.origNumCtx
+	}
 	byLibrary := gpus.ByLibrary()
 	if len(byLibrary) <= 1 {
 		return gpus