From 6cd566872b4fdf531b5a3be203e3eb61b75a6b14 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 30 Oct 2024 17:09:42 -0700 Subject: [PATCH] sched: Lift parallel restriction for multimodal models except mllama The Go runner does not have a problem with supporting parallel requests for most multimodal models. Now that we won't be potentially falling back to server.cpp, this restriction can be lifted. However, the new mllama model can't support parallel requests, so we will need to keep a restriction for that. --- server/sched.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/sched.go b/server/sched.go index 1409ff07..1e34e4b9 100644 --- a/server/sched.go +++ b/server/sched.go @@ -130,11 +130,11 @@ func (s *Scheduler) processPending(ctx context.Context) { continue } numParallel := int(envconfig.NumParallel()) - // TODO (jmorganca): multimodal models don't support parallel yet + // TODO (jmorganca): mllama doesn't support parallel yet // see https://github.com/ollama/ollama/issues/4165 - if len(pending.model.ProjectorPaths) > 0 && numParallel != 1 { + if checkMllamaModelFamily(pending.model) && numParallel != 1 { numParallel = 1 - slog.Warn("multimodal models don't support parallel requests yet") + slog.Warn("mllama doesn't support parallel requests yet") } for {