fix: parallel queueing race condition caused silent failure (#1445)

* fix: queued request failures

- increase parallel requests to 2 to complete queued request, queueing is managed in ollama

* log steam errors
This commit is contained in:
Bruce MacDonald 2023-12-09 14:14:02 -05:00 committed by GitHub
parent 9e1406e4ed
commit bbe41ce41a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -341,6 +341,7 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx), "--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
"--batch-size", fmt.Sprintf("%d", opts.NumBatch), "--batch-size", fmt.Sprintf("%d", opts.NumBatch),
"--n-gpu-layers", fmt.Sprintf("%d", numGPU), "--n-gpu-layers", fmt.Sprintf("%d", numGPU),
"--parallel", "2",
"--embedding", "--embedding",
} }
@ -631,34 +632,37 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
continue continue
} }
if evt, ok := bytes.CutPrefix(line, []byte("data: ")); ok { evt, ok := bytes.CutPrefix(line, []byte("data: "))
var p prediction if !ok {
if err := json.Unmarshal(evt, &p); err != nil { return fmt.Errorf("error parsing llm response stream: %s", line)
return fmt.Errorf("error unmarshaling llm prediction response: %v", err) }
}
if p.Content != "" { var p prediction
fn(PredictResult{ if err := json.Unmarshal(evt, &p); err != nil {
Model: predict.Model, return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
CreatedAt: time.Now().UTC(), }
Content: p.Content,
})
}
if p.Stop { if p.Content != "" {
fn(PredictResult{ fn(PredictResult{
Model: predict.Model, Model: predict.Model,
CreatedAt: time.Now().UTC(), CreatedAt: time.Now().UTC(),
TotalDuration: time.Since(predict.CheckpointStart), Content: p.Content,
})
}
Done: true, if p.Stop {
PromptEvalCount: p.Timings.PromptN, fn(PredictResult{
PromptEvalDuration: parseDurationMs(p.Timings.PromptMS), Model: predict.Model,
EvalCount: p.Timings.PredictedN, CreatedAt: time.Now().UTC(),
EvalDuration: parseDurationMs(p.Timings.PredictedMS), TotalDuration: time.Since(predict.CheckpointStart),
})
return nil Done: true,
} PromptEvalCount: p.Timings.PromptN,
PromptEvalDuration: parseDurationMs(p.Timings.PromptMS),
EvalCount: p.Timings.PredictedN,
EvalDuration: parseDurationMs(p.Timings.PredictedMS),
})
return nil
} }
} }
} }