do not use --parallel 2
for old runners
This commit is contained in:
parent
bbe41ce41a
commit
2dd040d04c
1 changed files with 16 additions and 9 deletions
25
llm/llama.go
25
llm/llama.go
|
@ -59,6 +59,7 @@ ws ::= ([ \t\n] ws)?
|
||||||
var llamaCppEmbed embed.FS
|
var llamaCppEmbed embed.FS
|
||||||
|
|
||||||
type ModelRunner struct {
|
type ModelRunner struct {
|
||||||
|
Type string // "gguf" or "ggml"
|
||||||
Path string // path to the model runner executable
|
Path string // path to the model runner executable
|
||||||
Accelerated bool
|
Accelerated bool
|
||||||
}
|
}
|
||||||
|
@ -72,25 +73,25 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
case "darwin":
|
case "darwin":
|
||||||
if runtime.GOARCH == "arm64" {
|
if runtime.GOARCH == "arm64" {
|
||||||
runners = []ModelRunner{{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
|
runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
|
||||||
} else {
|
} else {
|
||||||
runners = []ModelRunner{{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
|
runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
|
||||||
}
|
}
|
||||||
case "linux":
|
case "linux":
|
||||||
runners = []ModelRunner{
|
runners = []ModelRunner{
|
||||||
{Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
|
{Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
|
||||||
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||||
}
|
}
|
||||||
case "windows":
|
case "windows":
|
||||||
// TODO: select windows GPU runner here when available
|
// TODO: select windows GPU runner here when available
|
||||||
runners = []ModelRunner{
|
runners = []ModelRunner{
|
||||||
{Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
|
{Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
|
||||||
{Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
|
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
|
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
|
||||||
runners = []ModelRunner{
|
runners = []ModelRunner{
|
||||||
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,6 +149,7 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||||
for _, r := range runners {
|
for _, r := range runners {
|
||||||
// clean the ModelRunner paths so that they match the OS we are running on
|
// clean the ModelRunner paths so that they match the OS we are running on
|
||||||
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{
|
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{
|
||||||
|
Type: r.Type,
|
||||||
Path: filepath.Clean(path.Join(workDir, r.Path)),
|
Path: filepath.Clean(path.Join(workDir, r.Path)),
|
||||||
Accelerated: r.Accelerated,
|
Accelerated: r.Accelerated,
|
||||||
})
|
})
|
||||||
|
@ -341,7 +343,6 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner
|
||||||
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
|
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
|
||||||
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
||||||
"--n-gpu-layers", fmt.Sprintf("%d", numGPU),
|
"--n-gpu-layers", fmt.Sprintf("%d", numGPU),
|
||||||
"--parallel", "2",
|
|
||||||
"--embedding",
|
"--embedding",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -403,11 +404,17 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner
|
||||||
}
|
}
|
||||||
|
|
||||||
port := rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
|
port := rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
|
||||||
|
params := append(params, "--port", strconv.Itoa(port))
|
||||||
|
|
||||||
|
if runner.Type == "gguf" {
|
||||||
|
params = append(params, "--parallel", "2")
|
||||||
|
}
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
cmd := exec.CommandContext(
|
cmd := exec.CommandContext(
|
||||||
ctx,
|
ctx,
|
||||||
runner.Path,
|
runner.Path,
|
||||||
append(params, "--port", strconv.Itoa(port))...,
|
params...,
|
||||||
)
|
)
|
||||||
|
|
||||||
var libraryPaths []string
|
var libraryPaths []string
|
||||||
|
|
Loading…
Reference in a new issue