runner.go: Remove unused arguments
Now that server.cpp is gone, we don't need to keep passing arguments that were only ignored and only kept for compatibility.
This commit is contained in:
parent
6cd566872b
commit
a909417602
5 changed files with 1 additions and 15 deletions
|
@ -236,7 +236,7 @@ type Runner struct {
|
|||
NumGPU int `json:"num_gpu,omitempty"`
|
||||
MainGPU int `json:"main_gpu,omitempty"`
|
||||
LowVRAM bool `json:"low_vram,omitempty"`
|
||||
F16KV bool `json:"f16_kv,omitempty"`
|
||||
F16KV bool `json:"f16_kv,omitempty"` // Deprecated: This option is ignored
|
||||
LogitsAll bool `json:"logits_all,omitempty"`
|
||||
VocabOnly bool `json:"vocab_only,omitempty"`
|
||||
UseMMap *bool `json:"use_mmap,omitempty"`
|
||||
|
@ -613,7 +613,6 @@ func DefaultOptions() Options {
|
|||
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
|
||||
NumThread: 0, // let the runtime decide
|
||||
LowVRAM: false,
|
||||
F16KV: true,
|
||||
UseMLock: false,
|
||||
UseMMap: nil,
|
||||
},
|
||||
|
|
|
@ -355,7 +355,6 @@ curl http://localhost:11434/api/generate -d '{
|
|||
"num_gpu": 1,
|
||||
"main_gpu": 0,
|
||||
"low_vram": false,
|
||||
"f16_kv": true,
|
||||
"vocab_only": false,
|
||||
"use_mmap": true,
|
||||
"use_mlock": false,
|
||||
|
|
|
@ -837,14 +837,8 @@ func main() {
|
|||
mlock := flag.Bool("mlock", false, "force system to keep model in RAM rather than swapping or compressing")
|
||||
tensorSplit := flag.String("tensor-split", "", "fraction of the model to offload to each GPU, comma-separated list of proportions")
|
||||
multiUserCache := flag.Bool("multiuser-cache", false, "optimize input cache algorithm for multiple users")
|
||||
// Expose requirements as a JSON output to stdout
|
||||
requirements := flag.Bool("requirements", false, "print json requirement information")
|
||||
|
||||
// These are either ignored by llama.cpp or have no significance to us
|
||||
_ = flag.Bool("embedding", false, "enable embedding vector output (default: disabled)")
|
||||
_ = flag.Bool("log-disable", false, "disables logging to a file")
|
||||
_ = flag.Bool("memory-f32", false, "use f32 instead of f16 for memory key+value (default: disabled) not recommended: doubles context memory required and no measurable increase in quality")
|
||||
|
||||
flag.Parse()
|
||||
if *requirements {
|
||||
printRequirements(os.Stdout)
|
||||
|
|
|
@ -186,7 +186,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
|
|||
"--model", model,
|
||||
"--ctx-size", strconv.Itoa(opts.NumCtx),
|
||||
"--batch-size", strconv.Itoa(opts.NumBatch),
|
||||
"--embedding",
|
||||
}
|
||||
|
||||
if opts.NumGPU >= 0 {
|
||||
|
@ -218,10 +217,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
|
|||
params = append(params, "--threads", strconv.Itoa(defaultThreads))
|
||||
}
|
||||
|
||||
if !opts.F16KV {
|
||||
params = append(params, "--memory-f32")
|
||||
}
|
||||
|
||||
flashAttnEnabled := envconfig.FlashAttention()
|
||||
|
||||
for _, g := range gpus {
|
||||
|
|
|
@ -440,7 +440,6 @@ func TestParseFileParameters(t *testing.T) {
|
|||
"num_gpu 1": {"num_gpu", "1"},
|
||||
"main_gpu 1": {"main_gpu", "1"},
|
||||
"low_vram true": {"low_vram", "true"},
|
||||
"f16_kv true": {"f16_kv", "true"},
|
||||
"logits_all true": {"logits_all", "true"},
|
||||
"vocab_only true": {"vocab_only", "true"},
|
||||
"use_mmap true": {"use_mmap", "true"},
|
||||
|
|
Loading…
Reference in a new issue