add NumGQA

This commit is contained in:
Michael Yang 2023-07-27 14:04:30 -07:00
parent 18ffeeec45
commit ad3a7d0e2c
2 changed files with 3 additions and 0 deletions

View file

@ -153,6 +153,7 @@ type Options struct {
NumCtx int `json:"num_ctx,omitempty"` NumCtx int `json:"num_ctx,omitempty"`
NumKeep int `json:"num_keep,omitempty"` NumKeep int `json:"num_keep,omitempty"`
NumBatch int `json:"num_batch,omitempty"` NumBatch int `json:"num_batch,omitempty"`
NumGQA int `json:"num_gqa,omitempty"`
NumGPU int `json:"num_gpu,omitempty"` NumGPU int `json:"num_gpu,omitempty"`
MainGPU int `json:"main_gpu,omitempty"` MainGPU int `json:"main_gpu,omitempty"`
LowVRAM bool `json:"low_vram,omitempty"` LowVRAM bool `json:"low_vram,omitempty"`
@ -190,6 +191,7 @@ func DefaultOptions() Options {
NumCtx: 2048, NumCtx: 2048,
NumBatch: 1024, NumBatch: 1024,
NumGPU: 1, NumGPU: 1,
NumGQA: 1,
LowVRAM: false, LowVRAM: false,
F16KV: true, F16KV: true,
UseMMap: true, UseMMap: true,

View file

@ -127,6 +127,7 @@ func New(model string, opts api.Options) (*LLM, error) {
params.seed = C.uint(llm.Seed) params.seed = C.uint(llm.Seed)
params.n_ctx = C.int(llm.NumCtx) params.n_ctx = C.int(llm.NumCtx)
params.n_batch = C.int(llm.NumBatch) params.n_batch = C.int(llm.NumBatch)
params.n_gqa = C.int(llm.NumGQA)
params.n_gpu_layers = C.int(llm.NumGPU) params.n_gpu_layers = C.int(llm.NumGPU)
params.main_gpu = C.int(llm.MainGPU) params.main_gpu = C.int(llm.MainGPU)
params.low_vram = C.bool(llm.LowVRAM) params.low_vram = C.bool(llm.LowVRAM)