add NumGQA

2023-07-27 14:04:30 -07:00 · 2023-07-27 14:04:30 -07:00 · ad3a7d0e2c
commit ad3a7d0e2c
parent 18ffeeec45
2 changed files with 3 additions and 0 deletions
--- a/api/types.go
+++ b/api/types.go
@ -153,6 +153,7 @@ type Options struct {
 	NumCtx        int  `json:"num_ctx,omitempty"`
 	NumKeep       int  `json:"num_keep,omitempty"`
 	NumBatch      int  `json:"num_batch,omitempty"`
+	NumGQA        int  `json:"num_gqa,omitempty"`
 	NumGPU        int  `json:"num_gpu,omitempty"`
 	MainGPU       int  `json:"main_gpu,omitempty"`
 	LowVRAM       bool `json:"low_vram,omitempty"`
@ -190,6 +191,7 @@ func DefaultOptions() Options {
 		NumCtx:   2048,
 		NumBatch: 1024,
 		NumGPU:   1,
+		NumGQA:   1,
 		LowVRAM:  false,
 		F16KV:    true,
 		UseMMap:  true,
--- a/llama/llama.go
+++ b/llama/llama.go
@ -127,6 +127,7 @@ func New(model string, opts api.Options) (*LLM, error) {
 	params.seed = C.uint(llm.Seed)
 	params.n_ctx = C.int(llm.NumCtx)
 	params.n_batch = C.int(llm.NumBatch)
+	params.n_gqa = C.int(llm.NumGQA)
 	params.n_gpu_layers = C.int(llm.NumGPU)
 	params.main_gpu = C.int(llm.MainGPU)
 	params.low_vram = C.bool(llm.LowVRAM)