From ad3a7d0e2cad1d9cdfe9ad72f14d3db1cbc4c720 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Thu, 27 Jul 2023 14:04:30 -0700 Subject: [PATCH] add NumGQA --- api/types.go | 2 ++ llama/llama.go | 1 + 2 files changed, 3 insertions(+) diff --git a/api/types.go b/api/types.go index 8f12b5f9..9e5991dc 100644 --- a/api/types.go +++ b/api/types.go @@ -153,6 +153,7 @@ type Options struct { NumCtx int `json:"num_ctx,omitempty"` NumKeep int `json:"num_keep,omitempty"` NumBatch int `json:"num_batch,omitempty"` + NumGQA int `json:"num_gqa,omitempty"` NumGPU int `json:"num_gpu,omitempty"` MainGPU int `json:"main_gpu,omitempty"` LowVRAM bool `json:"low_vram,omitempty"` @@ -190,6 +191,7 @@ func DefaultOptions() Options { NumCtx: 2048, NumBatch: 1024, NumGPU: 1, + NumGQA: 1, LowVRAM: false, F16KV: true, UseMMap: true, diff --git a/llama/llama.go b/llama/llama.go index e2c30f1f..c7bf194a 100644 --- a/llama/llama.go +++ b/llama/llama.go @@ -127,6 +127,7 @@ func New(model string, opts api.Options) (*LLM, error) { params.seed = C.uint(llm.Seed) params.n_ctx = C.int(llm.NumCtx) params.n_batch = C.int(llm.NumBatch) + params.n_gqa = C.int(llm.NumGQA) params.n_gpu_layers = C.int(llm.NumGPU) params.main_gpu = C.int(llm.MainGPU) params.low_vram = C.bool(llm.LowVRAM)