From d018c7b01dd08518b59ebcedc603111243a39391 Mon Sep 17 00:00:00 2001 From: Billy Cao Date: Sat, 12 Aug 2023 18:41:47 +0800 Subject: [PATCH] Add doc string for n_gpu_layers argument --- llama_cpp/llama.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index a996d5c..20a5e0c 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -239,6 +239,7 @@ class Llama: n_ctx: Maximum context size. n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined. seed: Random seed. -1 for random. + n_gpu_layers: Number of layers to offload to GPU (-ngl). If -1, all layers are offloaded. f16_kv: Use half-precision for key/value cache. logits_all: Return logits for all tokens, not just the last token. vocab_only: Only load the vocabulary no weights.