From 6e298d8fca1ee5f25239e54aa5f3eed2eee4651e Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 14 Apr 2023 22:21:19 -0400 Subject: [PATCH] Set kv cache size to f16 by default --- llama_cpp/llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index ecfd2f4..cd737c5 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -21,7 +21,7 @@ class Llama: n_ctx: int = 512, n_parts: int = -1, seed: int = 1337, - f16_kv: bool = False, + f16_kv: bool = True, logits_all: bool = False, vocab_only: bool = False, use_mmap: bool = True,