From 29b6e9a5c832b7d148044b45fb3cd6f3f923d96b Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Tue, 30 Apr 2024 09:32:47 -0400 Subject: [PATCH] fix: wrong parameter for flash attention in pickle __getstate__ --- llama_cpp/llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 172f4c6..f927f0c 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -1777,7 +1777,7 @@ class Llama: logits_all=self.context_params.logits_all, embedding=self.context_params.embeddings, offload_kqv=self.context_params.offload_kqv, - flash_offload=self.context_params.flash_offload, + flash_attn=self.context_params.flash_attn, # Sampling Params last_n_tokens_size=self.last_n_tokens_size, # LoRA Params