diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 172f4c6..f927f0c 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -1777,7 +1777,7 @@ class Llama: logits_all=self.context_params.logits_all, embedding=self.context_params.embeddings, offload_kqv=self.context_params.offload_kqv, - flash_offload=self.context_params.flash_offload, + flash_attn=self.context_params.flash_attn, # Sampling Params last_n_tokens_size=self.last_n_tokens_size, # LoRA Params