fix(server): Propagate flash_attn to model load. (#1424)

This commit is contained in:
Daniel Thuerck 2024-05-03 18:17:07 +02:00 committed by GitHub
parent 2117122396
commit 2138561fab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -242,6 +242,7 @@ class LlamaProxy:
logits_all=settings.logits_all, logits_all=settings.logits_all,
embedding=settings.embedding, embedding=settings.embedding,
offload_kqv=settings.offload_kqv, offload_kqv=settings.offload_kqv,
flash_attn=settings.flash_attn,
# Sampling Params # Sampling Params
last_n_tokens_size=settings.last_n_tokens_size, last_n_tokens_size=settings.last_n_tokens_size,
# LoRA Params # LoRA Params