fix(server): Propagate flash_attn
to model load. (#1424)
This commit is contained in:
parent
2117122396
commit
2138561fab
1 changed files with 1 additions and 0 deletions
|
@ -242,6 +242,7 @@ class LlamaProxy:
|
|||
logits_all=settings.logits_all,
|
||||
embedding=settings.embedding,
|
||||
offload_kqv=settings.offload_kqv,
|
||||
flash_attn=settings.flash_attn,
|
||||
# Sampling Params
|
||||
last_n_tokens_size=settings.last_n_tokens_size,
|
||||
# LoRA Params
|
||||
|
|
Loading…
Reference in a new issue