Bugfix: enable embeddings for fastapi server

This commit is contained in:
Andrei Betlen 2023-04-01 15:12:25 -04:00
parent c25b7dfc86
commit f28bf3f13d

View file

@ -31,6 +31,7 @@ llama = llama_cpp.Llama(
settings.model, settings.model,
f16_kv=True, f16_kv=True,
use_mlock=True, use_mlock=True,
embedding=True,
n_threads=6, n_threads=6,
n_batch=2048, n_batch=2048,
) )
@ -93,4 +94,6 @@ CreateEmbeddingResponse = create_model_from_typeddict(llama_cpp.Embedding)
response_model=CreateEmbeddingResponse, response_model=CreateEmbeddingResponse,
) )
def create_embedding(request: CreateEmbeddingRequest): def create_embedding(request: CreateEmbeddingRequest):
return llama.create_embedding(**request.dict()) # print(request)
# return llama.create_embedding(**request.dict(exclude={"model", "user"}))
return llama.create_embedding(request.input)