Use model_name and index in response

This commit is contained in:
Andrei Betlen 2023-05-21 21:30:03 -04:00
parent 922b5b2bfd
commit 0adb9ec37a

View file

@ -177,7 +177,6 @@ class Llama:
if self.verbose: if self.verbose:
print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr) print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
n_vocab = self.n_vocab() n_vocab = self.n_vocab()
n_ctx = self.n_ctx() n_ctx = self.n_ctx()
data = (llama_cpp.llama_token_data * n_vocab)( data = (llama_cpp.llama_token_data * n_vocab)(
@ -575,9 +574,9 @@ class Llama:
else: else:
inputs = input inputs = input
data = [] data: List[EmbeddingData] = []
total_tokens = 0 total_tokens = 0
for input in inputs: for index, input in enumerate(inputs):
tokens = self.tokenize(input.encode("utf-8")) tokens = self.tokenize(input.encode("utf-8"))
self.reset() self.reset()
self.eval(tokens) self.eval(tokens)
@ -587,20 +586,20 @@ class Llama:
: llama_cpp.llama_n_embd(self.ctx) : llama_cpp.llama_n_embd(self.ctx)
] ]
if self.verbose:
llama_cpp.llama_print_timings(self.ctx)
data.append( data.append(
{ {
"object": "embedding", "object": "embedding",
"embedding": embedding, "embedding": embedding,
"index": 0, "index": index,
} }
) )
if self.verbose:
llama_cpp.llama_print_timings(self.ctx)
return { return {
"object": "list", "object": "list",
"data": data, "data": data,
"model": self.model_path, "model": model_name,
"usage": { "usage": {
"prompt_tokens": total_tokens, "prompt_tokens": total_tokens,
"total_tokens": total_tokens, "total_tokens": total_tokens,