Use model_name and index in response
This commit is contained in:
parent
922b5b2bfd
commit
0adb9ec37a
1 changed files with 6 additions and 7 deletions
|
@ -177,7 +177,6 @@ class Llama:
|
|||
if self.verbose:
|
||||
print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
|
||||
|
||||
|
||||
n_vocab = self.n_vocab()
|
||||
n_ctx = self.n_ctx()
|
||||
data = (llama_cpp.llama_token_data * n_vocab)(
|
||||
|
@ -575,9 +574,9 @@ class Llama:
|
|||
else:
|
||||
inputs = input
|
||||
|
||||
data = []
|
||||
data: List[EmbeddingData] = []
|
||||
total_tokens = 0
|
||||
for input in inputs:
|
||||
for index, input in enumerate(inputs):
|
||||
tokens = self.tokenize(input.encode("utf-8"))
|
||||
self.reset()
|
||||
self.eval(tokens)
|
||||
|
@ -587,20 +586,20 @@ class Llama:
|
|||
: llama_cpp.llama_n_embd(self.ctx)
|
||||
]
|
||||
|
||||
if self.verbose:
|
||||
llama_cpp.llama_print_timings(self.ctx)
|
||||
data.append(
|
||||
{
|
||||
"object": "embedding",
|
||||
"embedding": embedding,
|
||||
"index": 0,
|
||||
"index": index,
|
||||
}
|
||||
)
|
||||
if self.verbose:
|
||||
llama_cpp.llama_print_timings(self.ctx)
|
||||
|
||||
return {
|
||||
"object": "list",
|
||||
"data": data,
|
||||
"model": self.model_path,
|
||||
"model": model_name,
|
||||
"usage": {
|
||||
"prompt_tokens": total_tokens,
|
||||
"total_tokens": total_tokens,
|
||||
|
|
Loading…
Reference in a new issue