Use model_name and index in response
This commit is contained in:
parent
922b5b2bfd
commit
0adb9ec37a
1 changed files with 6 additions and 7 deletions
|
@ -176,7 +176,6 @@ class Llama:
|
||||||
|
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
|
print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
n_vocab = self.n_vocab()
|
n_vocab = self.n_vocab()
|
||||||
n_ctx = self.n_ctx()
|
n_ctx = self.n_ctx()
|
||||||
|
@ -575,9 +574,9 @@ class Llama:
|
||||||
else:
|
else:
|
||||||
inputs = input
|
inputs = input
|
||||||
|
|
||||||
data = []
|
data: List[EmbeddingData] = []
|
||||||
total_tokens = 0
|
total_tokens = 0
|
||||||
for input in inputs:
|
for index, input in enumerate(inputs):
|
||||||
tokens = self.tokenize(input.encode("utf-8"))
|
tokens = self.tokenize(input.encode("utf-8"))
|
||||||
self.reset()
|
self.reset()
|
||||||
self.eval(tokens)
|
self.eval(tokens)
|
||||||
|
@ -587,20 +586,20 @@ class Llama:
|
||||||
: llama_cpp.llama_n_embd(self.ctx)
|
: llama_cpp.llama_n_embd(self.ctx)
|
||||||
]
|
]
|
||||||
|
|
||||||
if self.verbose:
|
|
||||||
llama_cpp.llama_print_timings(self.ctx)
|
|
||||||
data.append(
|
data.append(
|
||||||
{
|
{
|
||||||
"object": "embedding",
|
"object": "embedding",
|
||||||
"embedding": embedding,
|
"embedding": embedding,
|
||||||
"index": 0,
|
"index": index,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
if self.verbose:
|
||||||
|
llama_cpp.llama_print_timings(self.ctx)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"object": "list",
|
"object": "list",
|
||||||
"data": data,
|
"data": data,
|
||||||
"model": self.model_path,
|
"model": model_name,
|
||||||
"usage": {
|
"usage": {
|
||||||
"prompt_tokens": total_tokens,
|
"prompt_tokens": total_tokens,
|
||||||
"total_tokens": total_tokens,
|
"total_tokens": total_tokens,
|
||||||
|
|
Loading…
Reference in a new issue