Change pointer to lower overhead byref
This commit is contained in:
parent
14da46f16e
commit
e72f58614b
1 changed files with 8 additions and 8 deletions
|
@ -295,47 +295,47 @@ class Llama:
|
||||||
ctx=self.ctx,
|
ctx=self.ctx,
|
||||||
last_tokens_data=last_n_tokens_data,
|
last_tokens_data=last_n_tokens_data,
|
||||||
last_tokens_size=last_n_tokens_size,
|
last_tokens_size=last_n_tokens_size,
|
||||||
candidates=llama_cpp.ctypes.pointer(candidates),
|
candidates=llama_cpp.ctypes.byref(candidates), # type: ignore
|
||||||
penalty=repeat_penalty,
|
penalty=repeat_penalty,
|
||||||
)
|
)
|
||||||
if float(temp.value) == 0.0:
|
if float(temp.value) == 0.0:
|
||||||
return llama_cpp.llama_sample_token_greedy(
|
return llama_cpp.llama_sample_token_greedy(
|
||||||
ctx=self.ctx,
|
ctx=self.ctx,
|
||||||
candidates=llama_cpp.ctypes.pointer(candidates),
|
candidates=llama_cpp.ctypes.byref(candidates), # type: ignore
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
llama_cpp.llama_sample_top_k(
|
llama_cpp.llama_sample_top_k(
|
||||||
ctx=self.ctx,
|
ctx=self.ctx,
|
||||||
candidates=llama_cpp.ctypes.pointer(candidates),
|
candidates=llama_cpp.ctypes.byref(candidates), # type: ignore
|
||||||
k=top_k,
|
k=top_k,
|
||||||
min_keep=llama_cpp.c_size_t(1),
|
min_keep=llama_cpp.c_size_t(1),
|
||||||
)
|
)
|
||||||
llama_cpp.llama_sample_tail_free(
|
llama_cpp.llama_sample_tail_free(
|
||||||
ctx=self.ctx,
|
ctx=self.ctx,
|
||||||
candidates=llama_cpp.ctypes.pointer(candidates),
|
candidates=llama_cpp.ctypes.byref(candidates), # type: ignore
|
||||||
z=llama_cpp.c_float(1.0),
|
z=llama_cpp.c_float(1.0),
|
||||||
min_keep=llama_cpp.c_size_t(1),
|
min_keep=llama_cpp.c_size_t(1),
|
||||||
)
|
)
|
||||||
llama_cpp.llama_sample_typical(
|
llama_cpp.llama_sample_typical(
|
||||||
ctx=self.ctx,
|
ctx=self.ctx,
|
||||||
candidates=llama_cpp.ctypes.pointer(candidates),
|
candidates=llama_cpp.ctypes.byref(candidates), # type: ignore
|
||||||
p=llama_cpp.c_float(1.0),
|
p=llama_cpp.c_float(1.0),
|
||||||
min_keep=llama_cpp.c_size_t(1),
|
min_keep=llama_cpp.c_size_t(1),
|
||||||
)
|
)
|
||||||
llama_cpp.llama_sample_top_p(
|
llama_cpp.llama_sample_top_p(
|
||||||
ctx=self.ctx,
|
ctx=self.ctx,
|
||||||
candidates=llama_cpp.ctypes.pointer(candidates),
|
candidates=llama_cpp.ctypes.byref(candidates), # type: ignore
|
||||||
p=top_p,
|
p=top_p,
|
||||||
min_keep=llama_cpp.c_size_t(1),
|
min_keep=llama_cpp.c_size_t(1),
|
||||||
)
|
)
|
||||||
llama_cpp.llama_sample_temperature(
|
llama_cpp.llama_sample_temperature(
|
||||||
ctx=self.ctx,
|
ctx=self.ctx,
|
||||||
candidates=llama_cpp.ctypes.pointer(candidates),
|
candidates=llama_cpp.ctypes.byref(candidates), # type: ignore
|
||||||
temp=temp,
|
temp=temp,
|
||||||
)
|
)
|
||||||
return llama_cpp.llama_sample_token(
|
return llama_cpp.llama_sample_token(
|
||||||
ctx=self.ctx,
|
ctx=self.ctx,
|
||||||
candidates=llama_cpp.ctypes.pointer(candidates),
|
candidates=llama_cpp.ctypes.byref(candidates), # type: ignore
|
||||||
)
|
)
|
||||||
|
|
||||||
def sample(
|
def sample(
|
||||||
|
|
Loading…
Reference in a new issue