From af3ed503e9ce60fe6b5365031abad4176a3536b3 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Sat, 1 Jun 2024 18:09:24 -0400 Subject: [PATCH] fix: Use numpy recarray for candidates data, fixes bug with temp < 0 --- llama_cpp/_internals.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py index b404601..e3f539b 100644 --- a/llama_cpp/_internals.py +++ b/llama_cpp/_internals.py @@ -545,13 +545,12 @@ class _LlamaBatch: class _LlamaTokenDataArray: def __init__(self, *, n_vocab: int): self.n_vocab = n_vocab - self.candidates_data = np.array( - [], + self.candidates_data = np.recarray( + (self.n_vocab,), dtype=np.dtype( [("id", np.intc), ("logit", np.single), ("p", np.single)], align=True ), ) - self.candidates_data.resize(3, self.n_vocab, refcheck=False) self.candidates = llama_cpp.llama_token_data_array( data=self.candidates_data.ctypes.data_as(llama_cpp.llama_token_data_p), size=self.n_vocab, @@ -561,14 +560,11 @@ class _LlamaTokenDataArray: self.default_candidates_data_p = np.zeros(self.n_vocab, dtype=np.single) def copy_logits(self, logits: npt.NDArray[np.single]): - self.candidates_data["id"][:] = self.default_candidates_data_id - self.candidates_data["logit"][:] = logits - self.candidates_data["p"][:] = self.default_candidates_data_p - self.candidates.data = self.candidates_data.ctypes.data_as( - llama_cpp.llama_token_data_p - ) - self.candidates.sorted = ctypes.c_bool(False) - self.candidates.size = ctypes.c_size_t(self.n_vocab) + self.candidates_data.id[:] = self.default_candidates_data_id + self.candidates_data.logit[:] = logits + self.candidates_data.p[:] = self.default_candidates_data_p + self.candidates.sorted = False + self.candidates.size = self.n_vocab # Python wrappers over common/common @@ -759,14 +755,14 @@ class _LlamaSamplingContext: self.params.penalty_present, ) if not self.params.penalize_nl: - token_data_array.candidates_data["logit"][nl_token] = nl_logit + token_data_array.candidates_data.logit[nl_token] = nl_logit if self.grammar is not None: ctx_main.sample_grammar(token_data_array, self.grammar) if self.params.temp < 0: ctx_main.sample_softmax(token_data_array) - id = token_data_array.candidates_data["id"][0] + id = token_data_array.candidates_data.id[0] elif self.params.temp == 0: id = ctx_main.sample_token_greedy(token_data_array) else: