Update llama.cpp

This commit is contained in:
Andrei Betlen 2023-10-19 02:55:08 -04:00
parent d989ac86e6
commit ff580031d2
3 changed files with 59 additions and 13 deletions

View file

@ -444,7 +444,7 @@ class Llama:
maxlen=self._n_ctx if self.context_params.logits_all else 1, maxlen=self._n_ctx if self.context_params.logits_all else 1,
) )
def tokenize(self, text: bytes, add_bos: bool = True) -> List[int]: def tokenize(self, text: bytes, add_bos: bool = True, special: bool = False) -> List[int]:
"""Tokenize a string. """Tokenize a string.
Args: Args:
@ -466,6 +466,7 @@ class Llama:
tokens, tokens,
n_ctx, n_ctx,
add_bos, add_bos,
special
) )
if n_tokens < 0: if n_tokens < 0:
n_tokens = abs(n_tokens) n_tokens = abs(n_tokens)
@ -477,6 +478,7 @@ class Llama:
tokens, tokens,
n_tokens, n_tokens,
add_bos, add_bos,
special
) )
if n_tokens < 0: if n_tokens < 0:
raise RuntimeError( raise RuntimeError(

View file

@ -240,11 +240,11 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
# typedef struct llama_batch { # typedef struct llama_batch {
# int32_t n_tokens; # int32_t n_tokens;
# llama_token * token; # llama_token * token;
# float * embd; # float * embd;
# llama_pos * pos; # llama_pos * pos;
# llama_seq_id * seq_id; # llama_seq_id ** seq_id;
# int8_t * logits; # int8_t * logits;
# // NOTE: helpers for smooth API transition - can be deprecated in the future # // NOTE: helpers for smooth API transition - can be deprecated in the future
@ -262,7 +262,7 @@ class llama_batch(Structure):
("token", POINTER(llama_token)), ("token", POINTER(llama_token)),
("embd", c_float_p), ("embd", c_float_p),
("pos", POINTER(llama_pos)), ("pos", POINTER(llama_pos)),
("seq_id", POINTER(llama_seq_id)), ("seq_id", POINTER(POINTER(llama_seq_id))),
("logits", POINTER(c_int8)), ("logits", POINTER(c_int8)),
("all_pos_0", llama_pos), ("all_pos_0", llama_pos),
("all_pos_1", llama_pos), ("all_pos_1", llama_pos),
@ -1069,7 +1069,8 @@ _lib.llama_batch_get_one.argtypes = [
_lib.llama_batch_get_one.restype = llama_batch _lib.llama_batch_get_one.restype = llama_batch
# // Allocates a batch of tokens on the heap # // Allocates a batch of tokens on the heap that can hold a maximum of n_tokens
# // Each token can be assigned up to n_seq_max sequence ids
# // The batch has to be freed with llama_batch_free() # // The batch has to be freed with llama_batch_free()
# // If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float) # // If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float)
# // Otherwise, llama_batch.token will be allocated to store n_tokens llama_token # // Otherwise, llama_batch.token will be allocated to store n_tokens llama_token
@ -1077,14 +1078,17 @@ _lib.llama_batch_get_one.restype = llama_batch
# // All members are left uninitialized # // All members are left uninitialized
# LLAMA_API struct llama_batch llama_batch_init( # LLAMA_API struct llama_batch llama_batch_init(
# int32_t n_tokens, # int32_t n_tokens,
# int32_t embd); # int32_t embd,
# int32_t n_seq_max);
def llama_batch_init( def llama_batch_init(
n_tokens: Union[c_int, int], embd: Union[c_int, int] n_tokens: Union[c_int32, int],
embd: Union[c_int32, int],
n_seq_max: Union[c_int32, int],
) -> llama_batch: ) -> llama_batch:
return _lib.llama_batch_init(n_tokens, embd) return _lib.llama_batch_init(n_tokens, embd, n_seq_max)
_lib.llama_batch_init.argtypes = [c_int, c_int] _lib.llama_batch_init.argtypes = [c_int32, c_int32, c_int32]
_lib.llama_batch_init.restype = llama_batch _lib.llama_batch_init.restype = llama_batch
@ -1308,6 +1312,46 @@ _lib.llama_tokenize.argtypes = [
_lib.llama_tokenize.restype = c_int _lib.llama_tokenize.restype = c_int
# /// @details Convert the provided text into tokens.
# /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
# /// @return Returns the number of tokens on success, no more than n_max_tokens
# /// @return Returns a negative number on failure - the number of tokens that would have been returned
# /// @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
# /// Does not insert a leading space.
# LLAMA_API int llama_tokenize(
# const struct llama_model * model,
# const char * text,
# int text_len,
# llama_token * tokens,
# int n_max_tokens,
# bool add_bos,
# bool special);
def llama_tokenize(
model: llama_model_p,
text: bytes,
text_len: Union[c_int, int],
tokens, # type: Array[llama_token]
n_max_tokens: Union[c_int, int],
add_bos: Union[c_bool, bool],
special: Union[c_bool, bool],
) -> int:
return _lib.llama_tokenize(
model, text, text_len, tokens, n_max_tokens, add_bos, special
)
_lib.llama_tokenize.argtypes = [
llama_model_p,
c_char_p,
c_int,
llama_token_p,
c_int,
c_bool,
c_bool,
]
_lib.llama_tokenize.restype = c_int
# // Token Id -> Piece. # // Token Id -> Piece.
# // Uses the vocabulary in the provided context. # // Uses the vocabulary in the provided context.
# // Does not write null terminator to the buffer. # // Does not write null terminator to the buffer.

2
vendor/llama.cpp vendored

@ -1 +1 @@
Subproject commit 11bff290458f12f020b588792707f76ec658a27a Subproject commit 004797f6ac135383f8c1d1f5bd415ddee2f79318