use int32_t for call to tokenize (#4738)

* use `int32_t` for call to tokenize

* variable naming

* cleanup

* fix crash
This commit is contained in:
Jeffrey Morgan 2024-05-30 21:43:30 -07:00 committed by GitHub
parent 7ca9605f54
commit 763bb65dbb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -63,12 +63,27 @@ func (llm *llamaModel) Tokenize(s string) []int {
cs := C.CString(s) cs := C.CString(s)
defer C.free(unsafe.Pointer(cs)) defer C.free(unsafe.Pointer(cs))
tokens := make([]int, len(s)+2) ltokens := make([]C.llama_token, len(s)+2)
if n := C.llama_tokenize(llm.m, cs, C.int(len(s)), (*C.llama_token)(unsafe.Pointer(&tokens[0])), C.int(len(s)+2), false, true); n > 0 { n := C.llama_tokenize(
return tokens[:n] llm.m,
cs,
C.int32_t(len(s)),
&ltokens[0],
C.int32_t(len(ltokens)),
false,
true,
)
if n < 0 {
return nil
} }
return nil tokens := make([]int, n)
for i := 0; i < int(n); i++ {
tokens[i] = int(ltokens[i])
}
return tokens
} }
func (llm *llamaModel) Detokenize(i32s []int) string { func (llm *llamaModel) Detokenize(i32s []int) string {