fix multibyte responses
This commit is contained in:
parent
0142660bd4
commit
40c9dc0a31
1 changed files with 13 additions and 6 deletions
|
@ -78,12 +78,14 @@ llama_token llama_sample(
|
||||||
*/
|
*/
|
||||||
import "C"
|
import "C"
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
"unicode/utf8"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/jmorganca/ollama/api"
|
"github.com/jmorganca/ollama/api"
|
||||||
|
@ -204,6 +206,7 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
|
||||||
context.PushLeft(int(in))
|
context.PushLeft(int(in))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var b bytes.Buffer
|
||||||
for C.llama_get_kv_cache_token_count(llm.ctx) < C.int(llm.NumCtx) {
|
for C.llama_get_kv_cache_token_count(llm.ctx) < C.int(llm.NumCtx) {
|
||||||
if retval := C.llama_eval(llm.ctx, unsafe.SliceData(input), C.int(len(input)), C.llama_get_kv_cache_token_count(llm.ctx), C.int(llm.NumThread)); retval != 0 {
|
if retval := C.llama_eval(llm.ctx, unsafe.SliceData(input), C.int(len(input)), C.llama_get_kv_cache_token_count(llm.ctx), C.int(llm.NumThread)); retval != 0 {
|
||||||
return errors.New("llama: eval")
|
return errors.New("llama: eval")
|
||||||
|
@ -216,13 +219,17 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
b.WriteString(llm.detokenize(token))
|
||||||
|
if utf8.Valid(b.Bytes()) || b.Len() >= utf8.UTFMax {
|
||||||
// call the callback
|
// call the callback
|
||||||
fn(api.GenerateResponse{
|
fn(api.GenerateResponse{
|
||||||
Response: llm.detokenize(token),
|
Response: b.String(),
|
||||||
})
|
})
|
||||||
|
|
||||||
output.PushLeft(token)
|
output.PushLeft(token)
|
||||||
context.PushLeft(int(token))
|
context.PushLeft(int(token))
|
||||||
|
b.Reset()
|
||||||
|
}
|
||||||
|
|
||||||
input = []C.llama_token{token}
|
input = []C.llama_token{token}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue