fix multibyte responses

2023-07-14 18:30:32 -07:00 · 2023-07-14 18:30:32 -07:00 · 40c9dc0a31
commit 40c9dc0a31
parent 0142660bd4
1 changed files with 13 additions and 6 deletions
--- a/llama/llama.go
+++ b/llama/llama.go
@ -78,12 +78,14 @@ llama_token llama_sample(
 */
 import "C"
 import (
+	"bytes"
 	"errors"
 	"fmt"
 	"io"
 	"os"
 	"strings"
 	"time"
+	"unicode/utf8"
 	"unsafe"

 	"github.com/jmorganca/ollama/api"
@ -204,6 +206,7 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
 		context.PushLeft(int(in))
 	}

+	var b bytes.Buffer
 	for C.llama_get_kv_cache_token_count(llm.ctx) < C.int(llm.NumCtx) {
 		if retval := C.llama_eval(llm.ctx, unsafe.SliceData(input), C.int(len(input)), C.llama_get_kv_cache_token_count(llm.ctx), C.int(llm.NumThread)); retval != 0 {
 			return errors.New("llama: eval")
@ -216,13 +219,17 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
 			return err
 		}

-		// call the callback
-		fn(api.GenerateResponse{
-			Response: llm.detokenize(token),
-		})
+		b.WriteString(llm.detokenize(token))
+		if utf8.Valid(b.Bytes()) || b.Len() >= utf8.UTFMax {
+			// call the callback
+			fn(api.GenerateResponse{
+				Response: b.String(),
+			})

-		output.PushLeft(token)
-		context.PushLeft(int(token))
+			output.PushLeft(token)
+			context.PushLeft(int(token))
+			b.Reset()
+		}

 		input = []C.llama_token{token}
 	}