llama: Improve error handling
Check for NULL return values from llama.cpp in more places and convert them into Go errors, which should make debugging easier in the future rather than having hidden surprises in our data structures.
This commit is contained in:
parent
a103dae01e
commit
312d9de1d1
4 changed files with 98 additions and 40 deletions
|
@ -88,6 +88,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"runtime"
|
"runtime"
|
||||||
"runtime/cgo"
|
"runtime/cgo"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
)
|
)
|
||||||
|
@ -260,7 +261,7 @@ func LoadModelFromFile(modelPath string, params ModelParams) (*Model, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
m := Model{c: C.llama_load_model_from_file(C.CString(modelPath), cparams)}
|
m := Model{c: C.llama_load_model_from_file(C.CString(modelPath), cparams)}
|
||||||
if m.c == (*C.struct_llama_model)(C.NULL) {
|
if m.c == nil {
|
||||||
return nil, fmt.Errorf("unable to load model: %s", modelPath)
|
return nil, fmt.Errorf("unable to load model: %s", modelPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -276,7 +277,7 @@ func NewContextWithModel(model *Model, params ContextParams) (*Context, error) {
|
||||||
c: C.llama_new_context_with_model(model.c, params.c),
|
c: C.llama_new_context_with_model(model.c, params.c),
|
||||||
numThreads: int(params.c.n_threads),
|
numThreads: int(params.c.n_threads),
|
||||||
}
|
}
|
||||||
if c.c == (*C.struct_llama_context)(C.NULL) {
|
if c.c == nil {
|
||||||
return nil, errors.New("unable to create llama context")
|
return nil, errors.New("unable to create llama context")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -300,6 +301,9 @@ func (m *Model) ApplyLoraFromFile(context *Context, loraPath string, scale float
|
||||||
defer C.free(unsafe.Pointer(cLoraPath))
|
defer C.free(unsafe.Pointer(cLoraPath))
|
||||||
|
|
||||||
loraAdapter := C.llama_lora_adapter_init(m.c, cLoraPath)
|
loraAdapter := C.llama_lora_adapter_init(m.c, cLoraPath)
|
||||||
|
if loraAdapter == nil {
|
||||||
|
return errors.New("unable to load lora")
|
||||||
|
}
|
||||||
|
|
||||||
err := -1
|
err := -1
|
||||||
if loraAdapter != nil {
|
if loraAdapter != nil {
|
||||||
|
@ -322,13 +326,25 @@ type Batch struct {
|
||||||
// Creates a new batch for either word tokens or image embeddings (if embedSize is non-zero).
|
// Creates a new batch for either word tokens or image embeddings (if embedSize is non-zero).
|
||||||
// Batches cannot contain both types at the same time. batchSize is the maximum number of entries
|
// Batches cannot contain both types at the same time. batchSize is the maximum number of entries
|
||||||
// that can be added per sequence
|
// that can be added per sequence
|
||||||
func NewBatch(batchSize int, maxSeq int, embedSize int) *Batch {
|
func NewBatch(batchSize int, maxSeq int, embedSize int) (*Batch, error) {
|
||||||
return &Batch{
|
b := Batch{
|
||||||
c: C.llama_batch_init(C.int(batchSize*maxSeq), C.int(embedSize), C.int(maxSeq)),
|
c: C.llama_batch_init(C.int(batchSize*maxSeq), C.int(embedSize), C.int(maxSeq)),
|
||||||
batchSize: batchSize,
|
batchSize: batchSize,
|
||||||
maxSeq: maxSeq,
|
maxSeq: maxSeq,
|
||||||
embedSize: embedSize,
|
embedSize: embedSize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check to see if any of the allocations in llama_batch_init() failed
|
||||||
|
nilPointer := (embedSize == 0 && b.c.token == nil) || (embedSize != 0 && b.c.embd == nil) ||
|
||||||
|
b.c.pos == nil || b.c.n_seq_id == nil || b.c.seq_id == nil || b.c.logits == nil ||
|
||||||
|
slices.Contains(unsafe.Slice(b.c.seq_id, b.allocSize()), nil)
|
||||||
|
|
||||||
|
if nilPointer {
|
||||||
|
C.llama_batch_free(b.c)
|
||||||
|
return nil, fmt.Errorf("unable to allocate batch (batchSize=%v maxSeq=%v embedSize=%v)", batchSize, maxSeq, embedSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &b, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *Batch) Size() int {
|
func (b *Batch) Size() int {
|
||||||
|
@ -484,6 +500,9 @@ func NewClipContext(llamaContext *Context, modelPath string) (*ClipContext, erro
|
||||||
mp := C.CString(modelPath)
|
mp := C.CString(modelPath)
|
||||||
defer C.free(unsafe.Pointer(mp))
|
defer C.free(unsafe.Pointer(mp))
|
||||||
c := C.clip_model_load(mp, 1)
|
c := C.clip_model_load(mp, 1)
|
||||||
|
if c == nil {
|
||||||
|
return nil, fmt.Errorf("unable to load clip model: %v", modelPath)
|
||||||
|
}
|
||||||
|
|
||||||
projEmbedSize := int(C.clip_n_mmproj_embd(c))
|
projEmbedSize := int(C.clip_n_mmproj_embd(c))
|
||||||
modelEmbedSize := llamaContext.Model().NEmbd()
|
modelEmbedSize := llamaContext.Model().NEmbd()
|
||||||
|
@ -498,8 +517,11 @@ func (c *ClipContext) Free() {
|
||||||
C.clip_free(c.c)
|
C.clip_free(c.c)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *ClipContext) NewEmbed(llamaContext *Context, data []byte) [][]float32 {
|
func (c *ClipContext) NewEmbed(llamaContext *Context, data []byte) ([][]float32, error) {
|
||||||
l := C.llava_image_embed_make_with_bytes(c.c, C.int(llamaContext.numThreads), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(len(data)))
|
l := C.llava_image_embed_make_with_bytes(c.c, C.int(llamaContext.numThreads), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(len(data)))
|
||||||
|
if l == nil {
|
||||||
|
return nil, errors.New("unable to make llava embedding from image")
|
||||||
|
}
|
||||||
|
|
||||||
numTokens := int(l.n_image_pos)
|
numTokens := int(l.n_image_pos)
|
||||||
numEmbed := llamaContext.Model().NEmbd()
|
numEmbed := llamaContext.Model().NEmbd()
|
||||||
|
@ -516,7 +538,7 @@ func (c *ClipContext) NewEmbed(llamaContext *Context, data []byte) [][]float32 {
|
||||||
|
|
||||||
C.llava_image_embed_free(l)
|
C.llava_image_embed_free(l)
|
||||||
|
|
||||||
return embed
|
return embed, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type MllamaContext struct {
|
type MllamaContext struct {
|
||||||
|
@ -527,6 +549,9 @@ func NewMllamaContext(llamaContext *Context, modelPath string) (*MllamaContext,
|
||||||
mp := C.CString(modelPath)
|
mp := C.CString(modelPath)
|
||||||
defer C.free(unsafe.Pointer(mp))
|
defer C.free(unsafe.Pointer(mp))
|
||||||
c := C.mllama_model_load(mp, 1)
|
c := C.mllama_model_load(mp, 1)
|
||||||
|
if c == nil {
|
||||||
|
return nil, fmt.Errorf("unable to load mllama model: %v", modelPath)
|
||||||
|
}
|
||||||
|
|
||||||
projEmbedSize := int(C.mllama_n_embd(c))
|
projEmbedSize := int(C.mllama_n_embd(c))
|
||||||
modelEmbedSize := llamaContext.Model().NEmbd()
|
modelEmbedSize := llamaContext.Model().NEmbd()
|
||||||
|
@ -541,19 +566,25 @@ func (m *MllamaContext) Free() {
|
||||||
C.mllama_free(m.c)
|
C.mllama_free(m.c)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MllamaContext) NewEmbed(llamaContext *Context, data []byte, aspectRatioId int) [][]float32 {
|
func (m *MllamaContext) NewEmbed(llamaContext *Context, data []byte, aspectRatioId int) ([][]float32, error) {
|
||||||
img := C.mllama_image_init()
|
img := C.mllama_image_init()
|
||||||
defer C.mllama_image_free(img)
|
defer C.mllama_image_free(img)
|
||||||
|
|
||||||
C.mllama_image_load_from_data(unsafe.Pointer(&data[0]), C.int(len(data)), 560, 560, 3, 4, C.int(aspectRatioId), img)
|
ok := bool(C.mllama_image_load_from_data(unsafe.Pointer(&data[0]), C.int(len(data)), 560, 560, 3, 4, C.int(aspectRatioId), img))
|
||||||
|
if !ok {
|
||||||
|
return nil, errors.New("unable to load mllama image data")
|
||||||
|
}
|
||||||
|
|
||||||
rows := make([]float32, m.EmbedSize(llamaContext))
|
rows := make([]float32, m.EmbedSize(llamaContext))
|
||||||
C.mllama_image_encode(m.c, C.int(llamaContext.numThreads), img, (*C.float)(unsafe.Pointer(&rows[0])))
|
ok = bool(C.mllama_image_encode(m.c, C.int(llamaContext.numThreads), img, (*C.float)(unsafe.Pointer(&rows[0]))))
|
||||||
|
if !ok {
|
||||||
|
return nil, errors.New("unable to make mllama embedding from image")
|
||||||
|
}
|
||||||
|
|
||||||
embed := make([][]float32, 1)
|
embed := make([][]float32, 1)
|
||||||
embed[0] = rows
|
embed[0] = rows
|
||||||
|
|
||||||
return embed
|
return embed, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MllamaContext) EmbedSize(llamaContext *Context) int {
|
func (m *MllamaContext) EmbedSize(llamaContext *Context) int {
|
||||||
|
@ -592,7 +623,7 @@ type SamplingParams struct {
|
||||||
Grammar string
|
Grammar string
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewSamplingContext(model *Model, params SamplingParams) *SamplingContext {
|
func NewSamplingContext(model *Model, params SamplingParams) (*SamplingContext, error) {
|
||||||
var cparams C.struct_gpt_sampler_cparams
|
var cparams C.struct_gpt_sampler_cparams
|
||||||
cparams.top_k = C.int32_t(params.TopK)
|
cparams.top_k = C.int32_t(params.TopK)
|
||||||
cparams.top_p = C.float(params.TopP)
|
cparams.top_p = C.float(params.TopP)
|
||||||
|
@ -615,9 +646,13 @@ func NewSamplingContext(model *Model, params SamplingParams) *SamplingContext {
|
||||||
|
|
||||||
cparams.grammar = grammar
|
cparams.grammar = grammar
|
||||||
context := &SamplingContext{c: C.gpt_sampler_cinit(model.c, &cparams)}
|
context := &SamplingContext{c: C.gpt_sampler_cinit(model.c, &cparams)}
|
||||||
|
if context.c == nil {
|
||||||
|
return nil, errors.New("unable to create sampling context")
|
||||||
|
}
|
||||||
|
|
||||||
runtime.SetFinalizer(context, func(s *SamplingContext) { C.gpt_sampler_cfree(s.c) })
|
runtime.SetFinalizer(context, func(s *SamplingContext) { C.gpt_sampler_cfree(s.c) })
|
||||||
|
|
||||||
return context
|
return context, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *SamplingContext) Reset() {
|
func (s *SamplingContext) Reset() {
|
||||||
|
|
|
@ -63,9 +63,9 @@ func (c *ImageContext) Free(modelPath string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspectRatioId int) [][]float32 {
|
func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspectRatioId int) ([][]float32, error) {
|
||||||
if c == nil {
|
if c == nil {
|
||||||
return nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
hash := c.hashImage(data)
|
hash := c.hashImage(data)
|
||||||
|
@ -76,17 +76,23 @@ func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspect
|
||||||
embed, err := c.findImage(hash)
|
embed, err := c.findImage(hash)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if c.mllama != nil {
|
if c.mllama != nil {
|
||||||
embed = c.mllama.NewEmbed(llamaContext, data, aspectRatioId)
|
embed, err = c.mllama.NewEmbed(llamaContext, data, aspectRatioId)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
} else if c.clip != nil {
|
} else if c.clip != nil {
|
||||||
embed = c.clip.NewEmbed(llamaContext, data)
|
embed, err = c.clip.NewEmbed(llamaContext, data)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
return nil
|
return nil, errors.New("received image but vision model not loaded")
|
||||||
}
|
}
|
||||||
|
|
||||||
c.addImage(hash, embed)
|
c.addImage(hash, embed)
|
||||||
}
|
}
|
||||||
|
|
||||||
return embed
|
return embed, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *ImageContext) BatchSize(configuredBatchSize int) int {
|
func (c *ImageContext) BatchSize(configuredBatchSize int) int {
|
||||||
|
|
|
@ -131,7 +131,10 @@ func (s *Server) NewSequence(prompt string, images []ImageData, params NewSequen
|
||||||
|
|
||||||
var sc *llama.SamplingContext
|
var sc *llama.SamplingContext
|
||||||
if params.samplingParams != nil {
|
if params.samplingParams != nil {
|
||||||
sc = llama.NewSamplingContext(s.model, *params.samplingParams)
|
sc, err = llama.NewSamplingContext(s.model, *params.samplingParams)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
for _, input := range inputs {
|
for _, input := range inputs {
|
||||||
if input.embed == nil {
|
if input.embed == nil {
|
||||||
sc.Accept(input.token, false)
|
sc.Accept(input.token, false)
|
||||||
|
@ -194,7 +197,11 @@ func (s *Server) inputs(prompt string, images []ImageData) ([]input, error) {
|
||||||
return nil, fmt.Errorf("invalid image index: %d", n)
|
return nil, fmt.Errorf("invalid image index: %d", n)
|
||||||
}
|
}
|
||||||
|
|
||||||
embed := s.image.NewEmbed(s.lc, images[imageIndex].Data, images[imageIndex].AspectRatioID)
|
embed, err := s.image.NewEmbed(s.lc, images[imageIndex].Data, images[imageIndex].AspectRatioID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
for _, e := range embed {
|
for _, e := range embed {
|
||||||
inputs = append(inputs, input{embed: e})
|
inputs = append(inputs, input{embed: e})
|
||||||
}
|
}
|
||||||
|
@ -305,13 +312,19 @@ func (s *Server) run(ctx context.Context) {
|
||||||
|
|
||||||
// Logically these batches are used only within the context of processBatch
|
// Logically these batches are used only within the context of processBatch
|
||||||
// but it is better for performance to allocate them once here
|
// but it is better for performance to allocate them once here
|
||||||
tokenBatch := llama.NewBatch(s.batchSize, len(s.seqs), 0)
|
tokenBatch, err := llama.NewBatch(s.batchSize, len(s.seqs), 0)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
defer tokenBatch.Free()
|
defer tokenBatch.Free()
|
||||||
|
|
||||||
var embedBatch *llama.Batch
|
var embedBatch *llama.Batch
|
||||||
embedBatchSize := s.image.BatchSize(s.batchSize)
|
embedBatchSize := s.image.BatchSize(s.batchSize)
|
||||||
if embedBatchSize != 0 {
|
if embedBatchSize != 0 {
|
||||||
embedBatch = llama.NewBatch(embedBatchSize, len(s.seqs), s.image.EmbedSize(s.lc))
|
embedBatch, err = llama.NewBatch(embedBatchSize, len(s.seqs), s.image.EmbedSize(s.lc))
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
defer embedBatch.Free()
|
defer embedBatch.Free()
|
||||||
} else {
|
} else {
|
||||||
embedBatch = &llama.Batch{}
|
embedBatch = &llama.Batch{}
|
||||||
|
|
40
llama/sampling_ext.cpp
vendored
40
llama/sampling_ext.cpp
vendored
|
@ -5,24 +5,28 @@
|
||||||
struct gpt_sampler *gpt_sampler_cinit(
|
struct gpt_sampler *gpt_sampler_cinit(
|
||||||
const struct llama_model *model, struct gpt_sampler_cparams *params)
|
const struct llama_model *model, struct gpt_sampler_cparams *params)
|
||||||
{
|
{
|
||||||
gpt_sampler_params sparams;
|
try {
|
||||||
sparams.top_k = params->top_k;
|
gpt_sampler_params sparams;
|
||||||
sparams.top_p = params->top_p;
|
sparams.top_k = params->top_k;
|
||||||
sparams.min_p = params->min_p;
|
sparams.top_p = params->top_p;
|
||||||
sparams.tfs_z = params->tfs_z;
|
sparams.min_p = params->min_p;
|
||||||
sparams.typ_p = params->typical_p;
|
sparams.tfs_z = params->tfs_z;
|
||||||
sparams.temp = params->temp;
|
sparams.typ_p = params->typical_p;
|
||||||
sparams.penalty_last_n = params->penalty_last_n;
|
sparams.temp = params->temp;
|
||||||
sparams.penalty_repeat = params->penalty_repeat;
|
sparams.penalty_last_n = params->penalty_last_n;
|
||||||
sparams.penalty_freq = params->penalty_freq;
|
sparams.penalty_repeat = params->penalty_repeat;
|
||||||
sparams.penalty_present = params->penalty_present;
|
sparams.penalty_freq = params->penalty_freq;
|
||||||
sparams.mirostat = params->mirostat;
|
sparams.penalty_present = params->penalty_present;
|
||||||
sparams.mirostat_tau = params->mirostat_tau;
|
sparams.mirostat = params->mirostat;
|
||||||
sparams.mirostat_eta = params->mirostat_eta;
|
sparams.mirostat_tau = params->mirostat_tau;
|
||||||
sparams.penalize_nl = params->penalize_nl;
|
sparams.mirostat_eta = params->mirostat_eta;
|
||||||
sparams.seed = params->seed;
|
sparams.penalize_nl = params->penalize_nl;
|
||||||
sparams.grammar = params->grammar;
|
sparams.seed = params->seed;
|
||||||
return gpt_sampler_init(model, sparams);
|
sparams.grammar = params->grammar;
|
||||||
|
return gpt_sampler_init(model, sparams);
|
||||||
|
} catch (const std::exception & err) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void gpt_sampler_cfree(struct gpt_sampler *sampler)
|
void gpt_sampler_cfree(struct gpt_sampler *sampler)
|
||||||
|
|
Loading…
Reference in a new issue