improve api error handling (#781)

- remove new lines from llama.cpp error messages relayed to client - check api option types and return error on wrong type - change num layers from 95% VRAM to 92% VRAM
2023-10-13 16:57:10 -04:00 · 2023-10-13 16:57:10 -04:00 · 6fe178134d
commit 6fe178134d
parent d890890f66
2 changed files with 9 additions and 16 deletions
--- a/api/types.go
+++ b/api/types.go
@ -3,7 +3,6 @@ package api
 import (
 	"encoding/json"
 	"fmt"
-	"log"
 	"math"
 	"os"
 	"reflect"
@ -238,44 +237,39 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 						// when JSON unmarshals numbers, it uses float64, not int
 						field.SetInt(int64(t))
 					default:
-						log.Printf("could not convert model parameter %v of type %T to int, skipped", key, val)
+						return fmt.Errorf("option %q must be of type integer", key)
 					}
 				case reflect.Bool:
 					val, ok := val.(bool)
 					if !ok {
-						log.Printf("could not convert model parameter %v of type %T to bool, skipped", key, val)
-						continue
+						return fmt.Errorf("option %q must be of type boolean", key)
 					}
 					field.SetBool(val)
 				case reflect.Float32:
 					// JSON unmarshals to float64
 					val, ok := val.(float64)
 					if !ok {
-						log.Printf("could not convert model parameter %v of type %T to float32, skipped", key, val)
-						continue
+						return fmt.Errorf("option %q must be of type float32", key)
 					}
 					field.SetFloat(val)
 				case reflect.String:
 					val, ok := val.(string)
 					if !ok {
-						log.Printf("could not convert model parameter %v of type %T to string, skipped", key, val)
-						continue
+						return fmt.Errorf("option %q must be of type string", key)
 					}
 					field.SetString(val)
 				case reflect.Slice:
 					// JSON unmarshals to []interface{}, not []string
 					val, ok := val.([]interface{})
 					if !ok {
-						log.Printf("could not convert model parameter %v of type %T to slice, skipped", key, val)
-						continue
+						return fmt.Errorf("option %q must be of type array", key)
 					}
 					// convert []interface{} to []string
 					slice := make([]string, len(val))
 					for i, item := range val {
 						str, ok := item.(string)
 						if !ok {
-							log.Printf("could not convert model parameter %v of type %T to slice of strings, skipped", key, item)
-							continue
+							return fmt.Errorf("option %q must be of an array of strings", key)
 						}
 						slice[i] = str
 					}
--- a/llm/llama.go
+++ b/llm/llama.go
@ -238,8 +238,8 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
 		// TODO: this is a rough heuristic, better would be to calculate this based on number of layers and context size
 		bytesPerLayer := fileSizeBytes / numLayer

-		// max number of layers we can fit in VRAM, subtract 5% to prevent consuming all available VRAM and running out of memory
-		layers := int(freeVramBytes/bytesPerLayer) * 95 / 100
+		// max number of layers we can fit in VRAM, subtract 8% to prevent consuming all available VRAM and running out of memory
+		layers := int(freeVramBytes/bytesPerLayer) * 92 / 100
 		log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, layers)

 		return layers
@ -261,8 +261,7 @@ func NewStatusWriter() *StatusWriter {

 func (w *StatusWriter) Write(b []byte) (int, error) {
 	if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
-		err := fmt.Errorf("llama runner: %s", after)
-		w.ErrCh <- err
+		w.ErrCh <- fmt.Errorf("llama runner: %s", bytes.TrimSpace(after))
 	}
 	return os.Stderr.Write(b)
 }