2023-07-03 19:22:44 +00:00
package server
import (
2023-07-25 21:08:51 +00:00
"context"
2023-07-06 17:40:11 +00:00
"encoding/json"
2023-10-06 20:06:20 +00:00
"errors"
2023-07-22 06:02:12 +00:00
"fmt"
2023-07-03 19:22:44 +00:00
"io"
2023-10-06 20:06:20 +00:00
"io/fs"
2024-01-18 18:52:01 +00:00
"log/slog"
2024-03-13 20:29:40 +00:00
"math"
2023-07-03 19:22:44 +00:00
"net"
"net/http"
2024-03-09 06:23:47 +00:00
"net/netip"
2023-07-07 19:27:43 +00:00
"os"
2023-08-30 20:35:03 +00:00
"os/signal"
2023-07-15 00:27:14 +00:00
"path/filepath"
2023-08-01 01:35:18 +00:00
"reflect"
2023-09-12 15:04:35 +00:00
"runtime"
2024-03-13 20:29:40 +00:00
"strconv"
2023-07-06 17:40:11 +00:00
"strings"
2023-07-18 18:59:42 +00:00
"sync"
2023-08-30 20:35:03 +00:00
"syscall"
2023-07-13 01:18:06 +00:00
"time"
2023-07-03 19:22:44 +00:00
2023-07-22 01:01:24 +00:00
"github.com/gin-contrib/cors"
2023-07-03 19:22:44 +00:00
"github.com/gin-gonic/gin"
2024-02-12 19:16:20 +00:00
"golang.org/x/exp/slices"
2023-07-03 19:22:44 +00:00
2024-03-26 20:04:17 +00:00
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/openai"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/version"
2023-07-03 19:22:44 +00:00
)
2023-08-22 16:48:35 +00:00
var mode string = gin . DebugMode
2023-12-15 00:47:40 +00:00
type Server struct {
2024-03-09 06:23:47 +00:00
addr net . Addr
2023-12-15 00:47:40 +00:00
}
2023-08-22 16:48:35 +00:00
func init ( ) {
switch mode {
case gin . DebugMode :
case gin . ReleaseMode :
case gin . TestMode :
default :
mode = gin . DebugMode
}
gin . SetMode ( mode )
}
2023-08-01 01:35:18 +00:00
var loaded struct {
2023-07-19 22:00:28 +00:00
mu sync . Mutex
2024-03-14 17:24:13 +00:00
llama * llm . LlamaServer
2023-07-19 22:00:28 +00:00
expireTimer * time . Timer
2023-08-01 01:35:18 +00:00
2024-03-14 17:24:13 +00:00
model string
adapters [ ] string
projectors [ ] string
2023-10-19 14:39:58 +00:00
* api . Options
2023-07-18 18:59:42 +00:00
}
2023-08-15 13:35:39 +00:00
var defaultSessionDuration = 5 * time . Minute
2024-04-15 16:09:32 +00:00
func unload ( ) {
if loaded . llama != nil {
loaded . llama . Close ( )
}
loaded . llama = nil
loaded . model = ""
loaded . adapters = nil
loaded . projectors = nil
loaded . Options = nil
}
2023-08-08 19:13:22 +00:00
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
2024-04-02 23:44:10 +00:00
func load ( c * gin . Context , model * Model , opts api . Options , sessionDuration time . Duration ) error {
2024-03-14 17:24:13 +00:00
ctx , cancel := context . WithTimeout ( c , 10 * time . Second )
defer cancel ( )
needLoad := loaded . llama == nil || // is there a model loaded?
loaded . model != model . ModelPath || // has the base model changed?
! reflect . DeepEqual ( loaded . adapters , model . AdapterPaths ) || // have the adapters changed?
! reflect . DeepEqual ( loaded . projectors , model . ProjectorPaths ) || // have the adapters changed?
! reflect . DeepEqual ( loaded . Options . Runner , opts . Runner ) || // have the runner options changed?
loaded . llama . Ping ( ctx ) != nil
2023-10-19 14:39:58 +00:00
if needLoad {
2024-03-14 17:24:13 +00:00
if loaded . llama != nil {
2024-01-18 18:52:01 +00:00
slog . Info ( "changing loaded model" )
2024-04-15 16:09:32 +00:00
unload ( )
2023-07-18 18:59:42 +00:00
}
2023-07-17 19:08:10 +00:00
2024-03-14 17:24:13 +00:00
llama , err := llm . NewLlamaServer ( model . ModelPath , model . AdapterPaths , model . ProjectorPaths , opts )
2023-07-18 18:59:42 +00:00
if err != nil {
2023-10-19 18:50:45 +00:00
// some older models are not compatible with newer versions of llama.cpp
// show a generalized compatibility error until there is a better way to
// check for model compatibility
2023-11-24 18:58:09 +00:00
if errors . Is ( llm . ErrUnsupportedFormat , err ) || strings . Contains ( err . Error ( ) , "failed to load model" ) {
2023-10-19 18:50:45 +00:00
err = fmt . Errorf ( "%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`" , err , model . ShortName )
}
2024-01-03 17:01:42 +00:00
return err
2023-07-18 18:59:42 +00:00
}
2024-03-14 17:24:13 +00:00
loaded . model = model . ModelPath
loaded . adapters = model . AdapterPaths
loaded . projectors = model . ProjectorPaths
loaded . llama = llama
2024-04-02 23:44:10 +00:00
loaded . Options = & opts
2024-04-15 16:09:32 +00:00
if err = llama . WaitUntilRunning ( ) ; err != nil {
slog . Error ( "error loading llama server" , "error" , err )
unload ( )
return err
}
2023-07-19 22:00:28 +00:00
}
2023-09-21 19:38:49 +00:00
2023-08-01 01:35:18 +00:00
if loaded . expireTimer == nil {
loaded . expireTimer = time . AfterFunc ( sessionDuration , func ( ) {
loaded . mu . Lock ( )
defer loaded . mu . Unlock ( )
2024-04-15 16:09:32 +00:00
unload ( )
2023-07-19 22:00:28 +00:00
} )
2023-07-06 17:40:11 +00:00
}
2023-09-21 19:38:49 +00:00
2023-08-01 01:35:18 +00:00
loaded . expireTimer . Reset ( sessionDuration )
2024-01-03 17:01:42 +00:00
return nil
}
func modelOptions ( model * Model , requestOpts map [ string ] interface { } ) ( api . Options , error ) {
opts := api . DefaultOptions ( )
if err := opts . FromMap ( model . Options ) ; err != nil {
return api . Options { } , err
}
if err := opts . FromMap ( requestOpts ) ; err != nil {
return api . Options { } , err
}
return opts , nil
2023-08-08 19:13:22 +00:00
}
2024-02-12 19:16:20 +00:00
func isSupportedImageType ( image [ ] byte ) bool {
contentType := http . DetectContentType ( image )
allowedTypes := [ ] string { "image/jpeg" , "image/jpg" , "image/png" }
return slices . Contains ( allowedTypes , contentType )
}
2023-08-08 19:13:22 +00:00
func GenerateHandler ( c * gin . Context ) {
loaded . mu . Lock ( )
defer loaded . mu . Unlock ( )
checkpointStart := time . Now ( )
var req api . GenerateRequest
2023-10-18 23:08:42 +00:00
err := c . ShouldBindJSON ( & req )
2023-12-11 21:56:22 +00:00
2023-10-18 23:08:42 +00:00
switch {
case errors . Is ( err , io . EOF ) :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "missing request body" } )
return
case err != nil :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
2023-08-08 19:13:22 +00:00
return
}
2023-11-08 22:05:02 +00:00
// validate the request
switch {
case req . Model == "" :
2023-10-18 22:56:34 +00:00
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "model is required" } )
return
2023-11-10 00:44:02 +00:00
case len ( req . Format ) > 0 && req . Format != "json" :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "format must be json" } )
return
2023-11-08 22:05:02 +00:00
case req . Raw && ( req . Template != "" || req . System != "" || len ( req . Context ) > 0 ) :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "raw mode does not support template, system, or context" } )
return
2023-10-18 22:56:34 +00:00
}
2024-02-12 19:16:20 +00:00
for _ , img := range req . Images {
if ! isSupportedImageType ( img ) {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "unsupported image format" } )
return
}
}
2024-01-03 17:01:42 +00:00
model , err := GetModel ( req . Model )
2023-08-08 19:13:22 +00:00
if err != nil {
2023-10-06 20:06:20 +00:00
var pErr * fs . PathError
2024-01-03 17:01:42 +00:00
if errors . As ( err , & pErr ) {
2023-10-06 20:06:20 +00:00
c . JSON ( http . StatusNotFound , gin . H { "error" : fmt . Sprintf ( "model '%s' not found, try pulling it first" , req . Model ) } )
2024-01-03 17:01:42 +00:00
return
}
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
2024-02-21 02:37:29 +00:00
if model . IsEmbedding ( ) {
2024-02-21 02:53:45 +00:00
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "embedding models do not support generate" } )
2024-02-21 02:37:29 +00:00
return
}
2024-01-03 17:01:42 +00:00
opts , err := modelOptions ( model , req . Options )
if err != nil {
if errors . Is ( err , api . ErrInvalidOpts ) {
2023-12-05 19:57:33 +00:00
c . JSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
2024-01-03 17:01:42 +00:00
return
2023-11-08 22:05:02 +00:00
}
2024-01-03 17:01:42 +00:00
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
2024-01-26 22:28:02 +00:00
var sessionDuration time . Duration
if req . KeepAlive == nil {
2024-03-13 20:29:40 +00:00
sessionDuration = getDefaultSessionDuration ( )
2024-01-26 22:28:02 +00:00
} else {
sessionDuration = req . KeepAlive . Duration
}
2024-04-02 23:44:10 +00:00
if err := load ( c , model , opts , sessionDuration ) ; err != nil {
2024-01-03 17:01:42 +00:00
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
2023-08-08 19:13:22 +00:00
return
}
2023-12-05 19:57:33 +00:00
// an empty request loads the model
2024-02-12 23:06:57 +00:00
// note: for a short while template was used in lieu
// of `raw` mode so we need to check for it too
2023-12-05 19:57:33 +00:00
if req . Prompt == "" && req . Template == "" && req . System == "" {
2023-12-01 19:37:17 +00:00
c . JSON ( http . StatusOK , api . GenerateResponse {
2023-12-09 10:05:43 +00:00
CreatedAt : time . Now ( ) . UTC ( ) ,
Model : req . Model ,
2023-12-15 22:25:12 +00:00
Done : true ,
} )
2023-12-04 23:01:06 +00:00
return
}
checkpointLoaded := time . Now ( )
2023-12-05 19:57:33 +00:00
var prompt string
switch {
case req . Raw :
prompt = req . Prompt
case req . Prompt != "" :
2024-02-12 23:06:57 +00:00
if req . Template == "" {
req . Template = model . Template
2023-12-05 19:57:33 +00:00
}
2024-02-12 23:06:57 +00:00
if req . System == "" {
req . System = model . System
}
slog . Debug ( "generate handler" , "prompt" , req . Prompt )
slog . Debug ( "generate handler" , "template" , req . Template )
slog . Debug ( "generate handler" , "system" , req . System )
var sb strings . Builder
2024-02-29 19:30:14 +00:00
for i := range req . Images {
fmt . Fprintf ( & sb , "[img-%d] " , i )
}
sb . WriteString ( req . Prompt )
p , err := Prompt ( req . Template , req . System , sb . String ( ) , "" , true )
if err != nil {
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
sb . Reset ( )
2023-12-05 19:57:33 +00:00
if req . Context != nil {
2024-03-14 17:24:13 +00:00
prev , err := loaded . llama . Detokenize ( c . Request . Context ( ) , req . Context )
2023-12-05 19:57:33 +00:00
if err != nil {
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
2024-02-12 23:06:57 +00:00
sb . WriteString ( prev )
2024-01-28 22:15:56 +00:00
}
2024-02-12 23:06:57 +00:00
sb . WriteString ( p )
prompt = sb . String ( )
2023-08-08 19:13:22 +00:00
}
2024-02-01 00:47:26 +00:00
slog . Debug ( "generate handler" , "prompt" , prompt )
2024-01-28 23:22:35 +00:00
2023-12-04 23:01:06 +00:00
ch := make ( chan any )
2023-12-05 19:57:33 +00:00
var generated strings . Builder
2023-12-04 23:01:06 +00:00
go func ( ) {
defer close ( ch )
2024-03-14 17:24:13 +00:00
fn := func ( r llm . CompletionResponse ) {
2023-12-05 19:57:33 +00:00
// Update model expiration
2023-12-04 23:01:06 +00:00
loaded . expireTimer . Reset ( sessionDuration )
2023-12-05 19:57:33 +00:00
// Build up the full response
if _ , err := generated . WriteString ( r . Content ) ; err != nil {
ch <- gin . H { "error" : err . Error ( ) }
return
2023-12-04 23:01:06 +00:00
}
2023-12-05 19:57:33 +00:00
resp := api . GenerateResponse {
2023-12-10 16:42:15 +00:00
Model : req . Model ,
2023-12-14 17:15:50 +00:00
CreatedAt : time . Now ( ) . UTC ( ) ,
2023-12-09 10:05:43 +00:00
Done : r . Done ,
Response : r . Content ,
2023-12-05 19:57:33 +00:00
Metrics : api . Metrics {
PromptEvalCount : r . PromptEvalCount ,
PromptEvalDuration : r . PromptEvalDuration ,
EvalCount : r . EvalCount ,
EvalDuration : r . EvalDuration ,
} ,
2023-12-04 23:01:06 +00:00
}
2023-12-14 17:15:50 +00:00
if r . Done {
resp . TotalDuration = time . Since ( checkpointStart )
resp . LoadDuration = checkpointLoaded . Sub ( checkpointStart )
if ! req . Raw {
2024-02-12 23:06:57 +00:00
p , err := Prompt ( req . Template , req . System , req . Prompt , generated . String ( ) , false )
2023-12-22 22:07:05 +00:00
if err != nil {
2024-02-12 23:06:57 +00:00
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
2023-12-22 22:07:05 +00:00
return
}
2024-02-12 23:06:57 +00:00
// TODO (jmorganca): encode() should not strip special tokens
2024-03-14 17:24:13 +00:00
tokens , err := loaded . llama . Tokenize ( c . Request . Context ( ) , p )
2023-12-14 17:15:50 +00:00
if err != nil {
ch <- gin . H { "error" : err . Error ( ) }
return
}
2024-02-12 23:06:57 +00:00
resp . Context = append ( req . Context , tokens ... )
2023-12-05 19:57:33 +00:00
}
}
ch <- resp
2023-12-04 23:01:06 +00:00
}
2024-02-01 02:56:12 +00:00
var images [ ] llm . ImageData
2024-02-01 01:39:38 +00:00
for i := range req . Images {
2024-02-01 02:56:12 +00:00
images = append ( images , llm . ImageData {
ID : i ,
Data : req . Images [ i ] ,
} )
2024-02-01 01:39:38 +00:00
}
2023-12-05 19:57:33 +00:00
// Start prediction
2024-03-14 17:24:13 +00:00
req := llm . CompletionRequest {
2024-01-03 17:01:42 +00:00
Prompt : prompt ,
Format : req . Format ,
2024-02-01 01:39:38 +00:00
Images : images ,
2024-01-03 17:01:42 +00:00
Options : opts ,
2023-12-05 19:57:33 +00:00
}
2024-03-14 17:24:13 +00:00
if err := loaded . llama . Completion ( c . Request . Context ( ) , req , fn ) ; err != nil {
2023-12-04 23:01:06 +00:00
ch <- gin . H { "error" : err . Error ( ) }
}
} ( )
if req . Stream != nil && ! * req . Stream {
2023-12-10 15:53:38 +00:00
// Accumulate responses into the final response
var final api . GenerateResponse
2023-12-05 19:57:33 +00:00
var sb strings . Builder
2023-12-04 23:01:06 +00:00
for resp := range ch {
2023-12-10 15:53:38 +00:00
switch r := resp . ( type ) {
case api . GenerateResponse :
sb . WriteString ( r . Response )
final = r
case gin . H :
if errorMsg , ok := r [ "error" ] . ( string ) ; ok {
c . JSON ( http . StatusInternalServerError , gin . H { "error" : errorMsg } )
return
} else {
c . JSON ( http . StatusInternalServerError , gin . H { "error" : "unexpected error format in response" } )
return
}
default :
c . JSON ( http . StatusInternalServerError , gin . H { "error" : "unexpected error" } )
2023-12-04 23:01:06 +00:00
return
}
}
2023-12-10 15:53:38 +00:00
final . Response = sb . String ( )
c . JSON ( http . StatusOK , final )
2023-12-04 23:01:06 +00:00
return
}
streamResponse ( c , ch )
}
2024-03-13 20:29:40 +00:00
func getDefaultSessionDuration ( ) time . Duration {
if t , exists := os . LookupEnv ( "OLLAMA_KEEP_ALIVE" ) ; exists {
v , err := strconv . Atoi ( t )
if err != nil {
d , err := time . ParseDuration ( t )
if err != nil {
return defaultSessionDuration
}
if d < 0 {
return time . Duration ( math . MaxInt64 )
}
return d
}
d := time . Duration ( v ) * time . Second
if d < 0 {
return time . Duration ( math . MaxInt64 )
}
return d
}
return defaultSessionDuration
}
2024-03-01 01:40:56 +00:00
func EmbeddingsHandler ( c * gin . Context ) {
2023-12-04 23:01:06 +00:00
loaded . mu . Lock ( )
defer loaded . mu . Unlock ( )
var req api . EmbeddingRequest
err := c . ShouldBindJSON ( & req )
switch {
case errors . Is ( err , io . EOF ) :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "missing request body" } )
return
case err != nil :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
return
}
if req . Model == "" {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "model is required" } )
return
}
2024-01-03 17:01:42 +00:00
model , err := GetModel ( req . Model )
2023-12-04 23:01:06 +00:00
if err != nil {
2023-12-05 19:57:33 +00:00
var pErr * fs . PathError
2024-01-03 17:01:42 +00:00
if errors . As ( err , & pErr ) {
2023-12-05 19:57:33 +00:00
c . JSON ( http . StatusNotFound , gin . H { "error" : fmt . Sprintf ( "model '%s' not found, try pulling it first" , req . Model ) } )
2024-01-03 17:01:42 +00:00
return
}
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
opts , err := modelOptions ( model , req . Options )
if err != nil {
if errors . Is ( err , api . ErrInvalidOpts ) {
2023-12-05 19:57:33 +00:00
c . JSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
2024-01-03 17:01:42 +00:00
return
2023-12-05 19:57:33 +00:00
}
2024-01-03 17:01:42 +00:00
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
2024-01-26 22:28:02 +00:00
var sessionDuration time . Duration
if req . KeepAlive == nil {
2024-03-13 20:29:40 +00:00
sessionDuration = getDefaultSessionDuration ( )
2024-01-26 22:28:02 +00:00
} else {
sessionDuration = req . KeepAlive . Duration
}
2024-04-02 23:44:10 +00:00
if err := load ( c , model , opts , sessionDuration ) ; err != nil {
2024-01-03 17:01:42 +00:00
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
2023-12-04 23:01:06 +00:00
return
}
2024-03-01 01:40:56 +00:00
// an empty request loads the model
if req . Prompt == "" {
c . JSON ( http . StatusOK , api . EmbeddingResponse { Embedding : [ ] float64 { } } )
2023-08-08 19:13:22 +00:00
return
}
2024-03-14 17:24:13 +00:00
embedding , err := loaded . llama . Embedding ( c . Request . Context ( ) , req . Prompt )
2023-08-08 19:13:22 +00:00
if err != nil {
2024-01-18 18:52:01 +00:00
slog . Info ( fmt . Sprintf ( "embedding generation failed: %v" , err ) )
2023-08-08 19:13:22 +00:00
c . JSON ( http . StatusInternalServerError , gin . H { "error" : "failed to generate embedding" } )
return
}
resp := api . EmbeddingResponse {
Embedding : embedding ,
}
c . JSON ( http . StatusOK , resp )
}
2023-07-20 23:09:23 +00:00
func PullModelHandler ( c * gin . Context ) {
2023-07-11 18:54:22 +00:00
var req api . PullRequest
2023-10-18 23:08:42 +00:00
err := c . ShouldBindJSON ( & req )
switch {
case errors . Is ( err , io . EOF ) :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "missing request body" } )
return
case err != nil :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
2023-07-11 18:54:22 +00:00
return
}
2024-01-11 22:07:54 +00:00
var model string
if req . Model != "" {
model = req . Model
} else if req . Name != "" {
model = req . Name
} else {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "model is required" } )
2023-10-18 22:56:34 +00:00
return
}
2023-07-17 00:02:22 +00:00
ch := make ( chan any )
go func ( ) {
defer close ( ch )
2023-07-19 01:51:30 +00:00
fn := func ( r api . ProgressResponse ) {
ch <- r
2023-07-17 00:02:22 +00:00
}
2023-07-19 01:51:30 +00:00
2024-02-14 19:29:49 +00:00
regOpts := & registryOptions {
2023-07-21 22:42:19 +00:00
Insecure : req . Insecure ,
}
2023-07-25 21:08:51 +00:00
ctx , cancel := context . WithCancel ( c . Request . Context ( ) )
defer cancel ( )
2024-01-11 22:07:54 +00:00
if err := PullModel ( ctx , model , regOpts , fn ) ; err != nil {
2023-07-20 19:12:08 +00:00
ch <- gin . H { "error" : err . Error ( ) }
2023-07-17 00:02:22 +00:00
}
} ( )
2023-10-11 16:54:27 +00:00
if req . Stream != nil && ! * req . Stream {
waitForStream ( c , ch )
return
}
2023-07-17 00:02:22 +00:00
streamResponse ( c , ch )
}
2023-07-20 23:09:23 +00:00
func PushModelHandler ( c * gin . Context ) {
2023-07-17 00:02:22 +00:00
var req api . PushRequest
2023-10-18 23:08:42 +00:00
err := c . ShouldBindJSON ( & req )
switch {
case errors . Is ( err , io . EOF ) :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "missing request body" } )
return
case err != nil :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
2023-07-11 18:54:22 +00:00
return
}
2023-07-06 17:40:11 +00:00
2024-01-11 22:07:54 +00:00
var model string
if req . Model != "" {
model = req . Model
} else if req . Name != "" {
model = req . Name
} else {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "model is required" } )
2023-10-18 22:56:34 +00:00
return
}
2023-07-17 00:02:22 +00:00
ch := make ( chan any )
go func ( ) {
defer close ( ch )
2023-07-19 01:51:30 +00:00
fn := func ( r api . ProgressResponse ) {
ch <- r
2023-07-17 00:02:22 +00:00
}
2023-07-19 01:51:30 +00:00
2024-02-14 19:29:49 +00:00
regOpts := & registryOptions {
2023-07-21 22:42:19 +00:00
Insecure : req . Insecure ,
}
2023-10-09 17:24:27 +00:00
ctx , cancel := context . WithCancel ( c . Request . Context ( ) )
defer cancel ( )
2024-01-11 22:07:54 +00:00
if err := PushModel ( ctx , model , regOpts , fn ) ; err != nil {
2023-07-20 19:12:08 +00:00
ch <- gin . H { "error" : err . Error ( ) }
2023-07-17 00:02:22 +00:00
}
} ( )
2023-10-11 16:54:27 +00:00
if req . Stream != nil && ! * req . Stream {
waitForStream ( c , ch )
return
}
2023-07-17 00:02:22 +00:00
streamResponse ( c , ch )
}
2023-07-20 23:09:23 +00:00
func CreateModelHandler ( c * gin . Context ) {
2023-07-17 00:02:22 +00:00
var req api . CreateRequest
2023-10-18 23:08:42 +00:00
err := c . ShouldBindJSON ( & req )
switch {
case errors . Is ( err , io . EOF ) :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "missing request body" } )
return
case err != nil :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
2023-07-13 02:07:15 +00:00
return
2023-07-17 00:02:22 +00:00
}
2024-01-11 22:07:54 +00:00
var model string
if req . Model != "" {
model = req . Model
} else if req . Name != "" {
model = req . Name
} else {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "model is required" } )
2023-10-18 22:56:34 +00:00
return
}
2024-01-11 22:07:54 +00:00
if err := ParseModelPath ( model ) . Validate ( ) ; err != nil {
2023-11-29 20:54:29 +00:00
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
2023-11-29 18:33:45 +00:00
return
}
2023-11-14 21:45:07 +00:00
if req . Path == "" && req . Modelfile == "" {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "path or modelfile are required" } )
2023-11-14 20:30:34 +00:00
return
}
2023-11-14 21:45:07 +00:00
var modelfile io . Reader = strings . NewReader ( req . Modelfile )
if req . Path != "" && req . Modelfile == "" {
2023-11-21 20:43:17 +00:00
mf , err := os . Open ( req . Path )
2023-11-14 21:45:07 +00:00
if err != nil {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : fmt . Sprintf ( "error reading modelfile: %s" , err ) } )
return
}
2023-11-21 20:43:17 +00:00
defer mf . Close ( )
2023-11-14 21:45:07 +00:00
2023-11-21 20:43:17 +00:00
modelfile = mf
2023-11-14 21:45:07 +00:00
}
2023-11-14 20:30:34 +00:00
commands , err := parser . Parse ( modelfile )
if err != nil {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
return
}
2023-07-11 18:54:22 +00:00
ch := make ( chan any )
2023-07-14 21:15:53 +00:00
go func ( ) {
defer close ( ch )
2023-07-25 18:25:13 +00:00
fn := func ( resp api . ProgressResponse ) {
ch <- resp
2023-07-17 00:02:22 +00:00
}
2023-07-25 21:08:51 +00:00
ctx , cancel := context . WithCancel ( c . Request . Context ( ) )
defer cancel ( )
2024-04-05 15:49:04 +00:00
if err := CreateModel ( ctx , model , filepath . Dir ( req . Path ) , req . Quantization , commands , fn ) ; err != nil {
2023-07-20 19:12:08 +00:00
ch <- gin . H { "error" : err . Error ( ) }
2023-07-17 00:02:22 +00:00
}
2023-07-14 21:15:53 +00:00
} ( )
2023-07-07 22:29:17 +00:00
2023-10-11 16:54:27 +00:00
if req . Stream != nil && ! * req . Stream {
waitForStream ( c , ch )
return
}
2023-07-14 21:15:53 +00:00
streamResponse ( c , ch )
2023-07-05 19:37:33 +00:00
}
2023-07-20 23:09:23 +00:00
func DeleteModelHandler ( c * gin . Context ) {
var req api . DeleteRequest
2023-10-18 23:08:42 +00:00
err := c . ShouldBindJSON ( & req )
switch {
case errors . Is ( err , io . EOF ) :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "missing request body" } )
return
case err != nil :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
2023-07-20 23:09:23 +00:00
return
}
2024-01-11 22:07:54 +00:00
var model string
if req . Model != "" {
model = req . Model
} else if req . Name != "" {
model = req . Name
} else {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "model is required" } )
2023-10-18 22:56:34 +00:00
return
}
2024-01-11 22:07:54 +00:00
if err := DeleteModel ( model ) ; err != nil {
2023-07-22 06:02:12 +00:00
if os . IsNotExist ( err ) {
2024-01-11 22:07:54 +00:00
c . JSON ( http . StatusNotFound , gin . H { "error" : fmt . Sprintf ( "model '%s' not found" , model ) } )
2023-07-22 06:02:12 +00:00
} else {
2023-07-20 23:09:23 +00:00
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
}
2023-07-22 06:02:12 +00:00
return
}
2023-09-27 00:28:14 +00:00
manifestsPath , err := GetManifestPath ( )
if err != nil {
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
if err := PruneDirectory ( manifestsPath ) ; err != nil {
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
2023-09-11 18:46:35 +00:00
c . JSON ( http . StatusOK , nil )
2023-07-20 23:09:23 +00:00
}
2023-09-06 18:04:17 +00:00
func ShowModelHandler ( c * gin . Context ) {
var req api . ShowRequest
2023-10-18 23:08:42 +00:00
err := c . ShouldBindJSON ( & req )
switch {
case errors . Is ( err , io . EOF ) :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "missing request body" } )
return
case err != nil :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
2023-09-06 18:04:17 +00:00
return
}
2024-01-11 22:07:54 +00:00
if req . Model != "" {
2024-01-18 23:36:50 +00:00
// noop
2024-01-11 22:07:54 +00:00
} else if req . Name != "" {
2024-01-18 23:36:50 +00:00
req . Model = req . Name
2024-01-11 22:07:54 +00:00
} else {
2024-01-05 01:23:11 +00:00
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "model is required" } )
2023-10-18 22:56:34 +00:00
return
}
2024-01-05 01:23:11 +00:00
resp , err := GetModelInfo ( req )
2023-09-06 18:04:17 +00:00
if err != nil {
if os . IsNotExist ( err ) {
2024-01-18 23:36:50 +00:00
c . JSON ( http . StatusNotFound , gin . H { "error" : fmt . Sprintf ( "model '%s' not found" , req . Model ) } )
2023-09-06 18:04:17 +00:00
} else {
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
}
return
}
c . JSON ( http . StatusOK , resp )
}
2024-01-05 01:23:11 +00:00
func GetModelInfo ( req api . ShowRequest ) ( * api . ShowResponse , error ) {
model , err := GetModel ( req . Model )
2023-09-06 18:04:17 +00:00
if err != nil {
return nil , err
}
2023-12-11 21:56:22 +00:00
modelDetails := api . ModelDetails {
2024-01-25 20:12:36 +00:00
ParentModel : model . ParentModel ,
2023-12-11 21:56:22 +00:00
Format : model . Config . ModelFormat ,
Family : model . Config . ModelFamily ,
Families : model . Config . ModelFamilies ,
ParameterSize : model . Config . ModelType ,
QuantizationLevel : model . Config . FileType ,
}
2024-01-05 01:23:11 +00:00
if req . System != "" {
model . System = req . System
}
if req . Template != "" {
model . Template = req . Template
}
2024-01-25 20:12:36 +00:00
msgs := make ( [ ] api . Message , 0 )
for _ , msg := range model . Messages {
msgs = append ( msgs , api . Message { Role : msg . Role , Content : msg . Content } )
}
2023-09-06 18:04:17 +00:00
resp := & api . ShowResponse {
License : strings . Join ( model . License , "\n" ) ,
System : model . System ,
Template : model . Template ,
2023-12-11 21:56:22 +00:00
Details : modelDetails ,
2024-01-25 20:12:36 +00:00
Messages : msgs ,
2023-09-06 18:04:17 +00:00
}
var params [ ] string
cs := 30
for k , v := range model . Options {
switch val := v . ( type ) {
case [ ] interface { } :
for _ , nv := range val {
2024-01-16 18:34:44 +00:00
params = append ( params , fmt . Sprintf ( "%-*s %#v" , cs , k , nv ) )
2023-09-06 18:04:17 +00:00
}
2024-01-16 18:34:44 +00:00
default :
params = append ( params , fmt . Sprintf ( "%-*s %#v" , cs , k , v ) )
2023-09-06 18:04:17 +00:00
}
}
resp . Parameters = strings . Join ( params , "\n" )
2024-01-05 01:23:11 +00:00
for k , v := range req . Options {
if _ , ok := req . Options [ k ] ; ok {
model . Options [ k ] = v
}
}
mf , err := ShowModelfile ( model )
if err != nil {
return nil , err
}
resp . Modelfile = mf
2023-09-06 18:04:17 +00:00
return resp , nil
}
2023-07-20 23:09:23 +00:00
func ListModelsHandler ( c * gin . Context ) {
2023-10-17 17:02:43 +00:00
models := make ( [ ] api . ModelResponse , 0 )
2023-12-15 23:50:51 +00:00
manifestsPath , err := GetManifestPath ( )
2023-07-18 16:09:45 +00:00
if err != nil {
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
2023-08-30 18:14:12 +00:00
2023-12-11 21:56:22 +00:00
modelResponse := func ( modelName string ) ( api . ModelResponse , error ) {
model , err := GetModel ( modelName )
if err != nil {
return api . ModelResponse { } , err
}
modelDetails := api . ModelDetails {
Format : model . Config . ModelFormat ,
Family : model . Config . ModelFamily ,
Families : model . Config . ModelFamilies ,
ParameterSize : model . Config . ModelType ,
QuantizationLevel : model . Config . FileType ,
}
return api . ModelResponse {
2024-01-18 22:32:55 +00:00
Model : model . ShortName ,
2023-12-11 21:56:22 +00:00
Name : model . ShortName ,
Size : model . Size ,
Digest : model . Digest ,
Details : modelDetails ,
} , nil
}
2023-08-30 18:14:12 +00:00
walkFunc := func ( path string , info os . FileInfo , _ error ) error {
2023-07-18 16:09:45 +00:00
if ! info . IsDir ( ) {
2023-12-15 23:50:51 +00:00
path , tag := filepath . Split ( path )
model := strings . Trim ( strings . TrimPrefix ( path , manifestsPath ) , string ( os . PathSeparator ) )
modelPath := strings . Join ( [ ] string { model , tag } , ":" )
canonicalModelPath := strings . ReplaceAll ( modelPath , string ( os . PathSeparator ) , "/" )
2023-08-22 04:56:56 +00:00
2023-12-15 23:50:51 +00:00
resp , err := modelResponse ( canonicalModelPath )
2023-07-18 16:09:45 +00:00
if err != nil {
2024-01-18 18:52:01 +00:00
slog . Info ( fmt . Sprintf ( "skipping file: %s" , canonicalModelPath ) )
2023-12-15 22:07:34 +00:00
// nolint: nilerr
2023-07-18 19:39:08 +00:00
return nil
2023-07-18 16:09:45 +00:00
}
2023-08-30 18:14:12 +00:00
2023-12-11 21:56:22 +00:00
resp . ModifiedAt = info . ModTime ( )
models = append ( models , resp )
2023-07-18 16:09:45 +00:00
}
2023-08-30 18:14:12 +00:00
2023-07-18 16:09:45 +00:00
return nil
2023-08-30 18:14:12 +00:00
}
2023-12-15 23:50:51 +00:00
if err := filepath . Walk ( manifestsPath , walkFunc ) ; err != nil {
2023-07-18 16:09:45 +00:00
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
2023-07-19 22:00:28 +00:00
c . JSON ( http . StatusOK , api . ListResponse { Models : models } )
2023-07-18 16:09:45 +00:00
}
2023-07-24 15:27:28 +00:00
func CopyModelHandler ( c * gin . Context ) {
var req api . CopyRequest
2023-10-18 23:08:42 +00:00
err := c . ShouldBindJSON ( & req )
switch {
case errors . Is ( err , io . EOF ) :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "missing request body" } )
return
case err != nil :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
2023-07-24 15:27:28 +00:00
return
}
2023-10-18 22:56:34 +00:00
if req . Source == "" || req . Destination == "" {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "source add destination are required" } )
return
}
2023-11-29 20:54:29 +00:00
if err := ParseModelPath ( req . Destination ) . Validate ( ) ; err != nil {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
return
}
2023-07-24 15:27:28 +00:00
if err := CopyModel ( req . Source , req . Destination ) ; err != nil {
if os . IsNotExist ( err ) {
c . JSON ( http . StatusNotFound , gin . H { "error" : fmt . Sprintf ( "model '%s' not found" , req . Source ) } )
} else {
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
}
return
}
}
2023-11-15 18:59:38 +00:00
func HeadBlobHandler ( c * gin . Context ) {
2023-11-14 22:07:40 +00:00
path , err := GetBlobsPath ( c . Param ( "digest" ) )
if err != nil {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
return
}
if _ , err := os . Stat ( path ) ; err != nil {
c . AbortWithStatusJSON ( http . StatusNotFound , gin . H { "error" : fmt . Sprintf ( "blob %q not found" , c . Param ( "digest" ) ) } )
return
}
2023-11-15 21:55:37 +00:00
c . Status ( http . StatusOK )
2023-11-14 22:07:40 +00:00
}
func CreateBlobHandler ( c * gin . Context ) {
2024-04-05 16:30:09 +00:00
path , err := GetBlobsPath ( c . Param ( "digest" ) )
if err != nil {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
return
}
_ , err = os . Stat ( path )
switch {
case errors . Is ( err , os . ErrNotExist ) :
// noop
case err != nil :
c . AbortWithStatusJSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
default :
c . Status ( http . StatusOK )
return
}
2023-11-24 20:01:23 +00:00
layer , err := NewLayer ( c . Request . Body , "" )
2023-11-17 23:21:57 +00:00
if err != nil {
c . AbortWithStatusJSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
2023-11-24 20:01:23 +00:00
if layer . Digest != c . Param ( "digest" ) {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : fmt . Sprintf ( "digest mismatch, expected %q, got %q" , c . Param ( "digest" ) , layer . Digest ) } )
2023-11-14 22:07:40 +00:00
return
}
2023-11-24 20:01:23 +00:00
if _ , err := layer . Commit ( ) ; err != nil {
2023-11-14 22:07:40 +00:00
c . AbortWithStatusJSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
2023-11-15 21:55:37 +00:00
c . Status ( http . StatusCreated )
2023-11-14 22:07:40 +00:00
}
2023-09-21 16:42:16 +00:00
var defaultAllowOrigins = [ ] string {
"localhost" ,
"127.0.0.1" ,
"0.0.0.0" ,
}
2024-03-09 08:22:08 +00:00
func isLocalIP ( ip netip . Addr ) bool {
if interfaces , err := net . Interfaces ( ) ; err == nil {
for _ , iface := range interfaces {
addrs , err := iface . Addrs ( )
if err != nil {
continue
}
for _ , a := range addrs {
if parsed , _ , err := net . ParseCIDR ( a . String ( ) ) ; err == nil {
if parsed . String ( ) == ip . String ( ) {
return true
}
}
}
}
}
return false
}
2024-03-09 06:23:47 +00:00
func allowedHost ( host string ) bool {
2024-03-09 08:22:08 +00:00
if host == "" || host == "localhost" {
2024-03-09 06:23:47 +00:00
return true
}
if hostname , err := os . Hostname ( ) ; err == nil && host == hostname {
return true
}
var tlds = [ ] string {
2024-03-09 07:23:59 +00:00
"localhost" ,
"local" ,
"internal" ,
2023-12-15 00:47:40 +00:00
}
2023-10-30 15:10:18 +00:00
2024-03-09 07:29:53 +00:00
// check if the host is a local TLD
2024-03-09 06:23:47 +00:00
for _ , tld := range tlds {
if strings . HasSuffix ( host , "." + tld ) {
return true
}
}
2024-03-09 07:29:53 +00:00
return false
2024-03-09 07:23:59 +00:00
}
2024-03-09 06:23:47 +00:00
2024-03-09 07:23:59 +00:00
func allowedHostsMiddleware ( addr net . Addr ) gin . HandlerFunc {
return func ( c * gin . Context ) {
if addr == nil {
2024-03-09 06:23:47 +00:00
c . Next ( )
return
}
2024-03-09 08:22:08 +00:00
if addr , err := netip . ParseAddrPort ( addr . String ( ) ) ; err == nil && ! addr . Addr ( ) . IsLoopback ( ) {
2024-03-09 06:23:47 +00:00
c . Next ( )
return
}
host , _ , err := net . SplitHostPort ( c . Request . Host )
if err != nil {
host = c . Request . Host
}
2024-03-09 07:23:59 +00:00
if addr , err := netip . ParseAddr ( host ) ; err == nil {
2024-03-09 08:22:08 +00:00
if addr . IsLoopback ( ) || addr . IsPrivate ( ) || addr . IsUnspecified ( ) || isLocalIP ( addr ) {
2024-03-09 07:23:59 +00:00
c . Next ( )
return
}
}
2024-03-09 06:23:47 +00:00
if allowedHost ( host ) {
c . Next ( )
return
}
c . AbortWithStatus ( http . StatusForbidden )
}
2023-12-15 00:47:40 +00:00
}
2023-10-30 15:10:18 +00:00
2023-12-15 00:47:40 +00:00
func ( s * Server ) GenerateRoutes ( ) http . Handler {
2023-07-22 01:01:24 +00:00
config := cors . DefaultConfig ( )
config . AllowWildcard = true
2024-01-05 01:55:47 +00:00
config . AllowBrowserExtensions = true
2023-09-21 16:42:16 +00:00
2024-03-27 22:24:28 +00:00
if allowedOrigins := strings . Trim ( os . Getenv ( "OLLAMA_ORIGINS" ) , "\"'" ) ; allowedOrigins != "" {
config . AllowOrigins = strings . Split ( allowedOrigins , "," )
}
2023-09-21 16:42:16 +00:00
for _ , allowOrigin := range defaultAllowOrigins {
config . AllowOrigins = append ( config . AllowOrigins ,
fmt . Sprintf ( "http://%s" , allowOrigin ) ,
fmt . Sprintf ( "https://%s" , allowOrigin ) ,
fmt . Sprintf ( "http://%s:*" , allowOrigin ) ,
fmt . Sprintf ( "https://%s:*" , allowOrigin ) ,
)
}
2023-07-22 01:01:24 +00:00
2023-07-05 19:37:33 +00:00
r := gin . Default ( )
2023-09-21 19:38:49 +00:00
r . Use (
cors . New ( config ) ,
2024-03-09 06:23:47 +00:00
allowedHostsMiddleware ( s . addr ) ,
2023-09-21 19:38:49 +00:00
)
2023-07-05 19:37:33 +00:00
2023-07-20 23:09:23 +00:00
r . POST ( "/api/pull" , PullModelHandler )
r . POST ( "/api/generate" , GenerateHandler )
2023-12-05 19:57:33 +00:00
r . POST ( "/api/chat" , ChatHandler )
2024-03-01 01:40:56 +00:00
r . POST ( "/api/embeddings" , EmbeddingsHandler )
2023-07-20 23:09:23 +00:00
r . POST ( "/api/create" , CreateModelHandler )
r . POST ( "/api/push" , PushModelHandler )
2023-07-24 15:27:28 +00:00
r . POST ( "/api/copy" , CopyModelHandler )
2023-07-20 23:09:23 +00:00
r . DELETE ( "/api/delete" , DeleteModelHandler )
2023-09-06 18:04:17 +00:00
r . POST ( "/api/show" , ShowModelHandler )
2023-11-14 22:07:40 +00:00
r . POST ( "/api/blobs/:digest" , CreateBlobHandler )
2023-11-15 23:22:12 +00:00
r . HEAD ( "/api/blobs/:digest" , HeadBlobHandler )
2023-07-03 19:22:44 +00:00
2024-02-07 22:24:29 +00:00
// Compatibility endpoints
r . POST ( "/v1/chat/completions" , openai . Middleware ( ) , ChatHandler )
2023-09-21 23:38:03 +00:00
for _ , method := range [ ] string { http . MethodGet , http . MethodHead } {
r . Handle ( method , "/" , func ( c * gin . Context ) {
c . String ( http . StatusOK , "Ollama is running" )
} )
r . Handle ( method , "/api/tags" , ListModelsHandler )
2023-10-12 22:45:07 +00:00
r . Handle ( method , "/api/version" , func ( c * gin . Context ) {
c . JSON ( http . StatusOK , gin . H { "version" : version . Version } )
} )
2023-09-21 23:38:03 +00:00
}
2023-12-15 00:47:40 +00:00
return r
}
func Serve ( ln net . Listener ) error {
2024-01-31 22:59:32 +00:00
level := slog . LevelInfo
2024-01-18 18:52:01 +00:00
if debug := os . Getenv ( "OLLAMA_DEBUG" ) ; debug != "" {
2024-01-31 22:59:32 +00:00
level = slog . LevelDebug
2024-01-18 18:52:01 +00:00
}
2024-01-31 22:59:32 +00:00
handler := slog . NewTextHandler ( os . Stderr , & slog . HandlerOptions {
Level : level ,
AddSource : true ,
ReplaceAttr : func ( _ [ ] string , attr slog . Attr ) slog . Attr {
if attr . Key == slog . SourceKey {
source := attr . Value . Any ( ) . ( * slog . Source )
source . File = filepath . Base ( source . File )
}
return attr
} ,
} )
slog . SetDefault ( slog . New ( handler ) )
2024-03-15 03:18:06 +00:00
blobsDir , err := GetBlobsPath ( "" )
if err != nil {
return err
}
if err := fixBlobs ( blobsDir ) ; err != nil {
return err
}
2023-12-15 00:47:40 +00:00
if noprune := os . Getenv ( "OLLAMA_NOPRUNE" ) ; noprune == "" {
// clean up unused layers and manifests
if err := PruneLayers ( ) ; err != nil {
return err
}
manifestsPath , err := GetManifestPath ( )
if err != nil {
return err
}
if err := PruneDirectory ( manifestsPath ) ; err != nil {
return err
}
}
2024-03-09 06:23:47 +00:00
s := & Server { addr : ln . Addr ( ) }
2023-12-15 00:47:40 +00:00
r := s . GenerateRoutes ( )
2024-01-18 18:52:01 +00:00
slog . Info ( fmt . Sprintf ( "Listening on %s (version %s)" , ln . Addr ( ) , version . Version ) )
2023-12-15 00:47:40 +00:00
srvr := & http . Server {
2023-07-03 19:22:44 +00:00
Handler : r ,
}
2023-08-30 20:35:03 +00:00
// listen for a ctrl+c and stop any loaded llm
signals := make ( chan os . Signal , 1 )
2023-09-21 19:38:49 +00:00
signal . Notify ( signals , syscall . SIGINT , syscall . SIGTERM )
2023-08-30 20:35:03 +00:00
go func ( ) {
<- signals
2024-04-15 16:09:32 +00:00
unload ( )
2024-03-08 17:45:55 +00:00
gpu . Cleanup ( )
2023-08-30 20:35:03 +00:00
os . Exit ( 0 )
} ( )
2024-02-16 01:15:09 +00:00
if err := llm . Init ( ) ; err != nil {
2023-11-29 19:00:37 +00:00
return fmt . Errorf ( "unable to initialize llm library %w" , err )
}
if runtime . GOOS == "linux" { // TODO - windows too
2023-09-12 15:04:35 +00:00
// check compatibility to log warnings
2023-11-29 19:00:37 +00:00
if _ , err := gpu . CheckVRAM ( ) ; err != nil {
2024-01-18 18:52:01 +00:00
slog . Info ( err . Error ( ) )
2023-09-12 15:04:35 +00:00
}
}
2023-12-15 00:47:40 +00:00
return srvr . Serve ( ln )
2023-07-03 19:22:44 +00:00
}
2023-07-06 17:40:11 +00:00
2023-10-11 16:54:27 +00:00
func waitForStream ( c * gin . Context , ch chan interface { } ) {
c . Header ( "Content-Type" , "application/json" )
for resp := range ch {
switch r := resp . ( type ) {
case api . ProgressResponse :
if r . Status == "success" {
c . JSON ( http . StatusOK , r )
return
}
case gin . H :
if errorMsg , ok := r [ "error" ] . ( string ) ; ok {
c . JSON ( http . StatusInternalServerError , gin . H { "error" : errorMsg } )
return
} else {
c . JSON ( http . StatusInternalServerError , gin . H { "error" : "unexpected error format in progress response" } )
return
}
default :
c . JSON ( http . StatusInternalServerError , gin . H { "error" : "unexpected progress response" } )
return
}
}
c . JSON ( http . StatusInternalServerError , gin . H { "error" : "unexpected end of progress response" } )
}
2023-07-14 21:15:53 +00:00
func streamResponse ( c * gin . Context , ch chan any ) {
2023-08-09 04:38:10 +00:00
c . Header ( "Content-Type" , "application/x-ndjson" )
2023-07-11 18:54:22 +00:00
c . Stream ( func ( w io . Writer ) bool {
val , ok := <- ch
if ! ok {
return false
}
bts , err := json . Marshal ( val )
if err != nil {
2024-01-18 18:52:01 +00:00
slog . Info ( fmt . Sprintf ( "streamResponse: json.Marshal failed with %s" , err ) )
2023-07-11 18:54:22 +00:00
return false
}
2023-09-30 04:45:52 +00:00
// Delineate chunks with new-line delimiter
2023-07-11 18:54:22 +00:00
bts = append ( bts , '\n' )
if _ , err := w . Write ( bts ) ; err != nil {
2024-01-18 18:52:01 +00:00
slog . Info ( fmt . Sprintf ( "streamResponse: w.Write failed with %s" , err ) )
2023-07-11 18:54:22 +00:00
return false
}
return true
} )
}
2023-12-05 19:57:33 +00:00
2024-02-12 23:06:57 +00:00
// ChatPrompt builds up a prompt from a series of messages for the currently `loaded` model
2024-02-16 19:42:43 +00:00
func chatPrompt ( ctx context . Context , template string , messages [ ] api . Message , numCtx int ) ( string , error ) {
2024-02-12 23:06:57 +00:00
encode := func ( s string ) ( [ ] int , error ) {
2024-03-14 17:24:13 +00:00
return loaded . llama . Tokenize ( ctx , s )
2024-02-12 23:06:57 +00:00
}
2024-02-16 19:42:43 +00:00
prompt , err := ChatPrompt ( template , messages , numCtx , encode )
2024-02-12 23:06:57 +00:00
if err != nil {
return "" , err
}
return prompt , nil
}
2023-12-05 19:57:33 +00:00
func ChatHandler ( c * gin . Context ) {
loaded . mu . Lock ( )
defer loaded . mu . Unlock ( )
checkpointStart := time . Now ( )
var req api . ChatRequest
err := c . ShouldBindJSON ( & req )
switch {
case errors . Is ( err , io . EOF ) :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "missing request body" } )
return
case err != nil :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
return
}
// validate the request
switch {
case req . Model == "" :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "model is required" } )
return
case len ( req . Format ) > 0 && req . Format != "json" :
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "format must be json" } )
return
}
2024-01-03 17:01:42 +00:00
model , err := GetModel ( req . Model )
2023-12-05 19:57:33 +00:00
if err != nil {
var pErr * fs . PathError
2024-01-03 17:01:42 +00:00
if errors . As ( err , & pErr ) {
2023-12-05 19:57:33 +00:00
c . JSON ( http . StatusNotFound , gin . H { "error" : fmt . Sprintf ( "model '%s' not found, try pulling it first" , req . Model ) } )
2024-01-03 17:01:42 +00:00
return
}
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
2024-02-21 02:37:29 +00:00
if model . IsEmbedding ( ) {
2024-02-21 02:53:45 +00:00
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "embedding models do not support chat" } )
2024-02-21 02:37:29 +00:00
return
}
2024-01-03 17:01:42 +00:00
opts , err := modelOptions ( model , req . Options )
if err != nil {
if errors . Is ( err , api . ErrInvalidOpts ) {
2023-12-05 19:57:33 +00:00
c . JSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
2024-01-03 17:01:42 +00:00
return
2023-12-05 19:57:33 +00:00
}
2024-01-03 17:01:42 +00:00
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
return
}
2024-01-26 22:28:02 +00:00
var sessionDuration time . Duration
if req . KeepAlive == nil {
2024-03-13 20:29:40 +00:00
sessionDuration = getDefaultSessionDuration ( )
2024-01-26 22:28:02 +00:00
} else {
sessionDuration = req . KeepAlive . Duration
}
2024-04-02 23:44:10 +00:00
if err := load ( c , model , opts , sessionDuration ) ; err != nil {
2024-01-03 17:01:42 +00:00
c . JSON ( http . StatusInternalServerError , gin . H { "error" : err . Error ( ) } )
2023-12-05 19:57:33 +00:00
return
}
checkpointLoaded := time . Now ( )
2024-02-16 19:42:43 +00:00
// if the first message is not a system message, then add the model's default system message
if len ( req . Messages ) > 0 && req . Messages [ 0 ] . Role != "system" {
req . Messages = append ( [ ] api . Message {
{
Role : "system" ,
Content : model . System ,
} ,
} , req . Messages ... )
}
prompt , err := chatPrompt ( c . Request . Context ( ) , model . Template , req . Messages , opts . NumCtx )
2023-12-05 19:57:33 +00:00
if err != nil {
c . JSON ( http . StatusBadRequest , gin . H { "error" : err . Error ( ) } )
return
}
2024-02-01 01:39:38 +00:00
2024-02-08 00:30:33 +00:00
// an empty request loads the model
2024-02-12 23:06:57 +00:00
if len ( req . Messages ) == 0 || prompt == "" {
2024-02-08 00:30:33 +00:00
resp := api . ChatResponse {
CreatedAt : time . Now ( ) . UTC ( ) ,
Model : req . Model ,
Done : true ,
Message : api . Message { Role : "assistant" } ,
}
c . JSON ( http . StatusOK , resp )
return
}
2024-02-12 23:06:57 +00:00
// only send images that are in the prompt
var i int
var images [ ] llm . ImageData
for _ , m := range req . Messages {
for _ , img := range m . Images {
if ! isSupportedImageType ( img ) {
c . AbortWithStatusJSON ( http . StatusBadRequest , gin . H { "error" : "unsupported image format" } )
return
}
if strings . Contains ( prompt , fmt . Sprintf ( "[img-%d]" , i ) ) {
images = append ( images , llm . ImageData { Data : img , ID : i } )
}
i += 1
}
}
slog . Debug ( "chat handler" , "prompt" , prompt , "images" , len ( images ) )
2024-01-28 23:22:35 +00:00
2023-12-05 19:57:33 +00:00
ch := make ( chan any )
go func ( ) {
defer close ( ch )
2024-03-14 17:24:13 +00:00
fn := func ( r llm . CompletionResponse ) {
2023-12-05 19:57:33 +00:00
// Update model expiration
loaded . expireTimer . Reset ( sessionDuration )
resp := api . ChatResponse {
2023-12-10 16:42:15 +00:00
Model : req . Model ,
2023-12-14 17:15:50 +00:00
CreatedAt : time . Now ( ) . UTC ( ) ,
2023-12-18 19:23:38 +00:00
Message : api . Message { Role : "assistant" , Content : r . Content } ,
2023-12-05 19:57:33 +00:00
Done : r . Done ,
Metrics : api . Metrics {
PromptEvalCount : r . PromptEvalCount ,
PromptEvalDuration : r . PromptEvalDuration ,
EvalCount : r . EvalCount ,
EvalDuration : r . EvalDuration ,
} ,
}
2023-12-14 17:15:50 +00:00
if r . Done {
resp . TotalDuration = time . Since ( checkpointStart )
resp . LoadDuration = checkpointLoaded . Sub ( checkpointStart )
2023-12-05 19:57:33 +00:00
}
ch <- resp
}
2024-03-14 17:24:13 +00:00
if err := loaded . llama . Completion ( c . Request . Context ( ) , llm . CompletionRequest {
2024-01-03 17:01:42 +00:00
Prompt : prompt ,
Format : req . Format ,
2024-02-01 03:18:25 +00:00
Images : images ,
2024-01-03 17:01:42 +00:00
Options : opts ,
2024-03-14 17:24:13 +00:00
} , fn ) ; err != nil {
2023-12-05 19:57:33 +00:00
ch <- gin . H { "error" : err . Error ( ) }
}
} ( )
if req . Stream != nil && ! * req . Stream {
2023-12-10 15:53:38 +00:00
// Accumulate responses into the final response
var final api . ChatResponse
2023-12-05 19:57:33 +00:00
var sb strings . Builder
for resp := range ch {
2023-12-10 15:53:38 +00:00
switch r := resp . ( type ) {
case api . ChatResponse :
2023-12-18 19:23:38 +00:00
sb . WriteString ( r . Message . Content )
2023-12-10 15:53:38 +00:00
final = r
case gin . H :
if errorMsg , ok := r [ "error" ] . ( string ) ; ok {
c . JSON ( http . StatusInternalServerError , gin . H { "error" : errorMsg } )
return
} else {
c . JSON ( http . StatusInternalServerError , gin . H { "error" : "unexpected error format in response" } )
return
}
default :
c . JSON ( http . StatusInternalServerError , gin . H { "error" : "unexpected error" } )
return
2023-12-05 19:57:33 +00:00
}
}
2023-12-10 15:53:38 +00:00
2023-12-18 19:23:38 +00:00
final . Message = api . Message { Role : "assistant" , Content : sb . String ( ) }
2023-12-10 15:53:38 +00:00
c . JSON ( http . StatusOK , final )
2023-12-05 19:57:33 +00:00
return
}
streamResponse ( c , ch )
}