ollama/api/types.go

package api

import (
	"fmt"
	"net/http"
	"strings"
)

type Error struct {
	Code    int32  `json:"code"`
	Message string `json:"message"`
}

func (e Error) Error() string {
	if e.Message == "" {
		return fmt.Sprintf("%d %v", e.Code, strings.ToLower(http.StatusText(int(e.Code))))
	}
	return e.Message
}

type PullRequest struct {
	Model string `json:"model"`
}

type PullProgress struct {
	Total     int64   `json:"total"`
	Completed int64   `json:"completed"`
	Percent   float64 `json:"percent"`
	Error     Error   `json:"error"`
}

type GenerateRequest struct {
	Model  string `json:"model"`
	Prompt string `json:"prompt"`

	ModelOptions   `json:"model_opts,omitempty"`
	PredictOptions `json:"predict_opts,omitempty"`
}

type ModelOptions struct {
	ContextSize int    `json:"context_size,omitempty"`
	Seed        int    `json:"seed,omitempty"`
	NBatch      int    `json:"n_batch,omitempty"`
	F16Memory   bool   `json:"memory_f16,omitempty"`
	MLock       bool   `json:"mlock,omitempty"`
	MMap        bool   `json:"mmap,omitempty"`
	VocabOnly   bool   `json:"vocab_only,omitempty"`
	LowVRAM     bool   `json:"low_vram,omitempty"`
	Embeddings  bool   `json:"embeddings,omitempty"`
	NUMA        bool   `json:"numa,omitempty"`
	NGPULayers  int    `json:"gpu_layers,omitempty"`
	MainGPU     string `json:"main_gpu,omitempty"`
	TensorSplit string `json:"tensor_split,omitempty"`
}

type PredictOptions struct {
	Seed        int     `json:"seed,omitempty"`
	Threads     int     `json:"threads,omitempty"`
	Tokens      int     `json:"tokens,omitempty"`
	TopK        int     `json:"top_k,omitempty"`
	Repeat      int     `json:"repeat,omitempty"`
	Batch       int     `json:"batch,omitempty"`
	NKeep       int     `json:"nkeep,omitempty"`
	TopP        float64 `json:"top_p,omitempty"`
	Temperature float64 `json:"temp,omitempty"`
	Penalty     float64 `json:"penalty,omitempty"`
	F16KV       bool
	DebugMode   bool
	StopPrompts []string
	IgnoreEOS   bool `json:"ignore_eos,omitempty"`

	TailFreeSamplingZ float64 `json:"tfs_z,omitempty"`
	TypicalP          float64 `json:"typical_p,omitempty"`
	FrequencyPenalty  float64 `json:"freq_penalty,omitempty"`
	PresencePenalty   float64 `json:"pres_penalty,omitempty"`
	Mirostat          int     `json:"mirostat,omitempty"`
	MirostatETA       float64 `json:"mirostat_lr,omitempty"`
	MirostatTAU       float64 `json:"mirostat_ent,omitempty"`
	PenalizeNL        bool    `json:"penalize_nl,omitempty"`
	LogitBias         string  `json:"logit_bias,omitempty"`

	PathPromptCache string
	MLock           bool `json:"mlock,omitempty"`
	MMap            bool `json:"mmap,omitempty"`
	PromptCacheAll  bool
	PromptCacheRO   bool
	MainGPU         string
	TensorSplit     string
}

var DefaultModelOptions ModelOptions = ModelOptions{
	ContextSize: 128,
	Seed:        0,
	F16Memory:   true,
	MLock:       false,
	Embeddings:  true,
	MMap:        true,
	LowVRAM:     false,
}

var DefaultPredictOptions PredictOptions = PredictOptions{
	Seed:              -1,
	Threads:           -1,
	Tokens:            512,
	Penalty:           1.1,
	Repeat:            64,
	Batch:             512,
	NKeep:             64,
	TopK:              90,
	TopP:              0.86,
	TailFreeSamplingZ: 1.0,
	TypicalP:          1.0,
	Temperature:       0.8,
	FrequencyPenalty:  0.0,
	PresencePenalty:   0.0,
	Mirostat:          0,
	MirostatTAU:       5.0,
	MirostatETA:       0.1,
	MMap:              true,
	StopPrompts:       []string{"llama"},
}

type GenerateResponse struct {
	Response string `json:"response"`
}