2023-07-03 15:22:44 -04:00
|
|
|
package api
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"net/http"
|
|
|
|
"strings"
|
|
|
|
)
|
|
|
|
|
|
|
|
type Error struct {
|
|
|
|
Code int32 `json:"code"`
|
|
|
|
Message string `json:"message"`
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e Error) Error() string {
|
|
|
|
if e.Message == "" {
|
|
|
|
return fmt.Sprintf("%d %v", e.Code, strings.ToLower(http.StatusText(int(e.Code))))
|
|
|
|
}
|
|
|
|
return e.Message
|
|
|
|
}
|
|
|
|
|
2023-07-06 12:24:49 -04:00
|
|
|
type PullRequest struct {
|
|
|
|
Model string `json:"model"`
|
|
|
|
}
|
|
|
|
|
2023-07-06 14:18:40 -04:00
|
|
|
type PullProgress struct {
|
2023-07-06 14:05:55 -07:00
|
|
|
Total int64 `json:"total"`
|
|
|
|
Completed int64 `json:"completed"`
|
2023-07-06 14:18:40 -04:00
|
|
|
Percent float64 `json:"percent"`
|
|
|
|
}
|
|
|
|
|
2023-07-03 15:22:44 -04:00
|
|
|
type GenerateRequest struct {
|
|
|
|
Model string `json:"model"`
|
|
|
|
Prompt string `json:"prompt"`
|
2023-07-06 17:09:48 -07:00
|
|
|
|
|
|
|
ModelOptions `json:"model_opts"`
|
|
|
|
PredictOptions `json:"predict_opts"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type ModelOptions struct {
|
|
|
|
ContextSize int `json:"context_size"`
|
|
|
|
Seed int `json:"seed"`
|
|
|
|
NBatch int `json:"n_batch"`
|
|
|
|
F16Memory bool `json:"memory_f16"`
|
|
|
|
MLock bool `json:"mlock"`
|
|
|
|
MMap bool `json:"mmap"`
|
|
|
|
VocabOnly bool `json:"vocab_only"`
|
|
|
|
LowVRAM bool `json:"low_vram"`
|
|
|
|
Embeddings bool `json:"embeddings"`
|
|
|
|
NUMA bool `json:"numa"`
|
|
|
|
NGPULayers int `json:"gpu_layers"`
|
|
|
|
MainGPU string `json:"main_gpu"`
|
|
|
|
TensorSplit string `json:"tensor_split"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type PredictOptions struct {
|
|
|
|
Seed int `json:"seed"`
|
|
|
|
Threads int `json:"threads"`
|
|
|
|
Tokens int `json:"tokens"`
|
|
|
|
TopK int `json:"top_k"`
|
|
|
|
Repeat int `json:"repeat"`
|
|
|
|
Batch int `json:"batch"`
|
|
|
|
NKeep int `json:"nkeep"`
|
|
|
|
TopP float64 `json:"top_p"`
|
|
|
|
Temperature float64 `json:"temp"`
|
|
|
|
Penalty float64 `json:"penalty"`
|
|
|
|
F16KV bool
|
|
|
|
DebugMode bool
|
|
|
|
StopPrompts []string
|
|
|
|
IgnoreEOS bool `json:"ignore_eos"`
|
|
|
|
|
|
|
|
TailFreeSamplingZ float64 `json:"tfs_z"`
|
|
|
|
TypicalP float64 `json:"typical_p"`
|
|
|
|
FrequencyPenalty float64 `json:"freq_penalty"`
|
|
|
|
PresencePenalty float64 `json:"pres_penalty"`
|
|
|
|
Mirostat int `json:"mirostat"`
|
|
|
|
MirostatETA float64 `json:"mirostat_lr"`
|
|
|
|
MirostatTAU float64 `json:"mirostat_ent"`
|
|
|
|
PenalizeNL bool `json:"penalize_nl"`
|
|
|
|
LogitBias string `json:"logit_bias"`
|
|
|
|
|
|
|
|
PathPromptCache string
|
|
|
|
MLock bool `json:"mlock"`
|
|
|
|
MMap bool `json:"mmap"`
|
|
|
|
PromptCacheAll bool
|
|
|
|
PromptCacheRO bool
|
|
|
|
MainGPU string
|
|
|
|
TensorSplit string
|
|
|
|
}
|
|
|
|
|
|
|
|
var DefaultModelOptions ModelOptions = ModelOptions{
|
|
|
|
ContextSize: 128,
|
|
|
|
Seed: 0,
|
|
|
|
F16Memory: true,
|
|
|
|
MLock: false,
|
|
|
|
Embeddings: true,
|
|
|
|
MMap: true,
|
|
|
|
LowVRAM: false,
|
|
|
|
}
|
|
|
|
|
|
|
|
var DefaultPredictOptions PredictOptions = PredictOptions{
|
|
|
|
Seed: -1,
|
|
|
|
Threads: -1,
|
|
|
|
Tokens: 512,
|
|
|
|
Penalty: 1.1,
|
|
|
|
Repeat: 64,
|
|
|
|
Batch: 512,
|
|
|
|
NKeep: 64,
|
|
|
|
TopK: 90,
|
|
|
|
TopP: 0.86,
|
|
|
|
TailFreeSamplingZ: 1.0,
|
|
|
|
TypicalP: 1.0,
|
|
|
|
Temperature: 0.8,
|
|
|
|
FrequencyPenalty: 0.0,
|
|
|
|
PresencePenalty: 0.0,
|
|
|
|
Mirostat: 0,
|
|
|
|
MirostatTAU: 5.0,
|
|
|
|
MirostatETA: 0.1,
|
|
|
|
MMap: true,
|
|
|
|
StopPrompts: []string{"llama"},
|
2023-07-03 15:22:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
type GenerateResponse struct {
|
|
|
|
Response string `json:"response"`
|
|
|
|
}
|