add a complete /generate options example (#1035)
This commit is contained in:
parent
6e9bcdb9b3
commit
a49d6acc1e
2 changed files with 111 additions and 43 deletions
87
api/types.go
87
api/types.go
|
@ -42,6 +42,50 @@ type GenerateRequest struct {
|
||||||
Options map[string]interface{} `json:"options"`
|
Options map[string]interface{} `json:"options"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Options specfied in GenerateRequest, if you add a new option here add it to the API docs also
|
||||||
|
type Options struct {
|
||||||
|
Runner
|
||||||
|
|
||||||
|
// Predict options used at runtime
|
||||||
|
NumKeep int `json:"num_keep,omitempty"`
|
||||||
|
Seed int `json:"seed,omitempty"`
|
||||||
|
NumPredict int `json:"num_predict,omitempty"`
|
||||||
|
TopK int `json:"top_k,omitempty"`
|
||||||
|
TopP float32 `json:"top_p,omitempty"`
|
||||||
|
TFSZ float32 `json:"tfs_z,omitempty"`
|
||||||
|
TypicalP float32 `json:"typical_p,omitempty"`
|
||||||
|
RepeatLastN int `json:"repeat_last_n,omitempty"`
|
||||||
|
Temperature float32 `json:"temperature,omitempty"`
|
||||||
|
RepeatPenalty float32 `json:"repeat_penalty,omitempty"`
|
||||||
|
PresencePenalty float32 `json:"presence_penalty,omitempty"`
|
||||||
|
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
|
||||||
|
Mirostat int `json:"mirostat,omitempty"`
|
||||||
|
MirostatTau float32 `json:"mirostat_tau,omitempty"`
|
||||||
|
MirostatEta float32 `json:"mirostat_eta,omitempty"`
|
||||||
|
PenalizeNewline bool `json:"penalize_newline,omitempty"`
|
||||||
|
Stop []string `json:"stop,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Runner options which must be set when the model is loaded into memory
|
||||||
|
type Runner struct {
|
||||||
|
UseNUMA bool `json:"numa,omitempty"`
|
||||||
|
NumCtx int `json:"num_ctx,omitempty"`
|
||||||
|
NumBatch int `json:"num_batch,omitempty"`
|
||||||
|
NumGQA int `json:"num_gqa,omitempty"`
|
||||||
|
NumGPU int `json:"num_gpu,omitempty"`
|
||||||
|
MainGPU int `json:"main_gpu,omitempty"`
|
||||||
|
LowVRAM bool `json:"low_vram,omitempty"`
|
||||||
|
F16KV bool `json:"f16_kv,omitempty"`
|
||||||
|
LogitsAll bool `json:"logits_all,omitempty"`
|
||||||
|
VocabOnly bool `json:"vocab_only,omitempty"`
|
||||||
|
UseMMap bool `json:"use_mmap,omitempty"`
|
||||||
|
UseMLock bool `json:"use_mlock,omitempty"`
|
||||||
|
EmbeddingOnly bool `json:"embedding_only,omitempty"`
|
||||||
|
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
|
||||||
|
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
|
||||||
|
NumThread int `json:"num_thread,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type EmbeddingRequest struct {
|
type EmbeddingRequest struct {
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
Prompt string `json:"prompt"`
|
Prompt string `json:"prompt"`
|
||||||
|
@ -162,49 +206,6 @@ func (r *GenerateResponse) Summary() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Runner options which must be set when the model is loaded into memory
|
|
||||||
type Runner struct {
|
|
||||||
UseNUMA bool `json:"numa,omitempty"`
|
|
||||||
NumCtx int `json:"num_ctx,omitempty"`
|
|
||||||
NumBatch int `json:"num_batch,omitempty"`
|
|
||||||
NumGQA int `json:"num_gqa,omitempty"`
|
|
||||||
NumGPU int `json:"num_gpu,omitempty"`
|
|
||||||
MainGPU int `json:"main_gpu,omitempty"`
|
|
||||||
LowVRAM bool `json:"low_vram,omitempty"`
|
|
||||||
F16KV bool `json:"f16_kv,omitempty"`
|
|
||||||
LogitsAll bool `json:"logits_all,omitempty"`
|
|
||||||
VocabOnly bool `json:"vocab_only,omitempty"`
|
|
||||||
UseMMap bool `json:"use_mmap,omitempty"`
|
|
||||||
UseMLock bool `json:"use_mlock,omitempty"`
|
|
||||||
EmbeddingOnly bool `json:"embedding_only,omitempty"`
|
|
||||||
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
|
|
||||||
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
|
|
||||||
NumThread int `json:"num_thread,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type Options struct {
|
|
||||||
Runner
|
|
||||||
|
|
||||||
// Predict options used at runtime
|
|
||||||
NumKeep int `json:"num_keep,omitempty"`
|
|
||||||
Seed int `json:"seed,omitempty"`
|
|
||||||
NumPredict int `json:"num_predict,omitempty"`
|
|
||||||
TopK int `json:"top_k,omitempty"`
|
|
||||||
TopP float32 `json:"top_p,omitempty"`
|
|
||||||
TFSZ float32 `json:"tfs_z,omitempty"`
|
|
||||||
TypicalP float32 `json:"typical_p,omitempty"`
|
|
||||||
RepeatLastN int `json:"repeat_last_n,omitempty"`
|
|
||||||
Temperature float32 `json:"temperature,omitempty"`
|
|
||||||
RepeatPenalty float32 `json:"repeat_penalty,omitempty"`
|
|
||||||
PresencePenalty float32 `json:"presence_penalty,omitempty"`
|
|
||||||
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
|
|
||||||
Mirostat int `json:"mirostat,omitempty"`
|
|
||||||
MirostatTau float32 `json:"mirostat_tau,omitempty"`
|
|
||||||
MirostatEta float32 `json:"mirostat_eta,omitempty"`
|
|
||||||
PenalizeNewline bool `json:"penalize_newline,omitempty"`
|
|
||||||
Stop []string `json:"stop,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
var ErrInvalidOpts = fmt.Errorf("invalid options")
|
var ErrInvalidOpts = fmt.Errorf("invalid options")
|
||||||
|
|
||||||
func (opts *Options) FromMap(m map[string]interface{}) error {
|
func (opts *Options) FromMap(m map[string]interface{}) error {
|
||||||
|
|
67
docs/api.md
67
docs/api.md
|
@ -167,6 +167,73 @@ curl -X POST http://localhost:11434/api/generate -d '{
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Request
|
||||||
|
|
||||||
|
If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl -X POST http://localhost:11434/api/generate -d '{
|
||||||
|
"model": "llama2:7b",
|
||||||
|
"prompt": "Why is the sky blue?",
|
||||||
|
"stream": false,
|
||||||
|
"options": {
|
||||||
|
"num_keep": 5,
|
||||||
|
"seed": 42,
|
||||||
|
"num_predict": 100,
|
||||||
|
"top_k": 20,
|
||||||
|
"top_p": 0.9,
|
||||||
|
"tfs_z": 0.5,
|
||||||
|
"typical_p": 0.7,
|
||||||
|
"repeat_last_n": 33,
|
||||||
|
"temperature": 0.8,
|
||||||
|
"repeat_penalty": 1.2,
|
||||||
|
"presence_penalty": 1.5,
|
||||||
|
"frequency_penalty": 1.0,
|
||||||
|
"mirostat": 1,
|
||||||
|
"mirostat_tau": 0.8,
|
||||||
|
"mirostat_eta": 0.6,
|
||||||
|
"penalize_newline": true,
|
||||||
|
"stop": ["\n", "user:"],
|
||||||
|
"numa": false,
|
||||||
|
"num_ctx": 4,
|
||||||
|
"num_batch": 2,
|
||||||
|
"num_gqa": 1,
|
||||||
|
"num_gpu": 1,
|
||||||
|
"main_gpu": 0,
|
||||||
|
"low_vram": false,
|
||||||
|
"f16_kv": true,
|
||||||
|
"logits_all": false,
|
||||||
|
"vocab_only": false,
|
||||||
|
"use_mmap": true,
|
||||||
|
"use_mlock": false,
|
||||||
|
"embedding_only": false,
|
||||||
|
"rope_frequency_base": 1.1,
|
||||||
|
"rope_frequency_scale": 0.8,
|
||||||
|
"num_thread": 8
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "llama2:7b",
|
||||||
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
|
"response": "The sky is blue because it is the color of the sky.",
|
||||||
|
"context": [1, 2, 3],
|
||||||
|
"done": true,
|
||||||
|
"total_duration": 5589157167,
|
||||||
|
"load_duration": 3013701500,
|
||||||
|
"sample_count": 114,
|
||||||
|
"sample_duration": 81442000,
|
||||||
|
"prompt_eval_count": 46,
|
||||||
|
"prompt_eval_duration": 1160282000,
|
||||||
|
"eval_count": 13,
|
||||||
|
"eval_duration": 1325948000
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## Create a Model
|
## Create a Model
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
|
Loading…
Reference in a new issue