diff --git a/docs/api.md b/docs/api.md index be00557e..355bfef4 100644 --- a/docs/api.md +++ b/docs/api.md @@ -62,27 +62,28 @@ A stream of JSON objects: The final response in the stream also includes additional data about the generation: +- `total_duration`: time spent generating the response +- `load_duration`: time spent in nanoseconds loading the model +- `sample_count`: number of samples generated +- `sample_duration`: time spent generating samples +- `prompt_eval_count`: number of tokens in the prompt +- `prompt_eval_duration`: time spent in nanoseconds evaluating the prompt +- `eval_count`: number of tokens the response +- `eval_duration`: time in nanoseconds spent generating the response + +To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`. + ```json { "model": "llama2:7b", "created_at": "2023-08-04T19:22:45.499127Z", "done": true, - - // total time in nanoseconds spent generating the response "total_duration": 5589157167, - - // time spent in nanoseconds loading the model "load_duration": 3013701500, - - // Sample: how fast tokens were sampled "sample_count": 114, "sample_duration": 81442000, - - // Prompt stats: how fast the prompt was evaluated "prompt_eval_count": 46, "prompt_eval_duration": 1160282000, - - // Eval stats: how fast tokens were generated by the model "eval_count": 113, "eval_duration": 1325948000 }