From 83a9b5271a68c7d1f8443f91c8d8b7d24ab581a9 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Mon, 9 Sep 2024 22:47:16 -0700 Subject: [PATCH] docs: update examples to use llama3.1 (#6718) --- docs/api.md | 48 +++++++++++++++++++++++------------------------ docs/faq.md | 6 +++--- docs/modelfile.md | 10 +++++----- docs/openai.md | 22 +++++++++++----------- docs/template.md | 2 +- docs/windows.md | 2 +- 6 files changed, 45 insertions(+), 45 deletions(-) diff --git a/docs/api.md b/docs/api.md index aed2b69f..1ae60dc7 100644 --- a/docs/api.md +++ b/docs/api.md @@ -69,7 +69,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3", + "model": "llama3.1", "prompt": "Why is the sky blue?" }' ``` @@ -80,7 +80,7 @@ A stream of JSON objects is returned: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T08:52:19.385406455-07:00", "response": "The", "done": false @@ -102,7 +102,7 @@ To calculate how fast the response is generated in tokens per second (token/s), ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T19:22:45.499127Z", "response": "", "done": true, @@ -124,7 +124,7 @@ A response can be received in one reply when streaming is off. ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3", + "model": "llama3.1", "prompt": "Why is the sky blue?", "stream": false }' @@ -136,7 +136,7 @@ If `stream` is set to `false`, the response will be a single JSON object: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T19:22:45.499127Z", "response": "The sky is blue because it is the color of the sky.", "done": true, @@ -194,7 +194,7 @@ curl http://localhost:11434/api/generate -d '{ ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3", + "model": "llama3.1", "prompt": "What color is the sky at different times of the day? Respond using JSON", "format": "json", "stream": false @@ -205,7 +205,7 @@ curl http://localhost:11434/api/generate -d '{ ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-11-09T21:07:55.186497Z", "response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n", "done": true, @@ -327,7 +327,7 @@ If you want to set custom options for the model at runtime rather than in the Mo ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3", + "model": "llama3.1", "prompt": "Why is the sky blue?", "stream": false, "options": { @@ -368,7 +368,7 @@ curl http://localhost:11434/api/generate -d '{ ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T19:22:45.499127Z", "response": "The sky is blue because it is the color of the sky.", "done": true, @@ -390,7 +390,7 @@ If an empty prompt is provided, the model will be loaded into memory. ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3" + "model": "llama3.1" }' ``` @@ -400,7 +400,7 @@ A single JSON object is returned: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-12-18T19:52:07.071755Z", "response": "", "done": true @@ -445,7 +445,7 @@ Send a chat message with a streaming response. ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama3", + "model": "llama3.1", "messages": [ { "role": "user", @@ -461,7 +461,7 @@ A stream of JSON objects is returned: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T08:52:19.385406455-07:00", "message": { "role": "assistant", @@ -476,7 +476,7 @@ Final response: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T19:22:45.499127Z", "done": true, "total_duration": 4883583458, @@ -494,7 +494,7 @@ Final response: ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama3", + "model": "llama3.1", "messages": [ { "role": "user", @@ -509,7 +509,7 @@ curl http://localhost:11434/api/chat -d '{ ```json { - "model": "registry.ollama.ai/library/llama3:latest", + "model": "llama3.1", "created_at": "2023-12-12T14:13:43.416799Z", "message": { "role": "assistant", @@ -533,7 +533,7 @@ Send a chat message with a conversation history. You can use this same approach ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama3", + "model": "llama3.1", "messages": [ { "role": "user", @@ -557,7 +557,7 @@ A stream of JSON objects is returned: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T08:52:19.385406455-07:00", "message": { "role": "assistant", @@ -571,7 +571,7 @@ Final response: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T19:22:45.499127Z", "done": true, "total_duration": 8113331500, @@ -629,7 +629,7 @@ curl http://localhost:11434/api/chat -d '{ ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama3", + "model": "llama3.1", "messages": [ { "role": "user", @@ -647,7 +647,7 @@ curl http://localhost:11434/api/chat -d '{ ```json { - "model": "registry.ollama.ai/library/llama3:latest", + "model": "llama3.1", "created_at": "2023-12-12T14:13:43.416799Z", "message": { "role": "assistant", @@ -904,7 +904,7 @@ Show information about a model including details, modelfile, template, parameter ```shell curl http://localhost:11434/api/show -d '{ - "name": "llama3" + "name": "llama3.1" }' ``` @@ -965,7 +965,7 @@ Copy a model. Creates a model with another name from an existing model. ```shell curl http://localhost:11434/api/copy -d '{ - "source": "llama3", + "source": "llama3.1", "destination": "llama3-backup" }' ``` @@ -1020,7 +1020,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where ```shell curl http://localhost:11434/api/pull -d '{ - "name": "llama3" + "name": "llama3.1" }' ``` diff --git a/docs/faq.md b/docs/faq.md index 356d5105..6267ad2b 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -32,7 +32,7 @@ When using the API, specify the `num_ctx` parameter: ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3", + "model": "llama3.1", "prompt": "Why is the sky blue?", "options": { "num_ctx": 4096 @@ -247,12 +247,12 @@ The `keep_alive` parameter can be set to: For example, to preload a model and leave it in memory use: ```shell -curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": -1}' +curl http://localhost:11434/api/generate -d '{"model": "llama3.1", "keep_alive": -1}' ``` To unload the model and free up memory use: ```shell -curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": 0}' +curl http://localhost:11434/api/generate -d '{"model": "llama3.1", "keep_alive": 0}' ``` Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable. diff --git a/docs/modelfile.md b/docs/modelfile.md index 92df22ef..a33f180b 100644 --- a/docs/modelfile.md +++ b/docs/modelfile.md @@ -11,7 +11,7 @@ A model file is the blueprint to create and share models with Ollama. - [Examples](#examples) - [Instructions](#instructions) - [FROM (Required)](#from-required) - - [Build from llama3.1](#build-from-llama31) + - [Build from existing model](#build-from-existing-model) - [Build from a Safetensors model](#build-from-a-safetensors-model) - [Build from a GGUF file](#build-from-a-gguf-file) - [PARAMETER](#parameter) @@ -50,7 +50,7 @@ INSTRUCTION arguments An example of a `Modelfile` creating a mario blueprint: ```modelfile -FROM llama3 +FROM llama3.1 # sets the temperature to 1 [higher is more creative, lower is more coherent] PARAMETER temperature 1 # sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token @@ -72,10 +72,10 @@ More examples are available in the [examples directory](../examples). To view the Modelfile of a given model, use the `ollama show --modelfile` command. ```bash - > ollama show --modelfile llama3 + > ollama show --modelfile llama3.1 # Modelfile generated by "ollama show" # To build a new Modelfile based on this one, replace the FROM line with: - # FROM llama3:latest + # FROM llama3.1:latest FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29 TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|> @@ -100,7 +100,7 @@ The `FROM` instruction defines the base model to use when creating a model. FROM : ``` -#### Build from llama3.1 +#### Build from existing model ```modelfile FROM llama3.1 diff --git a/docs/openai.md b/docs/openai.md index 0cbea6cc..c6df0fec 100644 --- a/docs/openai.md +++ b/docs/openai.md @@ -25,7 +25,7 @@ chat_completion = client.chat.completions.create( 'content': 'Say this is a test', } ], - model='llama3', + model='llama3.1', ) response = client.chat.completions.create( @@ -46,13 +46,13 @@ response = client.chat.completions.create( ) completion = client.completions.create( - model="llama3", + model="llama3.1", prompt="Say this is a test", ) list_completion = client.models.list() -model = client.models.retrieve("llama3") +model = client.models.retrieve("llama3.1") embeddings = client.embeddings.create( model="all-minilm", @@ -74,7 +74,7 @@ const openai = new OpenAI({ const chatCompletion = await openai.chat.completions.create({ messages: [{ role: 'user', content: 'Say this is a test' }], - model: 'llama3', + model: 'llama3.1', }) const response = await openai.chat.completions.create({ @@ -94,13 +94,13 @@ const response = await openai.chat.completions.create({ }) const completion = await openai.completions.create({ - model: "llama3", + model: "llama3.1", prompt: "Say this is a test.", }) const listCompletion = await openai.models.list() -const model = await openai.models.retrieve("llama3") +const model = await openai.models.retrieve("llama3.1") const embedding = await openai.embeddings.create({ model: "all-minilm", @@ -114,7 +114,7 @@ const embedding = await openai.embeddings.create({ curl http://localhost:11434/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "llama3", + "model": "llama3.1", "messages": [ { "role": "system", @@ -154,13 +154,13 @@ curl http://localhost:11434/v1/chat/completions \ curl http://localhost:11434/v1/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "llama3", + "model": "llama3.1", "prompt": "Say this is a test" }' curl http://localhost:11434/v1/models -curl http://localhost:11434/v1/models/llama3 +curl http://localhost:11434/v1/models/llama3.1 curl http://localhost:11434/v1/embeddings \ -H "Content-Type: application/json" \ @@ -274,7 +274,7 @@ curl http://localhost:11434/v1/embeddings \ Before using a model, pull it locally `ollama pull`: ```shell -ollama pull llama3 +ollama pull llama3.1 ``` ### Default model names @@ -282,7 +282,7 @@ ollama pull llama3 For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name: ``` -ollama cp llama3 gpt-3.5-turbo +ollama cp llama3.1 gpt-3.5-turbo ``` Afterwards, this new model name can be specified the `model` field: diff --git a/docs/template.md b/docs/template.md index 1d7104de..192d878d 100644 --- a/docs/template.md +++ b/docs/template.md @@ -33,7 +33,7 @@ Omitting a template in these models puts the responsibility of correctly templat To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3. ```dockerfile -FROM llama3 +FROM llama3.1 TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|> diff --git a/docs/windows.md b/docs/windows.md index f681ffac..372a35aa 100644 --- a/docs/windows.md +++ b/docs/windows.md @@ -29,7 +29,7 @@ Ollama uses unicode characters for progress indication, which may render as unkn Here's a quick example showing API access from `powershell` ```powershell -(Invoke-WebRequest -method POST -Body '{"model":"llama3", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json +(Invoke-WebRequest -method POST -Body '{"model":"llama3.1", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json ``` ## Troubleshooting