Compare commits
No commits in common. "9fb5f4446a8a6485e065358862914d21d40f04ec" and "4dca98681071820222f2ab676c87874bf72a54be" have entirely different histories.
9fb5f4446a
...
4dca986810
33 changed files with 111 additions and 125 deletions
2
.github/workflows/release.yaml
vendored
2
.github/workflows/release.yaml
vendored
|
@ -354,7 +354,7 @@ jobs:
|
||||||
- name: Set Version
|
- name: Set Version
|
||||||
run: |
|
run: |
|
||||||
$ver=${env:GITHUB_REF_NAME}.trim("v")
|
$ver=${env:GITHUB_REF_NAME}.trim("v")
|
||||||
echo VERSION=$ver | Out-File -FilePath ${env:GITHUB_ENV} -Encoding utf8 -Append
|
write-host VERSION=$ver | Out-File -FilePath ${env:GITHUB_ENV} -Encoding utf8 -Append
|
||||||
- uses: 'google-github-actions/auth@v2'
|
- uses: 'google-github-actions/auth@v2'
|
||||||
with:
|
with:
|
||||||
project_id: 'ollama'
|
project_id: 'ollama'
|
||||||
|
|
33
README.md
33
README.md
|
@ -35,10 +35,10 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla
|
||||||
|
|
||||||
## Quickstart
|
## Quickstart
|
||||||
|
|
||||||
To run and chat with [Llama 3.2](https://ollama.com/library/llama3.2):
|
To run and chat with [Llama 3.1](https://ollama.com/library/llama3.1):
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama run llama3.2
|
ollama run llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
## Model library
|
## Model library
|
||||||
|
@ -49,8 +49,6 @@ Here are some example models that can be downloaded:
|
||||||
|
|
||||||
| Model | Parameters | Size | Download |
|
| Model | Parameters | Size | Download |
|
||||||
| ------------------ | ---------- | ----- | ------------------------------ |
|
| ------------------ | ---------- | ----- | ------------------------------ |
|
||||||
| Llama 3.2 | 3B | 2.0GB | `ollama run llama3.2` |
|
|
||||||
| Llama 3.2 | 1B | 1.3GB | `ollama run llama3.2:1b` |
|
|
||||||
| Llama 3.1 | 8B | 4.7GB | `ollama run llama3.1` |
|
| Llama 3.1 | 8B | 4.7GB | `ollama run llama3.1` |
|
||||||
| Llama 3.1 | 70B | 40GB | `ollama run llama3.1:70b` |
|
| Llama 3.1 | 70B | 40GB | `ollama run llama3.1:70b` |
|
||||||
| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` |
|
| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` |
|
||||||
|
@ -101,16 +99,16 @@ See the [guide](docs/import.md) on importing models for more information.
|
||||||
|
|
||||||
### Customize a prompt
|
### Customize a prompt
|
||||||
|
|
||||||
Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.2` model:
|
Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.1` model:
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama pull llama3.2
|
ollama pull llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
Create a `Modelfile`:
|
Create a `Modelfile`:
|
||||||
|
|
||||||
```
|
```
|
||||||
FROM llama3.2
|
FROM llama3.1
|
||||||
|
|
||||||
# set the temperature to 1 [higher is more creative, lower is more coherent]
|
# set the temperature to 1 [higher is more creative, lower is more coherent]
|
||||||
PARAMETER temperature 1
|
PARAMETER temperature 1
|
||||||
|
@ -145,7 +143,7 @@ ollama create mymodel -f ./Modelfile
|
||||||
### Pull a model
|
### Pull a model
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama pull llama3.2
|
ollama pull llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
> This command can also be used to update a local model. Only the diff will be pulled.
|
> This command can also be used to update a local model. Only the diff will be pulled.
|
||||||
|
@ -153,13 +151,13 @@ ollama pull llama3.2
|
||||||
### Remove a model
|
### Remove a model
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama rm llama3.2
|
ollama rm llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
### Copy a model
|
### Copy a model
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama cp llama3.2 my-model
|
ollama cp llama3.1 my-model
|
||||||
```
|
```
|
||||||
|
|
||||||
### Multiline input
|
### Multiline input
|
||||||
|
@ -183,14 +181,14 @@ The image features a yellow smiley face, which is likely the central focus of th
|
||||||
### Pass the prompt as an argument
|
### Pass the prompt as an argument
|
||||||
|
|
||||||
```
|
```
|
||||||
$ ollama run llama3.2 "Summarize this file: $(cat README.md)"
|
$ ollama run llama3.1 "Summarize this file: $(cat README.md)"
|
||||||
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
|
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
|
||||||
```
|
```
|
||||||
|
|
||||||
### Show model information
|
### Show model information
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama show llama3.2
|
ollama show llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
### List models on your computer
|
### List models on your computer
|
||||||
|
@ -208,7 +206,7 @@ ollama ps
|
||||||
### Stop a model which is currently running
|
### Stop a model which is currently running
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama stop llama3.2
|
ollama stop llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
### Start Ollama
|
### Start Ollama
|
||||||
|
@ -230,7 +228,7 @@ Next, start the server:
|
||||||
Finally, in a separate shell, run a model:
|
Finally, in a separate shell, run a model:
|
||||||
|
|
||||||
```
|
```
|
||||||
./ollama run llama3.2
|
./ollama run llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
## REST API
|
## REST API
|
||||||
|
@ -241,7 +239,7 @@ Ollama has a REST API for running and managing models.
|
||||||
|
|
||||||
```
|
```
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"prompt":"Why is the sky blue?"
|
"prompt":"Why is the sky blue?"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
@ -250,7 +248,7 @@ curl http://localhost:11434/api/generate -d '{
|
||||||
|
|
||||||
```
|
```
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"messages": [
|
"messages": [
|
||||||
{ "role": "user", "content": "why is the sky blue?" }
|
{ "role": "user", "content": "why is the sky blue?" }
|
||||||
]
|
]
|
||||||
|
@ -327,7 +325,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||||
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption)
|
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption)
|
||||||
- [Archyve](https://github.com/nickthecook/archyve) (RAG-enabling document library)
|
- [Archyve](https://github.com/nickthecook/archyve) (RAG-enabling document library)
|
||||||
- [crewAI with Mesop](https://github.com/rapidarchitect/ollama-crew-mesop) (Mesop Web Interface to run crewAI with Ollama)
|
- [crewAI with Mesop](https://github.com/rapidarchitect/ollama-crew-mesop) (Mesop Web Interface to run crewAI with Ollama)
|
||||||
- [LLMChat](https://github.com/trendy-design/llmchat) (Privacy focused, 100% local, intuitive all-in-one chat interface)
|
|
||||||
|
|
||||||
### Terminal
|
### Terminal
|
||||||
|
|
||||||
|
@ -380,7 +377,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||||
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
|
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
|
||||||
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
|
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
|
||||||
- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
|
- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
|
||||||
- [LlamaIndex](https://docs.llamaindex.ai/en/stable/examples/llm/ollama/) and [LlamaIndexTS](https://ts.llamaindex.ai/modules/llms/available_llms/ollama)
|
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
|
||||||
- [LiteLLM](https://github.com/BerriAI/litellm)
|
- [LiteLLM](https://github.com/BerriAI/litellm)
|
||||||
- [OllamaFarm for Go](https://github.com/presbrey/ollamafarm)
|
- [OllamaFarm for Go](https://github.com/presbrey/ollamafarm)
|
||||||
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
|
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
|
||||||
|
|
|
@ -142,7 +142,7 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
|
||||||
|
|
||||||
|
|
||||||
;FinishedHeadingLabel=Run your first model
|
;FinishedHeadingLabel=Run your first model
|
||||||
;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3.2
|
;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3.1
|
||||||
;ClickFinish=%n
|
;ClickFinish=%n
|
||||||
|
|
||||||
[Registry]
|
[Registry]
|
||||||
|
|
|
@ -4,5 +4,5 @@ write-host "Welcome to Ollama!"
|
||||||
write-host ""
|
write-host ""
|
||||||
write-host "Run your first model:"
|
write-host "Run your first model:"
|
||||||
write-host ""
|
write-host ""
|
||||||
write-host "`tollama run llama3.2"
|
write-host "`tollama run llama3.1"
|
||||||
write-host ""
|
write-host ""
|
64
docs/api.md
64
docs/api.md
|
@ -69,7 +69,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"prompt": "Why is the sky blue?"
|
"prompt": "Why is the sky blue?"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
@ -80,7 +80,7 @@ A stream of JSON objects is returned:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||||
"response": "The",
|
"response": "The",
|
||||||
"done": false
|
"done": false
|
||||||
|
@ -102,7 +102,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"response": "",
|
"response": "",
|
||||||
"done": true,
|
"done": true,
|
||||||
|
@ -124,7 +124,7 @@ A response can be received in one reply when streaming is off.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"prompt": "Why is the sky blue?",
|
"prompt": "Why is the sky blue?",
|
||||||
"stream": false
|
"stream": false
|
||||||
}'
|
}'
|
||||||
|
@ -136,7 +136,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"response": "The sky is blue because it is the color of the sky.",
|
"response": "The sky is blue because it is the color of the sky.",
|
||||||
"done": true,
|
"done": true,
|
||||||
|
@ -194,7 +194,7 @@ curl http://localhost:11434/api/generate -d '{
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"prompt": "What color is the sky at different times of the day? Respond using JSON",
|
"prompt": "What color is the sky at different times of the day? Respond using JSON",
|
||||||
"format": "json",
|
"format": "json",
|
||||||
"stream": false
|
"stream": false
|
||||||
|
@ -205,7 +205,7 @@ curl http://localhost:11434/api/generate -d '{
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2023-11-09T21:07:55.186497Z",
|
"created_at": "2023-11-09T21:07:55.186497Z",
|
||||||
"response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
|
"response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
|
||||||
"done": true,
|
"done": true,
|
||||||
|
@ -327,7 +327,7 @@ If you want to set custom options for the model at runtime rather than in the Mo
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"prompt": "Why is the sky blue?",
|
"prompt": "Why is the sky blue?",
|
||||||
"stream": false,
|
"stream": false,
|
||||||
"options": {
|
"options": {
|
||||||
|
@ -368,7 +368,7 @@ curl http://localhost:11434/api/generate -d '{
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"response": "The sky is blue because it is the color of the sky.",
|
"response": "The sky is blue because it is the color of the sky.",
|
||||||
"done": true,
|
"done": true,
|
||||||
|
@ -390,7 +390,7 @@ If an empty prompt is provided, the model will be loaded into memory.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2"
|
"model": "llama3.1"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -400,7 +400,7 @@ A single JSON object is returned:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2023-12-18T19:52:07.071755Z",
|
"created_at": "2023-12-18T19:52:07.071755Z",
|
||||||
"response": "",
|
"response": "",
|
||||||
"done": true
|
"done": true
|
||||||
|
@ -415,7 +415,7 @@ If an empty prompt is provided and the `keep_alive` parameter is set to `0`, a m
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"keep_alive": 0
|
"keep_alive": 0
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
@ -426,7 +426,7 @@ A single JSON object is returned:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2024-09-12T03:54:03.516566Z",
|
"created_at": "2024-09-12T03:54:03.516566Z",
|
||||||
"response": "",
|
"response": "",
|
||||||
"done": true,
|
"done": true,
|
||||||
|
@ -472,7 +472,7 @@ Send a chat message with a streaming response.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
@ -488,7 +488,7 @@ A stream of JSON objects is returned:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
|
@ -503,7 +503,7 @@ Final response:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"total_duration": 4883583458,
|
"total_duration": 4883583458,
|
||||||
|
@ -521,7 +521,7 @@ Final response:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
@ -536,7 +536,7 @@ curl http://localhost:11434/api/chat -d '{
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2023-12-12T14:13:43.416799Z",
|
"created_at": "2023-12-12T14:13:43.416799Z",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
|
@ -560,7 +560,7 @@ Send a chat message with a conversation history. You can use this same approach
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
@ -584,7 +584,7 @@ A stream of JSON objects is returned:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
|
@ -598,7 +598,7 @@ Final response:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"total_duration": 8113331500,
|
"total_duration": 8113331500,
|
||||||
|
@ -656,7 +656,7 @@ curl http://localhost:11434/api/chat -d '{
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
@ -674,7 +674,7 @@ curl http://localhost:11434/api/chat -d '{
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2023-12-12T14:13:43.416799Z",
|
"created_at": "2023-12-12T14:13:43.416799Z",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
|
@ -696,7 +696,7 @@ curl http://localhost:11434/api/chat -d '{
|
||||||
|
|
||||||
```
|
```
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
@ -735,7 +735,7 @@ curl http://localhost:11434/api/chat -d '{
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at": "2024-07-22T20:33:28.123648Z",
|
"created_at": "2024-07-22T20:33:28.123648Z",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
|
@ -771,7 +771,7 @@ If the messages array is empty, the model will be loaded into memory.
|
||||||
|
|
||||||
```
|
```
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"messages": []
|
"messages": []
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
@ -779,7 +779,7 @@ curl http://localhost:11434/api/chat -d '{
|
||||||
##### Response
|
##### Response
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at":"2024-09-12T21:17:29.110811Z",
|
"created_at":"2024-09-12T21:17:29.110811Z",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
|
@ -798,7 +798,7 @@ If the messages array is empty and the `keep_alive` parameter is set to `0`, a m
|
||||||
|
|
||||||
```
|
```
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"messages": [],
|
"messages": [],
|
||||||
"keep_alive": 0
|
"keep_alive": 0
|
||||||
}'
|
}'
|
||||||
|
@ -810,7 +810,7 @@ A single JSON object is returned:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"created_at":"2024-09-12T21:33:17.547535Z",
|
"created_at":"2024-09-12T21:33:17.547535Z",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
|
@ -989,7 +989,7 @@ Show information about a model including details, modelfile, template, parameter
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/show -d '{
|
curl http://localhost:11434/api/show -d '{
|
||||||
"name": "llama3.2"
|
"name": "llama3.1"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -1050,7 +1050,7 @@ Copy a model. Creates a model with another name from an existing model.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/copy -d '{
|
curl http://localhost:11434/api/copy -d '{
|
||||||
"source": "llama3.2",
|
"source": "llama3.1",
|
||||||
"destination": "llama3-backup"
|
"destination": "llama3-backup"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
@ -1105,7 +1105,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/pull -d '{
|
curl http://localhost:11434/api/pull -d '{
|
||||||
"name": "llama3.2"
|
"name": "llama3.1"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -63,7 +63,7 @@ docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 114
|
||||||
Now you can run a model:
|
Now you can run a model:
|
||||||
|
|
||||||
```
|
```
|
||||||
docker exec -it ollama ollama run llama3.2
|
docker exec -it ollama ollama run llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
### Try different models
|
### Try different models
|
||||||
|
|
10
docs/faq.md
10
docs/faq.md
|
@ -32,7 +32,7 @@ When using the API, specify the `num_ctx` parameter:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"prompt": "Why is the sky blue?",
|
"prompt": "Why is the sky blue?",
|
||||||
"options": {
|
"options": {
|
||||||
"num_ctx": 4096
|
"num_ctx": 4096
|
||||||
|
@ -232,7 +232,7 @@ curl http://localhost:11434/api/chat -d '{"model": "mistral"}'
|
||||||
|
|
||||||
To preload a model using the CLI, use the command:
|
To preload a model using the CLI, use the command:
|
||||||
```shell
|
```shell
|
||||||
ollama run llama3.2 ""
|
ollama run llama3.1 ""
|
||||||
```
|
```
|
||||||
|
|
||||||
## How do I keep a model loaded in memory or make it unload immediately?
|
## How do I keep a model loaded in memory or make it unload immediately?
|
||||||
|
@ -240,7 +240,7 @@ ollama run llama3.2 ""
|
||||||
By default models are kept in memory for 5 minutes before being unloaded. This allows for quicker response times if you're making numerous requests to the LLM. If you want to immediately unload a model from memory, use the `ollama stop` command:
|
By default models are kept in memory for 5 minutes before being unloaded. This allows for quicker response times if you're making numerous requests to the LLM. If you want to immediately unload a model from memory, use the `ollama stop` command:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
ollama stop llama3.2
|
ollama stop llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
If you're using the API, use the `keep_alive` parameter with the `/api/generate` and `/api/chat` endpoints to set the amount of time that a model stays in memory. The `keep_alive` parameter can be set to:
|
If you're using the API, use the `keep_alive` parameter with the `/api/generate` and `/api/chat` endpoints to set the amount of time that a model stays in memory. The `keep_alive` parameter can be set to:
|
||||||
|
@ -251,12 +251,12 @@ If you're using the API, use the `keep_alive` parameter with the `/api/generate`
|
||||||
|
|
||||||
For example, to preload a model and leave it in memory use:
|
For example, to preload a model and leave it in memory use:
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{"model": "llama3.2", "keep_alive": -1}'
|
curl http://localhost:11434/api/generate -d '{"model": "llama3.1", "keep_alive": -1}'
|
||||||
```
|
```
|
||||||
|
|
||||||
To unload the model and free up memory use:
|
To unload the model and free up memory use:
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{"model": "llama3.2", "keep_alive": 0}'
|
curl http://localhost:11434/api/generate -d '{"model": "llama3.1", "keep_alive": 0}'
|
||||||
```
|
```
|
||||||
|
|
||||||
Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to the section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable.
|
Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to the section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable.
|
||||||
|
|
|
@ -50,7 +50,7 @@ INSTRUCTION arguments
|
||||||
An example of a `Modelfile` creating a mario blueprint:
|
An example of a `Modelfile` creating a mario blueprint:
|
||||||
|
|
||||||
```modelfile
|
```modelfile
|
||||||
FROM llama3.2
|
FROM llama3.1
|
||||||
# sets the temperature to 1 [higher is more creative, lower is more coherent]
|
# sets the temperature to 1 [higher is more creative, lower is more coherent]
|
||||||
PARAMETER temperature 1
|
PARAMETER temperature 1
|
||||||
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
|
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
|
||||||
|
@ -72,10 +72,10 @@ More examples are available in the [examples directory](../examples).
|
||||||
To view the Modelfile of a given model, use the `ollama show --modelfile` command.
|
To view the Modelfile of a given model, use the `ollama show --modelfile` command.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
> ollama show --modelfile llama3.2
|
> ollama show --modelfile llama3.1
|
||||||
# Modelfile generated by "ollama show"
|
# Modelfile generated by "ollama show"
|
||||||
# To build a new Modelfile based on this one, replace the FROM line with:
|
# To build a new Modelfile based on this one, replace the FROM line with:
|
||||||
# FROM llama3.2:latest
|
# FROM llama3.1:latest
|
||||||
FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
|
FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
|
||||||
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
||||||
|
|
||||||
|
@ -103,7 +103,7 @@ FROM <model name>:<tag>
|
||||||
#### Build from existing model
|
#### Build from existing model
|
||||||
|
|
||||||
```modelfile
|
```modelfile
|
||||||
FROM llama3.2
|
FROM llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
A list of available base models:
|
A list of available base models:
|
||||||
|
|
|
@ -25,7 +25,7 @@ chat_completion = client.chat.completions.create(
|
||||||
'content': 'Say this is a test',
|
'content': 'Say this is a test',
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
model='llama3.2',
|
model='llama3.1',
|
||||||
)
|
)
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
|
@ -46,13 +46,13 @@ response = client.chat.completions.create(
|
||||||
)
|
)
|
||||||
|
|
||||||
completion = client.completions.create(
|
completion = client.completions.create(
|
||||||
model="llama3.2",
|
model="llama3.1",
|
||||||
prompt="Say this is a test",
|
prompt="Say this is a test",
|
||||||
)
|
)
|
||||||
|
|
||||||
list_completion = client.models.list()
|
list_completion = client.models.list()
|
||||||
|
|
||||||
model = client.models.retrieve("llama3.2")
|
model = client.models.retrieve("llama3.1")
|
||||||
|
|
||||||
embeddings = client.embeddings.create(
|
embeddings = client.embeddings.create(
|
||||||
model="all-minilm",
|
model="all-minilm",
|
||||||
|
@ -74,7 +74,7 @@ const openai = new OpenAI({
|
||||||
|
|
||||||
const chatCompletion = await openai.chat.completions.create({
|
const chatCompletion = await openai.chat.completions.create({
|
||||||
messages: [{ role: 'user', content: 'Say this is a test' }],
|
messages: [{ role: 'user', content: 'Say this is a test' }],
|
||||||
model: 'llama3.2',
|
model: 'llama3.1',
|
||||||
})
|
})
|
||||||
|
|
||||||
const response = await openai.chat.completions.create({
|
const response = await openai.chat.completions.create({
|
||||||
|
@ -94,13 +94,13 @@ const response = await openai.chat.completions.create({
|
||||||
})
|
})
|
||||||
|
|
||||||
const completion = await openai.completions.create({
|
const completion = await openai.completions.create({
|
||||||
model: "llama3.2",
|
model: "llama3.1",
|
||||||
prompt: "Say this is a test.",
|
prompt: "Say this is a test.",
|
||||||
})
|
})
|
||||||
|
|
||||||
const listCompletion = await openai.models.list()
|
const listCompletion = await openai.models.list()
|
||||||
|
|
||||||
const model = await openai.models.retrieve("llama3.2")
|
const model = await openai.models.retrieve("llama3.1")
|
||||||
|
|
||||||
const embedding = await openai.embeddings.create({
|
const embedding = await openai.embeddings.create({
|
||||||
model: "all-minilm",
|
model: "all-minilm",
|
||||||
|
@ -114,7 +114,7 @@ const embedding = await openai.embeddings.create({
|
||||||
curl http://localhost:11434/v1/chat/completions \
|
curl http://localhost:11434/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
|
@ -154,13 +154,13 @@ curl http://localhost:11434/v1/chat/completions \
|
||||||
curl http://localhost:11434/v1/completions \
|
curl http://localhost:11434/v1/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.1",
|
||||||
"prompt": "Say this is a test"
|
"prompt": "Say this is a test"
|
||||||
}'
|
}'
|
||||||
|
|
||||||
curl http://localhost:11434/v1/models
|
curl http://localhost:11434/v1/models
|
||||||
|
|
||||||
curl http://localhost:11434/v1/models/llama3.2
|
curl http://localhost:11434/v1/models/llama3.1
|
||||||
|
|
||||||
curl http://localhost:11434/v1/embeddings \
|
curl http://localhost:11434/v1/embeddings \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
|
@ -274,7 +274,7 @@ curl http://localhost:11434/v1/embeddings \
|
||||||
Before using a model, pull it locally `ollama pull`:
|
Before using a model, pull it locally `ollama pull`:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
ollama pull llama3.2
|
ollama pull llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
### Default model names
|
### Default model names
|
||||||
|
@ -282,7 +282,7 @@ ollama pull llama3.2
|
||||||
For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name:
|
For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name:
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama cp llama3.2 gpt-3.5-turbo
|
ollama cp llama3.1 gpt-3.5-turbo
|
||||||
```
|
```
|
||||||
|
|
||||||
Afterwards, this new model name can be specified the `model` field:
|
Afterwards, this new model name can be specified the `model` field:
|
||||||
|
|
|
@ -33,7 +33,7 @@ Omitting a template in these models puts the responsibility of correctly templat
|
||||||
To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
|
To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
|
||||||
|
|
||||||
```dockerfile
|
```dockerfile
|
||||||
FROM llama3.2
|
FROM llama3.1
|
||||||
|
|
||||||
TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
|
TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ import { Ollama } from "@langchain/community/llms/ollama";
|
||||||
|
|
||||||
const ollama = new Ollama({
|
const ollama = new Ollama({
|
||||||
baseUrl: "http://localhost:11434",
|
baseUrl: "http://localhost:11434",
|
||||||
model: "llama3.2",
|
model: "llama3.1",
|
||||||
});
|
});
|
||||||
|
|
||||||
const answer = await ollama.invoke(`why is the sky blue?`);
|
const answer = await ollama.invoke(`why is the sky blue?`);
|
||||||
|
@ -23,7 +23,7 @@ const answer = await ollama.invoke(`why is the sky blue?`);
|
||||||
console.log(answer);
|
console.log(answer);
|
||||||
```
|
```
|
||||||
|
|
||||||
That will get us the same thing as if we ran `ollama run llama3.2 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
|
That will get us the same thing as if we ran `ollama run llama3.1 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
npm install cheerio
|
npm install cheerio
|
||||||
|
|
|
@ -29,7 +29,7 @@ Ollama uses unicode characters for progress indication, which may render as unkn
|
||||||
|
|
||||||
Here's a quick example showing API access from `powershell`
|
Here's a quick example showing API access from `powershell`
|
||||||
```powershell
|
```powershell
|
||||||
(Invoke-WebRequest -method POST -Body '{"model":"llama3.2", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
|
(Invoke-WebRequest -method POST -Body '{"model":"llama3.1", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
|
||||||
```
|
```
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
|
@ -35,7 +35,7 @@ func main() {
|
||||||
|
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
req := &api.ChatRequest{
|
req := &api.ChatRequest{
|
||||||
Model: "llama3.2",
|
Model: "llama3.1",
|
||||||
Messages: messages,
|
Messages: messages,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,10 +4,10 @@ This example provides an interface for asking questions to a PDF document.
|
||||||
|
|
||||||
## Setup
|
## Setup
|
||||||
|
|
||||||
1. Ensure you have the `llama3.2` model installed:
|
1. Ensure you have the `llama3.1` model installed:
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama pull llama3.2
|
ollama pull llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install the Python Requirements.
|
2. Install the Python Requirements.
|
||||||
|
|
|
@ -51,7 +51,7 @@ while True:
|
||||||
template=template,
|
template=template,
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = Ollama(model="llama3.2", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
|
llm = Ollama(model="llama3.1", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
|
||||||
qa_chain = RetrievalQA.from_chain_type(
|
qa_chain = RetrievalQA.from_chain_type(
|
||||||
llm,
|
llm,
|
||||||
retriever=vectorstore.as_retriever(),
|
retriever=vectorstore.as_retriever(),
|
||||||
|
|
|
@ -4,10 +4,10 @@ This example summarizes the website, [https://ollama.com/blog/run-llama2-uncenso
|
||||||
|
|
||||||
## Running the Example
|
## Running the Example
|
||||||
|
|
||||||
1. Ensure you have the `llama3.2` model installed:
|
1. Ensure you have the `llama3.1` model installed:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ollama pull llama3.2
|
ollama pull llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install the Python Requirements.
|
2. Install the Python Requirements.
|
||||||
|
|
|
@ -5,7 +5,7 @@ from langchain.chains.summarize import load_summarize_chain
|
||||||
loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally")
|
loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally")
|
||||||
docs = loader.load()
|
docs = loader.load()
|
||||||
|
|
||||||
llm = Ollama(model="llama3.2")
|
llm = Ollama(model="llama3.1")
|
||||||
chain = load_summarize_chain(llm, chain_type="stuff")
|
chain = load_summarize_chain(llm, chain_type="stuff")
|
||||||
|
|
||||||
result = chain.invoke(docs)
|
result = chain.invoke(docs)
|
||||||
|
|
|
@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama.
|
||||||
|
|
||||||
## Running the Example
|
## Running the Example
|
||||||
|
|
||||||
1. Ensure you have the `llama3.2` model installed:
|
1. Ensure you have the `llama3.1` model installed:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ollama pull llama3.2
|
ollama pull llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install the Python Requirements.
|
2. Install the Python Requirements.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from langchain.llms import Ollama
|
from langchain.llms import Ollama
|
||||||
|
|
||||||
input = input("What is your question?")
|
input = input("What is your question?")
|
||||||
llm = Ollama(model="llama3.2")
|
llm = Ollama(model="llama3.1")
|
||||||
res = llm.predict(input)
|
res = llm.predict(input)
|
||||||
print (res)
|
print (res)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
FROM llama3.2
|
FROM llama3.1
|
||||||
PARAMETER temperature 1
|
PARAMETER temperature 1
|
||||||
SYSTEM """
|
SYSTEM """
|
||||||
You are Mario from super mario bros, acting as an assistant.
|
You are Mario from super mario bros, acting as an assistant.
|
||||||
|
|
|
@ -2,12 +2,12 @@
|
||||||
|
|
||||||
# Example character: Mario
|
# Example character: Mario
|
||||||
|
|
||||||
This example shows how to create a basic character using Llama 3.2 as the base model.
|
This example shows how to create a basic character using Llama3.1 as the base model.
|
||||||
|
|
||||||
To run this example:
|
To run this example:
|
||||||
|
|
||||||
1. Download the Modelfile
|
1. Download the Modelfile
|
||||||
2. `ollama pull llama3.2` to get the base model used in the model file.
|
2. `ollama pull llama3.1` to get the base model used in the model file.
|
||||||
3. `ollama create NAME -f ./Modelfile`
|
3. `ollama create NAME -f ./Modelfile`
|
||||||
4. `ollama run NAME`
|
4. `ollama run NAME`
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?"
|
||||||
What the model file looks like:
|
What the model file looks like:
|
||||||
|
|
||||||
```
|
```
|
||||||
FROM llama3.2
|
FROM llama3.1
|
||||||
PARAMETER temperature 1
|
PARAMETER temperature 1
|
||||||
SYSTEM """
|
SYSTEM """
|
||||||
You are Mario from Super Mario Bros, acting as an assistant.
|
You are Mario from Super Mario Bros, acting as an assistant.
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
# RAG Hallucination Checker using Bespoke-Minicheck
|
# RAG Hallucination Checker using Bespoke-Minicheck
|
||||||
|
|
||||||
This example allows the user to ask questions related to a document, which can be specified via an article url. Relevant chunks are retreived from the document and given to `llama3.2` as context to answer the question. Then each sentence in the answer is checked against the retrieved chunks using `bespoke-minicheck` to ensure that the answer does not contain hallucinations.
|
This example allows the user to ask questions related to a document, which can be specified via an article url. Relevant chunks are retreived from the document and given to `llama3.1` as context to answer the question. Then each sentence in the answer is checked against the retrieved chunks using `bespoke-minicheck` to ensure that the answer does not contain hallucinations.
|
||||||
|
|
||||||
## Running the Example
|
## Running the Example
|
||||||
|
|
||||||
1. Ensure `all-minilm` (embedding) `llama3.2` (chat) and `bespoke-minicheck` (check) models installed:
|
1. Ensure `all-minilm` (embedding) `llama3.1` (chat) and `bespoke-minicheck` (check) models installed:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ollama pull all-minilm
|
ollama pull all-minilm
|
||||||
ollama pull llama3.2
|
ollama pull llama3.1
|
||||||
ollama pull bespoke-minicheck
|
ollama pull bespoke-minicheck
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ import nltk
|
||||||
warnings.filterwarnings(
|
warnings.filterwarnings(
|
||||||
"ignore", category=FutureWarning, module="transformers.tokenization_utils_base"
|
"ignore", category=FutureWarning, module="transformers.tokenization_utils_base"
|
||||||
)
|
)
|
||||||
nltk.download("punkt_tab", quiet=True)
|
nltk.download("punkt", quiet=True)
|
||||||
|
|
||||||
|
|
||||||
def getArticleText(url):
|
def getArticleText(url):
|
||||||
|
@ -119,7 +119,7 @@ if __name__ == "__main__":
|
||||||
system_prompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"
|
system_prompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"
|
||||||
|
|
||||||
ollama_response = ollama.generate(
|
ollama_response = ollama.generate(
|
||||||
model="llama3.2",
|
model="llama3.1",
|
||||||
prompt=question,
|
prompt=question,
|
||||||
system=system_prompt,
|
system=system_prompt,
|
||||||
options={"stream": False},
|
options={"stream": False},
|
||||||
|
|
|
@ -2,7 +2,7 @@ import requests
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
|
|
||||||
model = "llama3.2"
|
model = "llama3.1"
|
||||||
template = {
|
template = {
|
||||||
"firstName": "",
|
"firstName": "",
|
||||||
"lastName": "",
|
"lastName": "",
|
||||||
|
|
|
@ -12,7 +12,7 @@ countries = [
|
||||||
"France",
|
"France",
|
||||||
]
|
]
|
||||||
country = random.choice(countries)
|
country = random.choice(countries)
|
||||||
model = "llama3.2"
|
model = "llama3.1"
|
||||||
|
|
||||||
prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
|
prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
|
||||||
|
|
||||||
|
|
|
@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran
|
||||||
|
|
||||||
## Running the Example
|
## Running the Example
|
||||||
|
|
||||||
1. Ensure you have the `llama3.2` model installed:
|
1. Ensure you have the `llama3.1` model installed:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ollama pull llama3.2
|
ollama pull llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install the Python Requirements.
|
2. Install the Python Requirements.
|
||||||
|
|
|
@ -2,7 +2,7 @@ import json
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
|
# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
|
||||||
model = "llama3.2" # TODO: update this for whatever model you wish to use
|
model = "llama3.1" # TODO: update this for whatever model you wish to use
|
||||||
|
|
||||||
|
|
||||||
def chat(messages):
|
def chat(messages):
|
||||||
|
|
|
@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam
|
||||||
|
|
||||||
## Running the Example
|
## Running the Example
|
||||||
|
|
||||||
1. Ensure you have the `llama3.2` model installed:
|
1. Ensure you have the `llama3.1` model installed:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ollama pull llama3.2
|
ollama pull llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install the Python Requirements.
|
2. Install the Python Requirements.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import * as readline from "readline";
|
import * as readline from "readline";
|
||||||
|
|
||||||
const model = "llama3.2";
|
const model = "llama3.1";
|
||||||
type Message = {
|
type Message = {
|
||||||
role: "assistant" | "user" | "system";
|
role: "assistant" | "user" | "system";
|
||||||
content: string;
|
content: string;
|
||||||
|
|
13
gpu/gpu.go
13
gpu/gpu.go
|
@ -205,16 +205,13 @@ func GetGPUInfo() GpuInfoList {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("error looking up system memory", "error", err)
|
slog.Warn("error looking up system memory", "error", err)
|
||||||
}
|
}
|
||||||
depPath := LibraryDir()
|
|
||||||
|
|
||||||
cpus = []CPUInfo{
|
cpus = []CPUInfo{
|
||||||
{
|
{
|
||||||
GpuInfo: GpuInfo{
|
GpuInfo: GpuInfo{
|
||||||
memInfo: mem,
|
memInfo: mem,
|
||||||
Library: "cpu",
|
Library: "cpu",
|
||||||
Variant: cpuCapability.String(),
|
Variant: cpuCapability.String(),
|
||||||
ID: "0",
|
ID: "0",
|
||||||
DependencyPath: depPath,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -227,6 +224,8 @@ func GetGPUInfo() GpuInfoList {
|
||||||
return GpuInfoList{cpus[0].GpuInfo}
|
return GpuInfoList{cpus[0].GpuInfo}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
depPath := LibraryDir()
|
||||||
|
|
||||||
// Load ALL libraries
|
// Load ALL libraries
|
||||||
cHandles = initCudaHandles()
|
cHandles = initCudaHandles()
|
||||||
|
|
||||||
|
|
|
@ -4,10 +4,7 @@ import (
|
||||||
"syscall"
|
"syscall"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const CREATE_DEFAULT_ERROR_MODE = 0x04000000
|
||||||
CREATE_DEFAULT_ERROR_MODE = 0x04000000
|
|
||||||
ABOVE_NORMAL_PRIORITY_CLASS = 0x00008000
|
|
||||||
)
|
|
||||||
|
|
||||||
var LlamaServerSysProcAttr = &syscall.SysProcAttr{
|
var LlamaServerSysProcAttr = &syscall.SysProcAttr{
|
||||||
// Wire up the default error handling logic If for some reason a DLL is
|
// Wire up the default error handling logic If for some reason a DLL is
|
||||||
|
@ -15,8 +12,5 @@ var LlamaServerSysProcAttr = &syscall.SysProcAttr{
|
||||||
// the user can either fix their PATH, or report a bug. Without this
|
// the user can either fix their PATH, or report a bug. Without this
|
||||||
// setting, the process exits immediately with a generic exit status but no
|
// setting, the process exits immediately with a generic exit status but no
|
||||||
// way to (easily) figure out what the actual missing DLL was.
|
// way to (easily) figure out what the actual missing DLL was.
|
||||||
//
|
CreationFlags: CREATE_DEFAULT_ERROR_MODE,
|
||||||
// Setting Above Normal priority class ensures when running as a "background service"
|
|
||||||
// with "programs" given best priority, we aren't starved of cpu cycles
|
|
||||||
CreationFlags: CREATE_DEFAULT_ERROR_MODE | ABOVE_NORMAL_PRIORITY_CLASS,
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ export default function () {
|
||||||
const [step, setStep] = useState<Step>(Step.WELCOME)
|
const [step, setStep] = useState<Step>(Step.WELCOME)
|
||||||
const [commandCopied, setCommandCopied] = useState<boolean>(false)
|
const [commandCopied, setCommandCopied] = useState<boolean>(false)
|
||||||
|
|
||||||
const command = 'ollama run llama3.2'
|
const command = 'ollama run llama3.1'
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className='drag'>
|
<div className='drag'>
|
||||||
|
|
|
@ -1025,8 +1025,6 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case resp.StatusCode == http.StatusUnauthorized:
|
case resp.StatusCode == http.StatusUnauthorized:
|
||||||
resp.Body.Close()
|
|
||||||
|
|
||||||
// Handle authentication error with one retry
|
// Handle authentication error with one retry
|
||||||
challenge := parseRegistryChallenge(resp.Header.Get("www-authenticate"))
|
challenge := parseRegistryChallenge(resp.Header.Get("www-authenticate"))
|
||||||
token, err := getAuthorizationToken(ctx, challenge)
|
token, err := getAuthorizationToken(ctx, challenge)
|
||||||
|
@ -1042,10 +1040,8 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case resp.StatusCode == http.StatusNotFound:
|
case resp.StatusCode == http.StatusNotFound:
|
||||||
resp.Body.Close()
|
|
||||||
return nil, os.ErrNotExist
|
return nil, os.ErrNotExist
|
||||||
case resp.StatusCode >= http.StatusBadRequest:
|
case resp.StatusCode >= http.StatusBadRequest:
|
||||||
defer resp.Body.Close()
|
|
||||||
responseBody, err := io.ReadAll(resp.Body)
|
responseBody, err := io.ReadAll(resp.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
|
return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
|
||||||
|
|
Loading…
Reference in a new issue