diff --git a/examples/python-simplechat/client.py b/examples/python-simplechat/client.py new file mode 100644 index 00000000..3c480f97 --- /dev/null +++ b/examples/python-simplechat/client.py @@ -0,0 +1,46 @@ +import json +import requests + +# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve` +model = "llama2" # TODO: update this for whatever model you wish to use + + +def chat(messages): + r = requests.post( + "http://0.0.0.0:11434/api/chat", + json={"model": model, "messages": messages, "stream": True}, + ) + r.raise_for_status() + output = "" + + for line in r.iter_lines(): + body = json.loads(line) + if "error" in body: + raise Exception(body["error"]) + if body.get("done") is False: + message = body.get("message", "") + content = message.get("content", "") + output += content + # the response streams one token at a time, print that as we receive it + print(content, end="", flush=True) + + + if body.get("done", False): + message["content"] = output + return message + + +def main(): + messages = [] + + while True: + user_input = input("Enter a prompt: ") + print() + messages.append({"role": "user", "content": user_input}) + message = chat(messages) + messages.append(message) + print("\n\n") + + +if __name__ == "__main__": + main() diff --git a/examples/python-simplechat/readme.md b/examples/python-simplechat/readme.md new file mode 100644 index 00000000..abbdfe7e --- /dev/null +++ b/examples/python-simplechat/readme.md @@ -0,0 +1,24 @@ +# Simple Chat Example + +The **chat** endpoint is one of two ways to generate text from an LLM with Ollama. At a high level you provide the endpoint an array of objects with a role and content specified. Then with each output and prompt, you add more of those role/content objects, which builds up the history. + +## Review the Code + +You can see in the **chat** function that actually calling the endpoint is done simply with: + +```python +r = requests.post( + "http://0.0.0.0:11434/api/chat", + json={"model": model, "messages": messages, "stream": True}, +) +``` + +With the **generate** endpoint, you need to provide a `prompt`. But with **chat**, you provide `messages`. And the resulting stream of responses includes a `message` object with a `content` field. + +The final JSON object doesn't provide the full content, so you will need to build the content yourself. + +In the **main** function, we collect `user_input` and add it as a message to our messages and that is passed to the chat function. When the LLM is done responding the output is added as another message. + +## Next Steps + +In this example, all generations are kept. You might want to experiment with summarizing everything older than 10 conversations to enable longer history with less context being used.