ollama/examples/python-simplegenerate/client.py

41 lines
1.3 KiB
Python
Raw Normal View History

2023-08-14 15:27:13 -03:00
import json
import requests
# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
model = 'stablelm-zephyr' # TODO: update this for whatever model you wish to use
2023-08-14 15:27:13 -03:00
2023-08-14 16:38:44 -03:00
def generate(prompt, context):
2023-08-14 15:27:13 -03:00
r = requests.post('http://localhost:11434/api/generate',
json={
'model': model,
'prompt': prompt,
'context': context,
},
stream=True)
r.raise_for_status()
for line in r.iter_lines():
body = json.loads(line)
response_part = body.get('response', '')
# the response streams one token at a time, print that as we receive it
2023-08-14 15:27:13 -03:00
print(response_part, end='', flush=True)
if 'error' in body:
raise Exception(body['error'])
if body.get('done', False):
2023-08-14 16:38:44 -03:00
return body['context']
2023-08-14 15:27:13 -03:00
def main():
2023-08-14 16:38:44 -03:00
context = [] # the context stores a conversation history, you can use this to make the model more context aware
2023-08-14 15:27:13 -03:00
while True:
user_input = input("Enter a prompt: ")
if not user_input:
exit()
2023-08-14 15:27:13 -03:00
print()
2023-08-14 16:38:44 -03:00
context = generate(user_input, context)
2023-08-14 15:27:13 -03:00
print()
if __name__ == "__main__":
main()