2023-08-14 18:27:13 +00:00
|
|
|
import json
|
|
|
|
import requests
|
|
|
|
|
|
|
|
# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
|
2023-12-22 17:10:41 +00:00
|
|
|
model = 'stablelm-zephyr' # TODO: update this for whatever model you wish to use
|
2023-08-14 18:27:13 +00:00
|
|
|
|
2023-08-14 19:38:44 +00:00
|
|
|
def generate(prompt, context):
|
2023-08-14 18:27:13 +00:00
|
|
|
r = requests.post('http://localhost:11434/api/generate',
|
|
|
|
json={
|
|
|
|
'model': model,
|
|
|
|
'prompt': prompt,
|
|
|
|
'context': context,
|
|
|
|
},
|
|
|
|
stream=True)
|
|
|
|
r.raise_for_status()
|
|
|
|
|
|
|
|
for line in r.iter_lines():
|
|
|
|
body = json.loads(line)
|
|
|
|
response_part = body.get('response', '')
|
2023-11-07 17:55:47 +00:00
|
|
|
# the response streams one token at a time, print that as we receive it
|
2023-08-14 18:27:13 +00:00
|
|
|
print(response_part, end='', flush=True)
|
|
|
|
|
|
|
|
if 'error' in body:
|
|
|
|
raise Exception(body['error'])
|
|
|
|
|
|
|
|
if body.get('done', False):
|
2023-08-14 19:38:44 +00:00
|
|
|
return body['context']
|
2023-08-14 18:27:13 +00:00
|
|
|
|
|
|
|
def main():
|
2023-08-14 19:38:44 +00:00
|
|
|
context = [] # the context stores a conversation history, you can use this to make the model more context aware
|
2023-08-14 18:27:13 +00:00
|
|
|
while True:
|
|
|
|
user_input = input("Enter a prompt: ")
|
2023-12-22 17:10:41 +00:00
|
|
|
if not user_input:
|
|
|
|
exit()
|
2023-08-14 18:27:13 +00:00
|
|
|
print()
|
2023-08-14 19:38:44 +00:00
|
|
|
context = generate(user_input, context)
|
2023-08-14 18:27:13 +00:00
|
|
|
print()
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2023-11-07 17:55:47 +00:00
|
|
|
main()
|