diff --git a/ollama/cmd/cli.py b/ollama/cmd/cli.py index a6287ea3..e3dc5d3d 100644 --- a/ollama/cmd/cli.py +++ b/ollama/cmd/cli.py @@ -1,6 +1,5 @@ import os import sys -import json from pathlib import Path from argparse import ArgumentParser @@ -71,7 +70,6 @@ def generate_oneshot(*args, **kwargs): print(flush=True) for output in engine.generate(*args, **kwargs): - output = json.loads(output) choices = output.get("choices", []) if len(choices) > 0: print(choices[0].get("text", ""), end="", flush=True) diff --git a/ollama/cmd/server.py b/ollama/cmd/server.py index 5a99e937..11e478bd 100644 --- a/ollama/cmd/server.py +++ b/ollama/cmd/server.py @@ -1,5 +1,6 @@ -from aiohttp import web +import json import aiohttp_cors +from aiohttp import web from ollama import engine @@ -89,7 +90,8 @@ async def generate(request): } for output in engine.generate(model, prompt, **kwargs): - await response.write(output.encode("utf-8")) + output = json.dumps(output).encode('utf-8') + await response.write(output) await response.write(b"\n") return response diff --git a/ollama/engine.py b/ollama/engine.py index 91c298e8..efcb460f 100644 --- a/ollama/engine.py +++ b/ollama/engine.py @@ -33,7 +33,7 @@ def generate(model, prompt, models_home=".", llms={}, *args, **kwargs): kwargs.update({"stream": True}) for output in llm(prompt, *args, **kwargs): - yield json.dumps(output) + yield output def load(model, models_home=".", llms={}):