llama.cpp/llama_cpp/server/__main__.py

51 lines
1 KiB
Python
Raw Normal View History

2023-04-05 20:23:25 +00:00
"""Example FastAPI server for llama.cpp.
To run this example:
```bash
pip install fastapi uvicorn sse-starlette
export MODEL=../models/7B/...
```
Then run:
```
uvicorn llama_cpp.server.app:app --reload
```
2023-04-05 20:23:25 +00:00
or
2023-04-05 20:23:25 +00:00
```
python3 -m llama_cpp.server
```
2023-04-05 20:23:25 +00:00
Then visit http://localhost:8000/docs to see the interactive API docs.
2023-04-05 20:23:25 +00:00
"""
import os
2023-05-05 16:08:28 +00:00
import argparse
import uvicorn
2023-04-05 20:23:25 +00:00
2023-05-05 16:08:28 +00:00
from llama_cpp.server.app import create_app, Settings
2023-04-05 20:23:25 +00:00
if __name__ == "__main__":
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
2023-05-05 16:08:28 +00:00
for name, field in Settings.__fields__.items():
parser.add_argument(
f"--{name}",
dest=name,
type=field.type_,
default=field.default,
help=field.field_info.description,
)
args = parser.parse_args()
settings = Settings(**{k: v for k, v in vars(args).items() if v is not None})
2023-05-05 16:08:28 +00:00
app = create_app(settings=settings)
2023-04-05 20:23:25 +00:00
2023-04-07 01:07:35 +00:00
uvicorn.run(
app, host=os.getenv("HOST", "localhost"), port=int(os.getenv("PORT", 8000))
)