2023-03-24 19:10:31 -04:00
|
|
|
"""Example FastAPI server for llama.cpp.
|
2023-04-03 20:12:44 -04:00
|
|
|
|
|
|
|
To run this example:
|
|
|
|
|
|
|
|
```bash
|
|
|
|
pip install fastapi uvicorn sse-starlette
|
2023-04-28 23:54:31 -07:00
|
|
|
export MODEL=../models/7B/...
|
2023-04-03 20:12:44 -04:00
|
|
|
```
|
|
|
|
|
2023-04-28 23:54:31 -07:00
|
|
|
Then run:
|
|
|
|
```
|
2024-01-31 10:37:19 -05:00
|
|
|
uvicorn --factory llama_cpp.server.app:create_app --reload
|
2023-04-28 23:54:31 -07:00
|
|
|
```
|
2023-04-05 14:44:26 -04:00
|
|
|
|
2023-04-28 23:54:31 -07:00
|
|
|
or
|
2023-04-05 14:44:26 -04:00
|
|
|
|
2023-04-28 23:54:31 -07:00
|
|
|
```
|
|
|
|
python3 -m llama_cpp.server
|
|
|
|
```
|
2023-04-05 14:44:26 -04:00
|
|
|
|
2023-04-28 23:54:31 -07:00
|
|
|
Then visit http://localhost:8000/docs to see the interactive API docs.
|
2023-04-05 14:44:26 -04:00
|
|
|
|
|
|
|
|
2023-04-28 23:54:31 -07:00
|
|
|
To actually see the implementation of the server, see llama_cpp/server/app.py
|
2023-04-05 14:44:26 -04:00
|
|
|
|
2023-04-28 23:54:31 -07:00
|
|
|
"""
|
|
|
|
import os
|
|
|
|
import uvicorn
|
2023-04-05 14:44:26 -04:00
|
|
|
|
2023-04-28 23:54:31 -07:00
|
|
|
from llama_cpp.server.app import create_app
|
2023-04-05 14:44:26 -04:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2023-04-28 23:54:31 -07:00
|
|
|
app = create_app()
|
2023-04-05 14:44:26 -04:00
|
|
|
|
2023-04-28 23:54:31 -07:00
|
|
|
uvicorn.run(
|
|
|
|
app, host=os.getenv("HOST", "localhost"), port=int(os.getenv("PORT", 8000))
|
|
|
|
)
|