efe8e6f879
Define an init_llama function that starts llama with supplied settings instead of just doing it in the global context of app.py This allows the test to be less brittle by not needing to mess with os.environ, then importing the app
34 lines
555 B
Python
34 lines
555 B
Python
"""Example FastAPI server for llama.cpp.
|
|
|
|
To run this example:
|
|
|
|
```bash
|
|
pip install fastapi uvicorn sse-starlette
|
|
export MODEL=../models/7B/...
|
|
```
|
|
|
|
Then run:
|
|
```
|
|
uvicorn llama_cpp.server.app:app --reload
|
|
```
|
|
|
|
or
|
|
|
|
```
|
|
python3 -m llama_cpp.server
|
|
```
|
|
|
|
Then visit http://localhost:8000/docs to see the interactive API docs.
|
|
|
|
"""
|
|
import os
|
|
import uvicorn
|
|
|
|
from llama_cpp.server.app import app, init_llama
|
|
|
|
if __name__ == "__main__":
|
|
init_llama()
|
|
|
|
uvicorn.run(
|
|
app, host=os.getenv("HOST", "localhost"), port=int(os.getenv("PORT", 8000))
|
|
)
|