"""Example FastAPI server for llama.cpp. To run this example: ```bash pip install fastapi uvicorn sse-starlette pydantic-settings export MODEL=../models/7B/... ``` Then run: ``` uvicorn llama_cpp.server.app:app --reload ``` or ``` python3 -m llama_cpp.server ``` Then visit http://localhost:8000/docs to see the interactive API docs. """ import os import argparse import uvicorn from llama_cpp.server.app import create_app, Settings if __name__ == "__main__": parser = argparse.ArgumentParser() for name, field in Settings.__model_fields__.items(): description = field.field_info.description if field.default is not None and description is not None: description += f" (default: {field.default})" parser.add_argument( f"--{name}", dest=name, type=field.type_, help=description, ) args = parser.parse_args() settings = Settings(**{k: v for k, v in vars(args).items() if v is not None}) app = create_app(settings=settings) uvicorn.run( app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port)) )