12b7f2f4e9
* Update Llama class to handle chat_format & caching * Add settings.py * Add util.py & update __main__.py * multimodel * update settings.py * cleanup * delete util.py * Fix /v1/models endpoint * MultiLlama now iterable, app check-alive on "/" * instant model init if file is given * backward compability * revert model param mandatory * fix error * handle individual model config json * refactor * revert chathandler/clip_model changes * handle chat_handler in MulitLlama() * split settings into server/llama * reduce global vars * Update LlamaProxy to handle config files * Add free method to LlamaProxy * update arg parsers & install server alias * refactor cache settings * change server executable name * better var name * whitespace * Revert "whitespace" This reverts commit bc5cf51c64a95bfc9926e1bc58166059711a1cd8. * remove exe_name * Fix merge bugs * Fix type annotations * Fix type annotations * Fix uvicorn app factory * Fix settings * Refactor server * Remove formatting fix * Format * Use default model if not found in model settings * Fix * Cleanup * Fix * Fix * Remove unnused CommandLineSettings * Cleanup * Support default name for copilot-codex models --------- Co-authored-by: Andrei Betlen <abetlen@gmail.com>
88 lines
2.4 KiB
Python
88 lines
2.4 KiB
Python
"""Example FastAPI server for llama.cpp.
|
|
|
|
To run this example:
|
|
|
|
```bash
|
|
pip install fastapi uvicorn sse-starlette pydantic-settings
|
|
export MODEL=../models/7B/...
|
|
```
|
|
|
|
Then run:
|
|
```
|
|
uvicorn llama_cpp.server.app:create_app --reload
|
|
```
|
|
|
|
or
|
|
|
|
```
|
|
python3 -m llama_cpp.server
|
|
```
|
|
|
|
Then visit http://localhost:8000/docs to see the interactive API docs.
|
|
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
|
|
import uvicorn
|
|
|
|
from llama_cpp.server.app import create_app
|
|
from llama_cpp.server.settings import (
|
|
Settings,
|
|
ServerSettings,
|
|
ModelSettings,
|
|
ConfigFileSettings,
|
|
)
|
|
from llama_cpp.server.cli import add_args_from_model, parse_model_from_args
|
|
|
|
|
|
def main():
|
|
description = "🦙 Llama.cpp python server. Host your own LLMs!🚀"
|
|
parser = argparse.ArgumentParser(description=description)
|
|
|
|
add_args_from_model(parser, Settings)
|
|
parser.add_argument(
|
|
"--config_file",
|
|
type=str,
|
|
help="Path to a config file to load.",
|
|
)
|
|
server_settings: ServerSettings | None = None
|
|
model_settings: list[ModelSettings] = []
|
|
args = parser.parse_args()
|
|
try:
|
|
# Load server settings from config_file if provided
|
|
config_file = os.environ.get("CONFIG_FILE", args.config_file)
|
|
if config_file:
|
|
if not os.path.exists(config_file):
|
|
raise ValueError(f"Config file {config_file} not found!")
|
|
with open(config_file, "rb") as f:
|
|
config_file_settings = ConfigFileSettings.model_validate_json(f.read())
|
|
server_settings = ServerSettings.model_validate(config_file_settings)
|
|
model_settings = config_file_settings.models
|
|
else:
|
|
server_settings = parse_model_from_args(ServerSettings, args)
|
|
model_settings = [parse_model_from_args(ModelSettings, args)]
|
|
except Exception as e:
|
|
print(e, file=sys.stderr)
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
assert server_settings is not None
|
|
assert model_settings is not None
|
|
app = create_app(
|
|
server_settings=server_settings,
|
|
model_settings=model_settings,
|
|
)
|
|
uvicorn.run(
|
|
app,
|
|
host=os.getenv("HOST", server_settings.host),
|
|
port=int(os.getenv("PORT", server_settings.port)),
|
|
ssl_keyfile=server_settings.ssl_keyfile,
|
|
ssl_certfile=server_settings.ssl_certfile,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|