llama.cpp/llama_cpp/server/__main__.py
Dave 12b7f2f4e9
[Feat] Multi model support (#931)
* Update Llama class to handle chat_format & caching

* Add settings.py

* Add util.py & update __main__.py

* multimodel

* update settings.py

* cleanup

* delete util.py

* Fix /v1/models endpoint

* MultiLlama now iterable, app check-alive on "/"

* instant model init if file is given

* backward compability

* revert model param mandatory

* fix error

* handle individual model config json

* refactor

* revert chathandler/clip_model changes

* handle chat_handler in MulitLlama()

* split settings into server/llama

* reduce global vars

* Update LlamaProxy to handle config files

* Add free method to LlamaProxy

* update arg parsers & install server alias

* refactor cache settings

* change server executable name

* better var name

* whitespace

* Revert "whitespace"

This reverts commit bc5cf51c64a95bfc9926e1bc58166059711a1cd8.

* remove exe_name

* Fix merge bugs

* Fix type annotations

* Fix type annotations

* Fix uvicorn app factory

* Fix settings

* Refactor server

* Remove formatting fix

* Format

* Use default model if not found in model settings

* Fix

* Cleanup

* Fix

* Fix

* Remove unnused CommandLineSettings

* Cleanup

* Support default name for copilot-codex models

---------

Co-authored-by: Andrei Betlen <abetlen@gmail.com>
2023-12-22 05:51:25 -05:00

88 lines
2.4 KiB
Python

"""Example FastAPI server for llama.cpp.
To run this example:
```bash
pip install fastapi uvicorn sse-starlette pydantic-settings
export MODEL=../models/7B/...
```
Then run:
```
uvicorn llama_cpp.server.app:create_app --reload
```
or
```
python3 -m llama_cpp.server
```
Then visit http://localhost:8000/docs to see the interactive API docs.
"""
from __future__ import annotations
import os
import sys
import argparse
import uvicorn
from llama_cpp.server.app import create_app
from llama_cpp.server.settings import (
Settings,
ServerSettings,
ModelSettings,
ConfigFileSettings,
)
from llama_cpp.server.cli import add_args_from_model, parse_model_from_args
def main():
description = "🦙 Llama.cpp python server. Host your own LLMs!🚀"
parser = argparse.ArgumentParser(description=description)
add_args_from_model(parser, Settings)
parser.add_argument(
"--config_file",
type=str,
help="Path to a config file to load.",
)
server_settings: ServerSettings | None = None
model_settings: list[ModelSettings] = []
args = parser.parse_args()
try:
# Load server settings from config_file if provided
config_file = os.environ.get("CONFIG_FILE", args.config_file)
if config_file:
if not os.path.exists(config_file):
raise ValueError(f"Config file {config_file} not found!")
with open(config_file, "rb") as f:
config_file_settings = ConfigFileSettings.model_validate_json(f.read())
server_settings = ServerSettings.model_validate(config_file_settings)
model_settings = config_file_settings.models
else:
server_settings = parse_model_from_args(ServerSettings, args)
model_settings = [parse_model_from_args(ModelSettings, args)]
except Exception as e:
print(e, file=sys.stderr)
parser.print_help()
sys.exit(1)
assert server_settings is not None
assert model_settings is not None
app = create_app(
server_settings=server_settings,
model_settings=model_settings,
)
uvicorn.run(
app,
host=os.getenv("HOST", server_settings.host),
port=int(os.getenv("PORT", server_settings.port)),
ssl_keyfile=server_settings.ssl_keyfile,
ssl_certfile=server_settings.ssl_certfile,
)
if __name__ == "__main__":
main()