2023-04-05 16:23:25 -04:00
|
|
|
"""Example FastAPI server for llama.cpp.
|
|
|
|
|
|
|
|
To run this example:
|
|
|
|
|
|
|
|
```bash
|
2023-07-07 21:38:46 -04:00
|
|
|
pip install fastapi uvicorn sse-starlette pydantic-settings
|
2023-04-05 16:23:25 -04:00
|
|
|
export MODEL=../models/7B/...
|
|
|
|
```
|
|
|
|
|
2023-04-28 22:43:37 -07:00
|
|
|
Then run:
|
|
|
|
```
|
|
|
|
uvicorn llama_cpp.server.app:app --reload
|
|
|
|
```
|
2023-04-05 16:23:25 -04:00
|
|
|
|
2023-04-28 22:43:37 -07:00
|
|
|
or
|
2023-04-05 16:23:25 -04:00
|
|
|
|
2023-04-28 22:43:37 -07:00
|
|
|
```
|
|
|
|
python3 -m llama_cpp.server
|
|
|
|
```
|
2023-04-05 16:23:25 -04:00
|
|
|
|
2023-04-28 22:43:37 -07:00
|
|
|
Then visit http://localhost:8000/docs to see the interactive API docs.
|
2023-04-05 16:23:25 -04:00
|
|
|
|
2023-04-28 22:43:37 -07:00
|
|
|
"""
|
|
|
|
import os
|
2023-05-05 12:08:28 -04:00
|
|
|
import argparse
|
2023-09-13 20:00:42 -04:00
|
|
|
from typing import List, Literal, Union
|
2023-05-05 12:08:28 -04:00
|
|
|
|
2023-04-28 22:43:37 -07:00
|
|
|
import uvicorn
|
2023-04-05 16:23:25 -04:00
|
|
|
|
2023-05-05 12:08:28 -04:00
|
|
|
from llama_cpp.server.app import create_app, Settings
|
2023-04-05 16:23:25 -04:00
|
|
|
|
2023-09-13 18:06:12 -04:00
|
|
|
def get_base_type(annotation):
|
|
|
|
if getattr(annotation, '__origin__', None) is Literal:
|
|
|
|
return type(annotation.__args__[0])
|
|
|
|
elif getattr(annotation, '__origin__', None) is Union:
|
|
|
|
non_optional_args = [arg for arg in annotation.__args__ if arg is not type(None)]
|
|
|
|
if non_optional_args:
|
|
|
|
return get_base_type(non_optional_args[0])
|
2023-09-13 20:00:42 -04:00
|
|
|
elif getattr(annotation, '__origin__', None) is list or getattr(annotation, '__origin__', None) is List:
|
|
|
|
return get_base_type(annotation.__args__[0])
|
2023-09-13 18:06:12 -04:00
|
|
|
else:
|
|
|
|
return annotation
|
|
|
|
|
2023-09-13 20:00:42 -04:00
|
|
|
def contains_list_type(annotation) -> bool:
|
|
|
|
origin = getattr(annotation, '__origin__', None)
|
|
|
|
|
|
|
|
if origin is list or origin is List:
|
|
|
|
return True
|
|
|
|
elif origin in (Literal, Union):
|
|
|
|
return any(contains_list_type(arg) for arg in annotation.__args__)
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
2023-09-13 23:09:57 -04:00
|
|
|
def parse_bool_arg(arg):
|
|
|
|
if isinstance(arg, bytes):
|
|
|
|
arg = arg.decode('utf-8')
|
|
|
|
|
|
|
|
true_values = {'1', 'on', 't', 'true', 'y', 'yes'}
|
|
|
|
false_values = {'0', 'off', 'f', 'false', 'n', 'no'}
|
|
|
|
|
|
|
|
arg_str = str(arg).lower().strip()
|
|
|
|
|
|
|
|
if arg_str in true_values:
|
|
|
|
return True
|
|
|
|
elif arg_str in false_values:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
raise ValueError(f'Invalid boolean argument: {arg}')
|
2023-09-13 20:00:42 -04:00
|
|
|
|
2023-04-05 16:23:25 -04:00
|
|
|
if __name__ == "__main__":
|
2023-05-08 14:46:25 -04:00
|
|
|
parser = argparse.ArgumentParser()
|
2023-07-09 18:03:31 -04:00
|
|
|
for name, field in Settings.model_fields.items():
|
|
|
|
description = field.description
|
2023-05-08 14:46:25 -04:00
|
|
|
if field.default is not None and description is not None:
|
|
|
|
description += f" (default: {field.default})"
|
2023-09-13 22:56:10 -04:00
|
|
|
base_type = get_base_type(field.annotation) if field.annotation is not None else str
|
|
|
|
list_type = contains_list_type(field.annotation)
|
|
|
|
if base_type is not bool:
|
|
|
|
parser.add_argument(
|
|
|
|
f"--{name}",
|
|
|
|
dest=name,
|
|
|
|
nargs="*" if list_type else None,
|
|
|
|
type=base_type,
|
|
|
|
help=description,
|
|
|
|
)
|
|
|
|
if base_type is bool:
|
|
|
|
parser.add_argument(
|
|
|
|
f"--{name}",
|
|
|
|
dest=name,
|
2023-09-13 23:09:57 -04:00
|
|
|
type=parse_bool_arg,
|
2023-09-15 14:09:43 -04:00
|
|
|
help=f"{description}",
|
2023-09-13 22:56:10 -04:00
|
|
|
)
|
2023-05-05 12:08:28 -04:00
|
|
|
|
|
|
|
args = parser.parse_args()
|
2023-05-08 14:20:53 -04:00
|
|
|
settings = Settings(**{k: v for k, v in vars(args).items() if v is not None})
|
2023-05-05 12:08:28 -04:00
|
|
|
app = create_app(settings=settings)
|
2023-04-05 16:23:25 -04:00
|
|
|
|
2023-04-06 21:07:35 -04:00
|
|
|
uvicorn.run(
|
2023-06-11 01:11:24 +01:00
|
|
|
app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port))
|
2023-04-06 21:07:35 -04:00
|
|
|
)
|