Merge branch 'main' of github.com:abetlen/llama_cpp_python into main
This commit is contained in:
commit
36679a58ef
2 changed files with 24 additions and 0 deletions
|
@ -283,6 +283,15 @@ Navigate to [http://localhost:8000/docs](http://localhost:8000/docs) to see the
|
||||||
To bind to `0.0.0.0` to enable remote connections, use `python3 -m llama_cpp.server --host 0.0.0.0`.
|
To bind to `0.0.0.0` to enable remote connections, use `python3 -m llama_cpp.server --host 0.0.0.0`.
|
||||||
Similarly, to change the port (default is 8000), use `--port`.
|
Similarly, to change the port (default is 8000), use `--port`.
|
||||||
|
|
||||||
|
You probably also want to set the prompt format. For chatml, use
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 -m llama_cpp.server --model models/7B/llama-model.gguf --chat_format chatml
|
||||||
|
```
|
||||||
|
|
||||||
|
That will format the prompt according to how model expects it. You can find the prompt format in the model card.
|
||||||
|
For possible options, see [llama_cpp/llama_chat_format.py](llama_cpp/llama_chat_format.py) and look for lines starting with "@register_chat_format".
|
||||||
|
|
||||||
## Docker image
|
## Docker image
|
||||||
|
|
||||||
A Docker image is available on [GHCR](https://ghcr.io/abetlen/llama-cpp-python). To run the server:
|
A Docker image is available on [GHCR](https://ghcr.io/abetlen/llama-cpp-python). To run the server:
|
||||||
|
|
|
@ -456,6 +456,21 @@ def format_oasst_llama(
|
||||||
return ChatFormatterResponse(prompt=_prompt)
|
return ChatFormatterResponse(prompt=_prompt)
|
||||||
|
|
||||||
|
|
||||||
|
@register_chat_format("baichuan-2")
|
||||||
|
def format_baichuan2(
|
||||||
|
messages: List[llama_types.ChatCompletionRequestMessage],
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> ChatFormatterResponse:
|
||||||
|
_system_template = "{system_message}"
|
||||||
|
_roles = dict(user="<reserved_106>", assistant="<reserved_107>")
|
||||||
|
_sep = ""
|
||||||
|
system_message = _get_system_message(messages)
|
||||||
|
system_message = _system_template.format(system_message=system_message)
|
||||||
|
_messages = _map_roles(messages, _roles)
|
||||||
|
_messages.append((_roles["assistant"], None))
|
||||||
|
_prompt = _format_no_colon_single(system_message, _messages, _sep)
|
||||||
|
return ChatFormatterResponse(prompt=_prompt)
|
||||||
|
|
||||||
@register_chat_format("openbuddy")
|
@register_chat_format("openbuddy")
|
||||||
def format_openbuddy(
|
def format_openbuddy(
|
||||||
messages: List[llama_types.ChatCompletionRequestMessage],
|
messages: List[llama_types.ChatCompletionRequestMessage],
|
||||||
|
|
Loading…
Reference in a new issue