Configurable Chat Formats (#711)

* Add configurable default chat completion format.

* Remove chat_template file to avoid circular import

* Update llama_types

* Add chat format
This commit is contained in:
Andrei 2023-09-29 19:52:04 -04:00 committed by GitHub
parent a945404b4a
commit 3bca7708fb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 330 additions and 19 deletions

View file

@ -24,6 +24,7 @@ import ctypes
from . import llama_cpp
from .llama_types import *
from .llama_grammar import LlamaGrammar
from . import llama_chat_format
import numpy as np
import numpy.typing as npt
@ -243,6 +244,8 @@ class Llama:
lora_path: Optional[str] = None,
# Backend Params
numa: bool = False,
# Chat Format Params
chat_format: str = "llama-2",
# Misc
verbose: bool = True,
# Extra Params
@ -273,6 +276,7 @@ class Llama:
lora_base: Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.
lora_path: Path to a LoRA file to apply to the model.
numa: Enable NUMA support. (NOTE: The initial value of this parameter is used for the remainder of the program as this value is set in llama_backend_init)
chat_format: String specifying the chat format to use when calling create_chat_completion.
verbose: Print verbose output to stderr.
kwargs: Unused keyword arguments (for additional backwards compatibility).
@ -389,6 +393,8 @@ class Llama:
if self.verbose:
print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
self.chat_format = chat_format
self._n_vocab = self.n_vocab()
self._n_ctx = self.n_ctx()
size = self._n_vocab
@ -1565,9 +1571,21 @@ class Llama:
],
}
def _convert_completion_to_chat(
self,
completion_or_chunks: Union[Completion, Iterator[CompletionChunk]],
stream: bool = False,
) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
if stream:
chunks: Iterator[CompletionChunk] = completion_or_chunks # type: ignore
return self._convert_text_completion_chunks_to_chat(chunks)
else:
completion: Completion = completion_or_chunks # type: ignore
return self._convert_text_completion_to_chat(completion)
def create_chat_completion(
self,
messages: List[ChatCompletionMessage],
messages: List[ChatCompletionRequestMessage],
functions: Optional[List[ChatCompletionFunction]] = None,
function_call: Optional[Union[str, ChatCompletionFunctionCall]] = None,
temperature: float = 0.2,
@ -1602,26 +1620,28 @@ class Llama:
Returns:
Generated chat completion or a stream of chat completion chunks.
"""
stop = (
stop if isinstance(stop, list) else [stop] if isinstance(stop, str) else []
format = llama_chat_format.get_chat_format(self.chat_format)
result = format(
messages=messages,
)
chat_history = "".join(
f'### {"Human" if message["role"] == "user" else "Assistant"}:{message["content"]}'
for message in messages
)
PROMPT = chat_history + "### Assistant:"
PROMPT_STOP = ["### Assistant:", "### Human:"]
completion_or_chunks = self(
prompt=PROMPT,
stop=PROMPT_STOP + stop,
prompt = result.prompt
if result.stop is not None:
stop = [] if stop is None else [stop] if isinstance(stop, str) else stop
rstop = result.stop if isinstance(result.stop, list) else [result.stop]
stop = stop + rstop
completion_or_chunks = self.create_completion(
prompt=prompt,
temperature=temperature,
top_p=top_p,
top_k=top_k,
stream=stream,
stop=stop,
max_tokens=max_tokens,
repeat_penalty=repeat_penalty,
presence_penalty=presence_penalty,
frequency_penalty=frequency_penalty,
repeat_penalty=repeat_penalty,
tfs_z=tfs_z,
mirostat_mode=mirostat_mode,
mirostat_tau=mirostat_tau,
@ -1630,12 +1650,7 @@ class Llama:
logits_processor=logits_processor,
grammar=grammar,
)
if stream:
chunks: Iterator[CompletionChunk] = completion_or_chunks # type: ignore
return self._convert_text_completion_chunks_to_chat(chunks)
else:
completion: Completion = completion_or_chunks # type: ignore
return self._convert_text_completion_to_chat(completion)
return self._convert_completion_to_chat(completion_or_chunks, stream=stream) # type: ignore
def __del__(self):
if hasattr(self, "model") and self.model is not None:
@ -1675,6 +1690,8 @@ class Llama:
lora_path=self.lora_path,
# Backend Params
numa=self.numa,
# Chat Format Params
chat_format=self.chat_format,
# Misc
verbose=self.verbose,
)
@ -1708,6 +1725,8 @@ class Llama:
lora_path=state["lora_path"],
# Backend Params
numa=state["numa"],
# Chat Format Params
chat_format=state["chat_format"],
# Misc
verbose=state["verbose"],
)

View file

@ -0,0 +1,292 @@
import dataclasses
from typing import Any, Dict, List, Optional, Tuple, Union, Protocol
from . import llama_types
def _get_system_message(
messages: List[llama_types.ChatCompletionRequestMessage],
) -> str:
"""Get the first system message."""
for message in messages:
if message["role"] == "system":
return message["content"] or ""
return ""
def _map_roles(
messages: List[llama_types.ChatCompletionRequestMessage], role_map: Dict[str, str]
) -> List[Tuple[str, Optional[str]]]:
"""Map the message roles."""
output: List[Tuple[str, Optional[str]]] = []
for message in messages:
role = message["role"]
if role in role_map:
output.append((role_map[role], message["content"]))
return output
def _format_llama2(
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
) -> str:
"""Format the prompt with the llama2 style."""
ret = system_message + sep
for role, message in messages:
if message:
ret += message + " "
else:
ret += role + " "
return ret
def _format_add_colon_single(
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
) -> str:
"""Format the prompt with the add-colon-single style."""
ret = system_message + sep
for role, message in messages:
if message:
ret += role + ": " + message + sep
else:
ret += role + ":"
return ret
def _format_add_colon_two(
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str, sep2: str
) -> str:
"""Format the prompt with the add-colon-two style."""
seps = [sep, sep2]
ret = system_message + seps[0]
for i, (role, message) in enumerate(messages):
if message:
ret += role + ": " + message + seps[i % 2]
else:
ret += role + ":"
return ret
def _format_no_colon_single(
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
) -> str:
"""Format the prompt with the no-colon-single style."""
ret = system_message
for role, message in messages:
if message:
ret += role + message + sep
else:
ret += role
return ret
def _format_add_colon_space_single(
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
) -> str:
"""Format the prompt with the add-colon-space-single style."""
ret = system_message + sep
for role, message in messages:
if message:
ret += role + ": " + message + sep
else:
ret += role + ": " # must be end with a space
return ret
@dataclasses.dataclass
class ChatFormatterResponse:
prompt: str
stop: Optional[Union[str, List[str]]] = None
class ChatFormatter(Protocol):
def __call__(
self,
messages: List[llama_types.ChatCompletionRequestMessage],
**kwargs: Any,
) -> ChatFormatterResponse:
...
_CHAT_FORMATS: Dict[str, ChatFormatter] = {}
def register_chat_format(name: str):
def decorator(f: ChatFormatter):
_CHAT_FORMATS[name] = f
return f
return decorator
def get_chat_format(name: str):
try:
return _CHAT_FORMATS[name]
except KeyError:
raise ValueError(
f"Invalid chat format: {name} (valid formats: {list(_CHAT_FORMATS.keys())})"
)
@register_chat_format("llama-2")
def format_llama2(
messages: List[llama_types.ChatCompletionRequestMessage],
**kwargs: Any,
) -> ChatFormatterResponse:
_system_template = "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n"
_roles = dict(user="[INST]", assistant="[/INST]")
_sep = "\n\n"
system_message = _get_system_message(messages)
system_message = _system_template.format(system_message=system_message)
_messages = _map_roles(messages, _roles)
_messages.append((_roles["assistant"], None))
_prompt = _format_llama2(system_message, _messages, _sep)
return ChatFormatterResponse(prompt=_prompt)
@register_chat_format("alpaca")
def format_alpaca(
messages: List[llama_types.ChatCompletionRequestMessage],
**kwargs: Any,
) -> ChatFormatterResponse:
_roles = dict(user="### Instruction", assistant="### Response")
_sep = "\n\n"
_sep2 = "</s>"
system_message = _get_system_message(messages)
_messages = _map_roles(messages, _roles)
_prompt = _format_add_colon_two(system_message, _messages, _sep, _sep2)
return ChatFormatterResponse(prompt=_prompt)
@register_chat_format("vicuna")
def format(
messages: List[llama_types.ChatCompletionRequestMessage],
**kwargs: Any,
) -> ChatFormatterResponse:
_system_message = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."
_roles = dict(user="USER", assistant="ASSISTANT")
_sep = " "
_sep2 = "</s>"
system_message = _system_message
_messages = _map_roles(messages, _roles)
_messages.append((_roles["assistant"], None))
_prompt = _format_add_colon_two(system_message, _messages, _sep, _sep2)
return ChatFormatterResponse(prompt=_prompt)
@register_chat_format("oasst_llama")
def format_oasst_llama(
messages: List[llama_types.ChatCompletionRequestMessage],
**kwargs: Any,
) -> ChatFormatterResponse:
_system_template = "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n"
_roles = dict(user="<|prompter|>", assistant="<|assistant|>")
_sep = "</s>"
system_message = _get_system_message(messages)
system_message = _system_template.format(system_message=system_message)
_messages = _map_roles(messages, _roles)
_messages.append((_roles["assistant"], None))
_prompt = _format_no_colon_single(system_message, _messages, _sep)
return ChatFormatterResponse(prompt=_prompt)
@register_chat_format("openbuddy")
def format_openbuddy(
messages: List[llama_types.ChatCompletionRequestMessage],
**kwargs: Any,
) -> ChatFormatterResponse:
_system_message = """Consider a conversation between User (a human) and Assistant (named Buddy).
Buddy is an INTP-T, a friendly, intelligent and multilingual AI assistant, by OpenBuddy team. GitHub: https://github.com/OpenBuddy/OpenBuddy
Buddy cannot access the Internet.
Buddy can fluently speak the user's language (e.g. English, Chinese).
Buddy can generate poems, stories, code, essays, songs, parodies, and more.
Buddy possesses vast knowledge about the world, history, and culture.
Buddy's responses are always safe, creative, high-quality, human-like, and interesting.
Buddy strictly refuses to discuss political, NSFW, or other unsafe topics.
User: Hi.
Assistant: Hi, I'm Buddy, your AI assistant. How can I help you today?"""
_roles = dict(user="User", assistant="Assistant")
_sep = "\n"
system_message = _system_message
_messages = _map_roles(messages, _roles)
_messages.append((_roles["assistant"], None))
_prompt = _format_add_colon_single(system_message, _messages, _sep)
return ChatFormatterResponse(prompt=_prompt)
@register_chat_format("redpajama-incite")
def format_redpajama_incite(
messages: List[llama_types.ChatCompletionRequestMessage],
**kwargs: Any,
) -> ChatFormatterResponse:
_system_message = _get_system_message(messages)
_roles = dict(user="<human>", assistant="<bot>")
_sep = "\n"
_stop = "<human>"
system_message = _system_message
_messages = _map_roles(messages, _roles)
_messages.append((_roles["assistant"], None))
_prompt = _format_add_colon_single(system_message, _messages, _sep)
return ChatFormatterResponse(prompt=_prompt, stop=_stop)
@register_chat_format("snoozy")
def format_snoozy(
messages: List[llama_types.ChatCompletionRequestMessage],
**kwargs: Any,
) -> ChatFormatterResponse:
system_template = "### Instruction:\n{system_message}"
default_system_message = "The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response."
_system_message = _get_system_message(messages)
_system_message = (
_system_message if _system_message != "" else default_system_message
)
system_message = system_template.format(system_message=_system_message)
_roles = dict(user="### Prompt", assistant="### Response")
_sep = "\n"
_stop = "###"
system_message = _system_message
_messages = _map_roles(messages, _roles)
_messages.append((_roles["assistant"], None))
_prompt = _format_add_colon_single(system_message, _messages, _sep)
return ChatFormatterResponse(prompt=_prompt, stop=_stop)
@register_chat_format("phind")
def format_phind(
messages: List[llama_types.ChatCompletionRequestMessage],
**kwargs: Any,
) -> ChatFormatterResponse:
_roles = dict(user="### User Message", assistant="### Assistant")
_sep = "\n\n"
_system_message = "### System Prompt\nYou are an intelligent programming assistant."
_messages = _map_roles(messages, _roles)
_messages.append((_roles["assistant"], None))
_prompt = _format_add_colon_single(_system_message, _messages, _sep)
return ChatFormatterResponse(prompt=_prompt)
@register_chat_format("open-orca")
def format_open_orca(
messages: List[llama_types.ChatCompletionRequestMessage],
**kwargs: Any,
) -> ChatFormatterResponse:
system_template = "{system_message}"
system_message = (
"You are a helpful assistant. Please answer truthfully and write out your "
)
"thinking step by step to be sure you get the right answer. If you make a mistake or encounter "
"an error in your thinking, say so out loud and attempt to correct it. If you don't know or "
"aren't sure about something, say so clearly. You will act as a professional logician, mathematician, "
"and physicist. You will also act as the most appropriate type of expert to answer any particular "
"question or solve the relevant problem; state which expert type your are, if so. Also think of "
"any particular named expert that would be ideal to answer the relevant question or solve the "
"relevant problem; name and act as them, if appropriate."
roles = ("User", "Assistant")
sep = "<|end_of_turn|>\n"
# stop_token_ids=[32000, 32001], # "<|end_of_turn|>"
stop_str = "User"
system_message = system_template.format(system_message=system_message)
_messages = _map_roles(messages, dict(zip(roles, roles)))
_messages.append((roles[1], None))
_prompt = _format_add_colon_space_single(system_message, _messages, sep)
return ChatFormatterResponse(prompt=_prompt, stop=stop_str)