Co-authored-by: Andrei <abetlen@gmail.com>
This commit is contained in:
parent
3580e2c5df
commit
4ff8def4d0
1 changed files with 20 additions and 0 deletions
|
@ -510,6 +510,26 @@ def format_chatml(
|
|||
_prompt = _format_chatml(system_message, _messages, _sep)
|
||||
return ChatFormatterResponse(prompt=_prompt)
|
||||
|
||||
# eg, export HF_MODEL=mistralai/Mistral-7B-Instruct-v0.1
|
||||
@register_chat_format("autotokenizer")
|
||||
def format_autotokenizer(
|
||||
messages: List[llama_types.ChatCompletionRequestMessage],
|
||||
**kwargs: Any,
|
||||
) -> ChatFormatterResponse:
|
||||
# https://huggingface.co/docs/transformers/main/chat_templating
|
||||
# https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1#instruction-format
|
||||
# https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/blob/main/tokenizer_config.json
|
||||
import os
|
||||
from transformers import AutoTokenizer
|
||||
huggingFaceModel = os.getenv("HF_MODEL") # eg, mistralai/Mistral-7B-Instruct-v0.1
|
||||
print(huggingFaceModel)
|
||||
if not huggingFaceModel:
|
||||
raise Exception("HF_MODEL needs to be set in env to use chat format 'autotokenizer'")
|
||||
tokenizer = AutoTokenizer.from_pretrained(huggingFaceModel)
|
||||
tokenizer.use_default_system_prompt = False
|
||||
_prompt = tokenizer.apply_chat_template(messages, tokenize=False)
|
||||
# Return formatted prompt and eos token by default
|
||||
return ChatFormatterResponse(prompt=_prompt, stop=tokenizer.eos_token)
|
||||
|
||||
@register_chat_completion_handler("functionary")
|
||||
def functionary_chat_handler(
|
||||
|
|
Loading…
Reference in a new issue