From 4ff8def4d0896ff3ecf800d2a7a738cdd012aa34 Mon Sep 17 00:00:00 2001
From: Joe <bioshazard@users.noreply.github.com>
Date: Sun, 5 Nov 2023 17:06:36 -0600
Subject: [PATCH] #717: Add support for Huggingface Autotokenizer (#790)

Co-authored-by: Andrei <abetlen@gmail.com>
---
 llama_cpp/llama_chat_format.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
index 36382a4..ef61d66 100644
--- a/llama_cpp/llama_chat_format.py
+++ b/llama_cpp/llama_chat_format.py
@@ -510,6 +510,26 @@ def format_chatml(
     _prompt = _format_chatml(system_message, _messages, _sep)
     return ChatFormatterResponse(prompt=_prompt)
 
+# eg, export HF_MODEL=mistralai/Mistral-7B-Instruct-v0.1
+@register_chat_format("autotokenizer")
+def format_autotokenizer(
+    messages: List[llama_types.ChatCompletionRequestMessage],
+    **kwargs: Any,
+) -> ChatFormatterResponse:
+    # https://huggingface.co/docs/transformers/main/chat_templating
+    # https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1#instruction-format
+    # https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/blob/main/tokenizer_config.json
+    import os
+    from transformers import AutoTokenizer
+    huggingFaceModel = os.getenv("HF_MODEL") # eg, mistralai/Mistral-7B-Instruct-v0.1
+    print(huggingFaceModel)
+    if not huggingFaceModel:
+        raise Exception("HF_MODEL needs to be set in env to use chat format 'autotokenizer'")
+    tokenizer = AutoTokenizer.from_pretrained(huggingFaceModel)
+    tokenizer.use_default_system_prompt = False
+    _prompt = tokenizer.apply_chat_template(messages, tokenize=False)
+    # Return formatted prompt and eos token by default
+    return ChatFormatterResponse(prompt=_prompt, stop=tokenizer.eos_token)
 
 @register_chat_completion_handler("functionary")
 def functionary_chat_handler(