From bd4ec2e612cdba200a5e38544ae2e1b0eda132f6 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sun, 25 Feb 2024 21:09:13 -0500
Subject: [PATCH] docs(examples): Add gradio chat example

---
 examples/gradio_chat/local.py  | 59 ++++++++++++++++++++++++++++++++++
 examples/gradio_chat/server.py | 56 ++++++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+)
 create mode 100644 examples/gradio_chat/local.py
 create mode 100644 examples/gradio_chat/server.py

diff --git a/examples/gradio_chat/local.py b/examples/gradio_chat/local.py
new file mode 100644
index 0000000..a7de8e8
--- /dev/null
+++ b/examples/gradio_chat/local.py
@@ -0,0 +1,59 @@
+import llama_cpp
+import llama_cpp.llama_tokenizer
+
+import gradio as gr
+
+llama = llama_cpp.Llama.from_pretrained(
+    repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
+    filename="*q8_0.gguf",
+    tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
+    verbose=False
+)
+
+model = "gpt-3.5-turbo"
+
+def predict(message, history):
+    messages = []
+
+    for user_message, assistant_message in history:
+        messages.append({"role": "user", "content": user_message})
+        messages.append({"role": "assistant", "content": assistant_message})
+    
+    messages.append({"role": "user", "content": message})
+
+    response = llama.create_chat_completion_openai_v1(
+        model=model,
+        messages=messages,
+        stream=True
+    )
+
+    text = ""
+    for chunk in response:
+        content = chunk.choices[0].delta.content
+        if content:
+            text += content
+            yield text
+
+
+js = """function () {
+  gradioURL = window.location.href
+  if (!gradioURL.endsWith('?__theme=dark')) {
+    window.location.replace(gradioURL + '?__theme=dark');
+  }
+}"""
+
+css = """
+footer {
+    visibility: hidden;
+}
+full-height {
+    height: 100%;
+}
+"""
+
+with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
+    gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"])
+
+
+if __name__ == "__main__":
+    demo.launch()
diff --git a/examples/gradio_chat/server.py b/examples/gradio_chat/server.py
new file mode 100644
index 0000000..36fa43f
--- /dev/null
+++ b/examples/gradio_chat/server.py
@@ -0,0 +1,56 @@
+import gradio as gr
+
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:8000/v1",
+    api_key="llama.cpp"
+)
+
+model = "gpt-3.5-turbo"
+
+def predict(message, history):
+    messages = []
+
+    for user_message, assistant_message in history:
+        messages.append({"role": "user", "content": user_message})
+        messages.append({"role": "assistant", "content": assistant_message})
+    
+    messages.append({"role": "user", "content": message})
+
+    response = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        stream=True
+    )
+
+    text = ""
+    for chunk in response:
+        content = chunk.choices[0].delta.content
+        if content:
+            text += content
+            yield text
+
+
+js = """function () {
+  gradioURL = window.location.href
+  if (!gradioURL.endsWith('?__theme=dark')) {
+    window.location.replace(gradioURL + '?__theme=dark');
+  }
+}"""
+
+css = """
+footer {
+    visibility: hidden;
+}
+full-height {
+    height: 100%;
+}
+"""
+
+with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
+    gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"])
+
+
+if __name__ == "__main__":
+    demo.launch()