docs(examples): Add gradio chat example

2024-02-25 21:09:13 -05:00 · 2024-02-25 21:09:13 -05:00 · bd4ec2e612
commit bd4ec2e612
parent dcf38f6141
2 changed files with 115 additions and 0 deletions
--- a/examples/gradio_chat/local.py
+++ b/examples/gradio_chat/local.py
@ -0,0 +1,59 @@
 import llama_cpp
 import llama_cpp.llama_tokenizer
 import gradio as gr
 llama = llama_cpp.Llama.from_pretrained(
    repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
    filename="*q8_0.gguf",
    tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
    verbose=False
 )
 model = "gpt-3.5-turbo"
 def predict(message, history):
    messages = []
    for user_message, assistant_message in history:
        messages.append({"role": "user", "content": user_message})
        messages.append({"role": "assistant", "content": assistant_message})
    messages.append({"role": "user", "content": message})
    response = llama.create_chat_completion_openai_v1(
        model=model,
        messages=messages,
        stream=True
    )
    text = ""
    for chunk in response:
        content = chunk.choices[0].delta.content
        if content:
            text += content
            yield text
 js = """function () {
  gradioURL = window.location.href
  if (!gradioURL.endsWith('?__theme=dark')) {
    window.location.replace(gradioURL + '?__theme=dark');
  }
 }"""
 css = """
 footer {
    visibility: hidden;
 }
 full-height {
    height: 100%;
 }
 """
 with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
    gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"])
 if __name__ == "__main__":
    demo.launch()
--- a/examples/gradio_chat/server.py
+++ b/examples/gradio_chat/server.py
@ -0,0 +1,56 @@
 import gradio as gr
 from openai import OpenAI
 client = OpenAI(
    base_url="http://localhost:8000/v1",
    api_key="llama.cpp"
 )
 model = "gpt-3.5-turbo"
 def predict(message, history):
    messages = []
    for user_message, assistant_message in history:
        messages.append({"role": "user", "content": user_message})
        messages.append({"role": "assistant", "content": assistant_message})
    messages.append({"role": "user", "content": message})
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        stream=True
    )
    text = ""
    for chunk in response:
        content = chunk.choices[0].delta.content
        if content:
            text += content
            yield text
 js = """function () {
  gradioURL = window.location.href
  if (!gradioURL.endsWith('?__theme=dark')) {
    window.location.replace(gradioURL + '?__theme=dark');
  }
 }"""
 css = """
 footer {
    visibility: hidden;
 }
 full-height {
    height: 100%;
 }
 """
 with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
    gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"])
 if __name__ == "__main__":
    demo.launch()