diff --git a/examples/gradio_chat/local.py b/examples/gradio_chat/local.py new file mode 100644 index 0000000..a7de8e8 --- /dev/null +++ b/examples/gradio_chat/local.py @@ -0,0 +1,59 @@ +import llama_cpp +import llama_cpp.llama_tokenizer + +import gradio as gr + +llama = llama_cpp.Llama.from_pretrained( + repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF", + filename="*q8_0.gguf", + tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"), + verbose=False +) + +model = "gpt-3.5-turbo" + +def predict(message, history): + messages = [] + + for user_message, assistant_message in history: + messages.append({"role": "user", "content": user_message}) + messages.append({"role": "assistant", "content": assistant_message}) + + messages.append({"role": "user", "content": message}) + + response = llama.create_chat_completion_openai_v1( + model=model, + messages=messages, + stream=True + ) + + text = "" + for chunk in response: + content = chunk.choices[0].delta.content + if content: + text += content + yield text + + +js = """function () { + gradioURL = window.location.href + if (!gradioURL.endsWith('?__theme=dark')) { + window.location.replace(gradioURL + '?__theme=dark'); + } +}""" + +css = """ +footer { + visibility: hidden; +} +full-height { + height: 100%; +} +""" + +with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo: + gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"]) + + +if __name__ == "__main__": + demo.launch() diff --git a/examples/gradio_chat/server.py b/examples/gradio_chat/server.py new file mode 100644 index 0000000..36fa43f --- /dev/null +++ b/examples/gradio_chat/server.py @@ -0,0 +1,56 @@ +import gradio as gr + +from openai import OpenAI + +client = OpenAI( + base_url="http://localhost:8000/v1", + api_key="llama.cpp" +) + +model = "gpt-3.5-turbo" + +def predict(message, history): + messages = [] + + for user_message, assistant_message in history: + messages.append({"role": "user", "content": user_message}) + messages.append({"role": "assistant", "content": assistant_message}) + + messages.append({"role": "user", "content": message}) + + response = client.chat.completions.create( + model=model, + messages=messages, + stream=True + ) + + text = "" + for chunk in response: + content = chunk.choices[0].delta.content + if content: + text += content + yield text + + +js = """function () { + gradioURL = window.location.href + if (!gradioURL.endsWith('?__theme=dark')) { + window.location.replace(gradioURL + '?__theme=dark'); + } +}""" + +css = """ +footer { + visibility: hidden; +} +full-height { + height: 100%; +} +""" + +with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo: + gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"]) + + +if __name__ == "__main__": + demo.launch()