llama.cpp/examples/gradio_chat/server.py

import gradio as gr

from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:8000/v1",
    api_key="llama.cpp"
)

model = "gpt-3.5-turbo"

def predict(message, history):
    messages = []

    for user_message, assistant_message in history:
        messages.append({"role": "user", "content": user_message})
        messages.append({"role": "assistant", "content": assistant_message})
    
    messages.append({"role": "user", "content": message})

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        stream=True
    )

    text = ""
    for chunk in response:
        content = chunk.choices[0].delta.content
        if content:
            text += content
            yield text


js = """function () {
  gradioURL = window.location.href
  if (!gradioURL.endsWith('?__theme=dark')) {
    window.location.replace(gradioURL + '?__theme=dark');
  }
}"""

css = """
footer {
    visibility: hidden;
}
full-height {
    height: 100%;
}
"""

with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
    gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"])


if __name__ == "__main__":
    demo.launch()
docs(examples): Add gradio chat example 2024-02-26 02:09:13 +00:00			`import gradio as gr`

			`from openai import OpenAI`

			`client = OpenAI(`
			`base_url="http://localhost:8000/v1",`
			`api_key="llama.cpp"`
			`)`

			`model = "gpt-3.5-turbo"`

			`def predict(message, history):`
			`messages = []`

			`for user_message, assistant_message in history:`
			`messages.append({"role": "user", "content": user_message})`
			`messages.append({"role": "assistant", "content": assistant_message})`

			`messages.append({"role": "user", "content": message})`

			`response = client.chat.completions.create(`
			`model=model,`
			`messages=messages,`
			`stream=True`
			`)`

			`text = ""`
			`for chunk in response:`
			`content = chunk.choices[0].delta.content`
			`if content:`
			`text += content`
			`yield text`


			`js = """function () {`
			`gradioURL = window.location.href`
			`if (!gradioURL.endsWith('?__theme=dark')) {`
			`window.location.replace(gradioURL + '?__theme=dark');`
			`}`
			`}"""`

			`css = """`
			`footer {`
			`visibility: hidden;`
			`}`
			`full-height {`
			`height: 100%;`
			`}`
			`"""`

			`with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:`
			`gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"])`


			`if __name__ == "__main__":`
			`demo.launch()`