import gradio as gr from openai import OpenAI client = OpenAI( base_url="http://localhost:8000/v1", api_key="llama.cpp" ) model = "gpt-3.5-turbo" def predict(message, history): messages = [] for user_message, assistant_message in history: messages.append({"role": "user", "content": user_message}) messages.append({"role": "assistant", "content": assistant_message}) messages.append({"role": "user", "content": message}) response = client.chat.completions.create( model=model, messages=messages, stream=True ) text = "" for chunk in response: content = chunk.choices[0].delta.content if content: text += content yield text js = """function () { gradioURL = window.location.href if (!gradioURL.endsWith('?__theme=dark')) { window.location.replace(gradioURL + '?__theme=dark'); } }""" css = """ footer { visibility: hidden; } full-height { height: 100%; } """ with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo: gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"]) if __name__ == "__main__": demo.launch()