import gradio as gr import torch from transformers import pipeline generate_text = pipeline(model="databricks/dolly-v2-3b", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto") theme = gr.themes.Soft( primary_hue=gr.themes.Color("#ededed", "#fee2e2", "#fecaca", "#fca5a5", "#f87171", "#ef4444", "#dc2626", "#b91c1c", "#991b1b", "#7f1d1d", "#6c1e1e"), neutral_hue="red", ) title = """

Chat with awesome Dolly V2 3B model


🏃‍♂️💨Streaming with Transformers & Gradio💪

""" with gr.Blocks(theme=theme) as demo: gr.HTML(title) gr.HTML("This is run in cpu, so it might be a bit slow. Also, the model is great with language but might fail answering questions with numbers. If you want to skip waiting time, just clone the app.") Dolly = gr.Chatbot().style(height=500) msg = gr.Textbox() clear = gr.Button("Clear") def respond(message, chat_history): res = generate_text(message) a = (res[0]["generated_text"]) b = str(a) bot_message = b chat_history.append((message, bot_message)) return "", chat_history msg.submit(respond, [msg, Dolly], [msg, Dolly]) clear.click(lambda: None, None, Dolly, queue=False) demo.launch(debug=True)