import ollama import gradio history = [] def get_history_messages(): messages = [] for user, assist in history: messages.append({"role": "user", "content": user}) messages.append({"role": "assistant", "content": assist}) return messages def predict(prompt): response = ollama.chat( model="tinydolphin", messages=[ *get_history_messages(), {"role": "user", "content": prompt} ], stream=True ) history.append((prompt, "")) message = "" for chunk in response: message += chunk["message"]["content"] history[-1] = (prompt, message) yield "", history with gradio.Blocks(theme='abidlabs/Lime', fill_height=True) as demo: chat = gradio.Chatbot(scale=1) with gradio.Row(variant="compact"): prompt = gradio.Textbox(show_label=False, scale=6, autofocus=True) button = gradio.Button(scale=1) for handler in [button.click, prompt.submit]: handler(predict, inputs=[prompt], outputs=[prompt, chat]) if __name__ == '__main__': demo.launch()