import spaces import gradio as gr import torch from transformers import pipeline torch.set_default_device("cuda") pipe = pipeline("text-generation", model="cognitivecomputations/dolphin-2.9.1-mixtral-1x22b") @spaces.GPU(duration=120) def predict(message, history): conv = [{"role": "system", "content": "You are Dolphin, a helpful AI assistant."}] for item in history: conv.append({"role": "user", "content": item[0]}) conv.append({"role": "assistant", "content": item[1]}) conv.append({"role": "user", "content": message}) generated_text = pipe(conv, max_new_tokens=1024)[0]['generated_text'][-1]['content'] return generated_text gr.ChatInterface(predict).launch()