import spaces
import gradio as gr
import torch
from transformers import pipeline

torch.set_default_device("cuda")

pipe = pipeline("text-generation", model="cognitivecomputations/dolphin-2.9.1-mixtral-1x22b")

@spaces.GPU(duration=120)
def predict(message, history):
    conv = [{"role": "system", "content": "You are Dolphin, a helpful AI assistant."}]
    for item in history:
        conv.append({"role": "user", "content": item[0]})
        conv.append({"role": "assistant", "content": item[1]})
    conv.append({"role": "user", "content": message})
    generated_text = pipe(conv, max_new_tokens=1024)[0]['generated_text'][-1]['content']
    return generated_text

gr.ChatInterface(predict).launch()