from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gradio as gr

tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")

def predict(user_input, history=[]):
    # Encode the user input + end-of-text token
    new_input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors='pt')

    # Prepare chat history
    if history:
        bot_input_ids = torch.cat([torch.tensor(history), new_input_ids], dim=-1)
    else:
        bot_input_ids = new_input_ids

    # Generate response
    chat_history_ids = model.generate(
        bot_input_ids,
        max_length=1000,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode the bot's reply
    response = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)

    return response, chat_history_ids.tolist()

iface = gr.Interface(
    fn=predict,
    inputs=["text", "state"],
    outputs=["text", "state"],
    title="DialoGPT Chatbot",
    description="Chat with our Intelligent Agent, Your chat history is preserved.",
)

iface.launch(share=True, show_error=True, inline=True)