from transformers import AutoModelForCausalLM, AutoTokenizer import torch import gradio as gr tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large") model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large") def predict(user_input, history=[]): # Encode the user input + end-of-text token new_input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors='pt') # Prepare chat history if history: bot_input_ids = torch.cat([torch.tensor(history), new_input_ids], dim=-1) else: bot_input_ids = new_input_ids # Generate response chat_history_ids = model.generate( bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id ) # Decode the bot's reply response = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True) return response, chat_history_ids.tolist() iface = gr.Interface( fn=predict, inputs=["text", "state"], outputs=["text", "state"], title="DialoGPT Chatbot", description="Chat with our Intelligent Agent, Your chat history is preserved.", ) iface.launch(share=True, show_error=True, inline=True)