import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # Predict with test data (first 5 rows) model_ckpt = "GenzNepal/mt5-summarize-nepali" device = "cuda" if torch.cuda.is_available() else "cpu" t5_tokenizer = AutoTokenizer.from_pretrained(model_ckpt) model = AutoModelForSeq2SeqLM.from_pretrained(model_ckpt).to(device) def summarize(text): inputs = t5_tokenizer(text, return_tensors="pt", max_length=1024, padding= "max_length", truncation=True, add_special_tokens=True) generation = model.generate( input_ids = inputs['input_ids'].to(device), attention_mask=inputs['attention_mask'].to(device), num_beams=6, num_return_sequences=1, no_repeat_ngram_size=2, repetition_penalty=1.0, min_length=100, max_length=250, length_penalty=2.0, early_stopping=True ) # # Convert id tokens to text output = t5_tokenizer.decode(generation[0], skip_special_tokens=True, clean_up_tokenization_spaces=True) return output demo = gr.Interface( fn=summarize, inputs=gr.Textbox(placeholder="Enter news " , lines=5, max_lines=20, label="News"), outputs=gr.Textbox(label="Generated Summary") ) if __name__ == "__main__": demo.launch()