import gradio as gr import re from transformers import MBartForConditionalGeneration, MBart50TokenizerFast, pipeline model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt") tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX") pipe2 = pipeline('summarization', model="Tiju1996/conversation-summ") def process_text(text): # Remove all reference citations text = re.sub(r'\[[0-9]*\]', '', text) # Remove all footnotes text = re.sub(r'\[\d*\]', '', text) # Remove all images text = re.sub(r'(\[[^\]]*\])?\[[^\]]*\]', '', text) # Remove all non-string characters text = re.sub(r'[^\x00-\x7F]+', '', text) # Remove all emojis emoji_pattern = re.compile("[" u"\U0001F600-\U0001F64F" # emoticons u"\U0001F300-\U0001F5FF" # symbols & pictographs u"\U0001F680-\U0001F6FF" # transport & map symbols u"\U0001F1E0-\U0001F1FF" # flags (iOS) u"\U00002702-\U000027B0" u"\U000024C2-\U0001F251" "]+", flags=re.UNICODE) text = emoji_pattern.sub(r'', text) # Remove all HTML tags text = re.sub(r'<.*?>', '', text) #Remove all hyperlinks from the text text=re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1', text) #Remove all url from the text text=re.sub(r'http\S+', '', text) # Strip whitespace text = text.strip(" ") return text def summarize(article_en_raw): article_en=process_text(article_en_raw) summary_en=pipe2(article_en) model_inputs = tokenizer(summary_en[0]['summary_text'], return_tensors="pt") generated_tokens = model.generate( **model_inputs, forced_bos_token_id=tokenizer.lang_code_to_id["hi_IN"] ) translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) return translation[0] input_text = gr.inputs.Textbox(lines=20, label="Enter text document to be summarized") output_text = gr.outputs.Textbox(label="Summarized Text") #gr.Interface(fn=summarize, inputs=input_text, outputs=output_text, title="Text Summarization App", description="Enter a text document and get its summarized version.").launch() gradio_interface = gr.Interface(fn=summarize, inputs=input_text, outputs=output_text, title="DistilBART Text Summarization App", description="Enter a text document and get its summarized version.") if __name__ == "__main__": gradio_interface.launch()