from newspaper import Article from newspaper import Config import gradio as gr from transformers import MBartForConditionalGeneration from transformers import AutoTokenizer model_name = "haotieu/vietnamese-summarization" tokenizer = AutoTokenizer.from_pretrained(model_name) model = MBartForConditionalGeneration.from_pretrained(model_name) def extract_article_text(url): USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0' config = Config() config.browser_user_agent = USER_AGENT config.request_timeout = 20 article = Article(url, config=config) article.download() article.parse() text = article.text return text def news_summarizer(url): text = extract_article_text(url) input_ids = tokenizer.encode_plus(text, return_tensors='pt', max_length=512, truncation=True, padding=True)['input_ids'] summary_ids = model.generate(input_ids,max_length=128,min_length= 64) summaries = [tokenizer.decode(s, skip_special_tokens=True) for s in summary_ids] return summaries[0] sample_url = 'https://vnexpress.net/them-hai-nuoc-rut-nhan-vien-su-quan-tai-ukraine-4420581.html' desc = 'This app uses BARTpho model by VinAI to summarize the text of a news article.' summarizer_interface = gr.Interface(fn = news_summarizer, inputs="text", outputs = "text",title="vietnamese news summarizer", theme = 'huggingface',examples=[sample_url],description=desc ) summarizer_interface.launch(inline=False)