|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from transformers import pipeline
|
|
import gradio as gr
|
|
summarizer=pipeline("summarization",model="t5-small", tokenizer="t5-small")
|
|
|
|
RSS_FEEDS = {
|
|
"Technology": "http://feeds.bbci.co.uk/news/technology/rss.xml",
|
|
"World": "http://feeds.bbci.co.uk/news/world/rss.xml",
|
|
"Sports": "http://feeds.bbci.co.uk/sport/rss.xml",
|
|
"Politics":"https://feeds.bbci.co.uk/news/politics/rss.xml",
|
|
"Health":"https://feeds.bbci.co.uk/news/health/rss.xml",
|
|
"Education":"https://feeds.bbci.co.uk/news/education/rss.xml",
|
|
"Science and Environment":"https://feeds.bbci.co.uk/news/science_and_environment/rss.xml",
|
|
"Entertainment and arts":"https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml",
|
|
"business":"https://feeds.bbci.co.uk/news/business/rss.xml"
|
|
}
|
|
def summarize_latest_article(category):
|
|
feed_url=RSS_FEEDS[category]
|
|
response=requests.get(feed_url)
|
|
soup=BeautifulSoup(response.content, features="xml")
|
|
first_item=soup.findAll('item')[0]
|
|
title=first_item.title.text
|
|
link=first_item.link.text
|
|
article=requests.get(link)
|
|
article_soup= BeautifulSoup(article.content,'html.parser')
|
|
paragraphs=article_soup.find_all('p')
|
|
article_text=" ".join(p.get_text() for p in paragraphs)
|
|
article_text=article_text[:1000]
|
|
summary=summarizer(article_text,max_length=200,min_length=20,do_sample=False)
|
|
return f"Title: {title}\n link: {link}\n Summary:\n{summary[0]['summary_text']}"
|
|
|
|
|
|
|
|
|
|
iface=gr.Interface(
|
|
fn=summarize_latest_article,
|
|
inputs=gr.Dropdown(choices=list(RSS_FEEDS.keys()),label="Choose a category"),
|
|
outputs=gr.Textbox(),
|
|
title="AI News summarizer bot",
|
|
description="Select a category to get asummarized news article."
|
|
)
|
|
iface.launch()
|
|
|