Karthik001291546's picture
Rename news_bot.py to app.py
c83b3c4 verified
import requests #fetch data from internet
from bs4 import BeautifulSoup #to parse rss/xml from article HTML
from transformers import pipeline # to load summarization model
import gradio as gr # for interface
summarizer=pipeline("summarization",model="t5-small", tokenizer="t5-small")
#RSS really simple syndication- Machine radable XML file that lists the latest articles from a websites
RSS_FEEDS = {
"Technology": "http://feeds.bbci.co.uk/news/technology/rss.xml",
"World": "http://feeds.bbci.co.uk/news/world/rss.xml",
"Sports": "http://feeds.bbci.co.uk/sport/rss.xml",
"Politics":"https://feeds.bbci.co.uk/news/politics/rss.xml",
"Health":"https://feeds.bbci.co.uk/news/health/rss.xml",
"Education":"https://feeds.bbci.co.uk/news/education/rss.xml",
"Science and Environment":"https://feeds.bbci.co.uk/news/science_and_environment/rss.xml",
"Entertainment and arts":"https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml",
"business":"https://feeds.bbci.co.uk/news/business/rss.xml"
}
def summarize_latest_article(category):
feed_url=RSS_FEEDS[category]
response=requests.get(feed_url)# fetch teh RSS feed
soup=BeautifulSoup(response.content, features="xml")# Parse XML content
first_item=soup.findAll('item')[0] # extract first article <item> , the latest news
title=first_item.title.text # get title
link=first_item.link.text # get link
article=requests.get(link) # get the link of the page
article_soup= BeautifulSoup(article.content,'html.parser') # parse the article pages html
paragraphs=article_soup.find_all('p') # join all paragraph texts into one string
article_text=" ".join(p.get_text() for p in paragraphs) # limit to 1000 characters
article_text=article_text[:1000]
summary=summarizer(article_text,max_length=200,min_length=20,do_sample=False)
return f"Title: {title}\n link: {link}\n Summary:\n{summary[0]['summary_text']}"
#completed till now imported libraries loaded summarizer model , set up RSS feed source andd parse RSS and get title, visit article extract text run summarizer and return result
iface=gr.Interface(
fn=summarize_latest_article,
inputs=gr.Dropdown(choices=list(RSS_FEEDS.keys()),label="Choose a category"),
outputs=gr.Textbox(),
title="AI News summarizer bot",
description="Select a category to get asummarized news article."
)
iface.launch()