Spaces:

Karthik001291546
/

AI_news_summarizer_bot

Sleeping

App Files Files Community

AI_news_summarizer_bot / app.py

Karthik001291546

Rename news_bot.py to app.py

c83b3c4 verified 5 months ago

raw

history blame contribute delete

2.45 kB

	import requests #fetch data from internet
	from bs4 import BeautifulSoup #to parse rss/xml from article HTML
	from transformers import pipeline # to load summarization model
	import gradio as gr # for interface
	summarizer=pipeline("summarization",model="t5-small", tokenizer="t5-small")
	#RSS really simple syndication- Machine radable XML file that lists the latest articles from a websites
	RSS_FEEDS = {
	"Technology": "http://feeds.bbci.co.uk/news/technology/rss.xml",
	"World": "http://feeds.bbci.co.uk/news/world/rss.xml",
	"Sports": "http://feeds.bbci.co.uk/sport/rss.xml",
	"Politics":"https://feeds.bbci.co.uk/news/politics/rss.xml",
	"Health":"https://feeds.bbci.co.uk/news/health/rss.xml",
	"Education":"https://feeds.bbci.co.uk/news/education/rss.xml",
	"Science and Environment":"https://feeds.bbci.co.uk/news/science_and_environment/rss.xml",
	"Entertainment and arts":"https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml",
	"business":"https://feeds.bbci.co.uk/news/business/rss.xml"
	}
	def summarize_latest_article(category):
	feed_url=RSS_FEEDS[category]
	response=requests.get(feed_url)# fetch teh RSS feed
	soup=BeautifulSoup(response.content, features="xml")# Parse XML content
	first_item=soup.findAll('item')[0] # extract first article <item> , the latest news
	title=first_item.title.text # get title
	link=first_item.link.text # get link
	article=requests.get(link) # get the link of the page
	article_soup= BeautifulSoup(article.content,'html.parser') # parse the article pages html
	paragraphs=article_soup.find_all('p') # join all paragraph texts into one string
	article_text=" ".join(p.get_text() for p in paragraphs) # limit to 1000 characters
	article_text=article_text[:1000]
	summary=summarizer(article_text,max_length=200,min_length=20,do_sample=False)
	return f"Title: {title}\n link: {link}\n Summary:\n{summary[0]['summary_text']}"


	#completed till now imported libraries loaded summarizer model , set up RSS feed source andd parse RSS and get title, visit article extract text run summarizer and return result

	iface=gr.Interface(
	fn=summarize_latest_article,
	inputs=gr.Dropdown(choices=list(RSS_FEEDS.keys()),label="Choose a category"),
	outputs=gr.Textbox(),
	title="AI News summarizer bot",
	description="Select a category to get asummarized news article."
	)
	iface.launch()