Spaces:
Sleeping
Sleeping
#!/usr/bin/env python | |
# coding: utf-8 | |
# In[1]: | |
import streamlit as st | |
from PIL import Image | |
from bs4 import BeautifulSoup as soup | |
from urllib.request import urlopen | |
from newspaper import Article | |
import io | |
import nltk | |
nltk.download('punkt') | |
from transformers import T5Tokenizer, T5ForConditionalGeneration | |
from transformers import pipeline | |
from rouge import Rouge | |
from nltk.sentiment import SentimentIntensityAnalyzer | |
# In[2]: | |
st.set_page_config(page_title='InNews: A Summarised News📰 Portal', page_icon="newspaper.ico") | |
# In[3]: | |
def fetch_news_search_topic(topic): | |
site = 'https://news.google.com/rss/search?q={}'.format(topic) | |
op = urlopen(site) # Open that site | |
rd = op.read() # read data from site | |
op.close() # close the object | |
sp_page = soup(rd, 'xml') # scrapping data from site | |
news_list = sp_page.find_all('item') # finding news | |
return news_list | |
# In[4]: | |
def fetch_top_news(): | |
site = 'https://news.google.com/news/rss' | |
op = urlopen(site) # Open that site | |
rd = op.read() # read data from site | |
op.close() # close the object | |
sp_page = soup(rd, 'xml') # scrapping data from site | |
news_list = sp_page.find_all('item') # finding news | |
return news_list | |
# In[5]: | |
def fetch_category_news(topic): | |
site = 'https://news.google.com/news/rss/headlines/section/topic/{}'.format(topic) | |
op = urlopen(site) # Open that site | |
rd = op.read() # read data from site | |
op.close() # close the object | |
sp_page = soup(rd, 'xml') # scrapping data from site | |
news_list = sp_page.find_all('item') # finding news | |
return news_list | |
# In[6]: | |
def fetch_news_poster(poster_link): | |
try: | |
u = urlopen(poster_link) | |
raw_data = u.read() | |
image = Image.open(io.BytesIO(raw_data)) | |
st.image(image, use_column_width=True) | |
except: | |
image = Image.open("no_image.jpg") | |
st.image(image, use_column_width=True) | |
# In[7]: | |
from nltk.sentiment import SentimentIntensityAnalyzer | |
def get_sentiment_label(sentiment_score): | |
if sentiment_score >= 0.05: | |
return "Positive" | |
elif sentiment_score <= -0.05: | |
return "Negative" | |
else: | |
return "Neutral" | |
def display_news(list_of_news, news_quantity): | |
tokenizer = T5Tokenizer.from_pretrained('t5-base') | |
model = T5ForConditionalGeneration.from_pretrained('t5-base') | |
rouge = Rouge() | |
sentiment_analyzer = SentimentIntensityAnalyzer() # Sentiment Analysis model | |
c = 0 | |
for news in list_of_news: | |
c += 1 | |
st.write('**({}) {}**'.format(c, news.title.text)) | |
news_data = Article(news.link.text) | |
try: | |
news_data.download() | |
news_data.parse() | |
news_data.nlp() | |
except Exception as e: | |
st.error(e) | |
# Abstractive Summarization | |
input_text = news_data.text | |
inputs = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=512, truncation=True) | |
outputs = model.generate(inputs, max_length=500, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True) | |
summary = tokenizer.decode(outputs[0]) | |
fetch_news_poster(news_data.top_image) | |
with st.expander(news.title.text): | |
st.markdown( | |
'''<h6 style='text-align: justify;'>{}"</h6>'''.format(summary), | |
unsafe_allow_html=True) | |
st.markdown("[Read more at {}...]({})".format(news.source.text, news.link.text)) | |
# Calculate ROUGE scores | |
reference_summary = news_data.summary | |
rouge_scores = rouge.get_scores(summary, reference_summary) | |
rouge_1 = rouge_scores[0]['rouge-1']['f'] | |
rouge_2 = rouge_scores[0]['rouge-2']['f'] | |
rouge_l = rouge_scores[0]['rouge-l']['f'] | |
st.success("ROUGE-1 Score: {:.2f}".format(rouge_1)) | |
st.success("ROUGE-2 Score: {:.2f}".format(rouge_2)) | |
st.success("ROUGE-L Score: {:.2f}".format(rouge_l)) | |
# Perform sentiment analysis | |
sentiment_scores = sentiment_analyzer.polarity_scores(summary) | |
sentiment_score = sentiment_scores['compound'] | |
sentiment_label = get_sentiment_label(sentiment_score) | |
st.write("Sentiment Score:", sentiment_score) | |
st.write("Sentiment Label:", sentiment_label) | |
st.success("Published Date: " + news.pubDate.text) | |
if c >= news_quantity: | |
break | |
# In[8]: | |
def run(): | |
st.title("InNews: A Summarised News📰") | |
image = Image.open("newspaper.png") | |
col1, col2, col3 = st.columns([3, 5, 3]) | |
with col1: | |
st.write("") | |
with col2: | |
st.image(image, use_column_width=False) | |
with col3: | |
st.write("") | |
category = ['--Select--', 'Trending🔥 News', 'Favourite💙 Topics', 'Search🔍 Topic'] | |
cat_op = st.selectbox('Select your Category', category) | |
if cat_op == category[0]: | |
st.warning('Please select Type!!') | |
elif cat_op == category[1]: | |
st.subheader("✅ Here is the Trending🔥 news for you") | |
no_of_news = st.slider('Number of News:', min_value=5, max_value=25, step=1) | |
news_list = fetch_top_news() | |
display_news(news_list, no_of_news) | |
elif cat_op == category[2]: | |
av_topics = ['Choose Topic', 'WORLD', 'NATION', 'BUSINESS', 'TECHNOLOGY', 'ENTERTAINMENT', 'SPORTS', 'SCIENCE', | |
'HEALTH'] | |
st.subheader("Choose your favourite Topic") | |
chosen_topic = st.selectbox("Choose your favourite Topic", av_topics) | |
if chosen_topic == av_topics[0]: | |
st.warning("Please Choose the Topic") | |
else: | |
no_of_news = st.slider('Number of News:', min_value=5, max_value=25, step=1) | |
news_list = fetch_category_news(chosen_topic) | |
if news_list: | |
st.subheader("✅ Here are the some {} News for you".format(chosen_topic)) | |
display_news(news_list, no_of_news) | |
else: | |
st.error("No News found for {}".format(chosen_topic)) | |
elif cat_op == category[3]: | |
user_topic = st.text_input("Enter your Topic🔍") | |
no_of_news = st.slider('Number of News:', min_value=5, max_value=15, step=1) | |
if st.button("Search") and user_topic != '': | |
user_topic_pr = user_topic.replace(' ', '') | |
news_list = fetch_news_search_topic(topic=user_topic_pr) | |
if news_list: | |
st.subheader("✅ Here are the some {} News for you".format(user_topic.capitalize())) | |
display_news(news_list, no_of_news) | |
else: | |
st.error("No News found for {}".format(user_topic)) | |
else: | |
st.warning("Please write Topic Name to Search🔍") | |
run() | |