ttphong68 commited on
Commit
cc6170a
1 Parent(s): 97a7a7c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +212 -0
app.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[1]:
5
+
6
+
7
+ import streamlit as st
8
+ from PIL import Image
9
+ from bs4 import BeautifulSoup as soup
10
+ from urllib.request import urlopen
11
+ from newspaper import Article
12
+ import io
13
+ import nltk
14
+ nltk.download('punkt')
15
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
16
+ from transformers import pipeline
17
+ from rouge import Rouge
18
+ from nltk.sentiment import SentimentIntensityAnalyzer
19
+
20
+
21
+ # In[2]:
22
+
23
+
24
+ st.set_page_config(page_title='InNews: A Summarised News📰 Portal', page_icon="newspaper.ico")
25
+
26
+
27
+ # In[3]:
28
+
29
+
30
+ def fetch_news_search_topic(topic):
31
+ site = 'https://news.google.com/rss/search?q={}'.format(topic)
32
+ op = urlopen(site) # Open that site
33
+ rd = op.read() # read data from site
34
+ op.close() # close the object
35
+ sp_page = soup(rd, 'xml') # scrapping data from site
36
+ news_list = sp_page.find_all('item') # finding news
37
+ return news_list
38
+
39
+
40
+ # In[4]:
41
+
42
+
43
+ def fetch_top_news():
44
+ site = 'https://news.google.com/news/rss'
45
+ op = urlopen(site) # Open that site
46
+ rd = op.read() # read data from site
47
+ op.close() # close the object
48
+ sp_page = soup(rd, 'xml') # scrapping data from site
49
+ news_list = sp_page.find_all('item') # finding news
50
+ return news_list
51
+
52
+
53
+ # In[5]:
54
+
55
+
56
+ def fetch_category_news(topic):
57
+ site = 'https://news.google.com/news/rss/headlines/section/topic/{}'.format(topic)
58
+ op = urlopen(site) # Open that site
59
+ rd = op.read() # read data from site
60
+ op.close() # close the object
61
+ sp_page = soup(rd, 'xml') # scrapping data from site
62
+ news_list = sp_page.find_all('item') # finding news
63
+ return news_list
64
+
65
+
66
+ # In[6]:
67
+
68
+
69
+ def fetch_news_poster(poster_link):
70
+ try:
71
+ u = urlopen(poster_link)
72
+ raw_data = u.read()
73
+ image = Image.open(io.BytesIO(raw_data))
74
+ st.image(image, use_column_width=True)
75
+ except:
76
+ image = Image.open("no_image.jpg")
77
+ st.image(image, use_column_width=True)
78
+
79
+
80
+ # In[7]:
81
+
82
+
83
+ from nltk.sentiment import SentimentIntensityAnalyzer
84
+
85
+ def get_sentiment_label(sentiment_score):
86
+ if sentiment_score >= 0.05:
87
+ return "Positive"
88
+ elif sentiment_score <= -0.05:
89
+ return "Negative"
90
+ else:
91
+ return "Neutral"
92
+
93
+ def display_news(list_of_news, news_quantity):
94
+ tokenizer = T5Tokenizer.from_pretrained('t5-base')
95
+ model = T5ForConditionalGeneration.from_pretrained('t5-base')
96
+ rouge = Rouge()
97
+ sentiment_analyzer = SentimentIntensityAnalyzer() # Sentiment Analysis model
98
+
99
+ c = 0
100
+ for news in list_of_news:
101
+ c += 1
102
+ st.write('**({}) {}**'.format(c, news.title.text))
103
+ news_data = Article(news.link.text)
104
+ try:
105
+ news_data.download()
106
+ news_data.parse()
107
+ news_data.nlp()
108
+ except Exception as e:
109
+ st.error(e)
110
+
111
+ # Abstractive Summarization
112
+ input_text = news_data.text
113
+ inputs = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=512, truncation=True)
114
+ outputs = model.generate(inputs, max_length=500, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
115
+ summary = tokenizer.decode(outputs[0])
116
+
117
+ fetch_news_poster(news_data.top_image)
118
+ with st.expander(news.title.text):
119
+ st.markdown(
120
+ '''<h6 style='text-align: justify;'>{}"</h6>'''.format(summary),
121
+ unsafe_allow_html=True)
122
+ st.markdown("[Read more at {}...]({})".format(news.source.text, news.link.text))
123
+
124
+ # Calculate ROUGE scores
125
+ reference_summary = news_data.summary
126
+ rouge_scores = rouge.get_scores(summary, reference_summary)
127
+ rouge_1 = rouge_scores[0]['rouge-1']['f']
128
+ rouge_2 = rouge_scores[0]['rouge-2']['f']
129
+ rouge_l = rouge_scores[0]['rouge-l']['f']
130
+
131
+ st.success("ROUGE-1 Score: {:.2f}".format(rouge_1))
132
+ st.success("ROUGE-2 Score: {:.2f}".format(rouge_2))
133
+ st.success("ROUGE-L Score: {:.2f}".format(rouge_l))
134
+
135
+ # Perform sentiment analysis
136
+ sentiment_scores = sentiment_analyzer.polarity_scores(summary)
137
+ sentiment_score = sentiment_scores['compound']
138
+ sentiment_label = get_sentiment_label(sentiment_score)
139
+
140
+ st.write("Sentiment Score:", sentiment_score)
141
+ st.write("Sentiment Label:", sentiment_label)
142
+
143
+ st.success("Published Date: " + news.pubDate.text)
144
+ if c >= news_quantity:
145
+ break
146
+
147
+
148
+ # In[8]:
149
+
150
+
151
+ def run():
152
+ st.title("InNews: A Summarised News📰")
153
+ image = Image.open("newspaper.png")
154
+
155
+ col1, col2, col3 = st.columns([3, 5, 3])
156
+
157
+ with col1:
158
+ st.write("")
159
+
160
+ with col2:
161
+ st.image(image, use_column_width=False)
162
+
163
+ with col3:
164
+ st.write("")
165
+ category = ['--Select--', 'Trending🔥 News', 'Favourite💙 Topics', 'Search🔍 Topic']
166
+ cat_op = st.selectbox('Select your Category', category)
167
+ if cat_op == category[0]:
168
+ st.warning('Please select Type!!')
169
+ elif cat_op == category[1]:
170
+ st.subheader("✅ Here is the Trending🔥 news for you")
171
+ no_of_news = st.slider('Number of News:', min_value=5, max_value=25, step=1)
172
+ news_list = fetch_top_news()
173
+ display_news(news_list, no_of_news)
174
+ elif cat_op == category[2]:
175
+ av_topics = ['Choose Topic', 'WORLD', 'NATION', 'BUSINESS', 'TECHNOLOGY', 'ENTERTAINMENT', 'SPORTS', 'SCIENCE',
176
+ 'HEALTH']
177
+ st.subheader("Choose your favourite Topic")
178
+ chosen_topic = st.selectbox("Choose your favourite Topic", av_topics)
179
+ if chosen_topic == av_topics[0]:
180
+ st.warning("Please Choose the Topic")
181
+ else:
182
+ no_of_news = st.slider('Number of News:', min_value=5, max_value=25, step=1)
183
+ news_list = fetch_category_news(chosen_topic)
184
+ if news_list:
185
+ st.subheader("✅ Here are the some {} News for you".format(chosen_topic))
186
+ display_news(news_list, no_of_news)
187
+ else:
188
+ st.error("No News found for {}".format(chosen_topic))
189
+
190
+ elif cat_op == category[3]:
191
+ user_topic = st.text_input("Enter your Topic🔍")
192
+ no_of_news = st.slider('Number of News:', min_value=5, max_value=15, step=1)
193
+
194
+ if st.button("Search") and user_topic != '':
195
+ user_topic_pr = user_topic.replace(' ', '')
196
+ news_list = fetch_news_search_topic(topic=user_topic_pr)
197
+ if news_list:
198
+ st.subheader("✅ Here are the some {} News for you".format(user_topic.capitalize()))
199
+ display_news(news_list, no_of_news)
200
+ else:
201
+ st.error("No News found for {}".format(user_topic))
202
+ else:
203
+ st.warning("Please write Topic Name to Search🔍")
204
+
205
+
206
+ run()
207
+
208
+
209
+
210
+
211
+
212
+