Faizan15 commited on
Commit
f25846c
1 Parent(s): 7b82169

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +294 -0
app.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, send_file, redirect
2
+ import json
3
+ import requests
4
+ from urllib.request import urlopen
5
+ from bs4 import BeautifulSoup
6
+ import re
7
+ import joblib
8
+ from nltk.sentiment import SentimentIntensityAnalyzer
9
+ import matplotlib.pyplot as plt
10
+ from matplotlib import font_manager
11
+ from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
12
+ plt.switch_backend('Agg')
13
+ import numpy as np
14
+ from io import BytesIO
15
+ import base64
16
+ from dotenv import load_dotenv
17
+ import os
18
+ import threading
19
+
20
+ import nltk
21
+ nltk.download('vader_lexicon')
22
+
23
+ def load_classifier_and_vectorizer():
24
+ global classifier, vectorizer
25
+ classifier = joblib.load('./data/sentiment_classifier.pkl')
26
+ vectorizer = joblib.load('./data/sentiment_vectorizer.pkl')
27
+
28
+ # Start loading in a separate thread
29
+ loading_thread = threading.Thread(target=load_classifier_and_vectorizer)
30
+ loading_thread.start()
31
+
32
+ app = Flask(__name__)
33
+ load_dotenv()
34
+
35
+ # Function to get Google search links
36
+ def search_links(query):
37
+ base_url = "https://www.googleapis.com/customsearch/v1"
38
+ api_key = os.environ['api_key']
39
+ cx = os.environ['cx']
40
+ params = {
41
+ 'q': query,
42
+ 'key': api_key,
43
+ 'cx': cx
44
+ }
45
+ Err = ""
46
+ response = requests.get(base_url, params=params)
47
+ if response.status_code == 429 or response.status_code == 403:
48
+ Err = "Daily API limit reached"
49
+ print("API limit reached.")
50
+ return []
51
+
52
+ results = response.json().get('items', [])
53
+ # Extract title and link from the results and create a list of dictionaries
54
+ links_with_titles = [[{item.get('title', ''): item.get('link', '')} for item in results], Err]
55
+
56
+ return links_with_titles
57
+
58
+ # Function to fetch reviews
59
+ def get_reviews(movie_url, review_type):
60
+ page = urlopen(movie_url)
61
+ html_bytes = page.read()
62
+ html = html_bytes.decode("utf-8")
63
+ emsId = re.search(r'"emsId":\s*"([^"]+)"', html, re.IGNORECASE).group(1)
64
+ title = re.search(r'"titleName":\s*"([^"]+)"', html, re.IGNORECASE).group(1)
65
+ title_type = re.search(r'"titleType":\s*"([^"]+)"', html, re.IGNORECASE).group(1)
66
+ vanity = re.search(r'"vanity":\s*"([^"]+)"', html, re.IGNORECASE).group(1)
67
+
68
+ s_no = ""
69
+ if title_type == "Tv":
70
+ s_no = re.search(r'"tvSeason":\s*"([^"]+)"', html, re.IGNORECASE).group(1)
71
+ poster_page_url = f'https://www.rottentomatoes.com/{"tv" if title_type == "Tv" else "m"}/{vanity}/{f"{s_no}/" if s_no else ""}reviews'
72
+ poster_page = urlopen(poster_page_url)
73
+ soup = BeautifulSoup(poster_page, 'html.parser')
74
+ poster_link = soup.find('img', {'data-qa': 'sidebar-poster-img'})
75
+ if poster_link:
76
+ poster_link = poster_link.get('src')
77
+ else:
78
+ poster_link = '/data/favicon.ico'
79
+
80
+ hasNextPage = True
81
+ reviews = []
82
+ after = ""
83
+ while hasNextPage and len(reviews) < 100:
84
+ url = f'https://www.rottentomatoes.com/napi/{"season" if title_type == "Tv" else "movie"}/{emsId}/reviews/{review_type}?after={after}'
85
+ response = urlopen(url).read().decode('utf-8')
86
+ response_object = json.loads(response)
87
+ responseArray = response_object['reviews']
88
+ for review in responseArray:
89
+ reviews.append(review['quote'])
90
+ hasNextPage = response_object['pageInfo']['hasNextPage']
91
+ if hasNextPage:
92
+ after = response_object['pageInfo']['endCursor']
93
+ global show_info
94
+ show_info = [title, poster_link]
95
+ return reviews
96
+
97
+
98
+ # Function to clean text
99
+ def clean(input_string):
100
+ cleaned_string = re.sub(r'\s+', ' ', input_string)
101
+ return cleaned_string.strip()
102
+
103
+ def analyze_sentiment(reviews):
104
+ loading_thread.join()
105
+
106
+ # Clean and analyze reviews
107
+ cleaned_reviews = [clean(phrase) for phrase in reviews]
108
+ new_vector = vectorizer.transform(cleaned_reviews)
109
+ predictions = classifier.predict(new_vector)
110
+
111
+ # Return predictions
112
+ return predictions
113
+
114
+ # Function to get polarity scores using NLTK
115
+ def get_polarity_scores(reviews):
116
+ cleaned_reviews = [clean(phrase) for phrase in reviews]
117
+ sia = SentimentIntensityAnalyzer()
118
+ scores_list = []
119
+ for entry in reviews:
120
+ scores = sia.polarity_scores(entry)
121
+ scores_list.append(scores)
122
+ return scores_list
123
+
124
+ #Load Font
125
+ custom_font_path = 'data/HPSimplified.ttf'
126
+ font_manager.fontManager.addfont(custom_font_path)
127
+
128
+ # Function to plot a bar chart
129
+ def plot_bar(data, title):
130
+ # Extracting values for each sentiment
131
+ neg_values = [entry['neg'] for entry in data]
132
+ neu_values = [entry['neu'] for entry in data]
133
+ pos_values = [entry['pos'] for entry in data]
134
+ compound_values = [entry['compound'] for entry in data]
135
+
136
+ # Creating positions for bars
137
+ positions = range(len(data))
138
+
139
+ # Plotting the stacked bars
140
+ fig, ax = plt.subplots(figsize=(20,10))
141
+ width = 0.7
142
+
143
+ # Bottom bar (red)
144
+ ax.bar(positions, neg_values, width=width, color='#961e1e', label='Negative')
145
+
146
+ # Middle bar (gray)
147
+ ax.bar(positions, neu_values, width=width, bottom=neg_values, color='#999', label='Neutral')
148
+
149
+ # Top bar (green)
150
+ ax.bar(positions, pos_values, width=width, bottom=np.array(neg_values) + np.array(neu_values), color='#015501', label='Positive')
151
+
152
+ # Adding labels and title
153
+ plt.xlabel('Reviews', fontdict={'fontname': 'HP Simplified', 'fontsize': 30, 'weight':'bold', 'color':'#fff'}, labelpad=20)
154
+ plt.ylabel('Polarity Scores', fontdict={'fontname': 'HP Simplified', 'fontsize': 30, 'weight':'bold', 'color':'#fff'}, labelpad=20)
155
+ plt.title(title, fontdict={'fontname': 'HP Simplified', 'fontsize': 40, 'weight':'bold', 'color':'#fff'}, pad=20)
156
+ plt.yticks(fontname='HP Simplified', fontsize=24, color="#fff")
157
+
158
+ # Remove X-axis labels
159
+ ax.set_xticks([])
160
+ ax.set_facecolor('#000')
161
+
162
+ # Adding legend
163
+ legend = plt.legend(loc='upper right', bbox_to_anchor=(1.25, 1), prop={'family': 'HP Simplified', 'size': 32})
164
+
165
+ average = np.mean(compound_values)
166
+ text = f'Average Compound Score'
167
+ avg_text = f'\n{average:.2f}'
168
+ plt.text(0.91, 0.66, text, fontsize=20, fontname='HP Simplified', weight="bold", color="white", ha='center', va='center', transform=fig.transFigure)
169
+ plt.text(0.91, 0.64, avg_text, fontsize=36, fontname='HP Simplified', weight="bold", color="white", ha='center', va='center', transform=fig.transFigure)
170
+
171
+ # Set the background color
172
+ fig.set_facecolor('#1e1e1e')
173
+ plt.tight_layout()
174
+
175
+ # Convert the Matplotlib figure to a Flask response
176
+ output = BytesIO()
177
+ FigureCanvas(fig).print_png(output)
178
+ plt.close()
179
+ return base64.b64encode(output.getvalue()).decode('utf-8')
180
+
181
+ def plot_pie(data, title):
182
+ unique, counts = np.unique(data, return_counts=True)
183
+
184
+ explode = ()
185
+ if len(unique) > 1:
186
+ explode = (0, 0.1)
187
+ else:
188
+ explode = (0,)
189
+
190
+ #add colors
191
+ colors = []
192
+ if unique[0] == "Negative":
193
+ colors = ['#961e1e','#024d0f']
194
+ elif unique[0] == "Positive":
195
+ colors = ['#024d0f','#961e1e']
196
+
197
+ fig, ax = plt.subplots()
198
+ fig.patch.set_facecolor('#1e1e1e')
199
+ ax.pie(counts, explode=explode, labels=unique, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90)
200
+
201
+ # Set custom fonts for title, labels, and autopct
202
+ ax.set_title(title, fontdict={'family': 'HP Simplified', 'color': 'White', 'weight': 'bold', 'size': 28})
203
+ for text in ax.texts:
204
+ text.set_fontfamily('HP Simplified')
205
+ text.set_fontsize('20')
206
+ text.set_fontweight('bold')
207
+ text.set_color('White')
208
+
209
+ # Equal aspect ratio ensures that pie is drawn as a circle
210
+ ax.axis('equal')
211
+ plt.tight_layout()
212
+
213
+ output = BytesIO()
214
+ FigureCanvas(fig).print_png(output)
215
+ plt.close()
216
+ return base64.b64encode(output.getvalue()).decode('utf-8')
217
+
218
+ @app.route('/data/bg.png')
219
+ def bg():
220
+ return send_file('data/bg.png')
221
+
222
+ @app.route('/data/search-icon.svg')
223
+ def search_icon():
224
+ return send_file('data/search-icon.svg')
225
+
226
+ @app.route('/data/favicon.ico')
227
+ def favicon():
228
+ return send_file('data/favicon.ico')
229
+
230
+ @app.route('/data/HPSimplified.ttf')
231
+ def font():
232
+ return send_file('data/HPSimplified.ttf')
233
+
234
+ @app.route('/data/search.gif')
235
+ def searchload():
236
+ return send_file('./data/search.gif')
237
+
238
+ @app.route('/')
239
+ def home():
240
+ return render_template('index.html')
241
+
242
+ @app.route('/search')
243
+ def search():
244
+ query = request.args.get('query')
245
+ if query:
246
+ search_data = search_links(query)
247
+ if len(search_data[0]) == 0:
248
+ search_data[1] = "No links found for your query"
249
+ return render_template('search.html', links=search_data[0], Err=search_data[1])
250
+ else:
251
+ return redirect('/')
252
+
253
+ @app.route('/review')
254
+ def review():
255
+ url = request.args.get('url')
256
+ if url:
257
+ url = url
258
+ user = get_reviews(url, 'user')
259
+ critic = get_reviews(url, 'all')
260
+ user_pie_img, user_bar_img, critic_pie_img, critic_bar_img = '', '', '', ''
261
+
262
+ if len(user) > 0:
263
+ user_polarity = get_polarity_scores(user)
264
+ user_reviews = analyze_sentiment(user)
265
+ user_bar = plot_bar(user_polarity, "User Sentiments")
266
+ user_pie = plot_pie(user_reviews, "User Reviews")
267
+ user_bar_img = f'<img src="data:image/png;base64,{user_bar}" alt="User Reviews Bar Plot">'
268
+ user_pie_img = f'<img src="data:image/png;base64,{user_pie}" alt="User Reviews Pie Plot">'
269
+ else:
270
+ user_bar_img = "No User Reviews Found"
271
+ user_pie_img = "No User Reviews Found"
272
+
273
+ if len(critic) > 0:
274
+ critic_polarity = get_polarity_scores(critic)
275
+ critic_reviews = analyze_sentiment(critic)
276
+ critic_bar = plot_bar(critic_polarity, "Critic Sentiments")
277
+ critic_pie = plot_pie(critic_reviews, "Critic Reviews")
278
+ critic_bar_img = f'<img src="data:image/png;base64,{critic_bar}" alt="Critic Reviews Bar Plot">'
279
+ critic_pie_img = f'<img src="data:image/png;base64,{critic_pie}" alt="Critic Reviews Pie Plot">'
280
+ else:
281
+ critic_bar_img = "No Critic Reviews Found"
282
+ critic_pie_img = "No Critic Reviews Found"
283
+
284
+ return render_template('review.html', user_bar=user_bar_img, user_pie=user_pie_img, critic_bar=critic_bar_img, critic_pie=critic_pie_img, title=show_info[0], url=url, poster=show_info[1])
285
+
286
+ else:
287
+ return redirect('/')
288
+
289
+ @app.errorhandler(404)
290
+ def not_found_error(error):
291
+ return render_template('404.html'), 404
292
+
293
+ if __name__ == '__main__':
294
+ app.run(host='0.0.0.0', port=3000, debug=True)