Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, render_template, request, send_file, redirect
|
2 |
+
import json
|
3 |
+
import requests
|
4 |
+
from urllib.request import urlopen
|
5 |
+
from bs4 import BeautifulSoup
|
6 |
+
import re
|
7 |
+
import joblib
|
8 |
+
from nltk.sentiment import SentimentIntensityAnalyzer
|
9 |
+
import matplotlib.pyplot as plt
|
10 |
+
from matplotlib import font_manager
|
11 |
+
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
|
12 |
+
plt.switch_backend('Agg')
|
13 |
+
import numpy as np
|
14 |
+
from io import BytesIO
|
15 |
+
import base64
|
16 |
+
from dotenv import load_dotenv
|
17 |
+
import os
|
18 |
+
import threading
|
19 |
+
|
20 |
+
import nltk
|
21 |
+
nltk.download('vader_lexicon')
|
22 |
+
|
23 |
+
def load_classifier_and_vectorizer():
|
24 |
+
global classifier, vectorizer
|
25 |
+
classifier = joblib.load('./data/sentiment_classifier.pkl')
|
26 |
+
vectorizer = joblib.load('./data/sentiment_vectorizer.pkl')
|
27 |
+
|
28 |
+
# Start loading in a separate thread
|
29 |
+
loading_thread = threading.Thread(target=load_classifier_and_vectorizer)
|
30 |
+
loading_thread.start()
|
31 |
+
|
32 |
+
app = Flask(__name__)
|
33 |
+
load_dotenv()
|
34 |
+
|
35 |
+
# Function to get Google search links
|
36 |
+
def search_links(query):
|
37 |
+
base_url = "https://www.googleapis.com/customsearch/v1"
|
38 |
+
api_key = os.environ['api_key']
|
39 |
+
cx = os.environ['cx']
|
40 |
+
params = {
|
41 |
+
'q': query,
|
42 |
+
'key': api_key,
|
43 |
+
'cx': cx
|
44 |
+
}
|
45 |
+
Err = ""
|
46 |
+
response = requests.get(base_url, params=params)
|
47 |
+
if response.status_code == 429 or response.status_code == 403:
|
48 |
+
Err = "Daily API limit reached"
|
49 |
+
print("API limit reached.")
|
50 |
+
return []
|
51 |
+
|
52 |
+
results = response.json().get('items', [])
|
53 |
+
# Extract title and link from the results and create a list of dictionaries
|
54 |
+
links_with_titles = [[{item.get('title', ''): item.get('link', '')} for item in results], Err]
|
55 |
+
|
56 |
+
return links_with_titles
|
57 |
+
|
58 |
+
# Function to fetch reviews
|
59 |
+
def get_reviews(movie_url, review_type):
|
60 |
+
page = urlopen(movie_url)
|
61 |
+
html_bytes = page.read()
|
62 |
+
html = html_bytes.decode("utf-8")
|
63 |
+
emsId = re.search(r'"emsId":\s*"([^"]+)"', html, re.IGNORECASE).group(1)
|
64 |
+
title = re.search(r'"titleName":\s*"([^"]+)"', html, re.IGNORECASE).group(1)
|
65 |
+
title_type = re.search(r'"titleType":\s*"([^"]+)"', html, re.IGNORECASE).group(1)
|
66 |
+
vanity = re.search(r'"vanity":\s*"([^"]+)"', html, re.IGNORECASE).group(1)
|
67 |
+
|
68 |
+
s_no = ""
|
69 |
+
if title_type == "Tv":
|
70 |
+
s_no = re.search(r'"tvSeason":\s*"([^"]+)"', html, re.IGNORECASE).group(1)
|
71 |
+
poster_page_url = f'https://www.rottentomatoes.com/{"tv" if title_type == "Tv" else "m"}/{vanity}/{f"{s_no}/" if s_no else ""}reviews'
|
72 |
+
poster_page = urlopen(poster_page_url)
|
73 |
+
soup = BeautifulSoup(poster_page, 'html.parser')
|
74 |
+
poster_link = soup.find('img', {'data-qa': 'sidebar-poster-img'})
|
75 |
+
if poster_link:
|
76 |
+
poster_link = poster_link.get('src')
|
77 |
+
else:
|
78 |
+
poster_link = '/data/favicon.ico'
|
79 |
+
|
80 |
+
hasNextPage = True
|
81 |
+
reviews = []
|
82 |
+
after = ""
|
83 |
+
while hasNextPage and len(reviews) < 100:
|
84 |
+
url = f'https://www.rottentomatoes.com/napi/{"season" if title_type == "Tv" else "movie"}/{emsId}/reviews/{review_type}?after={after}'
|
85 |
+
response = urlopen(url).read().decode('utf-8')
|
86 |
+
response_object = json.loads(response)
|
87 |
+
responseArray = response_object['reviews']
|
88 |
+
for review in responseArray:
|
89 |
+
reviews.append(review['quote'])
|
90 |
+
hasNextPage = response_object['pageInfo']['hasNextPage']
|
91 |
+
if hasNextPage:
|
92 |
+
after = response_object['pageInfo']['endCursor']
|
93 |
+
global show_info
|
94 |
+
show_info = [title, poster_link]
|
95 |
+
return reviews
|
96 |
+
|
97 |
+
|
98 |
+
# Function to clean text
|
99 |
+
def clean(input_string):
|
100 |
+
cleaned_string = re.sub(r'\s+', ' ', input_string)
|
101 |
+
return cleaned_string.strip()
|
102 |
+
|
103 |
+
def analyze_sentiment(reviews):
|
104 |
+
loading_thread.join()
|
105 |
+
|
106 |
+
# Clean and analyze reviews
|
107 |
+
cleaned_reviews = [clean(phrase) for phrase in reviews]
|
108 |
+
new_vector = vectorizer.transform(cleaned_reviews)
|
109 |
+
predictions = classifier.predict(new_vector)
|
110 |
+
|
111 |
+
# Return predictions
|
112 |
+
return predictions
|
113 |
+
|
114 |
+
# Function to get polarity scores using NLTK
|
115 |
+
def get_polarity_scores(reviews):
|
116 |
+
cleaned_reviews = [clean(phrase) for phrase in reviews]
|
117 |
+
sia = SentimentIntensityAnalyzer()
|
118 |
+
scores_list = []
|
119 |
+
for entry in reviews:
|
120 |
+
scores = sia.polarity_scores(entry)
|
121 |
+
scores_list.append(scores)
|
122 |
+
return scores_list
|
123 |
+
|
124 |
+
#Load Font
|
125 |
+
custom_font_path = 'data/HPSimplified.ttf'
|
126 |
+
font_manager.fontManager.addfont(custom_font_path)
|
127 |
+
|
128 |
+
# Function to plot a bar chart
|
129 |
+
def plot_bar(data, title):
|
130 |
+
# Extracting values for each sentiment
|
131 |
+
neg_values = [entry['neg'] for entry in data]
|
132 |
+
neu_values = [entry['neu'] for entry in data]
|
133 |
+
pos_values = [entry['pos'] for entry in data]
|
134 |
+
compound_values = [entry['compound'] for entry in data]
|
135 |
+
|
136 |
+
# Creating positions for bars
|
137 |
+
positions = range(len(data))
|
138 |
+
|
139 |
+
# Plotting the stacked bars
|
140 |
+
fig, ax = plt.subplots(figsize=(20,10))
|
141 |
+
width = 0.7
|
142 |
+
|
143 |
+
# Bottom bar (red)
|
144 |
+
ax.bar(positions, neg_values, width=width, color='#961e1e', label='Negative')
|
145 |
+
|
146 |
+
# Middle bar (gray)
|
147 |
+
ax.bar(positions, neu_values, width=width, bottom=neg_values, color='#999', label='Neutral')
|
148 |
+
|
149 |
+
# Top bar (green)
|
150 |
+
ax.bar(positions, pos_values, width=width, bottom=np.array(neg_values) + np.array(neu_values), color='#015501', label='Positive')
|
151 |
+
|
152 |
+
# Adding labels and title
|
153 |
+
plt.xlabel('Reviews', fontdict={'fontname': 'HP Simplified', 'fontsize': 30, 'weight':'bold', 'color':'#fff'}, labelpad=20)
|
154 |
+
plt.ylabel('Polarity Scores', fontdict={'fontname': 'HP Simplified', 'fontsize': 30, 'weight':'bold', 'color':'#fff'}, labelpad=20)
|
155 |
+
plt.title(title, fontdict={'fontname': 'HP Simplified', 'fontsize': 40, 'weight':'bold', 'color':'#fff'}, pad=20)
|
156 |
+
plt.yticks(fontname='HP Simplified', fontsize=24, color="#fff")
|
157 |
+
|
158 |
+
# Remove X-axis labels
|
159 |
+
ax.set_xticks([])
|
160 |
+
ax.set_facecolor('#000')
|
161 |
+
|
162 |
+
# Adding legend
|
163 |
+
legend = plt.legend(loc='upper right', bbox_to_anchor=(1.25, 1), prop={'family': 'HP Simplified', 'size': 32})
|
164 |
+
|
165 |
+
average = np.mean(compound_values)
|
166 |
+
text = f'Average Compound Score'
|
167 |
+
avg_text = f'\n{average:.2f}'
|
168 |
+
plt.text(0.91, 0.66, text, fontsize=20, fontname='HP Simplified', weight="bold", color="white", ha='center', va='center', transform=fig.transFigure)
|
169 |
+
plt.text(0.91, 0.64, avg_text, fontsize=36, fontname='HP Simplified', weight="bold", color="white", ha='center', va='center', transform=fig.transFigure)
|
170 |
+
|
171 |
+
# Set the background color
|
172 |
+
fig.set_facecolor('#1e1e1e')
|
173 |
+
plt.tight_layout()
|
174 |
+
|
175 |
+
# Convert the Matplotlib figure to a Flask response
|
176 |
+
output = BytesIO()
|
177 |
+
FigureCanvas(fig).print_png(output)
|
178 |
+
plt.close()
|
179 |
+
return base64.b64encode(output.getvalue()).decode('utf-8')
|
180 |
+
|
181 |
+
def plot_pie(data, title):
|
182 |
+
unique, counts = np.unique(data, return_counts=True)
|
183 |
+
|
184 |
+
explode = ()
|
185 |
+
if len(unique) > 1:
|
186 |
+
explode = (0, 0.1)
|
187 |
+
else:
|
188 |
+
explode = (0,)
|
189 |
+
|
190 |
+
#add colors
|
191 |
+
colors = []
|
192 |
+
if unique[0] == "Negative":
|
193 |
+
colors = ['#961e1e','#024d0f']
|
194 |
+
elif unique[0] == "Positive":
|
195 |
+
colors = ['#024d0f','#961e1e']
|
196 |
+
|
197 |
+
fig, ax = plt.subplots()
|
198 |
+
fig.patch.set_facecolor('#1e1e1e')
|
199 |
+
ax.pie(counts, explode=explode, labels=unique, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90)
|
200 |
+
|
201 |
+
# Set custom fonts for title, labels, and autopct
|
202 |
+
ax.set_title(title, fontdict={'family': 'HP Simplified', 'color': 'White', 'weight': 'bold', 'size': 28})
|
203 |
+
for text in ax.texts:
|
204 |
+
text.set_fontfamily('HP Simplified')
|
205 |
+
text.set_fontsize('20')
|
206 |
+
text.set_fontweight('bold')
|
207 |
+
text.set_color('White')
|
208 |
+
|
209 |
+
# Equal aspect ratio ensures that pie is drawn as a circle
|
210 |
+
ax.axis('equal')
|
211 |
+
plt.tight_layout()
|
212 |
+
|
213 |
+
output = BytesIO()
|
214 |
+
FigureCanvas(fig).print_png(output)
|
215 |
+
plt.close()
|
216 |
+
return base64.b64encode(output.getvalue()).decode('utf-8')
|
217 |
+
|
218 |
+
@app.route('/data/bg.png')
|
219 |
+
def bg():
|
220 |
+
return send_file('data/bg.png')
|
221 |
+
|
222 |
+
@app.route('/data/search-icon.svg')
|
223 |
+
def search_icon():
|
224 |
+
return send_file('data/search-icon.svg')
|
225 |
+
|
226 |
+
@app.route('/data/favicon.ico')
|
227 |
+
def favicon():
|
228 |
+
return send_file('data/favicon.ico')
|
229 |
+
|
230 |
+
@app.route('/data/HPSimplified.ttf')
|
231 |
+
def font():
|
232 |
+
return send_file('data/HPSimplified.ttf')
|
233 |
+
|
234 |
+
@app.route('/data/search.gif')
|
235 |
+
def searchload():
|
236 |
+
return send_file('./data/search.gif')
|
237 |
+
|
238 |
+
@app.route('/')
|
239 |
+
def home():
|
240 |
+
return render_template('index.html')
|
241 |
+
|
242 |
+
@app.route('/search')
|
243 |
+
def search():
|
244 |
+
query = request.args.get('query')
|
245 |
+
if query:
|
246 |
+
search_data = search_links(query)
|
247 |
+
if len(search_data[0]) == 0:
|
248 |
+
search_data[1] = "No links found for your query"
|
249 |
+
return render_template('search.html', links=search_data[0], Err=search_data[1])
|
250 |
+
else:
|
251 |
+
return redirect('/')
|
252 |
+
|
253 |
+
@app.route('/review')
|
254 |
+
def review():
|
255 |
+
url = request.args.get('url')
|
256 |
+
if url:
|
257 |
+
url = url
|
258 |
+
user = get_reviews(url, 'user')
|
259 |
+
critic = get_reviews(url, 'all')
|
260 |
+
user_pie_img, user_bar_img, critic_pie_img, critic_bar_img = '', '', '', ''
|
261 |
+
|
262 |
+
if len(user) > 0:
|
263 |
+
user_polarity = get_polarity_scores(user)
|
264 |
+
user_reviews = analyze_sentiment(user)
|
265 |
+
user_bar = plot_bar(user_polarity, "User Sentiments")
|
266 |
+
user_pie = plot_pie(user_reviews, "User Reviews")
|
267 |
+
user_bar_img = f'<img src="data:image/png;base64,{user_bar}" alt="User Reviews Bar Plot">'
|
268 |
+
user_pie_img = f'<img src="data:image/png;base64,{user_pie}" alt="User Reviews Pie Plot">'
|
269 |
+
else:
|
270 |
+
user_bar_img = "No User Reviews Found"
|
271 |
+
user_pie_img = "No User Reviews Found"
|
272 |
+
|
273 |
+
if len(critic) > 0:
|
274 |
+
critic_polarity = get_polarity_scores(critic)
|
275 |
+
critic_reviews = analyze_sentiment(critic)
|
276 |
+
critic_bar = plot_bar(critic_polarity, "Critic Sentiments")
|
277 |
+
critic_pie = plot_pie(critic_reviews, "Critic Reviews")
|
278 |
+
critic_bar_img = f'<img src="data:image/png;base64,{critic_bar}" alt="Critic Reviews Bar Plot">'
|
279 |
+
critic_pie_img = f'<img src="data:image/png;base64,{critic_pie}" alt="Critic Reviews Pie Plot">'
|
280 |
+
else:
|
281 |
+
critic_bar_img = "No Critic Reviews Found"
|
282 |
+
critic_pie_img = "No Critic Reviews Found"
|
283 |
+
|
284 |
+
return render_template('review.html', user_bar=user_bar_img, user_pie=user_pie_img, critic_bar=critic_bar_img, critic_pie=critic_pie_img, title=show_info[0], url=url, poster=show_info[1])
|
285 |
+
|
286 |
+
else:
|
287 |
+
return redirect('/')
|
288 |
+
|
289 |
+
@app.errorhandler(404)
|
290 |
+
def not_found_error(error):
|
291 |
+
return render_template('404.html'), 404
|
292 |
+
|
293 |
+
if __name__ == '__main__':
|
294 |
+
app.run(host='0.0.0.0', port=3000, debug=True)
|