Krittaprot
commited on
Commit
•
f6aaf96
1
Parent(s):
cdb32f0
Update app.py
Browse filesRemove summarization functionality to speed up the application.
app.py
CHANGED
@@ -9,17 +9,17 @@ from PIL import Image
|
|
9 |
import re
|
10 |
import io
|
11 |
from io import BytesIO
|
|
|
|
|
12 |
|
13 |
sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
|
14 |
-
text_summarization_task = pipeline("summarization", model="facebook/bart-large-cnn")
|
15 |
|
16 |
def extract_youtube_video_id(url_or_id):
|
17 |
"""
|
18 |
Extracts the YouTube video ID from a given URL or returns the ID if a direct ID is provided.
|
19 |
-
|
20 |
Args:
|
21 |
url_or_id (str): A YouTube URL or a video ID.
|
22 |
-
|
23 |
Returns:
|
24 |
str: The extracted YouTube video ID.
|
25 |
"""
|
@@ -55,7 +55,7 @@ def comments_collector(video_link, max_comments = 100):
|
|
55 |
# pandas.DataFrame: A DataFrame containing the comments, or None in case of an exception.
|
56 |
video_id = extract_youtube_video_id(video_link)
|
57 |
max_comments -= 1
|
58 |
-
|
59 |
try:
|
60 |
#load the first 20 comments
|
61 |
comments = Comments(video_id)
|
@@ -65,7 +65,7 @@ def comments_collector(video_link, max_comments = 100):
|
|
65 |
while comments.hasMoreComments and (len(comments.comments["result"]) <= max_comments):
|
66 |
comments.getNextComments()
|
67 |
print(f'Found all the {len(comments.comments["result"])} comments.')
|
68 |
-
|
69 |
#load all the comments into "comments" variable
|
70 |
comments = comments.comments
|
71 |
|
@@ -134,6 +134,7 @@ def comments_analyzer(comments_df):
|
|
134 |
return None
|
135 |
else:
|
136 |
comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
|
|
|
137 |
|
138 |
data = {}
|
139 |
#Categorize the comments by sentiment and count them
|
@@ -143,11 +144,11 @@ def comments_analyzer(comments_df):
|
|
143 |
data['num_negative'] = comments_df['sentiment'].value_counts().get('negative', 0)
|
144 |
|
145 |
#blend all the comments
|
146 |
-
data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
|
147 |
data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
|
148 |
|
149 |
return data
|
150 |
-
|
151 |
def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
|
152 |
# This function generates a word cloud image from a given text and returns it as a PIL image object.
|
153 |
# Args:
|
@@ -196,7 +197,7 @@ def create_sentiment_analysis_chart(data):
|
|
196 |
# Finally, the plot is saved to a BytesIO object and converted to a PIL image.
|
197 |
# Returns:
|
198 |
# PIL.Image: The sentiment analysis bar chart as a PIL image object.
|
199 |
-
|
200 |
# Convert the data to a DataFrame
|
201 |
df = {}
|
202 |
df['num_positive'] = data['num_positive']
|
@@ -236,24 +237,52 @@ def create_sentiment_analysis_chart(data):
|
|
236 |
|
237 |
def process_youtube_comments(youtube_link, max_comments, stop_words):
|
238 |
# Process the YouTube link and generate the word cloud, summary, and sentiment analysis
|
239 |
-
|
|
|
|
|
240 |
# Pull comments from the YouTube Video
|
241 |
comments_df = comments_collector(video_link=youtube_link, max_comments=max_comments)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
# Analyze
|
243 |
analysis_dict = comments_analyzer(comments_df)
|
|
|
|
|
|
|
|
|
244 |
long_text = analysis_dict['blended_comments']
|
245 |
|
|
|
|
|
246 |
# Generate word cloud
|
247 |
word_cloud_img = generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps'])
|
248 |
|
249 |
-
|
250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
# Create Sentiment Chart
|
253 |
sentiment_chart = create_sentiment_analysis_chart(analysis_dict)
|
254 |
|
|
|
|
|
|
|
255 |
# Return the generated word cloud image, summary text, and sentiment analysis chart
|
256 |
-
return word_cloud_img,
|
257 |
|
258 |
############################################################################################################################################
|
259 |
# Gradio interface
|
@@ -266,7 +295,7 @@ interface = gr.Interface(
|
|
266 |
],
|
267 |
outputs=[
|
268 |
gr.Image(label="Word Cloud"),
|
269 |
-
gr.Textbox(label="Summary of Comments"),
|
270 |
gr.Image(label="Sentiment Analysis Chart")
|
271 |
],
|
272 |
title="YouTube Comments Analyzer",
|
@@ -275,4 +304,4 @@ interface = gr.Interface(
|
|
275 |
|
276 |
# Run the interface
|
277 |
interface.launch()
|
278 |
-
############################################################################################################################################
|
|
|
9 |
import re
|
10 |
import io
|
11 |
from io import BytesIO
|
12 |
+
import time
|
13 |
+
|
14 |
|
15 |
sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
|
16 |
+
# text_summarization_task = pipeline("summarization", model="facebook/bart-large-cnn")
|
17 |
|
18 |
def extract_youtube_video_id(url_or_id):
|
19 |
"""
|
20 |
Extracts the YouTube video ID from a given URL or returns the ID if a direct ID is provided.
|
|
|
21 |
Args:
|
22 |
url_or_id (str): A YouTube URL or a video ID.
|
|
|
23 |
Returns:
|
24 |
str: The extracted YouTube video ID.
|
25 |
"""
|
|
|
55 |
# pandas.DataFrame: A DataFrame containing the comments, or None in case of an exception.
|
56 |
video_id = extract_youtube_video_id(video_link)
|
57 |
max_comments -= 1
|
58 |
+
|
59 |
try:
|
60 |
#load the first 20 comments
|
61 |
comments = Comments(video_id)
|
|
|
65 |
while comments.hasMoreComments and (len(comments.comments["result"]) <= max_comments):
|
66 |
comments.getNextComments()
|
67 |
print(f'Found all the {len(comments.comments["result"])} comments.')
|
68 |
+
|
69 |
#load all the comments into "comments" variable
|
70 |
comments = comments.comments
|
71 |
|
|
|
134 |
return None
|
135 |
else:
|
136 |
comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
|
137 |
+
comments_df['score'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['score'])
|
138 |
|
139 |
data = {}
|
140 |
#Categorize the comments by sentiment and count them
|
|
|
144 |
data['num_negative'] = comments_df['sentiment'].value_counts().get('negative', 0)
|
145 |
|
146 |
#blend all the comments
|
147 |
+
data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
|
148 |
data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
|
149 |
|
150 |
return data
|
151 |
+
|
152 |
def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
|
153 |
# This function generates a word cloud image from a given text and returns it as a PIL image object.
|
154 |
# Args:
|
|
|
197 |
# Finally, the plot is saved to a BytesIO object and converted to a PIL image.
|
198 |
# Returns:
|
199 |
# PIL.Image: The sentiment analysis bar chart as a PIL image object.
|
200 |
+
|
201 |
# Convert the data to a DataFrame
|
202 |
df = {}
|
203 |
df['num_positive'] = data['num_positive']
|
|
|
237 |
|
238 |
def process_youtube_comments(youtube_link, max_comments, stop_words):
|
239 |
# Process the YouTube link and generate the word cloud, summary, and sentiment analysis
|
240 |
+
|
241 |
+
start_time = time.time()
|
242 |
+
|
243 |
# Pull comments from the YouTube Video
|
244 |
comments_df = comments_collector(video_link=youtube_link, max_comments=max_comments)
|
245 |
+
|
246 |
+
end_time = time.time()
|
247 |
+
print(f"Time taken for loading comments: {end_time - start_time} seconds")
|
248 |
+
|
249 |
+
|
250 |
+
start_time = time.time()
|
251 |
+
|
252 |
# Analyze
|
253 |
analysis_dict = comments_analyzer(comments_df)
|
254 |
+
|
255 |
+
end_time = time.time()
|
256 |
+
print(f"Time taken for sentiment analysis: {end_time - start_time} seconds")
|
257 |
+
|
258 |
long_text = analysis_dict['blended_comments']
|
259 |
|
260 |
+
start_time = time.time()
|
261 |
+
|
262 |
# Generate word cloud
|
263 |
word_cloud_img = generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps'])
|
264 |
|
265 |
+
end_time = time.time()
|
266 |
+
print(f"Time taken for generating word clouds: {end_time - start_time} seconds")
|
267 |
+
|
268 |
+
start_time = time.time()
|
269 |
+
|
270 |
+
# # Text Summarization
|
271 |
+
# summarized_text = text_summarization_task(long_text, min_length=100, max_length=200, truncation=True)[0]['summary_text']
|
272 |
+
|
273 |
+
end_time = time.time()
|
274 |
+
print(f"Time taken for summarizing comments: {end_time - start_time} seconds")
|
275 |
+
|
276 |
+
start_time = time.time()
|
277 |
|
278 |
# Create Sentiment Chart
|
279 |
sentiment_chart = create_sentiment_analysis_chart(analysis_dict)
|
280 |
|
281 |
+
end_time = time.time()
|
282 |
+
print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
|
283 |
+
|
284 |
# Return the generated word cloud image, summary text, and sentiment analysis chart
|
285 |
+
return word_cloud_img, sentiment_chart
|
286 |
|
287 |
############################################################################################################################################
|
288 |
# Gradio interface
|
|
|
295 |
],
|
296 |
outputs=[
|
297 |
gr.Image(label="Word Cloud"),
|
298 |
+
# gr.Textbox(label="Summary of Comments"),
|
299 |
gr.Image(label="Sentiment Analysis Chart")
|
300 |
],
|
301 |
title="YouTube Comments Analyzer",
|
|
|
304 |
|
305 |
# Run the interface
|
306 |
interface.launch()
|
307 |
+
############################################################################################################################################
|