Krittaprot commited on
Commit
f6aaf96
1 Parent(s): cdb32f0

Update app.py

Browse files

Remove summarization functionality to speed up the application.

Files changed (1) hide show
  1. app.py +43 -14
app.py CHANGED
@@ -9,17 +9,17 @@ from PIL import Image
9
  import re
10
  import io
11
  from io import BytesIO
 
 
12
 
13
  sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
14
- text_summarization_task = pipeline("summarization", model="facebook/bart-large-cnn")
15
 
16
  def extract_youtube_video_id(url_or_id):
17
  """
18
  Extracts the YouTube video ID from a given URL or returns the ID if a direct ID is provided.
19
-
20
  Args:
21
  url_or_id (str): A YouTube URL or a video ID.
22
-
23
  Returns:
24
  str: The extracted YouTube video ID.
25
  """
@@ -55,7 +55,7 @@ def comments_collector(video_link, max_comments = 100):
55
  # pandas.DataFrame: A DataFrame containing the comments, or None in case of an exception.
56
  video_id = extract_youtube_video_id(video_link)
57
  max_comments -= 1
58
-
59
  try:
60
  #load the first 20 comments
61
  comments = Comments(video_id)
@@ -65,7 +65,7 @@ def comments_collector(video_link, max_comments = 100):
65
  while comments.hasMoreComments and (len(comments.comments["result"]) <= max_comments):
66
  comments.getNextComments()
67
  print(f'Found all the {len(comments.comments["result"])} comments.')
68
-
69
  #load all the comments into "comments" variable
70
  comments = comments.comments
71
 
@@ -134,6 +134,7 @@ def comments_analyzer(comments_df):
134
  return None
135
  else:
136
  comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
 
137
 
138
  data = {}
139
  #Categorize the comments by sentiment and count them
@@ -143,11 +144,11 @@ def comments_analyzer(comments_df):
143
  data['num_negative'] = comments_df['sentiment'].value_counts().get('negative', 0)
144
 
145
  #blend all the comments
146
- data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
147
  data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
148
 
149
  return data
150
-
151
  def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
152
  # This function generates a word cloud image from a given text and returns it as a PIL image object.
153
  # Args:
@@ -196,7 +197,7 @@ def create_sentiment_analysis_chart(data):
196
  # Finally, the plot is saved to a BytesIO object and converted to a PIL image.
197
  # Returns:
198
  # PIL.Image: The sentiment analysis bar chart as a PIL image object.
199
-
200
  # Convert the data to a DataFrame
201
  df = {}
202
  df['num_positive'] = data['num_positive']
@@ -236,24 +237,52 @@ def create_sentiment_analysis_chart(data):
236
 
237
  def process_youtube_comments(youtube_link, max_comments, stop_words):
238
  # Process the YouTube link and generate the word cloud, summary, and sentiment analysis
239
-
 
 
240
  # Pull comments from the YouTube Video
241
  comments_df = comments_collector(video_link=youtube_link, max_comments=max_comments)
 
 
 
 
 
 
 
242
  # Analyze
243
  analysis_dict = comments_analyzer(comments_df)
 
 
 
 
244
  long_text = analysis_dict['blended_comments']
245
 
 
 
246
  # Generate word cloud
247
  word_cloud_img = generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps'])
248
 
249
- # Text Summarization
250
- summarized_text = text_summarization_task(long_text, min_length=100, max_length=200, truncation=True)[0]['summary_text']
 
 
 
 
 
 
 
 
 
 
251
 
252
  # Create Sentiment Chart
253
  sentiment_chart = create_sentiment_analysis_chart(analysis_dict)
254
 
 
 
 
255
  # Return the generated word cloud image, summary text, and sentiment analysis chart
256
- return word_cloud_img, summarized_text, sentiment_chart
257
 
258
  ############################################################################################################################################
259
  # Gradio interface
@@ -266,7 +295,7 @@ interface = gr.Interface(
266
  ],
267
  outputs=[
268
  gr.Image(label="Word Cloud"),
269
- gr.Textbox(label="Summary of Comments"),
270
  gr.Image(label="Sentiment Analysis Chart")
271
  ],
272
  title="YouTube Comments Analyzer",
@@ -275,4 +304,4 @@ interface = gr.Interface(
275
 
276
  # Run the interface
277
  interface.launch()
278
- ############################################################################################################################################
 
9
  import re
10
  import io
11
  from io import BytesIO
12
+ import time
13
+
14
 
15
  sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
16
+ # text_summarization_task = pipeline("summarization", model="facebook/bart-large-cnn")
17
 
18
  def extract_youtube_video_id(url_or_id):
19
  """
20
  Extracts the YouTube video ID from a given URL or returns the ID if a direct ID is provided.
 
21
  Args:
22
  url_or_id (str): A YouTube URL or a video ID.
 
23
  Returns:
24
  str: The extracted YouTube video ID.
25
  """
 
55
  # pandas.DataFrame: A DataFrame containing the comments, or None in case of an exception.
56
  video_id = extract_youtube_video_id(video_link)
57
  max_comments -= 1
58
+
59
  try:
60
  #load the first 20 comments
61
  comments = Comments(video_id)
 
65
  while comments.hasMoreComments and (len(comments.comments["result"]) <= max_comments):
66
  comments.getNextComments()
67
  print(f'Found all the {len(comments.comments["result"])} comments.')
68
+
69
  #load all the comments into "comments" variable
70
  comments = comments.comments
71
 
 
134
  return None
135
  else:
136
  comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
137
+ comments_df['score'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['score'])
138
 
139
  data = {}
140
  #Categorize the comments by sentiment and count them
 
144
  data['num_negative'] = comments_df['sentiment'].value_counts().get('negative', 0)
145
 
146
  #blend all the comments
147
+ data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
148
  data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
149
 
150
  return data
151
+
152
  def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
153
  # This function generates a word cloud image from a given text and returns it as a PIL image object.
154
  # Args:
 
197
  # Finally, the plot is saved to a BytesIO object and converted to a PIL image.
198
  # Returns:
199
  # PIL.Image: The sentiment analysis bar chart as a PIL image object.
200
+
201
  # Convert the data to a DataFrame
202
  df = {}
203
  df['num_positive'] = data['num_positive']
 
237
 
238
  def process_youtube_comments(youtube_link, max_comments, stop_words):
239
  # Process the YouTube link and generate the word cloud, summary, and sentiment analysis
240
+
241
+ start_time = time.time()
242
+
243
  # Pull comments from the YouTube Video
244
  comments_df = comments_collector(video_link=youtube_link, max_comments=max_comments)
245
+
246
+ end_time = time.time()
247
+ print(f"Time taken for loading comments: {end_time - start_time} seconds")
248
+
249
+
250
+ start_time = time.time()
251
+
252
  # Analyze
253
  analysis_dict = comments_analyzer(comments_df)
254
+
255
+ end_time = time.time()
256
+ print(f"Time taken for sentiment analysis: {end_time - start_time} seconds")
257
+
258
  long_text = analysis_dict['blended_comments']
259
 
260
+ start_time = time.time()
261
+
262
  # Generate word cloud
263
  word_cloud_img = generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps'])
264
 
265
+ end_time = time.time()
266
+ print(f"Time taken for generating word clouds: {end_time - start_time} seconds")
267
+
268
+ start_time = time.time()
269
+
270
+ # # Text Summarization
271
+ # summarized_text = text_summarization_task(long_text, min_length=100, max_length=200, truncation=True)[0]['summary_text']
272
+
273
+ end_time = time.time()
274
+ print(f"Time taken for summarizing comments: {end_time - start_time} seconds")
275
+
276
+ start_time = time.time()
277
 
278
  # Create Sentiment Chart
279
  sentiment_chart = create_sentiment_analysis_chart(analysis_dict)
280
 
281
+ end_time = time.time()
282
+ print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
283
+
284
  # Return the generated word cloud image, summary text, and sentiment analysis chart
285
+ return word_cloud_img, sentiment_chart
286
 
287
  ############################################################################################################################################
288
  # Gradio interface
 
295
  ],
296
  outputs=[
297
  gr.Image(label="Word Cloud"),
298
+ # gr.Textbox(label="Summary of Comments"),
299
  gr.Image(label="Sentiment Analysis Chart")
300
  ],
301
  title="YouTube Comments Analyzer",
 
304
 
305
  # Run the interface
306
  interface.launch()
307
+ ############################################################################################################################################