Krittaprot commited on
Commit
6d3a753
1 Parent(s): 4fdac55
Files changed (1) hide show
  1. app.py +7 -22
app.py CHANGED
@@ -13,8 +13,6 @@ import time
13
 
14
 
15
  sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
16
- # summarization_task = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
17
-
18
 
19
  def extract_youtube_video_id(url_or_id):
20
  """
@@ -144,7 +142,7 @@ def comments_analyzer(comments_df):
144
  for i in range(0, len(comments_df), batch_size):
145
  batch = comments_df['content'][i:i+batch_size].tolist()
146
  batch_results = sentiment_task(batch)
147
-
148
  # Extracting both sentiment labels and scores
149
  batch_sentiments = [item['label'] for item in batch_results]
150
  batch_scores = [item['score'] for item in batch_results]
@@ -163,32 +161,19 @@ def comments_analyzer(comments_df):
163
  top_comments = filtered_comments.nlargest(top_n, 'score')
164
 
165
  if not top_comments.empty:
166
- return '\n\n'.join(f"{row['content']}" for _, row in top_comments.iterrows())
167
  else:
168
  return f"No {sentiment_type} comments available."
169
 
170
  start_time = time.time()
171
  # Get top positive comments
172
  top_positive_comments = get_top_comments(comments_df, 'positive')
 
173
  # Get top negative comments
174
  top_negative_comments = get_top_comments(comments_df, 'negative')
175
  end_time = time.time()
176
  print(f"Time taken for finding top n positive/negative comments: {end_time - start_time} seconds")
177
 
178
- # #Summarize the texts from positive and negative comments
179
- # start_time = time.time()
180
- # if top_positive_comments == "No positive comments available.":
181
- # top_positive_comments_summary = top_positive_comments
182
- # else:
183
- # top_positive_comments_summary = summarization_task(top_positive_comments)[0]['summary_text']
184
-
185
- # if top_negative_comments == "No negative comments available.":
186
- # top_negative_comments_summary = top_negative_comments
187
- # else:
188
- # top_negative_comments_summary = summarization_task(top_negative_comments)[0]['summary_text']
189
- # end_time = time.time()
190
- # print(f"Time taken for summarizing the top n positive/negative comments: {end_time - start_time} seconds")
191
-
192
  data = {}
193
  #Categorize the comments by sentiment and count them
194
  data['total_comments'] = len(comments_df)
@@ -298,14 +283,14 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
298
 
299
  end_time = time.time()
300
  print(f"Time taken for loading comments: {end_time - start_time} seconds")
301
-
302
  # Analyze
303
- analysis_dict, top_positive_comments, top_positive_comments = comments_analyzer(comments_df)
304
 
305
  long_text = analysis_dict['blended_comments']
306
 
307
  start_time = time.time()
308
-
309
  # Generate word cloud
310
  word_cloud_img = generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps'])
311
 
@@ -321,7 +306,7 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
321
  print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
322
 
323
  # Return the generated word cloud image, summary text, and sentiment analysis chart
324
- return word_cloud_img, top_positive_comments_summary, top_negative_comments_summary, sentiment_chart
325
 
326
  ############################################################################################################################################
327
  # Gradio interface
 
13
 
14
 
15
  sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
 
 
16
 
17
  def extract_youtube_video_id(url_or_id):
18
  """
 
142
  for i in range(0, len(comments_df), batch_size):
143
  batch = comments_df['content'][i:i+batch_size].tolist()
144
  batch_results = sentiment_task(batch)
145
+
146
  # Extracting both sentiment labels and scores
147
  batch_sentiments = [item['label'] for item in batch_results]
148
  batch_scores = [item['score'] for item in batch_results]
 
161
  top_comments = filtered_comments.nlargest(top_n, 'score')
162
 
163
  if not top_comments.empty:
164
+ return '\n\n'.join(f"{row['content']} - {row['author']}" for _, row in top_comments.iterrows())
165
  else:
166
  return f"No {sentiment_type} comments available."
167
 
168
  start_time = time.time()
169
  # Get top positive comments
170
  top_positive_comments = get_top_comments(comments_df, 'positive')
171
+
172
  # Get top negative comments
173
  top_negative_comments = get_top_comments(comments_df, 'negative')
174
  end_time = time.time()
175
  print(f"Time taken for finding top n positive/negative comments: {end_time - start_time} seconds")
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  data = {}
178
  #Categorize the comments by sentiment and count them
179
  data['total_comments'] = len(comments_df)
 
283
 
284
  end_time = time.time()
285
  print(f"Time taken for loading comments: {end_time - start_time} seconds")
286
+
287
  # Analyze
288
+ analysis_dict, top_positive_comments, top_negative_comments = comments_analyzer(comments_df)
289
 
290
  long_text = analysis_dict['blended_comments']
291
 
292
  start_time = time.time()
293
+
294
  # Generate word cloud
295
  word_cloud_img = generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps'])
296
 
 
306
  print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
307
 
308
  # Return the generated word cloud image, summary text, and sentiment analysis chart
309
+ return word_cloud_img, top_positive_comments, top_negative_comments, sentiment_chart
310
 
311
  ############################################################################################################################################
312
  # Gradio interface