Krittaprot commited on
Commit
fc73403
1 Parent(s): 379ee2c

Update app.py

Browse files

Include top 3 positive and negative comments.

Files changed (1) hide show
  1. app.py +38 -10
app.py CHANGED
@@ -120,6 +120,10 @@ def comments_collector(video_link, max_comments = 100):
120
  print(e)
121
  return None
122
 
 
 
 
 
123
  def comments_analyzer(comments_df):
124
  # This function analyzes the sentiment of comments in a given DataFrame.
125
  # It requires a DataFrame of comments, typically generated by the comments_collector function.
@@ -132,19 +136,42 @@ def comments_analyzer(comments_df):
132
  if comments_df is None:
133
  return None
134
  else:
135
- # comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
136
- # comments_df['score'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['score'])
137
 
138
- # Example of batch processing
139
- batch_size = 20 # You can adjust this size based on your system's capabilities
140
  sentiments = []
 
141
 
142
  for i in range(0, len(comments_df), batch_size):
143
  batch = comments_df['content'][i:i+batch_size].tolist()
144
- batch_sentiments = [item['label'] for item in sentiment_task(batch)]
 
 
 
 
 
145
  sentiments.extend(batch_sentiments)
 
146
 
147
  comments_df['sentiment'] = sentiments
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  data = {}
150
  #Categorize the comments by sentiment and count them
@@ -157,7 +184,7 @@ def comments_analyzer(comments_df):
157
  data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
158
  data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
159
 
160
- return data
161
 
162
  def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
163
  # This function generates a word cloud image from a given text and returns it as a PIL image object.
@@ -260,7 +287,7 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
260
  start_time = time.time()
261
 
262
  # Analyze
263
- analysis_dict = comments_analyzer(comments_df)
264
 
265
  end_time = time.time()
266
  print(f"Time taken for sentiment analysis: {end_time - start_time} seconds")
@@ -289,7 +316,7 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
289
  print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
290
 
291
  # Return the generated word cloud image, summary text, and sentiment analysis chart
292
- return word_cloud_img, sentiment_chart
293
 
294
  ############################################################################################################################################
295
  # Gradio interface
@@ -302,11 +329,12 @@ interface = gr.Interface(
302
  ],
303
  outputs=[
304
  gr.Image(label="Word Cloud"),
305
- # gr.Textbox(label="Summary of Comments"),
 
306
  gr.Image(label="Sentiment Analysis Chart")
307
  ],
308
  title="YouTube Comments Analyzer",
309
- description="Enter a YouTube link to generate a word cloud, summary, and sentiment analysis of the comments."
310
  )
311
 
312
  # Run the interface
 
120
  print(e)
121
  return None
122
 
123
+ def top_comments(comments_df):
124
+ #Find top 3
125
+ return top_positive_comments, top_negative_comments
126
+
127
  def comments_analyzer(comments_df):
128
  # This function analyzes the sentiment of comments in a given DataFrame.
129
  # It requires a DataFrame of comments, typically generated by the comments_collector function.
 
136
  if comments_df is None:
137
  return None
138
  else:
 
 
139
 
140
+ # Example of batch processing with sentiment and confidence
141
+ batch_size = 20 # Adjust the size based on your system's capabilities
142
  sentiments = []
143
+ scores = []
144
 
145
  for i in range(0, len(comments_df), batch_size):
146
  batch = comments_df['content'][i:i+batch_size].tolist()
147
+ batch_results = sentiment_task(batch)
148
+
149
+ # Extracting both sentiment labels and scores
150
+ batch_sentiments = [item['label'] for item in batch_results]
151
+ batch_scores = [item['score'] for item in batch_results]
152
+
153
  sentiments.extend(batch_sentiments)
154
+ scores.extend(batch_scores)
155
 
156
  comments_df['sentiment'] = sentiments
157
+ comments_df['score'] = scores
158
+
159
+
160
+ def get_top_comments(comments, sentiment_type, top_n=3):
161
+ filtered_comments = comments[comments['sentiment'] == sentiment_type]
162
+ sorted_comments = filtered_comments.sort_values(by='score', ascending=False)
163
+ top_comments = sorted_comments.head(top_n)
164
+
165
+ if not top_comments.empty:
166
+ return '\n\n'.join(f"{row['content']} - {row['author']}" for _, row in top_comments.iterrows())
167
+ else:
168
+ return f"No {sentiment_type} comments available."
169
+
170
+ # Get top positive comments
171
+ top_positive_comments = get_top_comments(comments_df, 'positive')
172
+
173
+ # Get top negative comments
174
+ top_negative_comments = get_top_comments(comments_df, 'negative')
175
 
176
  data = {}
177
  #Categorize the comments by sentiment and count them
 
184
  data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
185
  data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
186
 
187
+ return data, top_positive_comments, top_negative_comments
188
 
189
  def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
190
  # This function generates a word cloud image from a given text and returns it as a PIL image object.
 
287
  start_time = time.time()
288
 
289
  # Analyze
290
+ analysis_dict, top_positive_comments, top_negative_comments = comments_analyzer(comments_df)
291
 
292
  end_time = time.time()
293
  print(f"Time taken for sentiment analysis: {end_time - start_time} seconds")
 
316
  print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
317
 
318
  # Return the generated word cloud image, summary text, and sentiment analysis chart
319
+ return word_cloud_img, top_positive_comments, top_negative_comments, sentiment_chart
320
 
321
  ############################################################################################################################################
322
  # Gradio interface
 
329
  ],
330
  outputs=[
331
  gr.Image(label="Word Cloud"),
332
+ gr.Textbox(label="Top 3 Positive Comments"),
333
+ gr.Textbox(label="Top 3 Negative Comments"),
334
  gr.Image(label="Sentiment Analysis Chart")
335
  ],
336
  title="YouTube Comments Analyzer",
337
+ description="Enter a YouTube link to generate a word cloud and sentiment analysis of the comments."
338
  )
339
 
340
  # Run the interface