Krittaprot commited on
Commit
9d422fc
1 Parent(s): fc73403

Update app.py

Browse files

Update the sorting mechanism to speed up the algorithm

Files changed (1) hide show
  1. app.py +7 -17
app.py CHANGED
@@ -120,10 +120,6 @@ def comments_collector(video_link, max_comments = 100):
120
  print(e)
121
  return None
122
 
123
- def top_comments(comments_df):
124
- #Find top 3
125
- return top_positive_comments, top_negative_comments
126
-
127
  def comments_analyzer(comments_df):
128
  # This function analyzes the sentiment of comments in a given DataFrame.
129
  # It requires a DataFrame of comments, typically generated by the comments_collector function.
@@ -137,6 +133,7 @@ def comments_analyzer(comments_df):
137
  return None
138
  else:
139
 
 
140
  # Example of batch processing with sentiment and confidence
141
  batch_size = 20 # Adjust the size based on your system's capabilities
142
  sentiments = []
@@ -156,22 +153,26 @@ def comments_analyzer(comments_df):
156
  comments_df['sentiment'] = sentiments
157
  comments_df['score'] = scores
158
 
 
 
159
 
160
  def get_top_comments(comments, sentiment_type, top_n=3):
161
  filtered_comments = comments[comments['sentiment'] == sentiment_type]
162
- sorted_comments = filtered_comments.sort_values(by='score', ascending=False)
163
- top_comments = sorted_comments.head(top_n)
164
 
165
  if not top_comments.empty:
166
  return '\n\n'.join(f"{row['content']} - {row['author']}" for _, row in top_comments.iterrows())
167
  else:
168
  return f"No {sentiment_type} comments available."
169
 
 
170
  # Get top positive comments
171
  top_positive_comments = get_top_comments(comments_df, 'positive')
172
 
173
  # Get top negative comments
174
  top_negative_comments = get_top_comments(comments_df, 'negative')
 
 
175
 
176
  data = {}
177
  #Categorize the comments by sentiment and count them
@@ -282,16 +283,10 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
282
 
283
  end_time = time.time()
284
  print(f"Time taken for loading comments: {end_time - start_time} seconds")
285
-
286
-
287
- start_time = time.time()
288
 
289
  # Analyze
290
  analysis_dict, top_positive_comments, top_negative_comments = comments_analyzer(comments_df)
291
 
292
- end_time = time.time()
293
- print(f"Time taken for sentiment analysis: {end_time - start_time} seconds")
294
-
295
  long_text = analysis_dict['blended_comments']
296
 
297
  start_time = time.time()
@@ -304,11 +299,6 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
304
 
305
  start_time = time.time()
306
 
307
- end_time = time.time()
308
- print(f"Time taken for summarizing comments: {end_time - start_time} seconds")
309
-
310
- start_time = time.time()
311
-
312
  # Create Sentiment Chart
313
  sentiment_chart = create_sentiment_analysis_chart(analysis_dict)
314
 
 
120
  print(e)
121
  return None
122
 
 
 
 
 
123
  def comments_analyzer(comments_df):
124
  # This function analyzes the sentiment of comments in a given DataFrame.
125
  # It requires a DataFrame of comments, typically generated by the comments_collector function.
 
133
  return None
134
  else:
135
 
136
+ start_time = time.time()
137
  # Example of batch processing with sentiment and confidence
138
  batch_size = 20 # Adjust the size based on your system's capabilities
139
  sentiments = []
 
153
  comments_df['sentiment'] = sentiments
154
  comments_df['score'] = scores
155
 
156
+ end_time = time.time()
157
+ print(f"Time taken for batch sentiment analysis: {end_time - start_time} seconds")
158
 
159
  def get_top_comments(comments, sentiment_type, top_n=3):
160
  filtered_comments = comments[comments['sentiment'] == sentiment_type]
161
+ top_comments = filtered_comments.nlargest(top_n, 'score')
 
162
 
163
  if not top_comments.empty:
164
  return '\n\n'.join(f"{row['content']} - {row['author']}" for _, row in top_comments.iterrows())
165
  else:
166
  return f"No {sentiment_type} comments available."
167
 
168
+ start_time = time.time()
169
  # Get top positive comments
170
  top_positive_comments = get_top_comments(comments_df, 'positive')
171
 
172
  # Get top negative comments
173
  top_negative_comments = get_top_comments(comments_df, 'negative')
174
+ end_time = time.time()
175
+ print(f"Time taken for finding top n positive/negative comments: {end_time - start_time} seconds")
176
 
177
  data = {}
178
  #Categorize the comments by sentiment and count them
 
283
 
284
  end_time = time.time()
285
  print(f"Time taken for loading comments: {end_time - start_time} seconds")
 
 
 
286
 
287
  # Analyze
288
  analysis_dict, top_positive_comments, top_negative_comments = comments_analyzer(comments_df)
289
 
 
 
 
290
  long_text = analysis_dict['blended_comments']
291
 
292
  start_time = time.time()
 
299
 
300
  start_time = time.time()
301
 
 
 
 
 
 
302
  # Create Sentiment Chart
303
  sentiment_chart = create_sentiment_analysis_chart(analysis_dict)
304