Spaces:

Krittaprot
/

YT-comments-analyzer-demo

Sleeping

App Files Files Community

Krittaprot commited on Dec 13, 2023

Commit

9d422fc

•

1 Parent(s): fc73403

Update app.py

Browse files

Update the sorting mechanism to speed up the algorithm

Files changed (1) hide show

app.py +7 -17

app.py CHANGED Viewed

@@ -120,10 +120,6 @@ def comments_collector(video_link, max_comments = 100):
     print(e)
     return None
-def top_comments(comments_df):
-  #Find top 3
-  return top_positive_comments, top_negative_comments
 def comments_analyzer(comments_df):
   # This function analyzes the sentiment of comments in a given DataFrame.
   # It requires a DataFrame of comments, typically generated by the comments_collector function.
@@ -137,6 +133,7 @@ def comments_analyzer(comments_df):
     return None
   else:
     # Example of batch processing with sentiment and confidence
     batch_size = 20  # Adjust the size based on your system's capabilities
     sentiments = []
@@ -156,22 +153,26 @@ def comments_analyzer(comments_df):
     comments_df['sentiment'] = sentiments
     comments_df['score'] = scores
     def get_top_comments(comments, sentiment_type, top_n=3):
         filtered_comments = comments[comments['sentiment'] == sentiment_type]
-        sorted_comments = filtered_comments.sort_values(by='score', ascending=False)
-        top_comments = sorted_comments.head(top_n)
         if not top_comments.empty:
             return '\n\n'.join(f"{row['content']} - {row['author']}" for _, row in top_comments.iterrows())
         else:
             return f"No {sentiment_type} comments available."
     # Get top positive comments
     top_positive_comments = get_top_comments(comments_df, 'positive')
     # Get top negative comments
     top_negative_comments = get_top_comments(comments_df, 'negative')
     data = {}
     #Categorize the comments by sentiment and count them
@@ -282,16 +283,10 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
     end_time = time.time()
     print(f"Time taken for loading comments: {end_time - start_time} seconds")
-    start_time = time.time()
     # Analyze
     analysis_dict, top_positive_comments, top_negative_comments = comments_analyzer(comments_df)
-    end_time = time.time()
-    print(f"Time taken for sentiment analysis: {end_time - start_time} seconds")
     long_text = analysis_dict['blended_comments']
     start_time = time.time()
@@ -304,11 +299,6 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
     start_time = time.time()
-    end_time = time.time()
-    print(f"Time taken for summarizing comments: {end_time - start_time} seconds")
-    start_time = time.time()
     # Create Sentiment Chart
     sentiment_chart = create_sentiment_analysis_chart(analysis_dict)

     print(e)
     return None
 def comments_analyzer(comments_df):
   # This function analyzes the sentiment of comments in a given DataFrame.
   # It requires a DataFrame of comments, typically generated by the comments_collector function.
     return None
   else:
+    start_time = time.time()
     # Example of batch processing with sentiment and confidence
     batch_size = 20  # Adjust the size based on your system's capabilities
     sentiments = []
     comments_df['sentiment'] = sentiments
     comments_df['score'] = scores
+    end_time = time.time()
+    print(f"Time taken for batch sentiment analysis: {end_time - start_time} seconds")
     def get_top_comments(comments, sentiment_type, top_n=3):
         filtered_comments = comments[comments['sentiment'] == sentiment_type]
+        top_comments = filtered_comments.nlargest(top_n, 'score')
         if not top_comments.empty:
             return '\n\n'.join(f"{row['content']} - {row['author']}" for _, row in top_comments.iterrows())
         else:
             return f"No {sentiment_type} comments available."
+    start_time = time.time()
     # Get top positive comments
     top_positive_comments = get_top_comments(comments_df, 'positive')
     # Get top negative comments
     top_negative_comments = get_top_comments(comments_df, 'negative')
+    end_time = time.time()
+    print(f"Time taken for finding top n positive/negative comments: {end_time - start_time} seconds")
     data = {}
     #Categorize the comments by sentiment and count them
     end_time = time.time()
     print(f"Time taken for loading comments: {end_time - start_time} seconds")
     # Analyze
     analysis_dict, top_positive_comments, top_negative_comments = comments_analyzer(comments_df)
     long_text = analysis_dict['blended_comments']
     start_time = time.time()
     start_time = time.time()
     # Create Sentiment Chart
     sentiment_chart = create_sentiment_analysis_chart(analysis_dict)