Spaces:

Krittaprot
/

YT-comments-analyzer-demo

Sleeping

App Files Files Community

Krittaprot commited on Dec 13, 2023

Commit

6d3a753

•

1 Parent(s): 4fdac55

Fix bugs

Browse files

Files changed (1) hide show

app.py +7 -22

app.py CHANGED Viewed

@@ -13,8 +13,6 @@ import time
 sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
-# summarization_task = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 def extract_youtube_video_id(url_or_id):
     """
@@ -144,7 +142,7 @@ def comments_analyzer(comments_df):
     for i in range(0, len(comments_df), batch_size):
         batch = comments_df['content'][i:i+batch_size].tolist()
         batch_results = sentiment_task(batch)
         # Extracting both sentiment labels and scores
         batch_sentiments = [item['label'] for item in batch_results]
         batch_scores = [item['score'] for item in batch_results]
@@ -163,32 +161,19 @@ def comments_analyzer(comments_df):
         top_comments = filtered_comments.nlargest(top_n, 'score')
         if not top_comments.empty:
-            return '\n\n'.join(f"{row['content']}" for _, row in top_comments.iterrows())
         else:
             return f"No {sentiment_type} comments available."
     start_time = time.time()
     # Get top positive comments
     top_positive_comments = get_top_comments(comments_df, 'positive')
     # Get top negative comments
     top_negative_comments = get_top_comments(comments_df, 'negative')
     end_time = time.time()
     print(f"Time taken for finding top n positive/negative comments: {end_time - start_time} seconds")
-    # #Summarize the texts from positive and negative comments
-    # start_time = time.time()
-    # if top_positive_comments == "No positive comments available.":
-    #   top_positive_comments_summary = top_positive_comments
-    # else:
-    #   top_positive_comments_summary = summarization_task(top_positive_comments)[0]['summary_text']
-    # if top_negative_comments == "No negative comments available.":
-    #   top_negative_comments_summary = top_negative_comments
-    # else:
-    #   top_negative_comments_summary = summarization_task(top_negative_comments)[0]['summary_text']
-    # end_time = time.time()
-    # print(f"Time taken for summarizing the top n positive/negative comments: {end_time - start_time} seconds")
     data = {}
     #Categorize the comments by sentiment and count them
     data['total_comments'] = len(comments_df)
@@ -298,14 +283,14 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
     end_time = time.time()
     print(f"Time taken for loading comments: {end_time - start_time} seconds")
     # Analyze
-    analysis_dict, top_positive_comments, top_positive_comments = comments_analyzer(comments_df)
     long_text = analysis_dict['blended_comments']
     start_time = time.time()
     # Generate word cloud
     word_cloud_img = generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps'])
@@ -321,7 +306,7 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
     print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
     # Return the generated word cloud image, summary text, and sentiment analysis chart
-    return word_cloud_img, top_positive_comments_summary, top_negative_comments_summary, sentiment_chart
 ############################################################################################################################################
 # Gradio interface

 sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
 def extract_youtube_video_id(url_or_id):
     """
     for i in range(0, len(comments_df), batch_size):
         batch = comments_df['content'][i:i+batch_size].tolist()
         batch_results = sentiment_task(batch)
         # Extracting both sentiment labels and scores
         batch_sentiments = [item['label'] for item in batch_results]
         batch_scores = [item['score'] for item in batch_results]
         top_comments = filtered_comments.nlargest(top_n, 'score')
         if not top_comments.empty:
+            return '\n\n'.join(f"{row['content']} - {row['author']}" for _, row in top_comments.iterrows())
         else:
             return f"No {sentiment_type} comments available."
     start_time = time.time()
     # Get top positive comments
     top_positive_comments = get_top_comments(comments_df, 'positive')
     # Get top negative comments
     top_negative_comments = get_top_comments(comments_df, 'negative')
     end_time = time.time()
     print(f"Time taken for finding top n positive/negative comments: {end_time - start_time} seconds")
     data = {}
     #Categorize the comments by sentiment and count them
     data['total_comments'] = len(comments_df)
     end_time = time.time()
     print(f"Time taken for loading comments: {end_time - start_time} seconds")
     # Analyze
+    analysis_dict, top_positive_comments, top_negative_comments = comments_analyzer(comments_df)
     long_text = analysis_dict['blended_comments']
     start_time = time.time()
     # Generate word cloud
     word_cloud_img = generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps'])
     print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
     # Return the generated word cloud image, summary text, and sentiment analysis chart
+    return word_cloud_img, top_positive_comments, top_negative_comments, sentiment_chart
 ############################################################################################################################################
 # Gradio interface