Spaces:

Krittaprot
/

YT-comments-analyzer-demo

Sleeping

App Files Files Community

Krittaprot commited on Dec 13, 2023

Commit

fc73403

•

1 Parent(s): 379ee2c

Update app.py

Browse files

Include top 3 positive and negative comments.

Files changed (1) hide show

app.py +38 -10

app.py CHANGED Viewed

@@ -120,6 +120,10 @@ def comments_collector(video_link, max_comments = 100):
     print(e)
     return None
 def comments_analyzer(comments_df):
   # This function analyzes the sentiment of comments in a given DataFrame.
   # It requires a DataFrame of comments, typically generated by the comments_collector function.
@@ -132,19 +136,42 @@ def comments_analyzer(comments_df):
   if comments_df is None:
     return None
   else:
-    # comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
-    # comments_df['score'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['score'])
-    # Example of batch processing
-    batch_size = 20  # You can adjust this size based on your system's capabilities
     sentiments = []
     for i in range(0, len(comments_df), batch_size):
         batch = comments_df['content'][i:i+batch_size].tolist()
-        batch_sentiments = [item['label'] for item in sentiment_task(batch)]
         sentiments.extend(batch_sentiments)
     comments_df['sentiment'] = sentiments
     data = {}
     #Categorize the comments by sentiment and count them
@@ -157,7 +184,7 @@ def comments_analyzer(comments_df):
     data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
     data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
-    return data
 def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
   # This function generates a word cloud image from a given text and returns it as a PIL image object.
@@ -260,7 +287,7 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
     start_time = time.time()
     # Analyze
-    analysis_dict = comments_analyzer(comments_df)
     end_time = time.time()
     print(f"Time taken for sentiment analysis: {end_time - start_time} seconds")
@@ -289,7 +316,7 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
     print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
     # Return the generated word cloud image, summary text, and sentiment analysis chart
-    return word_cloud_img, sentiment_chart
 ############################################################################################################################################
 # Gradio interface
@@ -302,11 +329,12 @@ interface = gr.Interface(
     ],
     outputs=[
         gr.Image(label="Word Cloud"),
-        # gr.Textbox(label="Summary of Comments"),
         gr.Image(label="Sentiment Analysis Chart")
     ],
     title="YouTube Comments Analyzer",
-    description="Enter a YouTube link to generate a word cloud, summary, and sentiment analysis of the comments."
 )
 # Run the interface

     print(e)
     return None
+def top_comments(comments_df):
+  #Find top 3
+  return top_positive_comments, top_negative_comments
 def comments_analyzer(comments_df):
   # This function analyzes the sentiment of comments in a given DataFrame.
   # It requires a DataFrame of comments, typically generated by the comments_collector function.
   if comments_df is None:
     return None
   else:
+    # Example of batch processing with sentiment and confidence
+    batch_size = 20  # Adjust the size based on your system's capabilities
     sentiments = []
+    scores = []
     for i in range(0, len(comments_df), batch_size):
         batch = comments_df['content'][i:i+batch_size].tolist()
+        batch_results = sentiment_task(batch)
+        # Extracting both sentiment labels and scores
+        batch_sentiments = [item['label'] for item in batch_results]
+        batch_scores = [item['score'] for item in batch_results]
         sentiments.extend(batch_sentiments)
+        scores.extend(batch_scores)
     comments_df['sentiment'] = sentiments
+    comments_df['score'] = scores
+    def get_top_comments(comments, sentiment_type, top_n=3):
+        filtered_comments = comments[comments['sentiment'] == sentiment_type]
+        sorted_comments = filtered_comments.sort_values(by='score', ascending=False)
+        top_comments = sorted_comments.head(top_n)
+        if not top_comments.empty:
+            return '\n\n'.join(f"{row['content']} - {row['author']}" for _, row in top_comments.iterrows())
+        else:
+            return f"No {sentiment_type} comments available."
+    # Get top positive comments
+    top_positive_comments = get_top_comments(comments_df, 'positive')
+    # Get top negative comments
+    top_negative_comments = get_top_comments(comments_df, 'negative')
     data = {}
     #Categorize the comments by sentiment and count them
     data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
     data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
+  return data, top_positive_comments, top_negative_comments
 def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
   # This function generates a word cloud image from a given text and returns it as a PIL image object.
     start_time = time.time()
     # Analyze
+    analysis_dict, top_positive_comments, top_negative_comments = comments_analyzer(comments_df)
     end_time = time.time()
     print(f"Time taken for sentiment analysis: {end_time - start_time} seconds")
     print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
     # Return the generated word cloud image, summary text, and sentiment analysis chart
+    return word_cloud_img, top_positive_comments, top_negative_comments, sentiment_chart
 ############################################################################################################################################
 # Gradio interface
     ],
     outputs=[
         gr.Image(label="Word Cloud"),
+        gr.Textbox(label="Top 3 Positive Comments"),
+        gr.Textbox(label="Top 3 Negative Comments"),
         gr.Image(label="Sentiment Analysis Chart")
     ],
     title="YouTube Comments Analyzer",
+    description="Enter a YouTube link to generate a word cloud and sentiment analysis of the comments."
 )
 # Run the interface