Spaces:

Krittaprot
/

YT-comments-analyzer-demo

Sleeping

App Files Files Community

Krittaprot commited on Dec 13, 2023

Commit

f6aaf96

1 Parent(s): cdb32f0

Update app.py

Browse files

Remove summarization functionality to speed up the application.

Files changed (1) hide show

app.py +43 -14

app.py CHANGED Viewed

@@ -9,17 +9,17 @@ from PIL import Image
 import re
 import io
 from io import BytesIO
 sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
-text_summarization_task = pipeline("summarization", model="facebook/bart-large-cnn")
 def extract_youtube_video_id(url_or_id):
     """
     Extracts the YouTube video ID from a given URL or returns the ID if a direct ID is provided.
     Args:
     url_or_id (str): A YouTube URL or a video ID.
     Returns:
     str: The extracted YouTube video ID.
     """
@@ -55,7 +55,7 @@ def comments_collector(video_link, max_comments = 100):
   #   pandas.DataFrame: A DataFrame containing the comments, or None in case of an exception.
   video_id = extract_youtube_video_id(video_link)
   max_comments -= 1
   try:
     #load the first 20 comments
     comments = Comments(video_id)
@@ -65,7 +65,7 @@ def comments_collector(video_link, max_comments = 100):
     while comments.hasMoreComments and (len(comments.comments["result"]) <= max_comments):
       comments.getNextComments()
     print(f'Found all the {len(comments.comments["result"])} comments.')
     #load all the comments into "comments" variable
     comments = comments.comments
@@ -134,6 +134,7 @@ def comments_analyzer(comments_df):
     return None
   else:
     comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
     data = {}
     #Categorize the comments by sentiment and count them
@@ -143,11 +144,11 @@ def comments_analyzer(comments_df):
     data['num_negative'] = comments_df['sentiment'].value_counts().get('negative', 0)
     #blend all the comments
-    data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
     data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
     return data
 def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
   # This function generates a word cloud image from a given text and returns it as a PIL image object.
   # Args:
@@ -196,7 +197,7 @@ def create_sentiment_analysis_chart(data):
   # Finally, the plot is saved to a BytesIO object and converted to a PIL image.
   # Returns:
   #   PIL.Image: The sentiment analysis bar chart as a PIL image object.
   # Convert the data to a DataFrame
   df = {}
   df['num_positive'] = data['num_positive']
@@ -236,24 +237,52 @@ def create_sentiment_analysis_chart(data):
 def process_youtube_comments(youtube_link, max_comments, stop_words):
     # Process the YouTube link and generate the word cloud, summary, and sentiment analysis
     # Pull comments from the YouTube Video
     comments_df = comments_collector(video_link=youtube_link, max_comments=max_comments)
     # Analyze
     analysis_dict = comments_analyzer(comments_df)
     long_text = analysis_dict['blended_comments']
     # Generate word cloud
     word_cloud_img = generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps'])
-    # Text Summarization
-    summarized_text = text_summarization_task(long_text, min_length=100, max_length=200, truncation=True)[0]['summary_text']
     # Create Sentiment Chart
     sentiment_chart = create_sentiment_analysis_chart(analysis_dict)
     # Return the generated word cloud image, summary text, and sentiment analysis chart
-    return word_cloud_img, summarized_text, sentiment_chart
 ############################################################################################################################################
 # Gradio interface
@@ -266,7 +295,7 @@ interface = gr.Interface(
     ],
     outputs=[
         gr.Image(label="Word Cloud"),
-        gr.Textbox(label="Summary of Comments"),
         gr.Image(label="Sentiment Analysis Chart")
     ],
     title="YouTube Comments Analyzer",
@@ -275,4 +304,4 @@ interface = gr.Interface(
 # Run the interface
 interface.launch()
-############################################################################################################################################

 import re
 import io
 from io import BytesIO
+import time
 sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
+# text_summarization_task = pipeline("summarization", model="facebook/bart-large-cnn")
 def extract_youtube_video_id(url_or_id):
     """
     Extracts the YouTube video ID from a given URL or returns the ID if a direct ID is provided.
     Args:
     url_or_id (str): A YouTube URL or a video ID.
     Returns:
     str: The extracted YouTube video ID.
     """
   #   pandas.DataFrame: A DataFrame containing the comments, or None in case of an exception.
   video_id = extract_youtube_video_id(video_link)
   max_comments -= 1
   try:
     #load the first 20 comments
     comments = Comments(video_id)
     while comments.hasMoreComments and (len(comments.comments["result"]) <= max_comments):
       comments.getNextComments()
     print(f'Found all the {len(comments.comments["result"])} comments.')
     #load all the comments into "comments" variable
     comments = comments.comments
     return None
   else:
     comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
+    comments_df['score'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['score'])
     data = {}
     #Categorize the comments by sentiment and count them
     data['num_negative'] = comments_df['sentiment'].value_counts().get('negative', 0)
     #blend all the comments
+    data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
     data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
     return data
 def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
   # This function generates a word cloud image from a given text and returns it as a PIL image object.
   # Args:
   # Finally, the plot is saved to a BytesIO object and converted to a PIL image.
   # Returns:
   #   PIL.Image: The sentiment analysis bar chart as a PIL image object.
   # Convert the data to a DataFrame
   df = {}
   df['num_positive'] = data['num_positive']
 def process_youtube_comments(youtube_link, max_comments, stop_words):
     # Process the YouTube link and generate the word cloud, summary, and sentiment analysis
+    start_time = time.time()
     # Pull comments from the YouTube Video
     comments_df = comments_collector(video_link=youtube_link, max_comments=max_comments)
+    end_time = time.time()
+    print(f"Time taken for loading comments: {end_time - start_time} seconds")
+    start_time = time.time()
     # Analyze
     analysis_dict = comments_analyzer(comments_df)
+    end_time = time.time()
+    print(f"Time taken for sentiment analysis: {end_time - start_time} seconds")
     long_text = analysis_dict['blended_comments']
+    start_time = time.time()
     # Generate word cloud
     word_cloud_img = generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps'])
+    end_time = time.time()
+    print(f"Time taken for generating word clouds: {end_time - start_time} seconds")
+    start_time = time.time()
+    # # Text Summarization
+    # summarized_text = text_summarization_task(long_text, min_length=100, max_length=200, truncation=True)[0]['summary_text']
+    end_time = time.time()
+    print(f"Time taken for summarizing comments: {end_time - start_time} seconds")
+    start_time = time.time()
     # Create Sentiment Chart
     sentiment_chart = create_sentiment_analysis_chart(analysis_dict)
+    end_time = time.time()
+    print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
     # Return the generated word cloud image, summary text, and sentiment analysis chart
+    return word_cloud_img, sentiment_chart
 ############################################################################################################################################
 # Gradio interface
     ],
     outputs=[
         gr.Image(label="Word Cloud"),
+        # gr.Textbox(label="Summary of Comments"),
         gr.Image(label="Sentiment Analysis Chart")
     ],
     title="YouTube Comments Analyzer",
 # Run the interface
 interface.launch()
+############################################################################################################################################