Spaces:

Krittaprot
/

YT-comments-analyzer-demo

Sleeping

Krittaprot commited on Dec 13, 2023

Commit

379ee2c

•

1 Parent(s): f6aaf96

Update app.py

Update sentiment analysis processing to be performed in batch, this should speed things up.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,7 +13,6 @@ import time
 sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
-# text_summarization_task = pipeline("summarization", model="facebook/bart-large-cnn")
 def extract_youtube_video_id(url_or_id):
     """
@@ -133,8 +132,19 @@ def comments_analyzer(comments_df):
   if comments_df is None:
     return None
   else:
-    comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
-    comments_df['score'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['score'])
     data = {}
     #Categorize the comments by sentiment and count them
@@ -267,9 +277,6 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
     start_time = time.time()
-    # # Text Summarization
-    # summarized_text = text_summarization_task(long_text, min_length=100, max_length=200, truncation=True)[0]['summary_text']
     end_time = time.time()
     print(f"Time taken for summarizing comments: {end_time - start_time} seconds")

 sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
 def extract_youtube_video_id(url_or_id):
     """
   if comments_df is None:
     return None
   else:
+    # comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
+    # comments_df['score'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['score'])
+    # Example of batch processing
+    batch_size = 20  # You can adjust this size based on your system's capabilities
+    sentiments = []
+    for i in range(0, len(comments_df), batch_size):
+        batch = comments_df['content'][i:i+batch_size].tolist()
+        batch_sentiments = [item['label'] for item in sentiment_task(batch)]
+        sentiments.extend(batch_sentiments)
+    comments_df['sentiment'] = sentiments
     data = {}
     #Categorize the comments by sentiment and count them
     start_time = time.time()
     end_time = time.time()
     print(f"Time taken for summarizing comments: {end_time - start_time} seconds")