Krittaprot commited on
Commit
379ee2c
1 Parent(s): f6aaf96

Update app.py

Browse files

Update sentiment analysis processing to be performed in batch, this should speed things up.

Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -13,7 +13,6 @@ import time
13
 
14
 
15
  sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
16
- # text_summarization_task = pipeline("summarization", model="facebook/bart-large-cnn")
17
 
18
  def extract_youtube_video_id(url_or_id):
19
  """
@@ -133,8 +132,19 @@ def comments_analyzer(comments_df):
133
  if comments_df is None:
134
  return None
135
  else:
136
- comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
137
- comments_df['score'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['score'])
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  data = {}
140
  #Categorize the comments by sentiment and count them
@@ -267,9 +277,6 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
267
 
268
  start_time = time.time()
269
 
270
- # # Text Summarization
271
- # summarized_text = text_summarization_task(long_text, min_length=100, max_length=200, truncation=True)[0]['summary_text']
272
-
273
  end_time = time.time()
274
  print(f"Time taken for summarizing comments: {end_time - start_time} seconds")
275
 
 
13
 
14
 
15
  sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
 
16
 
17
  def extract_youtube_video_id(url_or_id):
18
  """
 
132
  if comments_df is None:
133
  return None
134
  else:
135
+ # comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
136
+ # comments_df['score'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['score'])
137
+
138
+ # Example of batch processing
139
+ batch_size = 20 # You can adjust this size based on your system's capabilities
140
+ sentiments = []
141
+
142
+ for i in range(0, len(comments_df), batch_size):
143
+ batch = comments_df['content'][i:i+batch_size].tolist()
144
+ batch_sentiments = [item['label'] for item in sentiment_task(batch)]
145
+ sentiments.extend(batch_sentiments)
146
+
147
+ comments_df['sentiment'] = sentiments
148
 
149
  data = {}
150
  #Categorize the comments by sentiment and count them
 
277
 
278
  start_time = time.time()
279
 
 
 
 
280
  end_time = time.time()
281
  print(f"Time taken for summarizing comments: {end_time - start_time} seconds")
282