Krittaprot commited on
Commit
4fdac55
β€’
1 Parent(s): 84cdcf9

Remove Summarization

Browse files

Remove summarization function as it takes too long to load.
The Top 5 Positive and Negative Comments remain.

Files changed (1) hide show
  1. app.py +17 -17
app.py CHANGED
@@ -13,7 +13,7 @@ import time
13
 
14
 
15
  sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
16
- summarization_task = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
17
 
18
 
19
  def extract_youtube_video_id(url_or_id):
@@ -175,19 +175,19 @@ def comments_analyzer(comments_df):
175
  end_time = time.time()
176
  print(f"Time taken for finding top n positive/negative comments: {end_time - start_time} seconds")
177
 
178
- #Summarize the texts from positive and negative comments
179
- start_time = time.time()
180
- if top_positive_comments == "No positive comments available.":
181
- top_positive_comments_summary = top_positive_comments
182
- else:
183
- top_positive_comments_summary = summarization_task(top_positive_comments)[0]['summary_text']
184
 
185
- if top_negative_comments == "No negative comments available.":
186
- top_negative_comments_summary = top_negative_comments
187
- else:
188
- top_negative_comments_summary = summarization_task(top_negative_comments)[0]['summary_text']
189
- end_time = time.time()
190
- print(f"Time taken for summarizing the top n positive/negative comments: {end_time - start_time} seconds")
191
 
192
  data = {}
193
  #Categorize the comments by sentiment and count them
@@ -200,7 +200,7 @@ def comments_analyzer(comments_df):
200
  data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
201
  data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
202
 
203
- return data, top_positive_comments_summary, top_negative_comments_summary
204
 
205
  def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
206
  # This function generates a word cloud image from a given text and returns it as a PIL image object.
@@ -300,7 +300,7 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
300
  print(f"Time taken for loading comments: {end_time - start_time} seconds")
301
 
302
  # Analyze
303
- analysis_dict, top_positive_comments_summary, top_negative_comments_summary = comments_analyzer(comments_df)
304
 
305
  long_text = analysis_dict['blended_comments']
306
 
@@ -334,8 +334,8 @@ interface = gr.Interface(
334
  ],
335
  outputs=[
336
  gr.Image(label="Word Cloud ☁️"),
337
- gr.Textbox(label="Summary of the Top 5 Positive Comments πŸ‘πŸ»"),
338
- gr.Textbox(label="Summary of the Top 5 Negative Comments πŸ‘ŽπŸ»"),
339
  gr.Image(label="Sentiment Analysis Chart πŸ“Š")
340
  ],
341
  title="YouTube Comments Analyzer πŸ“ˆ",
 
13
 
14
 
15
  sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
16
+ # summarization_task = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
17
 
18
 
19
  def extract_youtube_video_id(url_or_id):
 
175
  end_time = time.time()
176
  print(f"Time taken for finding top n positive/negative comments: {end_time - start_time} seconds")
177
 
178
+ # #Summarize the texts from positive and negative comments
179
+ # start_time = time.time()
180
+ # if top_positive_comments == "No positive comments available.":
181
+ # top_positive_comments_summary = top_positive_comments
182
+ # else:
183
+ # top_positive_comments_summary = summarization_task(top_positive_comments)[0]['summary_text']
184
 
185
+ # if top_negative_comments == "No negative comments available.":
186
+ # top_negative_comments_summary = top_negative_comments
187
+ # else:
188
+ # top_negative_comments_summary = summarization_task(top_negative_comments)[0]['summary_text']
189
+ # end_time = time.time()
190
+ # print(f"Time taken for summarizing the top n positive/negative comments: {end_time - start_time} seconds")
191
 
192
  data = {}
193
  #Categorize the comments by sentiment and count them
 
200
  data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
201
  data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
202
 
203
+ return data, top_positive_comments, top_negative_comments
204
 
205
  def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
206
  # This function generates a word cloud image from a given text and returns it as a PIL image object.
 
300
  print(f"Time taken for loading comments: {end_time - start_time} seconds")
301
 
302
  # Analyze
303
+ analysis_dict, top_positive_comments, top_positive_comments = comments_analyzer(comments_df)
304
 
305
  long_text = analysis_dict['blended_comments']
306
 
 
334
  ],
335
  outputs=[
336
  gr.Image(label="Word Cloud ☁️"),
337
+ gr.Textbox(label="Top 5 Positive Comments πŸ‘πŸ»"),
338
+ gr.Textbox(label="Top 5 Negative Comments πŸ‘ŽπŸ»"),
339
  gr.Image(label="Sentiment Analysis Chart πŸ“Š")
340
  ],
341
  title="YouTube Comments Analyzer πŸ“ˆ",