Commit
•
fc73403
1
Parent(s):
379ee2c
Update app.py
Browse filesInclude top 3 positive and negative comments.
app.py
CHANGED
@@ -120,6 +120,10 @@ def comments_collector(video_link, max_comments = 100):
|
|
120 |
print(e)
|
121 |
return None
|
122 |
|
|
|
|
|
|
|
|
|
123 |
def comments_analyzer(comments_df):
|
124 |
# This function analyzes the sentiment of comments in a given DataFrame.
|
125 |
# It requires a DataFrame of comments, typically generated by the comments_collector function.
|
@@ -132,19 +136,42 @@ def comments_analyzer(comments_df):
|
|
132 |
if comments_df is None:
|
133 |
return None
|
134 |
else:
|
135 |
-
# comments_df['sentiment'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['label'])
|
136 |
-
# comments_df['score'] = comments_df['content'].apply(lambda x: sentiment_task(x)[0]['score'])
|
137 |
|
138 |
-
# Example of batch processing
|
139 |
-
batch_size = 20 #
|
140 |
sentiments = []
|
|
|
141 |
|
142 |
for i in range(0, len(comments_df), batch_size):
|
143 |
batch = comments_df['content'][i:i+batch_size].tolist()
|
144 |
-
|
|
|
|
|
|
|
|
|
|
|
145 |
sentiments.extend(batch_sentiments)
|
|
|
146 |
|
147 |
comments_df['sentiment'] = sentiments
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
data = {}
|
150 |
#Categorize the comments by sentiment and count them
|
@@ -157,7 +184,7 @@ def comments_analyzer(comments_df):
|
|
157 |
data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
|
158 |
data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
|
159 |
|
160 |
-
|
161 |
|
162 |
def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
|
163 |
# This function generates a word cloud image from a given text and returns it as a PIL image object.
|
@@ -260,7 +287,7 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
|
|
260 |
start_time = time.time()
|
261 |
|
262 |
# Analyze
|
263 |
-
analysis_dict = comments_analyzer(comments_df)
|
264 |
|
265 |
end_time = time.time()
|
266 |
print(f"Time taken for sentiment analysis: {end_time - start_time} seconds")
|
@@ -289,7 +316,7 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
|
|
289 |
print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
|
290 |
|
291 |
# Return the generated word cloud image, summary text, and sentiment analysis chart
|
292 |
-
return word_cloud_img, sentiment_chart
|
293 |
|
294 |
############################################################################################################################################
|
295 |
# Gradio interface
|
@@ -302,11 +329,12 @@ interface = gr.Interface(
|
|
302 |
],
|
303 |
outputs=[
|
304 |
gr.Image(label="Word Cloud"),
|
305 |
-
|
|
|
306 |
gr.Image(label="Sentiment Analysis Chart")
|
307 |
],
|
308 |
title="YouTube Comments Analyzer",
|
309 |
-
description="Enter a YouTube link to generate a word cloud
|
310 |
)
|
311 |
|
312 |
# Run the interface
|
|
|
120 |
print(e)
|
121 |
return None
|
122 |
|
123 |
+
def top_comments(comments_df):
|
124 |
+
#Find top 3
|
125 |
+
return top_positive_comments, top_negative_comments
|
126 |
+
|
127 |
def comments_analyzer(comments_df):
|
128 |
# This function analyzes the sentiment of comments in a given DataFrame.
|
129 |
# It requires a DataFrame of comments, typically generated by the comments_collector function.
|
|
|
136 |
if comments_df is None:
|
137 |
return None
|
138 |
else:
|
|
|
|
|
139 |
|
140 |
+
# Example of batch processing with sentiment and confidence
|
141 |
+
batch_size = 20 # Adjust the size based on your system's capabilities
|
142 |
sentiments = []
|
143 |
+
scores = []
|
144 |
|
145 |
for i in range(0, len(comments_df), batch_size):
|
146 |
batch = comments_df['content'][i:i+batch_size].tolist()
|
147 |
+
batch_results = sentiment_task(batch)
|
148 |
+
|
149 |
+
# Extracting both sentiment labels and scores
|
150 |
+
batch_sentiments = [item['label'] for item in batch_results]
|
151 |
+
batch_scores = [item['score'] for item in batch_results]
|
152 |
+
|
153 |
sentiments.extend(batch_sentiments)
|
154 |
+
scores.extend(batch_scores)
|
155 |
|
156 |
comments_df['sentiment'] = sentiments
|
157 |
+
comments_df['score'] = scores
|
158 |
+
|
159 |
+
|
160 |
+
def get_top_comments(comments, sentiment_type, top_n=3):
|
161 |
+
filtered_comments = comments[comments['sentiment'] == sentiment_type]
|
162 |
+
sorted_comments = filtered_comments.sort_values(by='score', ascending=False)
|
163 |
+
top_comments = sorted_comments.head(top_n)
|
164 |
+
|
165 |
+
if not top_comments.empty:
|
166 |
+
return '\n\n'.join(f"{row['content']} - {row['author']}" for _, row in top_comments.iterrows())
|
167 |
+
else:
|
168 |
+
return f"No {sentiment_type} comments available."
|
169 |
+
|
170 |
+
# Get top positive comments
|
171 |
+
top_positive_comments = get_top_comments(comments_df, 'positive')
|
172 |
+
|
173 |
+
# Get top negative comments
|
174 |
+
top_negative_comments = get_top_comments(comments_df, 'negative')
|
175 |
|
176 |
data = {}
|
177 |
#Categorize the comments by sentiment and count them
|
|
|
184 |
data['blended_comments'] = comments_df['content'].str.cat(sep=' ')
|
185 |
data['pct_positive'] = 100 * round(data['num_positive']/data['total_comments'], 2)
|
186 |
|
187 |
+
return data, top_positive_comments, top_negative_comments
|
188 |
|
189 |
def generate_wordcloud(long_text, additional_stopwords=['Timestamps', 'timestamps']):
|
190 |
# This function generates a word cloud image from a given text and returns it as a PIL image object.
|
|
|
287 |
start_time = time.time()
|
288 |
|
289 |
# Analyze
|
290 |
+
analysis_dict, top_positive_comments, top_negative_comments = comments_analyzer(comments_df)
|
291 |
|
292 |
end_time = time.time()
|
293 |
print(f"Time taken for sentiment analysis: {end_time - start_time} seconds")
|
|
|
316 |
print(f"Time taken for creating sentiment chart: {end_time - start_time} seconds")
|
317 |
|
318 |
# Return the generated word cloud image, summary text, and sentiment analysis chart
|
319 |
+
return word_cloud_img, top_positive_comments, top_negative_comments, sentiment_chart
|
320 |
|
321 |
############################################################################################################################################
|
322 |
# Gradio interface
|
|
|
329 |
],
|
330 |
outputs=[
|
331 |
gr.Image(label="Word Cloud"),
|
332 |
+
gr.Textbox(label="Top 3 Positive Comments"),
|
333 |
+
gr.Textbox(label="Top 3 Negative Comments"),
|
334 |
gr.Image(label="Sentiment Analysis Chart")
|
335 |
],
|
336 |
title="YouTube Comments Analyzer",
|
337 |
+
description="Enter a YouTube link to generate a word cloud and sentiment analysis of the comments."
|
338 |
)
|
339 |
|
340 |
# Run the interface
|