Commit
•
9d422fc
1
Parent(s):
fc73403
Update app.py
Browse filesUpdate the sorting mechanism to speed up the algorithm
app.py
CHANGED
@@ -120,10 +120,6 @@ def comments_collector(video_link, max_comments = 100):
|
|
120 |
print(e)
|
121 |
return None
|
122 |
|
123 |
-
def top_comments(comments_df):
|
124 |
-
#Find top 3
|
125 |
-
return top_positive_comments, top_negative_comments
|
126 |
-
|
127 |
def comments_analyzer(comments_df):
|
128 |
# This function analyzes the sentiment of comments in a given DataFrame.
|
129 |
# It requires a DataFrame of comments, typically generated by the comments_collector function.
|
@@ -137,6 +133,7 @@ def comments_analyzer(comments_df):
|
|
137 |
return None
|
138 |
else:
|
139 |
|
|
|
140 |
# Example of batch processing with sentiment and confidence
|
141 |
batch_size = 20 # Adjust the size based on your system's capabilities
|
142 |
sentiments = []
|
@@ -156,22 +153,26 @@ def comments_analyzer(comments_df):
|
|
156 |
comments_df['sentiment'] = sentiments
|
157 |
comments_df['score'] = scores
|
158 |
|
|
|
|
|
159 |
|
160 |
def get_top_comments(comments, sentiment_type, top_n=3):
|
161 |
filtered_comments = comments[comments['sentiment'] == sentiment_type]
|
162 |
-
|
163 |
-
top_comments = sorted_comments.head(top_n)
|
164 |
|
165 |
if not top_comments.empty:
|
166 |
return '\n\n'.join(f"{row['content']} - {row['author']}" for _, row in top_comments.iterrows())
|
167 |
else:
|
168 |
return f"No {sentiment_type} comments available."
|
169 |
|
|
|
170 |
# Get top positive comments
|
171 |
top_positive_comments = get_top_comments(comments_df, 'positive')
|
172 |
|
173 |
# Get top negative comments
|
174 |
top_negative_comments = get_top_comments(comments_df, 'negative')
|
|
|
|
|
175 |
|
176 |
data = {}
|
177 |
#Categorize the comments by sentiment and count them
|
@@ -282,16 +283,10 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
|
|
282 |
|
283 |
end_time = time.time()
|
284 |
print(f"Time taken for loading comments: {end_time - start_time} seconds")
|
285 |
-
|
286 |
-
|
287 |
-
start_time = time.time()
|
288 |
|
289 |
# Analyze
|
290 |
analysis_dict, top_positive_comments, top_negative_comments = comments_analyzer(comments_df)
|
291 |
|
292 |
-
end_time = time.time()
|
293 |
-
print(f"Time taken for sentiment analysis: {end_time - start_time} seconds")
|
294 |
-
|
295 |
long_text = analysis_dict['blended_comments']
|
296 |
|
297 |
start_time = time.time()
|
@@ -304,11 +299,6 @@ def process_youtube_comments(youtube_link, max_comments, stop_words):
|
|
304 |
|
305 |
start_time = time.time()
|
306 |
|
307 |
-
end_time = time.time()
|
308 |
-
print(f"Time taken for summarizing comments: {end_time - start_time} seconds")
|
309 |
-
|
310 |
-
start_time = time.time()
|
311 |
-
|
312 |
# Create Sentiment Chart
|
313 |
sentiment_chart = create_sentiment_analysis_chart(analysis_dict)
|
314 |
|
|
|
120 |
print(e)
|
121 |
return None
|
122 |
|
|
|
|
|
|
|
|
|
123 |
def comments_analyzer(comments_df):
|
124 |
# This function analyzes the sentiment of comments in a given DataFrame.
|
125 |
# It requires a DataFrame of comments, typically generated by the comments_collector function.
|
|
|
133 |
return None
|
134 |
else:
|
135 |
|
136 |
+
start_time = time.time()
|
137 |
# Example of batch processing with sentiment and confidence
|
138 |
batch_size = 20 # Adjust the size based on your system's capabilities
|
139 |
sentiments = []
|
|
|
153 |
comments_df['sentiment'] = sentiments
|
154 |
comments_df['score'] = scores
|
155 |
|
156 |
+
end_time = time.time()
|
157 |
+
print(f"Time taken for batch sentiment analysis: {end_time - start_time} seconds")
|
158 |
|
159 |
def get_top_comments(comments, sentiment_type, top_n=3):
|
160 |
filtered_comments = comments[comments['sentiment'] == sentiment_type]
|
161 |
+
top_comments = filtered_comments.nlargest(top_n, 'score')
|
|
|
162 |
|
163 |
if not top_comments.empty:
|
164 |
return '\n\n'.join(f"{row['content']} - {row['author']}" for _, row in top_comments.iterrows())
|
165 |
else:
|
166 |
return f"No {sentiment_type} comments available."
|
167 |
|
168 |
+
start_time = time.time()
|
169 |
# Get top positive comments
|
170 |
top_positive_comments = get_top_comments(comments_df, 'positive')
|
171 |
|
172 |
# Get top negative comments
|
173 |
top_negative_comments = get_top_comments(comments_df, 'negative')
|
174 |
+
end_time = time.time()
|
175 |
+
print(f"Time taken for finding top n positive/negative comments: {end_time - start_time} seconds")
|
176 |
|
177 |
data = {}
|
178 |
#Categorize the comments by sentiment and count them
|
|
|
283 |
|
284 |
end_time = time.time()
|
285 |
print(f"Time taken for loading comments: {end_time - start_time} seconds")
|
|
|
|
|
|
|
286 |
|
287 |
# Analyze
|
288 |
analysis_dict, top_positive_comments, top_negative_comments = comments_analyzer(comments_df)
|
289 |
|
|
|
|
|
|
|
290 |
long_text = analysis_dict['blended_comments']
|
291 |
|
292 |
start_time = time.time()
|
|
|
299 |
|
300 |
start_time = time.time()
|
301 |
|
|
|
|
|
|
|
|
|
|
|
302 |
# Create Sentiment Chart
|
303 |
sentiment_chart = create_sentiment_analysis_chart(analysis_dict)
|
304 |
|