Spaces:
Runtime error
Runtime error
polished main pipeline
Browse files- textclassifier/TextClassifier.py +23 -14
textclassifier/TextClassifier.py
CHANGED
@@ -109,7 +109,7 @@ class TextClassifier:
|
|
109 |
"""
|
110 |
Analyzes the sentiment of a text using OpenAI.
|
111 |
:param text: string of the tweet text.
|
112 |
-
:param sentiment:
|
113 |
:return:
|
114 |
"""
|
115 |
# assert 1 == 2, "Måste fixa prompt innan denna metod körs"
|
@@ -275,28 +275,37 @@ class TextClassifier:
|
|
275 |
"""
|
276 |
Classifies the topics/sentiments of a user's tweets.
|
277 |
#We presume that all tweets inside the twitterdata.csv file are already classified.
|
278 |
-
:return:
|
279 |
"""
|
280 |
-
#
|
281 |
-
# columns=['id', 'tweet', 'date', 'user_id', 'username', 'urls', 'nlikes', 'nreplies', 'nretweets'])
|
282 |
if os.path.exists(filename):
|
283 |
already_classified_df = pd.read_csv(filename)
|
|
|
284 |
# Create a temporary df where values from already_classified_df that are not it self.df are stored
|
285 |
temp_df = self.df[self.df['id'].isin(already_classified_df['id'])]
|
286 |
# Remove rows from self.df that are not in already_classified_df
|
287 |
self.df = self.df[~self.df['id'].isin(already_classified_df['id'])]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
else:
|
289 |
print("No csv file found. Continuing without removing already classified tweets.")
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
return 0
|
300 |
|
301 |
|
302 |
def __repr__(self):
|
|
|
109 |
"""
|
110 |
Analyzes the sentiment of a text using OpenAI.
|
111 |
:param text: string of the tweet text.
|
112 |
+
:param sentiment: string of the sentiment.
|
113 |
:return:
|
114 |
"""
|
115 |
# assert 1 == 2, "Måste fixa prompt innan denna metod körs"
|
|
|
275 |
"""
|
276 |
Classifies the topics/sentiments of a user's tweets.
|
277 |
#We presume that all tweets inside the twitterdata.csv file are already classified.
|
278 |
+
:return: None
|
279 |
"""
|
280 |
+
# Check if file exists, if not, create it
|
|
|
281 |
if os.path.exists(filename):
|
282 |
already_classified_df = pd.read_csv(filename)
|
283 |
+
print("Already classified tweets: {}".format(already_classified_df.shape[0]))
|
284 |
# Create a temporary df where values from already_classified_df that are not it self.df are stored
|
285 |
temp_df = self.df[self.df['id'].isin(already_classified_df['id'])]
|
286 |
# Remove rows from self.df that are not in already_classified_df
|
287 |
self.df = self.df[~self.df['id'].isin(already_classified_df['id'])]
|
288 |
+
print("Classifying topic of {} tweets...".format(self.df.shape[0]))
|
289 |
+
self.df = self.classify_topics_of_tweets()
|
290 |
+
print("Classifying sentiment of {} tweets...".format(self.df.shape[0]))
|
291 |
+
self.df = self.classify_sentiment_of_tweets()
|
292 |
+
print("Writing to csv...")
|
293 |
+
self.df_to_csv(filename)
|
294 |
+
# Concatenate temp_df and self.df
|
295 |
+
self.df = pd.concat([temp_df, self.df], ignore_index=True)
|
296 |
+
print("Appended {}.".format(filename))
|
297 |
+
return None
|
298 |
else:
|
299 |
print("No csv file found. Continuing without removing already classified tweets.")
|
300 |
+
print("Classifying topics...")
|
301 |
+
self.df = self.classify_topics_of_tweets()
|
302 |
+
print("Classifying sentiments...")
|
303 |
+
self.df = self.classify_sentiment_of_tweets()
|
304 |
+
# self.df = self.analyze_sentiment_of_tweets()
|
305 |
+
print("Writing to csv file...")
|
306 |
+
self.df_to_csv(filename)
|
307 |
+
print("Created {}.".format(filename))
|
308 |
+
return None
|
|
|
309 |
|
310 |
|
311 |
def __repr__(self):
|