Demea9000 commited on
Commit
f8f979f
1 Parent(s): 974febc

polished main pipeline

Browse files
Files changed (1) hide show
  1. textclassifier/TextClassifier.py +23 -14
textclassifier/TextClassifier.py CHANGED
@@ -109,7 +109,7 @@ class TextClassifier:
109
  """
110
  Analyzes the sentiment of a text using OpenAI.
111
  :param text: string of the tweet text.
112
- :param sentiment:
113
  :return:
114
  """
115
  # assert 1 == 2, "Måste fixa prompt innan denna metod körs"
@@ -275,28 +275,37 @@ class TextClassifier:
275
  """
276
  Classifies the topics/sentiments of a user's tweets.
277
  #We presume that all tweets inside the twitterdata.csv file are already classified.
278
- :return: Pandas dataframe
279
  """
280
- # temp_df = pd.DataFrame(
281
- # columns=['id', 'tweet', 'date', 'user_id', 'username', 'urls', 'nlikes', 'nreplies', 'nretweets'])
282
  if os.path.exists(filename):
283
  already_classified_df = pd.read_csv(filename)
 
284
  # Create a temporary df where values from already_classified_df that are not it self.df are stored
285
  temp_df = self.df[self.df['id'].isin(already_classified_df['id'])]
286
  # Remove rows from self.df that are not in already_classified_df
287
  self.df = self.df[~self.df['id'].isin(already_classified_df['id'])]
 
 
 
 
 
 
 
 
 
 
288
  else:
289
  print("No csv file found. Continuing without removing already classified tweets.")
290
-
291
- self.df = self.classify_topics_of_tweets()
292
- self.df = self.classify_sentiment_of_tweets()
293
- # self.df = self.analyze_sentiment_of_tweets()
294
-
295
- self.df_to_csv(filename)
296
- # Concatenate temp_df and self.df
297
- self.df = pd.concat([temp_df, self.df], ignore_index=True)
298
- print("Done.")
299
- return 0
300
 
301
 
302
  def __repr__(self):
 
109
  """
110
  Analyzes the sentiment of a text using OpenAI.
111
  :param text: string of the tweet text.
112
+ :param sentiment: string of the sentiment.
113
  :return:
114
  """
115
  # assert 1 == 2, "Måste fixa prompt innan denna metod körs"
 
275
  """
276
  Classifies the topics/sentiments of a user's tweets.
277
  #We presume that all tweets inside the twitterdata.csv file are already classified.
278
+ :return: None
279
  """
280
+ # Check if file exists, if not, create it
 
281
  if os.path.exists(filename):
282
  already_classified_df = pd.read_csv(filename)
283
+ print("Already classified tweets: {}".format(already_classified_df.shape[0]))
284
  # Create a temporary df where values from already_classified_df that are not it self.df are stored
285
  temp_df = self.df[self.df['id'].isin(already_classified_df['id'])]
286
  # Remove rows from self.df that are not in already_classified_df
287
  self.df = self.df[~self.df['id'].isin(already_classified_df['id'])]
288
+ print("Classifying topic of {} tweets...".format(self.df.shape[0]))
289
+ self.df = self.classify_topics_of_tweets()
290
+ print("Classifying sentiment of {} tweets...".format(self.df.shape[0]))
291
+ self.df = self.classify_sentiment_of_tweets()
292
+ print("Writing to csv...")
293
+ self.df_to_csv(filename)
294
+ # Concatenate temp_df and self.df
295
+ self.df = pd.concat([temp_df, self.df], ignore_index=True)
296
+ print("Appended {}.".format(filename))
297
+ return None
298
  else:
299
  print("No csv file found. Continuing without removing already classified tweets.")
300
+ print("Classifying topics...")
301
+ self.df = self.classify_topics_of_tweets()
302
+ print("Classifying sentiments...")
303
+ self.df = self.classify_sentiment_of_tweets()
304
+ # self.df = self.analyze_sentiment_of_tweets()
305
+ print("Writing to csv file...")
306
+ self.df_to_csv(filename)
307
+ print("Created {}.".format(filename))
308
+ return None
 
309
 
310
 
311
  def __repr__(self):