Demea9000 commited on
Commit
5f7371a
1 Parent(s): 5214b07

made TextClassifier divide topics into columns

Browse files
data/twitterdata.csv CHANGED
The diff for this file is too large to render. See raw diff
 
textclassifier/TextClassifier.py CHANGED
@@ -221,8 +221,9 @@ class TextClassifier:
221
  Classifies the topics of a user's tweets.
222
  """
223
  df_topic = self.df.copy()
224
- df_topic['topic'] = df_topic['tweet'].apply(self.classify_topic)
225
  self.df = df_topic
 
226
  return self.df
227
 
228
  @staticmethod
@@ -275,11 +276,11 @@ class TextClassifier:
275
  :return: None
276
  """
277
  df_topic = self.df.copy()
278
- df_topic['topic_temp'] = df_topic['topic'].apply(lambda x: f.separate_string(x))
279
- df_topic_split = pd.DataFrame(df_topic['topic_temp'].tolist(),
280
  columns=['main_topic', 'sub_topic_1', 'sub_topic_2'])
281
  self.df = df_topic.merge(df_topic_split, how='left', left_index=True, right_index=True)
282
- self.df.drop(['topic_temp'], axis=1, inplace=True)
283
 
284
  def run_main_pipeline(self, filename="{}/data/twitterdata.csv".format(ROOT_PATH)):
285
  """
@@ -345,6 +346,6 @@ class TextClassifier:
345
 
346
 
347
  if __name__ == "__main__":
348
- text_classifier = TextClassifier(from_date="2020-01-01", to_date="2020-01-31", user_name='jimmieakesson', num_tweets=20)
349
  text_classifier.run_main_pipeline()
350
 
 
221
  Classifies the topics of a user's tweets.
222
  """
223
  df_topic = self.df.copy()
224
+ df_topic['topics'] = df_topic['tweet'].apply(self.classify_topic)
225
  self.df = df_topic
226
+ self.split_topics_into_columns()
227
  return self.df
228
 
229
  @staticmethod
 
276
  :return: None
277
  """
278
  df_topic = self.df.copy()
279
+ df_topic['topics_temp'] = df_topic['topics'].apply(lambda x: f.separate_string(x))
280
+ df_topic_split = pd.DataFrame(df_topic['topics_temp'].tolist(),
281
  columns=['main_topic', 'sub_topic_1', 'sub_topic_2'])
282
  self.df = df_topic.merge(df_topic_split, how='left', left_index=True, right_index=True)
283
+ self.df.drop(['topics_temp'], axis=1, inplace=True)
284
 
285
  def run_main_pipeline(self, filename="{}/data/twitterdata.csv".format(ROOT_PATH)):
286
  """
 
346
 
347
 
348
  if __name__ == "__main__":
349
+ text_classifier = TextClassifier(from_date="2020-05-01", to_date="2020-05-31", user_name='jimmieakesson', num_tweets=20)
350
  text_classifier.run_main_pipeline()
351