Spaces:

politweet-sh
/

politweet

Runtime error

Demea9000 commited on Jul 20, 2022

Commit

5f7371a

•

1 Parent(s): 5214b07

made TextClassifier divide topics into columns

Files changed (2) hide show

data/twitterdata.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

textclassifier/TextClassifier.py CHANGED Viewed

@@ -221,8 +221,9 @@ class TextClassifier:
         Classifies the topics of a user's tweets.
         """
         df_topic = self.df.copy()
-        df_topic['topic'] = df_topic['tweet'].apply(self.classify_topic)
         self.df = df_topic
         return self.df
     @staticmethod
@@ -275,11 +276,11 @@ class TextClassifier:
         :return: None
         """
         df_topic = self.df.copy()
-        df_topic['topic_temp'] = df_topic['topic'].apply(lambda x: f.separate_string(x))
-        df_topic_split = pd.DataFrame(df_topic['topic_temp'].tolist(),
                                       columns=['main_topic', 'sub_topic_1', 'sub_topic_2'])
         self.df = df_topic.merge(df_topic_split, how='left', left_index=True, right_index=True)
-        self.df.drop(['topic_temp'], axis=1, inplace=True)
     def run_main_pipeline(self, filename="{}/data/twitterdata.csv".format(ROOT_PATH)):
         """
@@ -345,6 +346,6 @@ class TextClassifier:
 if __name__ == "__main__":
-    text_classifier = TextClassifier(from_date="2020-01-01", to_date="2020-01-31", user_name='jimmieakesson', num_tweets=20)
     text_classifier.run_main_pipeline()

         Classifies the topics of a user's tweets.
         """
         df_topic = self.df.copy()
+        df_topic['topics'] = df_topic['tweet'].apply(self.classify_topic)
         self.df = df_topic
+        self.split_topics_into_columns()
         return self.df
     @staticmethod
         :return: None
         """
         df_topic = self.df.copy()
+        df_topic['topics_temp'] = df_topic['topics'].apply(lambda x: f.separate_string(x))
+        df_topic_split = pd.DataFrame(df_topic['topics_temp'].tolist(),
                                       columns=['main_topic', 'sub_topic_1', 'sub_topic_2'])
         self.df = df_topic.merge(df_topic_split, how='left', left_index=True, right_index=True)
+        self.df.drop(['topics_temp'], axis=1, inplace=True)
     def run_main_pipeline(self, filename="{}/data/twitterdata.csv".format(ROOT_PATH)):
         """
 if __name__ == "__main__":
+    text_classifier = TextClassifier(from_date="2020-05-01", to_date="2020-05-31", user_name='jimmieakesson', num_tweets=20)
     text_classifier.run_main_pipeline()