Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Nisse00 commited on Jul 14, 2022

Commit

fe688af

•

1 Parent(s): 456b287

Bugfixing

Browse files

Files changed (4) hide show

.idea/csv-plugin.xml +16 -0
.idea/misc.xml +1 -1
.idea/politweet.iml +1 -0
textclassifier/TextClassifier.py +38 -18

.idea/csv-plugin.xml ADDED Viewed

	@@ -0,0 +1,16 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="CsvFileAttributes">
+    <option name="attributeMap">
+      <map>
+        <entry key="/data/twitterdata.csv">
+          <value>
+            <Attribute>
+              <option name="separator" value="," />
+            </Attribute>
+          </value>
+        </entry>
+      </map>
+    </option>
+  </component>
+</project>

.idea/misc.xml CHANGED Viewed

@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet)" project-jdk-type="Python SDK" />
 </project>

 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (politweet)" project-jdk-type="Python SDK" />
 </project>

.idea/politweet.iml CHANGED Viewed

@@ -3,6 +3,7 @@
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$">
       <excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
     </content>
     <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />

   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$">
       <excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
+      <excludeFolder url="file://$MODULE_DIR$/env" />
     </content>
     <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />

textclassifier/TextClassifier.py CHANGED Viewed

@@ -5,6 +5,7 @@ from twitterscraper import TwitterScraper
 from datetime import date
 import os
 from dotenv import find_dotenv, load_dotenv
 # Set one directory up into ROOT_PATH
 ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -51,7 +52,6 @@ class TextClassifier:
         # save the dataframe to a csv file
     @staticmethod
     def cleanup_sentiment_results(classification_unclean):
         """
@@ -219,7 +219,7 @@ class TextClassifier:
         return df_topic
     @staticmethod
-    def cleanup_topic_results(prediction_dict, text):
         new_item = text.replace("\n", " ")
         new_item = new_item.replace("  ", " ")
         return new_item
@@ -262,6 +262,35 @@ class TextClassifier:
         self.df = df
         self.df_to_csv(filename)
     def __repr__(self):
         """
         Gives a string that describes which user is classified
@@ -269,20 +298,11 @@ class TextClassifier:
         """
         return "Classifier for user: " + self.user_name + " with model: " + self.model_name + "."
 if __name__ == "__main__":
-    tc = TextClassifier(from_date="2022-01-01", to_date="2022-05-31", user_name='jimmieakesson', num_tweets=20)
-    tc.remove_duplicates_from_csv()
-#     import pandas as pd
-#     from datetime import datetime
-#     import os
-#     # show all columns
-#     pd.set_option('display.max_columns', None)
-#
-#     tc = TextClassifier(from_date="2019-01-01", to_date="2019-05-31", user_name='jimmieakesson', num_tweets=20)
-#     tc.classify_sentiment_of_tweets()
-#     # df = tc.analyze_sentiment_of_tweets()
-#     # print(df)
-#     df = tc.classify_topics_of_tweets()
-#     print(df)
-#     # save to csv in a folder under politweet with timestamp in name
-#     df.to_csv(f"{datetime.now().strftime('%Y-%m-%d %H-%M-%S')}_tweets.csv")

 from datetime import date
 import os
 from dotenv import find_dotenv, load_dotenv
+import pandas as pd
 # Set one directory up into ROOT_PATH
 ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
         # save the dataframe to a csv file
     @staticmethod
     def cleanup_sentiment_results(classification_unclean):
         """
         return df_topic
     @staticmethod
+    def cleanup_topic_results(text):
         new_item = text.replace("\n", " ")
         new_item = new_item.replace("  ", " ")
         return new_item
         self.df = df
         self.df_to_csv(filename)
+    def get_tweet_by_id(self, id, filename="{}/data/twitterdata.csv".format(ROOT_PATH)):
+        """
+        Returns tweet by id.
+        :param id: id of tweet
+        :return: tweet
+        """
+    def run_main_pipeline(self, filename="{}/data/twitterdata.csv".format(ROOT_PATH)):
+        """
+        Classifies the topics/sentiments of a user's tweets.
+        #We presume that all tweets inside the twitterdata.csv file are already classified.
+        :return: Pandas dataframe
+        """
+        temp_df = pd.DataFrame(
+            columns=['id', 'tweet', 'date', 'user_id', 'username', 'urls', 'nlikes', 'nreplies', 'nretweets'])
+        already_classified_df = pd.read_csv(filename)
+        for index, row in self.df.iterrows():
+            if row['id'] in already_classified_df['id'].values:
+                temp_df = temp_df.append(self.get_tweet_by_id(row['id']))
+                self.df = self.df.drop(index)
+        self.df = self.classify_topics_of_tweets()
+        self.df = self.classify_sentiment_of_tweets()
+        # self.df = self.analyze_sentiment_of_tweets()
+        self.df_to_csv(filename)
+        self.df = self.df.append(temp_df)
+        return self.df
     def __repr__(self):
         """
         Gives a string that describes which user is classified
         """
         return "Classifier for user: " + self.user_name + " with model: " + self.model_name + "."
 if __name__ == "__main__":
+    tc = TextClassifier(from_date="2020-01-01", to_date="2020-12-31", user_name='jimmieakesson', num_tweets=20)
+    tc.test()