Nisse00 commited on
Commit
fe688af
1 Parent(s): 456b287
.idea/csv-plugin.xml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="CsvFileAttributes">
4
+ <option name="attributeMap">
5
+ <map>
6
+ <entry key="/data/twitterdata.csv">
7
+ <value>
8
+ <Attribute>
9
+ <option name="separator" value="," />
10
+ </Attribute>
11
+ </value>
12
+ </entry>
13
+ </map>
14
+ </option>
15
+ </component>
16
+ </project>
.idea/misc.xml CHANGED
@@ -1,4 +1,4 @@
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
3
- <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet)" project-jdk-type="Python SDK" />
4
  </project>
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (politweet)" project-jdk-type="Python SDK" />
4
  </project>
.idea/politweet.iml CHANGED
@@ -3,6 +3,7 @@
3
  <component name="NewModuleRootManager">
4
  <content url="file://$MODULE_DIR$">
5
  <excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
 
6
  </content>
7
  <orderEntry type="inheritedJdk" />
8
  <orderEntry type="sourceFolder" forTests="false" />
3
  <component name="NewModuleRootManager">
4
  <content url="file://$MODULE_DIR$">
5
  <excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
6
+ <excludeFolder url="file://$MODULE_DIR$/env" />
7
  </content>
8
  <orderEntry type="inheritedJdk" />
9
  <orderEntry type="sourceFolder" forTests="false" />
textclassifier/TextClassifier.py CHANGED
@@ -5,6 +5,7 @@ from twitterscraper import TwitterScraper
5
  from datetime import date
6
  import os
7
  from dotenv import find_dotenv, load_dotenv
 
8
 
9
  # Set one directory up into ROOT_PATH
10
  ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -51,7 +52,6 @@ class TextClassifier:
51
 
52
  # save the dataframe to a csv file
53
 
54
-
55
  @staticmethod
56
  def cleanup_sentiment_results(classification_unclean):
57
  """
@@ -219,7 +219,7 @@ class TextClassifier:
219
  return df_topic
220
 
221
  @staticmethod
222
- def cleanup_topic_results(prediction_dict, text):
223
  new_item = text.replace("\n", " ")
224
  new_item = new_item.replace(" ", " ")
225
  return new_item
@@ -262,6 +262,35 @@ class TextClassifier:
262
  self.df = df
263
  self.df_to_csv(filename)
264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  def __repr__(self):
266
  """
267
  Gives a string that describes which user is classified
@@ -269,20 +298,11 @@ class TextClassifier:
269
  """
270
  return "Classifier for user: " + self.user_name + " with model: " + self.model_name + "."
271
 
 
 
272
  if __name__ == "__main__":
273
- tc = TextClassifier(from_date="2022-01-01", to_date="2022-05-31", user_name='jimmieakesson', num_tweets=20)
274
- tc.remove_duplicates_from_csv()
275
- # import pandas as pd
276
- # from datetime import datetime
277
- # import os
278
- # # show all columns
279
- # pd.set_option('display.max_columns', None)
280
- #
281
- # tc = TextClassifier(from_date="2019-01-01", to_date="2019-05-31", user_name='jimmieakesson', num_tweets=20)
282
- # tc.classify_sentiment_of_tweets()
283
- # # df = tc.analyze_sentiment_of_tweets()
284
- # # print(df)
285
- # df = tc.classify_topics_of_tweets()
286
- # print(df)
287
- # # save to csv in a folder under politweet with timestamp in name
288
- # df.to_csv(f"{datetime.now().strftime('%Y-%m-%d %H-%M-%S')}_tweets.csv")
5
  from datetime import date
6
  import os
7
  from dotenv import find_dotenv, load_dotenv
8
+ import pandas as pd
9
 
10
  # Set one directory up into ROOT_PATH
11
  ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
52
 
53
  # save the dataframe to a csv file
54
 
 
55
  @staticmethod
56
  def cleanup_sentiment_results(classification_unclean):
57
  """
219
  return df_topic
220
 
221
  @staticmethod
222
+ def cleanup_topic_results(text):
223
  new_item = text.replace("\n", " ")
224
  new_item = new_item.replace(" ", " ")
225
  return new_item
262
  self.df = df
263
  self.df_to_csv(filename)
264
 
265
+ def get_tweet_by_id(self, id, filename="{}/data/twitterdata.csv".format(ROOT_PATH)):
266
+ """
267
+ Returns tweet by id.
268
+ :param id: id of tweet
269
+ :return: tweet
270
+ """
271
+
272
+ def run_main_pipeline(self, filename="{}/data/twitterdata.csv".format(ROOT_PATH)):
273
+ """
274
+ Classifies the topics/sentiments of a user's tweets.
275
+ #We presume that all tweets inside the twitterdata.csv file are already classified.
276
+ :return: Pandas dataframe
277
+ """
278
+ temp_df = pd.DataFrame(
279
+ columns=['id', 'tweet', 'date', 'user_id', 'username', 'urls', 'nlikes', 'nreplies', 'nretweets'])
280
+ already_classified_df = pd.read_csv(filename)
281
+
282
+ for index, row in self.df.iterrows():
283
+ if row['id'] in already_classified_df['id'].values:
284
+ temp_df = temp_df.append(self.get_tweet_by_id(row['id']))
285
+ self.df = self.df.drop(index)
286
+
287
+ self.df = self.classify_topics_of_tweets()
288
+ self.df = self.classify_sentiment_of_tweets()
289
+ # self.df = self.analyze_sentiment_of_tweets()
290
+ self.df_to_csv(filename)
291
+ self.df = self.df.append(temp_df)
292
+ return self.df
293
+
294
  def __repr__(self):
295
  """
296
  Gives a string that describes which user is classified
298
  """
299
  return "Classifier for user: " + self.user_name + " with model: " + self.model_name + "."
300
 
301
+
302
+
303
  if __name__ == "__main__":
304
+ tc = TextClassifier(from_date="2020-01-01", to_date="2020-12-31", user_name='jimmieakesson', num_tweets=20)
305
+ tc.test()
306
+
307
+
308
+