Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Demea9000 commited on Jul 18, 2022

Commit

ae34e1d

•

1 Parent(s): e65e077

fixed top TODOs in TwitterScraper

Browse files

Files changed (2) hide show

textclassifier/TextClassifier.py +1 -7
twitterscraper/TwitterScraper.py +18 -6

textclassifier/TextClassifier.py CHANGED Viewed

@@ -29,21 +29,15 @@ class TextClassifier:
         :param to_date: string of the format 'YYYY-MM-DD'.
         :param num_tweets: integer value of the maximum number of tweets to be scraped.
         """
-        # Make sure to_date is later than from_date
-        assert from_date < to_date, "from_date must be earlier than to_date"
-        # Make sure the dates are in the correct format
-        assert re.match(r'^\d{4}-\d{2}-\d{2}$', from_date) is not None, "from_date must be in the format YYYY-MM-DD"
         # Make sure user_name is not empty
         assert user_name is not None, "user_name cannot be empty"
-        # Make sure num_tweets is a positive integer
-        assert 0 < num_tweets <= 20, "num_tweets must be a positive integer and at most 20"
         self.model_name = model_name
         self.from_date = from_date
         self.to_date = to_date
         self.num_tweets = num_tweets
         self.user_name = user_name
-        self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
         # Assure that scrape_by_user actually gets num_tweets
         # add timer in time-loop and stop after 10 seconds
         start_time = time.time()

         :param to_date: string of the format 'YYYY-MM-DD'.
         :param num_tweets: integer value of the maximum number of tweets to be scraped.
         """
         # Make sure user_name is not empty
         assert user_name is not None, "user_name cannot be empty"
+        self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
         self.model_name = model_name
         self.from_date = from_date
         self.to_date = to_date
         self.num_tweets = num_tweets
         self.user_name = user_name
         # Assure that scrape_by_user actually gets num_tweets
         # add timer in time-loop and stop after 10 seconds
         start_time = time.time()

twitterscraper/TwitterScraper.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import pandas as pd
 import twint
 from datetime import date
 class TwitterScraper(object):
@@ -13,10 +14,21 @@ class TwitterScraper(object):
     """
     def __init__(self, from_date="2022-07-01", to_date=str(date.today()), num_tweets=20):
-        # TODO: add a check to make sure that the dates are in the correct format.
-        #  TODO: add a check to make sure that the number of tweets is a positive number.
-        #  TODO: add a check to make sure that the number of tweets
-        #   is not greater than the number of tweets in the date range.
         self.from_date = from_date
         self.to_date = to_date
         self.num_tweets = num_tweets
@@ -74,6 +86,7 @@ class TwitterScraper(object):
         return tweets_info
     def __get_tweets__from_twint__(self):
         """ __get_tweets_from_twint__
         tweet info is a dataframe with fallowing columns
             Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
@@ -99,7 +112,7 @@ class TwitterScraper(object):
             print("No tweet containing the word \"" + self.conf.Search + "\" could be found!")
         else:
             tweet_and_replies_inf = tweet_and_replies_inf[
-                     ["id", "tweet", "date", "user_id", "username", "urls", 'nlikes', 'nreplies', 'nretweets']]
         return tweet_and_replies_inf
     # def __check_date_type(d1,d2): if (type(d1) or type(d2)) is not type("str"):  # If the type of ite date input
@@ -111,7 +124,6 @@ class TwitterScraper(object):
     def __repr__(self):
         return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
                                                                                 self.num_tweets)
 if __name__ == "__main__":

 import pandas as pd
 import twint
 from datetime import date
+import re
 class TwitterScraper(object):
     """
     def __init__(self, from_date="2022-07-01", to_date=str(date.today()), num_tweets=20):
+        """
+        This method initializes the TwitterScraper class. It takes the user as input and collects the user's tweets
+        from 'from_date' to 'to_date'. If 'from_date' and 'to_date' are not specified, it collects the number of
+        tweets 'num_tweets' from today.
+        :param from_date: str (format: YYYY-MM-DD)
+        :param to_date: str (format: YYYY-MM-DD)
+        :param num_tweets: int (number of tweets to be scraped)
+        """
+        # Make sure the dates are in the correct format
+        assert re.match(r'^\d{4}-\d{2}-\d{2}$', from_date) is not None, "from_date must be in the format YYYY-MM-DD"
+        # Make sure to_date is later than from_date
+        assert from_date < to_date, "from_date must be earlier than to_date"
+        # Make sure num_tweets is a positive integer
+        assert 0 < num_tweets <= 20, "num_tweets must be a positive integer and at most 20"
         self.from_date = from_date
         self.to_date = to_date
         self.num_tweets = num_tweets
         return tweets_info
     def __get_tweets__from_twint__(self):
+        # TODO: fix documentation
         """ __get_tweets_from_twint__
         tweet info is a dataframe with fallowing columns
             Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
             print("No tweet containing the word \"" + self.conf.Search + "\" could be found!")
         else:
             tweet_and_replies_inf = tweet_and_replies_inf[
+                ["id", "tweet", "date", "user_id", "username", "urls", 'nlikes', 'nreplies', 'nretweets']]
         return tweet_and_replies_inf
     # def __check_date_type(d1,d2): if (type(d1) or type(d2)) is not type("str"):  # If the type of ite date input
     def __repr__(self):
         return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
                                                                                 self.num_tweets)
 if __name__ == "__main__":