Demea9000 commited on
Commit
ae34e1d
1 Parent(s): e65e077

fixed top TODOs in TwitterScraper

Browse files
textclassifier/TextClassifier.py CHANGED
@@ -29,21 +29,15 @@ class TextClassifier:
29
  :param to_date: string of the format 'YYYY-MM-DD'.
30
  :param num_tweets: integer value of the maximum number of tweets to be scraped.
31
  """
32
- # Make sure to_date is later than from_date
33
- assert from_date < to_date, "from_date must be earlier than to_date"
34
- # Make sure the dates are in the correct format
35
- assert re.match(r'^\d{4}-\d{2}-\d{2}$', from_date) is not None, "from_date must be in the format YYYY-MM-DD"
36
  # Make sure user_name is not empty
37
  assert user_name is not None, "user_name cannot be empty"
38
- # Make sure num_tweets is a positive integer
39
- assert 0 < num_tweets <= 20, "num_tweets must be a positive integer and at most 20"
40
 
 
41
  self.model_name = model_name
42
  self.from_date = from_date
43
  self.to_date = to_date
44
  self.num_tweets = num_tweets
45
  self.user_name = user_name
46
- self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
47
  # Assure that scrape_by_user actually gets num_tweets
48
  # add timer in time-loop and stop after 10 seconds
49
  start_time = time.time()
 
29
  :param to_date: string of the format 'YYYY-MM-DD'.
30
  :param num_tweets: integer value of the maximum number of tweets to be scraped.
31
  """
 
 
 
 
32
  # Make sure user_name is not empty
33
  assert user_name is not None, "user_name cannot be empty"
 
 
34
 
35
+ self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
36
  self.model_name = model_name
37
  self.from_date = from_date
38
  self.to_date = to_date
39
  self.num_tweets = num_tweets
40
  self.user_name = user_name
 
41
  # Assure that scrape_by_user actually gets num_tweets
42
  # add timer in time-loop and stop after 10 seconds
43
  start_time = time.time()
twitterscraper/TwitterScraper.py CHANGED
@@ -1,6 +1,7 @@
1
  import pandas as pd
2
  import twint
3
  from datetime import date
 
4
 
5
 
6
  class TwitterScraper(object):
@@ -13,10 +14,21 @@ class TwitterScraper(object):
13
  """
14
 
15
  def __init__(self, from_date="2022-07-01", to_date=str(date.today()), num_tweets=20):
16
- # TODO: add a check to make sure that the dates are in the correct format.
17
- # TODO: add a check to make sure that the number of tweets is a positive number.
18
- # TODO: add a check to make sure that the number of tweets
19
- # is not greater than the number of tweets in the date range.
 
 
 
 
 
 
 
 
 
 
 
20
  self.from_date = from_date
21
  self.to_date = to_date
22
  self.num_tweets = num_tweets
@@ -74,6 +86,7 @@ class TwitterScraper(object):
74
  return tweets_info
75
 
76
  def __get_tweets__from_twint__(self):
 
77
  """ __get_tweets_from_twint__
78
  tweet info is a dataframe with fallowing columns
79
  Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
@@ -99,7 +112,7 @@ class TwitterScraper(object):
99
  print("No tweet containing the word \"" + self.conf.Search + "\" could be found!")
100
  else:
101
  tweet_and_replies_inf = tweet_and_replies_inf[
102
- ["id", "tweet", "date", "user_id", "username", "urls", 'nlikes', 'nreplies', 'nretweets']]
103
  return tweet_and_replies_inf
104
 
105
  # def __check_date_type(d1,d2): if (type(d1) or type(d2)) is not type("str"): # If the type of ite date input
@@ -111,7 +124,6 @@ class TwitterScraper(object):
111
  def __repr__(self):
112
  return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
113
  self.num_tweets)
114
-
115
 
116
 
117
  if __name__ == "__main__":
 
1
  import pandas as pd
2
  import twint
3
  from datetime import date
4
+ import re
5
 
6
 
7
  class TwitterScraper(object):
 
14
  """
15
 
16
  def __init__(self, from_date="2022-07-01", to_date=str(date.today()), num_tweets=20):
17
+ """
18
+ This method initializes the TwitterScraper class. It takes the user as input and collects the user's tweets
19
+ from 'from_date' to 'to_date'. If 'from_date' and 'to_date' are not specified, it collects the number of
20
+ tweets 'num_tweets' from today.
21
+ :param from_date: str (format: YYYY-MM-DD)
22
+ :param to_date: str (format: YYYY-MM-DD)
23
+ :param num_tweets: int (number of tweets to be scraped)
24
+ """
25
+ # Make sure the dates are in the correct format
26
+ assert re.match(r'^\d{4}-\d{2}-\d{2}$', from_date) is not None, "from_date must be in the format YYYY-MM-DD"
27
+ # Make sure to_date is later than from_date
28
+ assert from_date < to_date, "from_date must be earlier than to_date"
29
+ # Make sure num_tweets is a positive integer
30
+ assert 0 < num_tweets <= 20, "num_tweets must be a positive integer and at most 20"
31
+
32
  self.from_date = from_date
33
  self.to_date = to_date
34
  self.num_tweets = num_tweets
 
86
  return tweets_info
87
 
88
  def __get_tweets__from_twint__(self):
89
+ # TODO: fix documentation
90
  """ __get_tweets_from_twint__
91
  tweet info is a dataframe with fallowing columns
92
  Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
 
112
  print("No tweet containing the word \"" + self.conf.Search + "\" could be found!")
113
  else:
114
  tweet_and_replies_inf = tweet_and_replies_inf[
115
+ ["id", "tweet", "date", "user_id", "username", "urls", 'nlikes', 'nreplies', 'nretweets']]
116
  return tweet_and_replies_inf
117
 
118
  # def __check_date_type(d1,d2): if (type(d1) or type(d2)) is not type("str"): # If the type of ite date input
 
124
  def __repr__(self):
125
  return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
126
  self.num_tweets)
 
127
 
128
 
129
  if __name__ == "__main__":