Demea9000 commited on
Commit
da9055d
1 Parent(s): d0ae49e

added TODOs and twitter scraping by many users

Browse files
Files changed (1) hide show
  1. twitter-scraper/TwitterScraper.py +18 -0
twitter-scraper/TwitterScraper.py CHANGED
@@ -11,6 +11,10 @@ class TwitterScraper(object):
11
  output: dict
12
  """
13
  def __init__(self, from_date="2022-07-01", to_date=str(date.today()), num_tweets=20):
 
 
 
 
14
  self.from_date = from_date
15
  self.to_date = to_date
16
  self.num_tweets = num_tweets
@@ -22,6 +26,19 @@ class TwitterScraper(object):
22
  # user_names.
23
  return self.__get_tweets__from_twint__()
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def scrape_by_string(self, _string: str):
26
  """This method uses twint to extract tweets based on string.
27
  all extracted tweets have the specified word in _string parameter in it.
@@ -35,6 +52,7 @@ class TwitterScraper(object):
35
  self.conf.Search = _string
36
  return self.__get_tweets__from_twint__()
37
 
 
38
  def get_only_tweets(self, tweet_and_replies_info):
39
  tweet_and_replies = tweet_and_replies_info["tweet"]
40
  """
 
11
  output: dict
12
  """
13
  def __init__(self, from_date="2022-07-01", to_date=str(date.today()), num_tweets=20):
14
+ # TODO: add a check to make sure that the dates are in the correct format.
15
+ # TODO: add a check to make sure that the number of tweets is a positive number.
16
+ # TODO: add a check to make sure that the number of tweets
17
+ # is not greater than the number of tweets in the date range.
18
  self.from_date = from_date
19
  self.to_date = to_date
20
  self.num_tweets = num_tweets
 
26
  # user_names.
27
  return self.__get_tweets__from_twint__()
28
 
29
+ def scrape_by_several_users(self, _users: list):
30
+ """
31
+ This method uses twint to extract tweets based on username. It takes a list of users as input.
32
+
33
+ :param _users: list of users
34
+ :return: dataframe
35
+ """
36
+ # TODO: test this method
37
+ self.conf.Search = "from:@" + _users[0]
38
+ for user in _users[1:]:
39
+ self.conf.Search += " OR from:@" + user
40
+ return self.__get_tweets__from_twint__()
41
+
42
  def scrape_by_string(self, _string: str):
43
  """This method uses twint to extract tweets based on string.
44
  all extracted tweets have the specified word in _string parameter in it.
 
52
  self.conf.Search = _string
53
  return self.__get_tweets__from_twint__()
54
 
55
+ # TODO: make method static
56
  def get_only_tweets(self, tweet_and_replies_info):
57
  tweet_and_replies = tweet_and_replies_info["tweet"]
58
  """