import unittest import pandas as pd from datetime import datetime import regex as re from TwitterScraper import TwitterScraper class MyTestCase(unittest.TestCase): @classmethod def setUpClass(cls): print('Super set up') num_tweets = 40 from_date = "2022-05-01" to_date = "2022-07-31" user = 'jimmieakesson' user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', 'dadgostarnooshi'] sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets) cls._df_uni = sc.scrape_by_user(user) nbr_of_cols = 9 def setUp(self): print('set up') self.num_tweets = 40 self.from_date = "2022-05-01" self.to_date = "2022-07-31" self.user = 'jimmieakesson' self.user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', 'dadgostarnooshi'] # self.sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets) nbr_of_cols = 9 # Checks that the returned datatype is pandas DataFrame def test_correct_type(self): print('Checking type...') self.assertEqual(type(self._df_uni), type(pd.DataFrame())) # add assertion here # Checks that we get the correct number of tweets # OBS, FOR NOW IT ONLY CHECKS THAT WE DON'T OVERSAMPLE def test_correct_nbr_tweets(self): print('Checking number of tweets...') self.assertTrue(self._df_uni.shape[0] < self.num_tweets) # Checks that all dates are between the start date and the end date def test_dates(self): print('Checking dates...') d_start = datetime.fromisoformat(self.from_date) d_end = datetime.fromisoformat(self.to_date) correct_date = True for date in self._df_uni.date: d = datetime.fromisoformat(date) if not (d >= d_start and d <= d_end): correct_date = False break self.assertTrue(correct_date) # Checks that all tweets are from the correct user def test_user(self): print('Checking user...') same_user = True for username in self._df_uni.username: if not username == self.user: same_user = False break self.assertTrue(same_user) # Checks that all user_ids are correct def test_user_id(self): print('Checking user ids...') same_user = True first_id = self._df_uni['user_id'][0] for user_id in self._df_uni.user_id: if not user_id == first_id: same_user = False break self.assertTrue(same_user) # Checks if there are tweets that have been sampled several times def test_no_doubles(self): print('Checking doubles...') id_set = set(self._df_uni.id) self.assertTrue(len(id_set) == self._df_uni.shape[0]) # Checks that we have no None entries def test_none(self): print('Checking Nones...') self.assertFalse(any(b == True for b in self._df_uni.isnull())) def test_no_url_tweets(self): print('Checking url tweets...') only_url = False for tweet in self._df_uni.tweet: if len(tweet.split()) == 1 and tweet.split()[0].startswith('https'): print(tweet.split()) only_url = True break self.assertFalse(only_url) if __name__ == '__main__': unittest.main()