import re import unittest import pandas as pd from datetime import datetime import sys from pathlib import Path from twitterscraper import TwitterScraper as ts sys.path.insert(0, str(Path(__file__).parents[1]) + "/twitterscraper") class MyTestCase(unittest.TestCase): @classmethod def setUpClass(cls): print('Super set up') num_tweets = 40 from_date = "2022-05-01" to_date = "2022-07-31" user = 'jimmieakesson' user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', 'dadgostarnooshi'] sc1 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets) sc2 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets) sc3 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets) search_string = 'miljö' cls._df_uni = sc1.scrape_by_user(user) cls._df_poly = sc2.scrape_by_several_users(user_list) cls._df_by_string = sc3.scrape_by_string(search_string) nbr_of_cols = 9 def setUp(self): print('set up') self.num_tweets = 40 self.from_date = "2022-05-01" self.to_date = "2022-07-31" self.user = 'jimmieakesson' self.user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', 'dadgostarnooshi'] self.search_string = 'miljö' # self.sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets) nbr_of_cols = 9 def test_correct_type(self): """ Checks that the returned datatype is pandas DataFrame :return: """ print('Checking type...') self.assertEqual(type(self._df_uni), type(pd.DataFrame())) self.assertEqual(type(self._df_poly), type(pd.DataFrame())) self.assertEqual(type(self._df_by_string), type(pd.DataFrame())) def test_correct_nbr_tweets(self): """ Checks that we get the correct number of tweets. OBS FOR NOW IT ONLY CHECKS THAT WE DON'T OVER SAMPLE TODO: Check that we get the correct number of tweets. :return: """ print('Checking number of tweets...') self.assertTrue(self._df_uni.shape[0] <= self.num_tweets) self.assertTrue(self._df_poly.shape[0] <= self.num_tweets) self.assertTrue(self._df_by_string.shape[0] <= self.num_tweets) def test_dates(self): """ Checks that all dates are between the start date and the end date :return: """ print('Checking dates...') d_start = datetime.fromisoformat(self.from_date) d_end = datetime.fromisoformat(self.to_date) correct_date_uni = True correct_date_poly = True correct_date_by_string = True for date in self._df_uni.date: d = datetime.fromisoformat(date) if not (d >= d_start and d <= d_end): correct_date_uni = False break for date in self._df_poly.date: d = datetime.fromisoformat(date) if not (d >= d_start and d <= d_end): correct_date_poly = False break for date in self._df_by_string.date: d = datetime.fromisoformat(date) if not (d >= d_start and d <= d_end): correct_date_by_string = False break self.assertTrue(correct_date_uni) self.assertTrue(correct_date_poly) self.assertTrue(correct_date_by_string) def test_user(self): """ Checks that all tweets are from the correct user :return: """ print('Checking user...') same_user = True for username in self._df_uni.username: if not username == self.user: same_user = False break self.assertTrue(same_user) def test_user_id(self): """ Checks that all user_ids are correct :return: """ print('Checking user ids...') same_user = True first_id = self._df_uni['user_id'][0] for user_id in self._df_uni.user_id: if not user_id == first_id: same_user = False break self.assertTrue(same_user) def test_no_doubles(self): """ Checks that there are no tweets that have been sampled several times :return: """ print('Checking doubles...') id_set_uni = set(self._df_uni.id) id_set_poly = set(self._df_poly.id) id_set_by_string = set(self._df_by_string.id) self.assertTrue(len(id_set_uni) == self._df_uni.shape[0]) self.assertTrue(len(id_set_poly) == self._df_poly.shape[0]) self.assertTrue(len(id_set_by_string) == self._df_by_string.shape[0]) def test_none(self): """ Checks that there are no None entries :return: """ print('Checking Nones...') self.assertFalse(any(b == True for b in self._df_uni.isnull())) self.assertFalse(any(b == True for b in self._df_poly.isnull())) self.assertFalse(any(b == True for b in self._df_by_string.isnull())) def test_no_url_tweets(self): print('Checking url tweets...') only_url_uni = False only_url_poly = False only_url_by_string = False for tweet in self._df_uni.tweet: if len(tweet.split()) == 1 and tweet.split()[0].startswith('https'): print(tweet.split()) only_url_uni = True break for tweet in self._df_poly.tweet: if len(tweet.split()) == 1 and tweet.split()[0].startswith('https'): print(tweet.split()) only_url_poly = True break for tweet in self._df_by_string.tweet: if len(tweet.split()) == 1 and tweet.split()[0].startswith('https'): print(tweet.split()) only_url_by_string = True break self.assertFalse(only_url_uni) self.assertFalse(only_url_poly) self.assertFalse(only_url_by_string) def test_many_users(self): correct_users = True for user in self._df_uni.username: if user not in self.user_list: correct_users = False break self.assertTrue(correct_users) def test_many_user_ids(self): correct_ids = True user_dict = {} for i in range(self._df_uni.shape[0]): if self._df_uni['username'][i] not in user_dict: user_dict[self._df_uni['username'][i]] = self._df_uni['user_id'][i] if self._df_uni['user_id'][i] != user_dict[self._df_uni['username'][i]]: correct_ids = False break self.assertTrue(correct_ids) def test_string_search(self): """ TODO: Check this :return: """ correct_search = True search = re.sub('ö', 'ø', self.search_string) search = re.sub('ä', 'æ', search) search_strings = [self.search_string, search] for tweet in self._df_by_string.tweet: if all(search not in tweet.lower() for search in search_strings): correct_search = False self.assertTrue(correct_search) if __name__ == '__main__': unittest.main()