olofbengtsson commited on
Commit
4795530
·
1 Parent(s): 1f96fb0

Adding unittests

Browse files
Files changed (1) hide show
  1. twitterscraper/scraper_test.py +106 -0
twitterscraper/scraper_test.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+
3
+ import pandas as pd
4
+ from datetime import datetime
5
+ import regex as re
6
+
7
+ from TwitterScraper import TwitterScraper
8
+
9
+ class MyTestCase(unittest.TestCase):
10
+
11
+ @classmethod
12
+ def setUpClass(cls):
13
+ print('Super set up')
14
+ num_tweets = 40
15
+ from_date = "2022-05-01"
16
+ to_date = "2022-07-31"
17
+ user = 'jimmieakesson'
18
+ user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', 'dadgostarnooshi']
19
+ sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
20
+ cls._df_uni = sc.scrape_by_user(user)
21
+ nbr_of_cols = 9
22
+
23
+ def setUp(self):
24
+ print('set up')
25
+ self.num_tweets = 40
26
+ self.from_date = "2022-05-01"
27
+ self.to_date = "2022-07-31"
28
+ self.user = 'jimmieakesson'
29
+ self.user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
30
+ 'dadgostarnooshi']
31
+ # self.sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
32
+ nbr_of_cols = 9
33
+
34
+ # Checks that the returned datatype is pandas DataFrame
35
+ def test_correct_type(self):
36
+ print('Checking type...')
37
+ self.assertEqual(type(self._df_uni), type(pd.DataFrame())) # add assertion here
38
+
39
+ # Checks that we get the correct number of tweets
40
+ # OBS, FOR NOW IT ONLY CHECKS THAT WE DON'T OVERSAMPLE
41
+ def test_correct_nbr_tweets(self):
42
+ print('Checking number of tweets...')
43
+ self.assertTrue(self._df_uni.shape[0] < self.num_tweets)
44
+
45
+ # Checks that all dates are between the start date and the end date
46
+ def test_dates(self):
47
+ print('Checking dates...')
48
+ d_start = datetime.fromisoformat(self.from_date)
49
+ d_end = datetime.fromisoformat(self.to_date)
50
+ correct_date = True
51
+ for date in self._df_uni.date:
52
+ d = datetime.fromisoformat(date)
53
+ if not (d >= d_start and d <= d_end):
54
+ correct_date = False
55
+ break
56
+
57
+ self.assertTrue(correct_date)
58
+
59
+ # Checks that all tweets are from the correct user
60
+ def test_user(self):
61
+ print('Checking user...')
62
+ same_user = True
63
+ for username in self._df_uni.username:
64
+ if not username == self.user:
65
+ same_user = False
66
+ break
67
+
68
+ self.assertTrue(same_user)
69
+
70
+ # Checks that all user_ids are correct
71
+ def test_user_id(self):
72
+ print('Checking user ids...')
73
+ same_user = True
74
+ first_id = self._df_uni['user_id'][0]
75
+ for user_id in self._df_uni.user_id:
76
+ if not user_id == first_id:
77
+ same_user = False
78
+ break
79
+ self.assertTrue(same_user)
80
+
81
+ # Checks if there are tweets that have been sampled several times
82
+ def test_no_doubles(self):
83
+ print('Checking doubles...')
84
+ id_set = set(self._df_uni.id)
85
+ self.assertTrue(len(id_set) == self._df_uni.shape[0])
86
+
87
+ # Checks that we have no None entries
88
+ def test_none(self):
89
+ print('Checking Nones...')
90
+ self.assertFalse(any(b == True for b in self._df_uni.isnull()))
91
+
92
+ def test_no_url_tweets(self):
93
+ print('Checking url tweets...')
94
+ only_url = False
95
+ for tweet in self._df_uni.tweet:
96
+ if len(tweet.split()) == 1 and tweet.split()[0].startswith('https'):
97
+ print(tweet.split())
98
+ only_url = True
99
+ break
100
+ self.assertFalse(only_url)
101
+
102
+
103
+
104
+ if __name__ == '__main__':
105
+ unittest.main()
106
+