Oresti Theodoridis commited on
Commit
e65e077
2 Parent(s): 3bb0b34 ba446a6

Merge pull request #62 from Demea9000/fix_unit_tests

Browse files
Files changed (3) hide show
  1. .idea/misc.xml +1 -1
  2. .idea/politweet.iml +1 -1
  3. tests/scraper_test.py +45 -18
.idea/misc.xml CHANGED
@@ -1,4 +1,4 @@
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
3
- <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (politweet)" project-jdk-type="Python SDK" />
4
  </project>
 
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet)" project-jdk-type="Python SDK" />
4
  </project>
.idea/politweet.iml CHANGED
@@ -6,7 +6,7 @@
6
  <excludeFolder url="file://$MODULE_DIR$/env" />
7
  <excludeFolder url="file://$MODULE_DIR$/venv" />
8
  </content>
9
- <orderEntry type="inheritedJdk" />
10
  <orderEntry type="sourceFolder" forTests="false" />
11
  </component>
12
  <component name="PyNamespacePackagesService">
 
6
  <excludeFolder url="file://$MODULE_DIR$/env" />
7
  <excludeFolder url="file://$MODULE_DIR$/venv" />
8
  </content>
9
+ <orderEntry type="jdk" jdkName="Python 3.9 (politweet)" jdkType="Python SDK" />
10
  <orderEntry type="sourceFolder" forTests="false" />
11
  </component>
12
  <component name="PyNamespacePackagesService">
tests/scraper_test.py CHANGED
@@ -5,9 +5,10 @@ import pandas as pd
5
  from datetime import datetime
6
  import sys
7
  from pathlib import Path
 
 
8
  sys.path.insert(0, str(Path(__file__).parents[1]) + "/twitterscraper")
9
 
10
- from TwitterScraper import TwitterScraper # Detta är inget problem, den hittar filen
11
 
12
  class MyTestCase(unittest.TestCase):
13
 
@@ -18,10 +19,11 @@ class MyTestCase(unittest.TestCase):
18
  from_date = "2022-05-01"
19
  to_date = "2022-07-31"
20
  user = 'jimmieakesson'
21
- user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', 'dadgostarnooshi']
22
- sc1 = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
23
- sc2 = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
24
- sc3 = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
 
25
  search_string = 'miljö'
26
  cls._df_uni = sc1.scrape_by_user(user)
27
  cls._df_poly = sc2.scrape_by_several_users(user_list)
@@ -35,29 +37,40 @@ class MyTestCase(unittest.TestCase):
35
  self.from_date = "2022-05-01"
36
  self.to_date = "2022-07-31"
37
  self.user = 'jimmieakesson'
38
- self.user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
39
- 'dadgostarnooshi']
40
- self.search_string = 'miljö'
 
41
  # self.sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
42
  nbr_of_cols = 9
43
 
44
- # Checks that the returned datatype is pandas DataFrame
45
  def test_correct_type(self):
 
 
 
 
46
  print('Checking type...')
47
  self.assertEqual(type(self._df_uni), type(pd.DataFrame()))
48
  self.assertEqual(type(self._df_poly), type(pd.DataFrame()))
49
  self.assertEqual(type(self._df_by_string), type(pd.DataFrame()))
50
 
51
- # Checks that we get the correct number of tweets
52
- # OBS, FOR NOW IT ONLY CHECKS THAT WE DON'T OVER SAMPLE
53
  def test_correct_nbr_tweets(self):
 
 
 
 
 
 
54
  print('Checking number of tweets...')
55
  self.assertTrue(self._df_uni.shape[0] <= self.num_tweets)
56
  self.assertTrue(self._df_poly.shape[0] <= self.num_tweets)
57
  self.assertTrue(self._df_by_string.shape[0] <= self.num_tweets)
58
 
59
- # Checks that all dates are between the start date and the end date
60
  def test_dates(self):
 
 
 
 
61
  print('Checking dates...')
62
  d_start = datetime.fromisoformat(self.from_date)
63
  d_end = datetime.fromisoformat(self.to_date)
@@ -84,8 +97,11 @@ class MyTestCase(unittest.TestCase):
84
  self.assertTrue(correct_date_poly)
85
  self.assertTrue(correct_date_by_string)
86
 
87
- # Checks that all tweets are from the correct user
88
  def test_user(self):
 
 
 
 
89
  print('Checking user...')
90
  same_user = True
91
  for username in self._df_uni.username:
@@ -95,8 +111,11 @@ class MyTestCase(unittest.TestCase):
95
 
96
  self.assertTrue(same_user)
97
 
98
- # Checks that all user_ids are correct
99
  def test_user_id(self):
 
 
 
 
100
  print('Checking user ids...')
101
  same_user = True
102
  first_id = self._df_uni['user_id'][0]
@@ -106,8 +125,11 @@ class MyTestCase(unittest.TestCase):
106
  break
107
  self.assertTrue(same_user)
108
 
109
- # Checks if there are tweets that have been sampled several times
110
  def test_no_doubles(self):
 
 
 
 
111
  print('Checking doubles...')
112
  id_set_uni = set(self._df_uni.id)
113
  id_set_poly = set(self._df_poly.id)
@@ -116,8 +138,11 @@ class MyTestCase(unittest.TestCase):
116
  self.assertTrue(len(id_set_poly) == self._df_poly.shape[0])
117
  self.assertTrue(len(id_set_by_string) == self._df_by_string.shape[0])
118
 
119
- # Checks that we have no None entries
120
  def test_none(self):
 
 
 
 
121
  print('Checking Nones...')
122
  self.assertFalse(any(b == True for b in self._df_uni.isnull()))
123
  self.assertFalse(any(b == True for b in self._df_poly.isnull()))
@@ -167,6 +192,10 @@ class MyTestCase(unittest.TestCase):
167
  self.assertTrue(correct_ids)
168
 
169
  def test_string_search(self):
 
 
 
 
170
  correct_search = True
171
  search = re.sub('ö', 'ø', self.search_string)
172
  search = re.sub('ä', 'æ', search)
@@ -177,7 +206,5 @@ class MyTestCase(unittest.TestCase):
177
  self.assertTrue(correct_search)
178
 
179
 
180
-
181
  if __name__ == '__main__':
182
  unittest.main()
183
-
 
5
  from datetime import datetime
6
  import sys
7
  from pathlib import Path
8
+ from twitterscraper import TwitterScraper as ts
9
+
10
  sys.path.insert(0, str(Path(__file__).parents[1]) + "/twitterscraper")
11
 
 
12
 
13
  class MyTestCase(unittest.TestCase):
14
 
 
19
  from_date = "2022-05-01"
20
  to_date = "2022-07-31"
21
  user = 'jimmieakesson'
22
+ user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
23
+ 'dadgostarnooshi']
24
+ sc1 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
25
+ sc2 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
26
+ sc3 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
27
  search_string = 'miljö'
28
  cls._df_uni = sc1.scrape_by_user(user)
29
  cls._df_poly = sc2.scrape_by_several_users(user_list)
 
37
  self.from_date = "2022-05-01"
38
  self.to_date = "2022-07-31"
39
  self.user = 'jimmieakesson'
40
+ self.user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi',
41
+ 'SwedishPM',
42
+ 'dadgostarnooshi']
43
+ self.search_string = 'miljö'
44
  # self.sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
45
  nbr_of_cols = 9
46
 
 
47
  def test_correct_type(self):
48
+ """
49
+ Checks that the returned datatype is pandas DataFrame
50
+ :return:
51
+ """
52
  print('Checking type...')
53
  self.assertEqual(type(self._df_uni), type(pd.DataFrame()))
54
  self.assertEqual(type(self._df_poly), type(pd.DataFrame()))
55
  self.assertEqual(type(self._df_by_string), type(pd.DataFrame()))
56
 
 
 
57
  def test_correct_nbr_tweets(self):
58
+ """
59
+ Checks that we get the correct number of tweets.
60
+ OBS FOR NOW IT ONLY CHECKS THAT WE DON'T OVER SAMPLE
61
+ TODO: Check that we get the correct number of tweets.
62
+ :return:
63
+ """
64
  print('Checking number of tweets...')
65
  self.assertTrue(self._df_uni.shape[0] <= self.num_tweets)
66
  self.assertTrue(self._df_poly.shape[0] <= self.num_tweets)
67
  self.assertTrue(self._df_by_string.shape[0] <= self.num_tweets)
68
 
 
69
  def test_dates(self):
70
+ """
71
+ Checks that all dates are between the start date and the end date
72
+ :return:
73
+ """
74
  print('Checking dates...')
75
  d_start = datetime.fromisoformat(self.from_date)
76
  d_end = datetime.fromisoformat(self.to_date)
 
97
  self.assertTrue(correct_date_poly)
98
  self.assertTrue(correct_date_by_string)
99
 
 
100
  def test_user(self):
101
+ """
102
+ Checks that all tweets are from the correct user
103
+ :return:
104
+ """
105
  print('Checking user...')
106
  same_user = True
107
  for username in self._df_uni.username:
 
111
 
112
  self.assertTrue(same_user)
113
 
 
114
  def test_user_id(self):
115
+ """
116
+ Checks that all user_ids are correct
117
+ :return:
118
+ """
119
  print('Checking user ids...')
120
  same_user = True
121
  first_id = self._df_uni['user_id'][0]
 
125
  break
126
  self.assertTrue(same_user)
127
 
 
128
  def test_no_doubles(self):
129
+ """
130
+ Checks that there are no tweets that have been sampled several times
131
+ :return:
132
+ """
133
  print('Checking doubles...')
134
  id_set_uni = set(self._df_uni.id)
135
  id_set_poly = set(self._df_poly.id)
 
138
  self.assertTrue(len(id_set_poly) == self._df_poly.shape[0])
139
  self.assertTrue(len(id_set_by_string) == self._df_by_string.shape[0])
140
 
 
141
  def test_none(self):
142
+ """
143
+ Checks that there are no None entries
144
+ :return:
145
+ """
146
  print('Checking Nones...')
147
  self.assertFalse(any(b == True for b in self._df_uni.isnull()))
148
  self.assertFalse(any(b == True for b in self._df_poly.isnull()))
 
192
  self.assertTrue(correct_ids)
193
 
194
  def test_string_search(self):
195
+ """
196
+ TODO: Check this
197
+ :return:
198
+ """
199
  correct_search = True
200
  search = re.sub('ö', 'ø', self.search_string)
201
  search = re.sub('ä', 'æ', search)
 
206
  self.assertTrue(correct_search)
207
 
208
 
 
209
  if __name__ == '__main__':
210
  unittest.main()