Spaces:
Runtime error
Runtime error
cleaned up unittests
Browse files- .idea/misc.xml +1 -1
- .idea/politweet.iml +1 -1
- tests/scraper_test.py +45 -18
.idea/misc.xml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
-
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.
|
4 |
</project>
|
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet)" project-jdk-type="Python SDK" />
|
4 |
</project>
|
.idea/politweet.iml
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
<excludeFolder url="file://$MODULE_DIR$/env" />
|
7 |
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
8 |
</content>
|
9 |
-
<orderEntry type="
|
10 |
<orderEntry type="sourceFolder" forTests="false" />
|
11 |
</component>
|
12 |
<component name="PyNamespacePackagesService">
|
|
|
6 |
<excludeFolder url="file://$MODULE_DIR$/env" />
|
7 |
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
8 |
</content>
|
9 |
+
<orderEntry type="jdk" jdkName="Python 3.9 (politweet)" jdkType="Python SDK" />
|
10 |
<orderEntry type="sourceFolder" forTests="false" />
|
11 |
</component>
|
12 |
<component name="PyNamespacePackagesService">
|
tests/scraper_test.py
CHANGED
@@ -5,9 +5,10 @@ import pandas as pd
|
|
5 |
from datetime import datetime
|
6 |
import sys
|
7 |
from pathlib import Path
|
|
|
|
|
8 |
sys.path.insert(0, str(Path(__file__).parents[1]) + "/twitterscraper")
|
9 |
|
10 |
-
from TwitterScraper import TwitterScraper # Detta är inget problem, den hittar filen
|
11 |
|
12 |
class MyTestCase(unittest.TestCase):
|
13 |
|
@@ -18,10 +19,11 @@ class MyTestCase(unittest.TestCase):
|
|
18 |
from_date = "2022-05-01"
|
19 |
to_date = "2022-07-31"
|
20 |
user = 'jimmieakesson'
|
21 |
-
user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
25 |
search_string = 'miljö'
|
26 |
cls._df_uni = sc1.scrape_by_user(user)
|
27 |
cls._df_poly = sc2.scrape_by_several_users(user_list)
|
@@ -35,29 +37,40 @@ class MyTestCase(unittest.TestCase):
|
|
35 |
self.from_date = "2022-05-01"
|
36 |
self.to_date = "2022-07-31"
|
37 |
self.user = 'jimmieakesson'
|
38 |
-
self.user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi',
|
39 |
-
|
40 |
-
|
|
|
41 |
# self.sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
|
42 |
nbr_of_cols = 9
|
43 |
|
44 |
-
# Checks that the returned datatype is pandas DataFrame
|
45 |
def test_correct_type(self):
|
|
|
|
|
|
|
|
|
46 |
print('Checking type...')
|
47 |
self.assertEqual(type(self._df_uni), type(pd.DataFrame()))
|
48 |
self.assertEqual(type(self._df_poly), type(pd.DataFrame()))
|
49 |
self.assertEqual(type(self._df_by_string), type(pd.DataFrame()))
|
50 |
|
51 |
-
# Checks that we get the correct number of tweets
|
52 |
-
# OBS, FOR NOW IT ONLY CHECKS THAT WE DON'T OVER SAMPLE
|
53 |
def test_correct_nbr_tweets(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
print('Checking number of tweets...')
|
55 |
self.assertTrue(self._df_uni.shape[0] <= self.num_tweets)
|
56 |
self.assertTrue(self._df_poly.shape[0] <= self.num_tweets)
|
57 |
self.assertTrue(self._df_by_string.shape[0] <= self.num_tweets)
|
58 |
|
59 |
-
# Checks that all dates are between the start date and the end date
|
60 |
def test_dates(self):
|
|
|
|
|
|
|
|
|
61 |
print('Checking dates...')
|
62 |
d_start = datetime.fromisoformat(self.from_date)
|
63 |
d_end = datetime.fromisoformat(self.to_date)
|
@@ -84,8 +97,11 @@ class MyTestCase(unittest.TestCase):
|
|
84 |
self.assertTrue(correct_date_poly)
|
85 |
self.assertTrue(correct_date_by_string)
|
86 |
|
87 |
-
# Checks that all tweets are from the correct user
|
88 |
def test_user(self):
|
|
|
|
|
|
|
|
|
89 |
print('Checking user...')
|
90 |
same_user = True
|
91 |
for username in self._df_uni.username:
|
@@ -95,8 +111,11 @@ class MyTestCase(unittest.TestCase):
|
|
95 |
|
96 |
self.assertTrue(same_user)
|
97 |
|
98 |
-
# Checks that all user_ids are correct
|
99 |
def test_user_id(self):
|
|
|
|
|
|
|
|
|
100 |
print('Checking user ids...')
|
101 |
same_user = True
|
102 |
first_id = self._df_uni['user_id'][0]
|
@@ -106,8 +125,11 @@ class MyTestCase(unittest.TestCase):
|
|
106 |
break
|
107 |
self.assertTrue(same_user)
|
108 |
|
109 |
-
# Checks if there are tweets that have been sampled several times
|
110 |
def test_no_doubles(self):
|
|
|
|
|
|
|
|
|
111 |
print('Checking doubles...')
|
112 |
id_set_uni = set(self._df_uni.id)
|
113 |
id_set_poly = set(self._df_poly.id)
|
@@ -116,8 +138,11 @@ class MyTestCase(unittest.TestCase):
|
|
116 |
self.assertTrue(len(id_set_poly) == self._df_poly.shape[0])
|
117 |
self.assertTrue(len(id_set_by_string) == self._df_by_string.shape[0])
|
118 |
|
119 |
-
# Checks that we have no None entries
|
120 |
def test_none(self):
|
|
|
|
|
|
|
|
|
121 |
print('Checking Nones...')
|
122 |
self.assertFalse(any(b == True for b in self._df_uni.isnull()))
|
123 |
self.assertFalse(any(b == True for b in self._df_poly.isnull()))
|
@@ -167,6 +192,10 @@ class MyTestCase(unittest.TestCase):
|
|
167 |
self.assertTrue(correct_ids)
|
168 |
|
169 |
def test_string_search(self):
|
|
|
|
|
|
|
|
|
170 |
correct_search = True
|
171 |
search = re.sub('ö', 'ø', self.search_string)
|
172 |
search = re.sub('ä', 'æ', search)
|
@@ -177,7 +206,5 @@ class MyTestCase(unittest.TestCase):
|
|
177 |
self.assertTrue(correct_search)
|
178 |
|
179 |
|
180 |
-
|
181 |
if __name__ == '__main__':
|
182 |
unittest.main()
|
183 |
-
|
|
|
5 |
from datetime import datetime
|
6 |
import sys
|
7 |
from pathlib import Path
|
8 |
+
from twitterscraper import TwitterScraper as ts
|
9 |
+
|
10 |
sys.path.insert(0, str(Path(__file__).parents[1]) + "/twitterscraper")
|
11 |
|
|
|
12 |
|
13 |
class MyTestCase(unittest.TestCase):
|
14 |
|
|
|
19 |
from_date = "2022-05-01"
|
20 |
to_date = "2022-07-31"
|
21 |
user = 'jimmieakesson'
|
22 |
+
user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
|
23 |
+
'dadgostarnooshi']
|
24 |
+
sc1 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
|
25 |
+
sc2 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
|
26 |
+
sc3 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
|
27 |
search_string = 'miljö'
|
28 |
cls._df_uni = sc1.scrape_by_user(user)
|
29 |
cls._df_poly = sc2.scrape_by_several_users(user_list)
|
|
|
37 |
self.from_date = "2022-05-01"
|
38 |
self.to_date = "2022-07-31"
|
39 |
self.user = 'jimmieakesson'
|
40 |
+
self.user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi',
|
41 |
+
'SwedishPM',
|
42 |
+
'dadgostarnooshi']
|
43 |
+
self.search_string = 'miljö'
|
44 |
# self.sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
|
45 |
nbr_of_cols = 9
|
46 |
|
|
|
47 |
def test_correct_type(self):
|
48 |
+
"""
|
49 |
+
Checks that the returned datatype is pandas DataFrame
|
50 |
+
:return:
|
51 |
+
"""
|
52 |
print('Checking type...')
|
53 |
self.assertEqual(type(self._df_uni), type(pd.DataFrame()))
|
54 |
self.assertEqual(type(self._df_poly), type(pd.DataFrame()))
|
55 |
self.assertEqual(type(self._df_by_string), type(pd.DataFrame()))
|
56 |
|
|
|
|
|
57 |
def test_correct_nbr_tweets(self):
|
58 |
+
"""
|
59 |
+
Checks that we get the correct number of tweets.
|
60 |
+
OBS FOR NOW IT ONLY CHECKS THAT WE DON'T OVER SAMPLE
|
61 |
+
TODO: Check that we get the correct number of tweets.
|
62 |
+
:return:
|
63 |
+
"""
|
64 |
print('Checking number of tweets...')
|
65 |
self.assertTrue(self._df_uni.shape[0] <= self.num_tweets)
|
66 |
self.assertTrue(self._df_poly.shape[0] <= self.num_tweets)
|
67 |
self.assertTrue(self._df_by_string.shape[0] <= self.num_tweets)
|
68 |
|
|
|
69 |
def test_dates(self):
|
70 |
+
"""
|
71 |
+
Checks that all dates are between the start date and the end date
|
72 |
+
:return:
|
73 |
+
"""
|
74 |
print('Checking dates...')
|
75 |
d_start = datetime.fromisoformat(self.from_date)
|
76 |
d_end = datetime.fromisoformat(self.to_date)
|
|
|
97 |
self.assertTrue(correct_date_poly)
|
98 |
self.assertTrue(correct_date_by_string)
|
99 |
|
|
|
100 |
def test_user(self):
|
101 |
+
"""
|
102 |
+
Checks that all tweets are from the correct user
|
103 |
+
:return:
|
104 |
+
"""
|
105 |
print('Checking user...')
|
106 |
same_user = True
|
107 |
for username in self._df_uni.username:
|
|
|
111 |
|
112 |
self.assertTrue(same_user)
|
113 |
|
|
|
114 |
def test_user_id(self):
|
115 |
+
"""
|
116 |
+
Checks that all user_ids are correct
|
117 |
+
:return:
|
118 |
+
"""
|
119 |
print('Checking user ids...')
|
120 |
same_user = True
|
121 |
first_id = self._df_uni['user_id'][0]
|
|
|
125 |
break
|
126 |
self.assertTrue(same_user)
|
127 |
|
|
|
128 |
def test_no_doubles(self):
|
129 |
+
"""
|
130 |
+
Checks that there are no tweets that have been sampled several times
|
131 |
+
:return:
|
132 |
+
"""
|
133 |
print('Checking doubles...')
|
134 |
id_set_uni = set(self._df_uni.id)
|
135 |
id_set_poly = set(self._df_poly.id)
|
|
|
138 |
self.assertTrue(len(id_set_poly) == self._df_poly.shape[0])
|
139 |
self.assertTrue(len(id_set_by_string) == self._df_by_string.shape[0])
|
140 |
|
|
|
141 |
def test_none(self):
|
142 |
+
"""
|
143 |
+
Checks that there are no None entries
|
144 |
+
:return:
|
145 |
+
"""
|
146 |
print('Checking Nones...')
|
147 |
self.assertFalse(any(b == True for b in self._df_uni.isnull()))
|
148 |
self.assertFalse(any(b == True for b in self._df_poly.isnull()))
|
|
|
192 |
self.assertTrue(correct_ids)
|
193 |
|
194 |
def test_string_search(self):
|
195 |
+
"""
|
196 |
+
TODO: Check this
|
197 |
+
:return:
|
198 |
+
"""
|
199 |
correct_search = True
|
200 |
search = re.sub('ö', 'ø', self.search_string)
|
201 |
search = re.sub('ä', 'æ', search)
|
|
|
206 |
self.assertTrue(correct_search)
|
207 |
|
208 |
|
|
|
209 |
if __name__ == '__main__':
|
210 |
unittest.main()
|
|