Spaces:
Runtime error
Runtime error
File size: 7,105 Bytes
58a2a29 4795530 f8ad876 4795530 f8ad876 4795530 58a2a29 4795530 58a2a29 4795530 58a2a29 4795530 58a2a29 4795530 58a2a29 4795530 58a2a29 4795530 58a2a29 4795530 58a2a29 4795530 58a2a29 4795530 58a2a29 4795530 58a2a29 4795530 58a2a29 4795530 58a2a29 4795530 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
import re
import unittest
import pandas as pd
from datetime import datetime
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parents[1]) + "/twitterscraper")
from TwitterScraper import TwitterScraper # Detta är inget problem, den hittar filen
class MyTestCase(unittest.TestCase):
@classmethod
def setUpClass(cls):
print('Super set up')
num_tweets = 40
from_date = "2022-05-01"
to_date = "2022-07-31"
user = 'jimmieakesson'
user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', 'dadgostarnooshi']
sc1 = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
sc2 = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
sc3 = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
search_string = 'miljö'
cls._df_uni = sc1.scrape_by_user(user)
cls._df_poly = sc2.scrape_by_several_users(user_list)
cls._df_by_string = sc3.scrape_by_string(search_string)
nbr_of_cols = 9
def setUp(self):
print('set up')
self.num_tweets = 40
self.from_date = "2022-05-01"
self.to_date = "2022-07-31"
self.user = 'jimmieakesson'
self.user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
'dadgostarnooshi']
self.search_string = 'miljö'
# self.sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
nbr_of_cols = 9
# Checks that the returned datatype is pandas DataFrame
def test_correct_type(self):
print('Checking type...')
self.assertEqual(type(self._df_uni), type(pd.DataFrame()))
self.assertEqual(type(self._df_poly), type(pd.DataFrame()))
self.assertEqual(type(self._df_by_string), type(pd.DataFrame()))
# Checks that we get the correct number of tweets
# OBS, FOR NOW IT ONLY CHECKS THAT WE DON'T OVER SAMPLE
def test_correct_nbr_tweets(self):
print('Checking number of tweets...')
self.assertTrue(self._df_uni.shape[0] <= self.num_tweets)
self.assertTrue(self._df_poly.shape[0] <= self.num_tweets)
self.assertTrue(self._df_by_string.shape[0] <= self.num_tweets)
# Checks that all dates are between the start date and the end date
def test_dates(self):
print('Checking dates...')
d_start = datetime.fromisoformat(self.from_date)
d_end = datetime.fromisoformat(self.to_date)
correct_date_uni = True
correct_date_poly = True
correct_date_by_string = True
for date in self._df_uni.date:
d = datetime.fromisoformat(date)
if not (d >= d_start and d <= d_end):
correct_date_uni = False
break
for date in self._df_poly.date:
d = datetime.fromisoformat(date)
if not (d >= d_start and d <= d_end):
correct_date_poly = False
break
for date in self._df_by_string.date:
d = datetime.fromisoformat(date)
if not (d >= d_start and d <= d_end):
correct_date_by_string = False
break
self.assertTrue(correct_date_uni)
self.assertTrue(correct_date_poly)
self.assertTrue(correct_date_by_string)
# Checks that all tweets are from the correct user
def test_user(self):
print('Checking user...')
same_user = True
for username in self._df_uni.username:
if not username == self.user:
same_user = False
break
self.assertTrue(same_user)
# Checks that all user_ids are correct
def test_user_id(self):
print('Checking user ids...')
same_user = True
first_id = self._df_uni['user_id'][0]
for user_id in self._df_uni.user_id:
if not user_id == first_id:
same_user = False
break
self.assertTrue(same_user)
# Checks if there are tweets that have been sampled several times
def test_no_doubles(self):
print('Checking doubles...')
id_set_uni = set(self._df_uni.id)
id_set_poly = set(self._df_poly.id)
id_set_by_string = set(self._df_by_string.id)
self.assertTrue(len(id_set_uni) == self._df_uni.shape[0])
self.assertTrue(len(id_set_poly) == self._df_poly.shape[0])
self.assertTrue(len(id_set_by_string) == self._df_by_string.shape[0])
# Checks that we have no None entries
def test_none(self):
print('Checking Nones...')
self.assertFalse(any(b == True for b in self._df_uni.isnull()))
self.assertFalse(any(b == True for b in self._df_poly.isnull()))
self.assertFalse(any(b == True for b in self._df_by_string.isnull()))
def test_no_url_tweets(self):
print('Checking url tweets...')
only_url_uni = False
only_url_poly = False
only_url_by_string = False
for tweet in self._df_uni.tweet:
if len(tweet.split()) == 1 and tweet.split()[0].startswith('https'):
print(tweet.split())
only_url_uni = True
break
for tweet in self._df_poly.tweet:
if len(tweet.split()) == 1 and tweet.split()[0].startswith('https'):
print(tweet.split())
only_url_poly = True
break
for tweet in self._df_by_string.tweet:
if len(tweet.split()) == 1 and tweet.split()[0].startswith('https'):
print(tweet.split())
only_url_by_string = True
break
self.assertFalse(only_url_uni)
self.assertFalse(only_url_poly)
self.assertFalse(only_url_by_string)
def test_many_users(self):
correct_users = True
for user in self._df_uni.username:
if user not in self.user_list:
correct_users = False
break
self.assertTrue(correct_users)
def test_many_user_ids(self):
correct_ids = True
user_dict = {}
for i in range(self._df_uni.shape[0]):
if self._df_uni['username'][i] not in user_dict:
user_dict[self._df_uni['username'][i]] = self._df_uni['user_id'][i]
if self._df_uni['user_id'][i] != user_dict[self._df_uni['username'][i]]:
correct_ids = False
break
self.assertTrue(correct_ids)
def test_string_search(self):
correct_search = True
search = re.sub('ö', 'ø', self.search_string)
search = re.sub('ä', 'æ', search)
search_strings = [self.search_string, search]
for tweet in self._df_by_string.tweet:
if all(search not in tweet.lower() for search in search_strings):
correct_search = False
self.assertTrue(correct_search)
if __name__ == '__main__':
unittest.main()
|