Spaces:
Runtime error
Runtime error
import re | |
import unittest | |
import pandas as pd | |
from datetime import datetime | |
import sys | |
from pathlib import Path | |
from twitterscraper import TwitterScraper as ts | |
sys.path.insert(0, str(Path(__file__).parents[1]) + "/twitterscraper") | |
class MyTestCase(unittest.TestCase): | |
def setUpClass(cls): | |
print('Super set up') | |
num_tweets = 40 | |
from_date = "2022-05-01" | |
to_date = "2022-07-31" | |
user = 'jimmieakesson' | |
user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', | |
'dadgostarnooshi'] | |
sc1 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets) | |
sc2 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets) | |
sc3 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets) | |
search_string = 'miljö' | |
cls._df_uni = sc1.scrape_by_user(user) | |
cls._df_poly = sc2.scrape_by_several_users(user_list) | |
cls._df_by_string = sc3.scrape_by_string(search_string) | |
nbr_of_cols = 9 | |
def setUp(self): | |
print('set up') | |
self.num_tweets = 40 | |
self.from_date = "2022-05-01" | |
self.to_date = "2022-07-31" | |
self.user = 'jimmieakesson' | |
self.user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', | |
'SwedishPM', | |
'dadgostarnooshi'] | |
self.search_string = 'miljö' | |
# self.sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets) | |
nbr_of_cols = 9 | |
def test_correct_type(self): | |
""" | |
Checks that the returned datatype is pandas DataFrame | |
:return: | |
""" | |
print('Checking type...') | |
self.assertEqual(type(self._df_uni), type(pd.DataFrame())) | |
self.assertEqual(type(self._df_poly), type(pd.DataFrame())) | |
self.assertEqual(type(self._df_by_string), type(pd.DataFrame())) | |
def test_correct_nbr_tweets(self): | |
""" | |
Checks that we get the correct number of tweets. | |
OBS FOR NOW IT ONLY CHECKS THAT WE DON'T OVER SAMPLE | |
TODO: Check that we get the correct number of tweets. | |
:return: | |
""" | |
print('Checking number of tweets...') | |
self.assertTrue(self._df_uni.shape[0] <= self.num_tweets) | |
self.assertTrue(self._df_poly.shape[0] <= self.num_tweets) | |
self.assertTrue(self._df_by_string.shape[0] <= self.num_tweets) | |
def test_dates(self): | |
""" | |
Checks that all dates are between the start date and the end date | |
:return: | |
""" | |
print('Checking dates...') | |
d_start = datetime.fromisoformat(self.from_date) | |
d_end = datetime.fromisoformat(self.to_date) | |
correct_date_uni = True | |
correct_date_poly = True | |
correct_date_by_string = True | |
for date in self._df_uni.date: | |
d = datetime.fromisoformat(date) | |
if not (d >= d_start and d <= d_end): | |
correct_date_uni = False | |
break | |
for date in self._df_poly.date: | |
d = datetime.fromisoformat(date) | |
if not (d >= d_start and d <= d_end): | |
correct_date_poly = False | |
break | |
for date in self._df_by_string.date: | |
d = datetime.fromisoformat(date) | |
if not (d >= d_start and d <= d_end): | |
correct_date_by_string = False | |
break | |
self.assertTrue(correct_date_uni) | |
self.assertTrue(correct_date_poly) | |
self.assertTrue(correct_date_by_string) | |
def test_user(self): | |
""" | |
Checks that all tweets are from the correct user | |
:return: | |
""" | |
print('Checking user...') | |
same_user = True | |
for username in self._df_uni.username: | |
if not username == self.user: | |
same_user = False | |
break | |
self.assertTrue(same_user) | |
def test_user_id(self): | |
""" | |
Checks that all user_ids are correct | |
:return: | |
""" | |
print('Checking user ids...') | |
same_user = True | |
first_id = self._df_uni['user_id'][0] | |
for user_id in self._df_uni.user_id: | |
if not user_id == first_id: | |
same_user = False | |
break | |
self.assertTrue(same_user) | |
def test_no_doubles(self): | |
""" | |
Checks that there are no tweets that have been sampled several times | |
:return: | |
""" | |
print('Checking doubles...') | |
id_set_uni = set(self._df_uni.id) | |
id_set_poly = set(self._df_poly.id) | |
id_set_by_string = set(self._df_by_string.id) | |
self.assertTrue(len(id_set_uni) == self._df_uni.shape[0]) | |
self.assertTrue(len(id_set_poly) == self._df_poly.shape[0]) | |
self.assertTrue(len(id_set_by_string) == self._df_by_string.shape[0]) | |
def test_none(self): | |
""" | |
Checks that there are no None entries | |
:return: | |
""" | |
print('Checking Nones...') | |
self.assertFalse(any(b == True for b in self._df_uni.isnull())) | |
self.assertFalse(any(b == True for b in self._df_poly.isnull())) | |
self.assertFalse(any(b == True for b in self._df_by_string.isnull())) | |
def test_no_url_tweets(self): | |
print('Checking url tweets...') | |
only_url_uni = False | |
only_url_poly = False | |
only_url_by_string = False | |
for tweet in self._df_uni.tweet: | |
if len(tweet.split()) == 1 and tweet.split()[0].startswith('https'): | |
print(tweet.split()) | |
only_url_uni = True | |
break | |
for tweet in self._df_poly.tweet: | |
if len(tweet.split()) == 1 and tweet.split()[0].startswith('https'): | |
print(tweet.split()) | |
only_url_poly = True | |
break | |
for tweet in self._df_by_string.tweet: | |
if len(tweet.split()) == 1 and tweet.split()[0].startswith('https'): | |
print(tweet.split()) | |
only_url_by_string = True | |
break | |
self.assertFalse(only_url_uni) | |
self.assertFalse(only_url_poly) | |
self.assertFalse(only_url_by_string) | |
def test_many_users(self): | |
correct_users = True | |
for user in self._df_uni.username: | |
if user not in self.user_list: | |
correct_users = False | |
break | |
self.assertTrue(correct_users) | |
def test_many_user_ids(self): | |
correct_ids = True | |
user_dict = {} | |
for i in range(self._df_uni.shape[0]): | |
if self._df_uni['username'][i] not in user_dict: | |
user_dict[self._df_uni['username'][i]] = self._df_uni['user_id'][i] | |
if self._df_uni['user_id'][i] != user_dict[self._df_uni['username'][i]]: | |
correct_ids = False | |
break | |
self.assertTrue(correct_ids) | |
def test_string_search(self): | |
""" | |
TODO: Check this | |
:return: | |
""" | |
correct_search = True | |
search = re.sub('ö', 'ø', self.search_string) | |
search = re.sub('ä', 'æ', search) | |
search_strings = [self.search_string, search] | |
for tweet in self._df_by_string.tweet: | |
if all(search not in tweet.lower() for search in search_strings): | |
correct_search = False | |
self.assertTrue(correct_search) | |
if __name__ == '__main__': | |
unittest.main() | |