Spaces:
Runtime error
Runtime error
Added test folder with some tests for the scraper and a file to start writing in for the classifier
f8ad876
import unittest | |
import pandas as pd | |
from datetime import datetime | |
import sys | |
from pathlib import Path | |
sys.path.insert(0, str(Path(__file__).parents[1]) + "/twitterscraper") | |
from TwitterScraper import TwitterScraper # Detta är inget problem, den hittar filen | |
class MyTestCase(unittest.TestCase): | |
def setUpClass(cls): | |
print('Super set up') | |
num_tweets = 40 | |
from_date = "2022-05-01" | |
to_date = "2022-07-31" | |
user = 'jimmieakesson' | |
user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', 'dadgostarnooshi'] | |
sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets) | |
cls._df_uni = sc.scrape_by_user(user) | |
nbr_of_cols = 9 | |
def setUp(self): | |
print('set up') | |
self.num_tweets = 40 | |
self.from_date = "2022-05-01" | |
self.to_date = "2022-07-31" | |
self.user = 'jimmieakesson' | |
self.user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', | |
'dadgostarnooshi'] | |
# self.sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets) | |
nbr_of_cols = 9 | |
# Checks that the returned datatype is pandas DataFrame | |
def test_correct_type(self): | |
print('Checking type...') | |
self.assertEqual(type(self._df_uni), type(pd.DataFrame())) # add assertion here | |
# Checks that we get the correct number of tweets | |
# OBS, FOR NOW IT ONLY CHECKS THAT WE DON'T OVERSAMPLE | |
def test_correct_nbr_tweets(self): | |
print('Checking number of tweets...') | |
self.assertTrue(self._df_uni.shape[0] < self.num_tweets) | |
# Checks that all dates are between the start date and the end date | |
def test_dates(self): | |
print('Checking dates...') | |
d_start = datetime.fromisoformat(self.from_date) | |
d_end = datetime.fromisoformat(self.to_date) | |
correct_date = True | |
for date in self._df_uni.date: | |
d = datetime.fromisoformat(date) | |
if not (d >= d_start and d <= d_end): | |
correct_date = False | |
break | |
self.assertTrue(correct_date) | |
# Checks that all tweets are from the correct user | |
def test_user(self): | |
print('Checking user...') | |
same_user = True | |
for username in self._df_uni.username: | |
if not username == self.user: | |
same_user = False | |
break | |
self.assertTrue(same_user) | |
# Checks that all user_ids are correct | |
def test_user_id(self): | |
print('Checking user ids...') | |
same_user = True | |
first_id = self._df_uni['user_id'][0] | |
for user_id in self._df_uni.user_id: | |
if not user_id == first_id: | |
same_user = False | |
break | |
self.assertTrue(same_user) | |
# Checks if there are tweets that have been sampled several times | |
def test_no_doubles(self): | |
print('Checking doubles...') | |
id_set = set(self._df_uni.id) | |
self.assertTrue(len(id_set) == self._df_uni.shape[0]) | |
# Checks that we have no None entries | |
def test_none(self): | |
print('Checking Nones...') | |
self.assertFalse(any(b == True for b in self._df_uni.isnull())) | |
def test_no_url_tweets(self): | |
print('Checking url tweets...') | |
only_url = False | |
for tweet in self._df_uni.tweet: | |
if len(tweet.split()) == 1 and tweet.split()[0].startswith('https'): | |
print(tweet.split()) | |
only_url = True | |
break | |
self.assertFalse(only_url) | |
if __name__ == '__main__': | |
unittest.main() | |