Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

olofbengtsson commited on Jul 13, 2022

Commit

4795530

1 Parent(s): 1f96fb0

Adding unittests

Browse files

Files changed (1) hide show

twitterscraper/scraper_test.py +106 -0

twitterscraper/scraper_test.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import unittest
+import pandas as pd
+from datetime import datetime
+import regex as re
+from TwitterScraper import TwitterScraper
+class MyTestCase(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        print('Super set up')
+        num_tweets = 40
+        from_date = "2022-05-01"
+        to_date = "2022-07-31"
+        user = 'jimmieakesson'
+        user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', 'dadgostarnooshi']
+        sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
+        cls._df_uni = sc.scrape_by_user(user)
+        nbr_of_cols = 9
+    def setUp(self):
+        print('set up')
+        self.num_tweets = 40
+        self.from_date = "2022-05-01"
+        self.to_date = "2022-07-31"
+        self.user = 'jimmieakesson'
+        self.user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
+                     'dadgostarnooshi']
+        # self.sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
+        nbr_of_cols = 9
+    # Checks that the returned datatype is pandas DataFrame
+    def test_correct_type(self):
+        print('Checking type...')
+        self.assertEqual(type(self._df_uni), type(pd.DataFrame()))  # add assertion here
+    # Checks that we get the correct number of tweets
+    # OBS, FOR NOW IT ONLY CHECKS THAT WE DON'T OVERSAMPLE
+    def test_correct_nbr_tweets(self):
+        print('Checking number of tweets...')
+        self.assertTrue(self._df_uni.shape[0] < self.num_tweets)
+    # Checks that all dates are between the start date and the end date
+    def test_dates(self):
+        print('Checking dates...')
+        d_start = datetime.fromisoformat(self.from_date)
+        d_end = datetime.fromisoformat(self.to_date)
+        correct_date = True
+        for date in self._df_uni.date:
+            d = datetime.fromisoformat(date)
+            if not (d >= d_start and d <= d_end):
+                correct_date = False
+                break
+        self.assertTrue(correct_date)
+    # Checks that all tweets are from the correct user
+    def test_user(self):
+        print('Checking user...')
+        same_user = True
+        for username in self._df_uni.username:
+            if not username == self.user:
+                same_user = False
+                break
+        self.assertTrue(same_user)
+    # Checks that all user_ids are correct
+    def test_user_id(self):
+        print('Checking user ids...')
+        same_user = True
+        first_id = self._df_uni['user_id'][0]
+        for user_id in self._df_uni.user_id:
+            if not user_id == first_id:
+                same_user = False
+                break
+        self.assertTrue(same_user)
+    # Checks if there are tweets that have been sampled several times
+    def test_no_doubles(self):
+        print('Checking doubles...')
+        id_set = set(self._df_uni.id)
+        self.assertTrue(len(id_set) == self._df_uni.shape[0])
+    # Checks that we have no None entries
+    def test_none(self):
+        print('Checking Nones...')
+        self.assertFalse(any(b == True for b in self._df_uni.isnull()))
+    def test_no_url_tweets(self):
+        print('Checking url tweets...')
+        only_url = False
+        for tweet in self._df_uni.tweet:
+            if len(tweet.split()) == 1 and tweet.split()[0].startswith('https'):
+                print(tweet.split())
+                only_url = True
+                break
+        self.assertFalse(only_url)
+if __name__ == '__main__':
+    unittest.main()