Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Demea9000 commited on Jul 18, 2022

Commit

ba446a6

1 Parent(s): 3bb0b34

cleaned up unittests

Browse files

Files changed (3) hide show

.idea/misc.xml +1 -1
.idea/politweet.iml +1 -1
tests/scraper_test.py +45 -18

.idea/misc.xml CHANGED Viewed

@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (politweet)" project-jdk-type="Python SDK" />
 </project>

 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet)" project-jdk-type="Python SDK" />
 </project>

.idea/politweet.iml CHANGED Viewed

@@ -6,7 +6,7 @@
       <excludeFolder url="file://$MODULE_DIR$/env" />
       <excludeFolder url="file://$MODULE_DIR$/venv" />
     </content>
-    <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">

       <excludeFolder url="file://$MODULE_DIR$/env" />
       <excludeFolder url="file://$MODULE_DIR$/venv" />
     </content>
+    <orderEntry type="jdk" jdkName="Python 3.9 (politweet)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">

tests/scraper_test.py CHANGED Viewed

@@ -5,9 +5,10 @@ import pandas as pd
 from datetime import datetime
 import sys
 from pathlib import Path
 sys.path.insert(0, str(Path(__file__).parents[1]) + "/twitterscraper")
-from TwitterScraper import TwitterScraper # Detta är inget problem, den hittar filen
 class MyTestCase(unittest.TestCase):
@@ -18,10 +19,11 @@ class MyTestCase(unittest.TestCase):
         from_date = "2022-05-01"
         to_date = "2022-07-31"
         user = 'jimmieakesson'
-        user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', 'dadgostarnooshi']
-        sc1 = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
-        sc2 = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
-        sc3 = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
         search_string = 'miljö'
         cls._df_uni = sc1.scrape_by_user(user)
         cls._df_poly = sc2.scrape_by_several_users(user_list)
@@ -35,29 +37,40 @@ class MyTestCase(unittest.TestCase):
         self.from_date = "2022-05-01"
         self.to_date = "2022-07-31"
         self.user = 'jimmieakesson'
-        self.user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
-                     'dadgostarnooshi']
-        self.search_string  = 'miljö'
         # self.sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
         nbr_of_cols = 9
-    # Checks that the returned datatype is pandas DataFrame
     def test_correct_type(self):
         print('Checking type...')
         self.assertEqual(type(self._df_uni), type(pd.DataFrame()))
         self.assertEqual(type(self._df_poly), type(pd.DataFrame()))
         self.assertEqual(type(self._df_by_string), type(pd.DataFrame()))
-    # Checks that we get the correct number of tweets
-    # OBS, FOR NOW IT ONLY CHECKS THAT WE DON'T OVER SAMPLE
     def test_correct_nbr_tweets(self):
         print('Checking number of tweets...')
         self.assertTrue(self._df_uni.shape[0] <= self.num_tweets)
         self.assertTrue(self._df_poly.shape[0] <= self.num_tweets)
         self.assertTrue(self._df_by_string.shape[0] <= self.num_tweets)
-    # Checks that all dates are between the start date and the end date
     def test_dates(self):
         print('Checking dates...')
         d_start = datetime.fromisoformat(self.from_date)
         d_end = datetime.fromisoformat(self.to_date)
@@ -84,8 +97,11 @@ class MyTestCase(unittest.TestCase):
         self.assertTrue(correct_date_poly)
         self.assertTrue(correct_date_by_string)
-    # Checks that all tweets are from the correct user
     def test_user(self):
         print('Checking user...')
         same_user = True
         for username in self._df_uni.username:
@@ -95,8 +111,11 @@ class MyTestCase(unittest.TestCase):
         self.assertTrue(same_user)
-    # Checks that all user_ids are correct
     def test_user_id(self):
         print('Checking user ids...')
         same_user = True
         first_id = self._df_uni['user_id'][0]
@@ -106,8 +125,11 @@ class MyTestCase(unittest.TestCase):
                 break
         self.assertTrue(same_user)
-    # Checks if there are tweets that have been sampled several times
     def test_no_doubles(self):
         print('Checking doubles...')
         id_set_uni = set(self._df_uni.id)
         id_set_poly = set(self._df_poly.id)
@@ -116,8 +138,11 @@ class MyTestCase(unittest.TestCase):
         self.assertTrue(len(id_set_poly) == self._df_poly.shape[0])
         self.assertTrue(len(id_set_by_string) == self._df_by_string.shape[0])
-    # Checks that we have no None entries
     def test_none(self):
         print('Checking Nones...')
         self.assertFalse(any(b == True for b in self._df_uni.isnull()))
         self.assertFalse(any(b == True for b in self._df_poly.isnull()))
@@ -167,6 +192,10 @@ class MyTestCase(unittest.TestCase):
         self.assertTrue(correct_ids)
     def test_string_search(self):
         correct_search = True
         search = re.sub('ö', 'ø', self.search_string)
         search = re.sub('ä', 'æ', search)
@@ -177,7 +206,5 @@ class MyTestCase(unittest.TestCase):
         self.assertTrue(correct_search)
 if __name__ == '__main__':
     unittest.main()

 from datetime import datetime
 import sys
 from pathlib import Path
+from twitterscraper import TwitterScraper as ts
 sys.path.insert(0, str(Path(__file__).parents[1]) + "/twitterscraper")
 class MyTestCase(unittest.TestCase):
         from_date = "2022-05-01"
         to_date = "2022-07-31"
         user = 'jimmieakesson'
+        user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
+                     'dadgostarnooshi']
+        sc1 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
+        sc2 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
+        sc3 = ts.TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
         search_string = 'miljö'
         cls._df_uni = sc1.scrape_by_user(user)
         cls._df_poly = sc2.scrape_by_several_users(user_list)
         self.from_date = "2022-05-01"
         self.to_date = "2022-07-31"
         self.user = 'jimmieakesson'
+        self.user_list = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi',
+                          'SwedishPM',
+                          'dadgostarnooshi']
+        self.search_string = 'miljö'
         # self.sc = TwitterScraper(from_date=from_date, to_date=to_date, num_tweets=num_tweets)
         nbr_of_cols = 9
     def test_correct_type(self):
+        """
+        Checks that the returned datatype is pandas DataFrame
+        :return:
+        """
         print('Checking type...')
         self.assertEqual(type(self._df_uni), type(pd.DataFrame()))
         self.assertEqual(type(self._df_poly), type(pd.DataFrame()))
         self.assertEqual(type(self._df_by_string), type(pd.DataFrame()))
     def test_correct_nbr_tweets(self):
+        """
+        Checks that we get the correct number of tweets.
+        OBS FOR NOW IT ONLY CHECKS THAT WE DON'T OVER SAMPLE
+        TODO: Check that we get the correct number of tweets.
+        :return:
+        """
         print('Checking number of tweets...')
         self.assertTrue(self._df_uni.shape[0] <= self.num_tweets)
         self.assertTrue(self._df_poly.shape[0] <= self.num_tweets)
         self.assertTrue(self._df_by_string.shape[0] <= self.num_tweets)
     def test_dates(self):
+        """
+        Checks that all dates are between the start date and the end date
+        :return:
+        """
         print('Checking dates...')
         d_start = datetime.fromisoformat(self.from_date)
         d_end = datetime.fromisoformat(self.to_date)
         self.assertTrue(correct_date_poly)
         self.assertTrue(correct_date_by_string)
     def test_user(self):
+        """
+        Checks that all tweets are from the correct user
+        :return:
+        """
         print('Checking user...')
         same_user = True
         for username in self._df_uni.username:
         self.assertTrue(same_user)
     def test_user_id(self):
+        """
+        Checks that all user_ids are correct
+        :return:
+        """
         print('Checking user ids...')
         same_user = True
         first_id = self._df_uni['user_id'][0]
                 break
         self.assertTrue(same_user)
     def test_no_doubles(self):
+        """
+        Checks that there are no tweets that have been sampled several times
+        :return:
+        """
         print('Checking doubles...')
         id_set_uni = set(self._df_uni.id)
         id_set_poly = set(self._df_poly.id)
         self.assertTrue(len(id_set_poly) == self._df_poly.shape[0])
         self.assertTrue(len(id_set_by_string) == self._df_by_string.shape[0])
     def test_none(self):
+        """
+        Checks that there are no None entries
+        :return:
+        """
         print('Checking Nones...')
         self.assertFalse(any(b == True for b in self._df_uni.isnull()))
         self.assertFalse(any(b == True for b in self._df_poly.isnull()))
         self.assertTrue(correct_ids)
     def test_string_search(self):
+        """
+        TODO: Check this
+        :return:
+        """
         correct_search = True
         search = re.sub('ö', 'ø', self.search_string)
         search = re.sub('ä', 'æ', search)
         self.assertTrue(correct_search)
 if __name__ == '__main__':
     unittest.main()