Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Mosa commited on Jul 8, 2022

Commit

c5c6036

•

1 Parent(s): 39fded5

I fixed mentioned issues.

Browse files

Files changed (4) hide show

.idea/misc.xml +1 -1
.idea/politweet.iml +1 -0
twitter-scraper/scrape.py +67 -54
twitter-scraper/twitter_scraper.ipynb +392 -153

.idea/misc.xml CHANGED Viewed

@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet)" project-jdk-type="Python SDK" />
 </project>

 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (politweet)" project-jdk-type="Python SDK" />
 </project>

.idea/politweet.iml CHANGED Viewed

@@ -3,6 +3,7 @@
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$">
       <excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
     </content>
     <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />

   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$">
       <excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
     </content>
     <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />

twitter-scraper/scrape.py CHANGED Viewed

@@ -1,51 +1,58 @@
-from tkinter import EXCEPTION
 import twint
 from datetime import date
-"""
-This class is a twitter scraper called TwitterScraper. It takes the user as input and collects the user's tweets
-from 'from_date' to 'to_date'. If 'from_date' and 'to_date' are not specified, it collects the number of tweets 'num_tweets' from today.
-It outputs a dictionary with the tweet unique id and some other information.
-input: user, from_date, to_date, num_tweets
-output: dict
-"""
-class scraper:
-    def __init__(self, from_date="2006-07-01", to_date=str(date.today()), num_tweets=20):
-        #self.user = user
         self.from_date = from_date
         self.to_date = to_date
         self.num_tweets = num_tweets
         self.conf = twint.Config()
-    def scrape_by_user(self,_user):
-        ##using twint to extract tweets
-        self.conf.Search = "from:@" + _user   # If the search string is a username.
-        return self.__get_tweets_from_twint__()
-    def scrape_by_string(self,_string:str):
-        self.conf.Search = _string
-        return self.__get_tweets_from_twint__()
-    def scrape_by_user_and_string(self,_user:str,_string:str):
         self.conf.Username = _user
-        self.conf.Search = _string
-        return self.__get_tweets_from_twint__()
-    def __get_only_tweets(tweet_and_replies):
-        #This functions input arg is a data frame with tweets and removes all tweets with  starting with \"@\" which is indicator of a reply or retweet.
-        tweet=tweet_and_replies["tweet"]
-        indx_replies=[]
-        for i in range(len(tweet)):
-            if tweet[i].startswith("@"):
                 indx_replies.append(i)
-        only_tweets=tweet_and_replies.drop(labels=indx_replies,axis=0)
-            # drop removes the columns which its index specified by indx_replies...   axis=0  if we want to delete rows, and inplace changes the same data_frame without creating a new.
-        #print(len(tweet_and_replies['tweet']), " of them are Tweets")
-        return only_tweets
-    def __get_tweets_from_twint__(self):
-        ''' __get_tweets_from_twint__
         tweet info is a dataframe with fallowing columns
             Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
             'tweet', 'language', 'hashtags', 'cashtags', 'user_id', 'user_id_str',
@@ -53,25 +60,31 @@ class scraper:
             'thumbnail', 'retweet', 'nlikes', 'nreplies', 'nretweets', 'quote_url',
             'search', 'near', 'geo', 'source', 'user_rt_id', 'user_rt',
             'retweet_id', 'reply_to', 'retweet_date', 'translate', 'trans_src',
-            'trans_dest']
-        we just pick the relevant ones.
         c is a twint.Config() object
-        '''
-        self.conf.Pandas = True
-        self.conf.Count = True
-        self.conf.Limit = self.num_tweets
         self.conf.Since = self.from_date
         self.conf.Until = self.to_date
-        self.conf.Hide_output = True
-        twint.run.Search(self.conf)
-        tweet_info =twint.output.panda.Tweets_df
-        tweet_info = tweet_info[["id","tweet","date","user_id","username","urls" ,'nlikes', 'nreplies', 'nretweets']]
-        df = scraper.__get_only_tweets(tweet_info)
-        return df
-    def __check_date_type(d1,d2):
-            if (type(d1) or type(d2)) is not type("str"):  # If the type of ite date input isent string it generates exception
-                print("[!] Please make sure the date is a string in this format \"yyyy-mm-dd\" ")
-                raise EXCEPTION("Incorrect date type Exception!")
-            elif (len(d1.split("-")) or len(d2.split("-")))<2:
-                print("[!] Please make sure the date is a string in this format \"yyyy-mm-dd\" ")
-                raise EXCEPTION("Incorrect date type Exception!")

 import twint
 from datetime import date
+class TwitterScraper(object):
+    """
+    This class is a twitter TwitterScraper called TwitterScraper. It takes the user as input and collects the user's tweets
+    from 'from_date' to 'to_date'. If 'from_date' and 'to_date' are not specified, it collects the number of tweets 'num_tweets' from today.
+    It outputs a dictionary with the tweet unique id and some other information.
+    input: user, from_date, to_date, num_tweets
+    output: dict
+    """
+    def __init__(self, from_date="2006-07-01", to_date=str(date.today()), num_tweets=20):
         self.from_date = from_date
         self.to_date = to_date
         self.num_tweets = num_tweets
         self.conf = twint.Config()
+    def scrape_by_user(self, _user):
+        """This method uses twint to extract tweets  based on username"""
+        self.conf.Search = "from:@" + _user  # is the search configuration is given in this format it searches after
+        # user_names.
+        return self.__get_tweets__from_twint__()
+    def scrape_by_string(self, _string: str):
+        """This method uses twint to extract tweets based on string.
+        all extracted tweets have the specified word in _string parameter in it.
+        """
+        self.conf.Search = _string  # this tells twint configuration to search for string
+        return self.__get_tweets__from_twint__()
+    def scrape_by_user_and_string(self, _user: str, _string: str):
+        """This method uses twint to extract tweets brased on string and username"""
         self.conf.Username = _user
+        self.conf.Search = _string
+        return self.__get_tweets__from_twint__()
+    def get_only_tweets(self, tweet_and_replies_info):
+        tweet_and_replies = tweet_and_replies_info["tweet"]
+        """
+        This functions input arg is a data frame (the output from scrape methords ) and removes...
+         all tweets starting with \"@\" which is indicator of a reply or retweet.
+        """
+        indx_replies = []
+        for i in range(len(tweet_and_replies)):
+            if tweet_and_replies[i].startswith("@"):
                 indx_replies.append(i)
+        tweets_info = tweet_and_replies_info.drop(labels=indx_replies, axis=0)
+        # drop removes the columns which its index specified by
+        # indx_replies. axis=0  if we want to delete rows.
+        #print(len(tweets['tweet']), " of them are Tweets")
+        return tweets_info
+    def __get_tweets__from_twint__(self):
+        """ __get_tweets_from_twint__
         tweet info is a dataframe with fallowing columns
             Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
             'tweet', 'language', 'hashtags', 'cashtags', 'user_id', 'user_id_str',
             'thumbnail', 'retweet', 'nlikes', 'nreplies', 'nretweets', 'quote_url',
             'search', 'near', 'geo', 'source', 'user_rt_id', 'user_rt',
             'retweet_id', 'reply_to', 'retweet_date', 'translate', 'trans_src',
+            'trans_dest']
+        we just pick the relevant ones.
         c is a twint.Config() object
+        we also configure twint output.
+        """
+        self.conf.Pandas = True  #
+        self.conf.Count = True  #
+        self.conf.Limit = self.num_tweets  # specifies how many tweet should be scraped
         self.conf.Since = self.from_date
         self.conf.Until = self.to_date
+        self.conf.Hide_output = True  # Hides the output. If set to False it will prints tweets in the terminal window.
+        twint.run.Search(self.conf)
+        tweet_and_replies_inf = twint.output.panda.Tweets_df  # here we say that output souldwe dataframe.
+        tweet_and_replies_inf = tweet_and_replies_inf[
+            ["id", "tweet", "date", "user_id", "username", "urls", 'nlikes', 'nreplies', 'nretweets']]
+        return tweet_and_replies_inf
+    # def __check_date_type(d1,d2): if (type(d1) or type(d2)) is not type("str"):  # If the type of ite date input
+    # is not string it generates exception print("[!] Please make sure the date is a string in this format
+    # \"yyyy-mm-dd\" ") raise EXCEPTION("Incorrect date type Exception!") elif (len(d1.split("-")) or len(d2.split(
+    # "-")))<2: print("[!] Please make sure the date is a string in this format \"yyyy-mm-dd\" ") raise EXCEPTION(
+    # "Incorrect date type Exception!")
+if __name__ == "__main__":
+     sc = TwitterScraper(num_tweets=10)
+     dc = sc.scrape_by_string("jimmieakesson")
+     print(dc.head())
+     print(type(dc))

twitter-scraper/twitter_scraper.ipynb CHANGED Viewed

@@ -54,7 +54,7 @@
    "outputs": [],
    "source": [
     "import scrape\n",
-    "sc= scrape.scraper( from_date=\"2006-07-01\", to_date= \"2022-06-22\",num_tweets=100)\n"
    ]
   },
   {
@@ -67,7 +67,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[+] Finished: Successfully collected 100 Tweets.\n"
      ]
     },
     {
@@ -105,61 +105,197 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>1539394015560359944</td>\n",
-       "      <td>wAllah comme si on avais pas d’autre choses j’...</td>\n",
-       "      <td>2022-06-22 01:45:08</td>\n",
-       "      <td>1202681666487115776</td>\n",
-       "      <td>svwssen</td>\n",
        "      <td>[]</td>\n",
-       "      <td>3</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>1539387277960433664</td>\n",
-       "      <td>Şev baş temaşevanen heja  https://t.co/aqw5vNPLFr</td>\n",
-       "      <td>2022-06-22 01:18:22</td>\n",
-       "      <td>743954955220979713</td>\n",
-       "      <td>coolnikoff</td>\n",
-       "      <td>[https://youtu.be/n_vYzgRBFUI]</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>1539386040313851904</td>\n",
-       "      <td>Heja!＝頑張れ！</td>\n",
-       "      <td>2022-06-22 01:13:27</td>\n",
-       "      <td>176860217</td>\n",
-       "      <td>swedish_bot</td>\n",
        "      <td>[]</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>1539379141597925377</td>\n",
-       "      <td>skończyłam po 15h naukę na dziś ❤️😋 wrócę po u...</td>\n",
-       "      <td>2022-06-22 00:46:02</td>\n",
-       "      <td>840668853948559360</td>\n",
-       "      <td>kiniazimmer</td>\n",
        "      <td>[]</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>1539377784707026945</td>\n",
-       "      <td>je suis en train de siroter mon candy up frche...</td>\n",
-       "      <td>2022-06-22 00:40:38</td>\n",
-       "      <td>980874157998137345</td>\n",
-       "      <td>__ajal</td>\n",
        "      <td>[]</td>\n",
-       "      <td>3</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
@@ -169,40 +305,40 @@
       ],
       "text/plain": [
        "                     id                                              tweet  \\\n",
-       "0   1539394015560359944  wAllah comme si on avais pas d’autre choses j’...   \n",
-       "5   1539387277960433664  Şev baş temaşevanen heja  https://t.co/aqw5vNPLFr   \n",
-       "7   1539386040313851904                                         Heja!＝頑張れ！   \n",
-       "9   1539379141597925377  skończyłam po 15h naukę na dziś ❤️😋 wrócę po u...   \n",
-       "10  1539377784707026945  je suis en train de siroter mon candy up frche...   \n",
        "\n",
-       "                   date              user_id     username  \\\n",
-       "0   2022-06-22 01:45:08  1202681666487115776      svwssen   \n",
-       "5   2022-06-22 01:18:22   743954955220979713   coolnikoff   \n",
-       "7   2022-06-22 01:13:27            176860217  swedish_bot   \n",
-       "9   2022-06-22 00:46:02   840668853948559360  kiniazimmer   \n",
-       "10  2022-06-22 00:40:38   980874157998137345       __ajal   \n",
        "\n",
-       "                              urls  nlikes  nreplies  nretweets  \n",
-       "0                               []       3         0          0  \n",
-       "5   [https://youtu.be/n_vYzgRBFUI]       0         0          0  \n",
-       "7                               []       0         0          0  \n",
-       "9                               []       0         0          0  \n",
-       "10                              []       3         0          0  "
       ]
      },
-     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "d=sc.scrape_by_string(\"heja\")\n",
-    "d.head()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "id": "a7912a91",
    "metadata": {},
    "outputs": [
@@ -210,7 +346,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[+] Finished: Successfully collected 100 Tweets.\n"
      ]
     },
     {
@@ -247,31 +383,55 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>1537770920621879297</td>\n",
        "      <td>Man kan ha synpunkter på en sådan lösning, men...</td>\n",
        "      <td>2022-06-17 14:15:32</td>\n",
        "      <td>95972673</td>\n",
        "      <td>jimmieakesson</td>\n",
        "      <td>[]</td>\n",
-       "      <td>692</td>\n",
        "      <td>17</td>\n",
        "      <td>41</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2</th>\n",
        "      <td>1537770809225273344</td>\n",
        "      <td>Är det ont om plats på anstalterna så får man ...</td>\n",
        "      <td>2022-06-17 14:15:05</td>\n",
        "      <td>95972673</td>\n",
        "      <td>jimmieakesson</td>\n",
        "      <td>[]</td>\n",
-       "      <td>809</td>\n",
        "      <td>26</td>\n",
        "      <td>57</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
        "      <td>1537770713368735744</td>\n",
        "      <td>Döms man för brott, särskilt våldsbrott, ska m...</td>\n",
        "      <td>2022-06-17 14:14:43</td>\n",
@@ -282,86 +442,64 @@
        "      <td>26</td>\n",
        "      <td>86</td>\n",
        "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1537770657823576066</td>\n",
-       "      <td>Platsbrist? Jaha, vad spelar det för roll?   D...</td>\n",
-       "      <td>2022-06-17 14:14:29</td>\n",
-       "      <td>95972673</td>\n",
-       "      <td>jimmieakesson</td>\n",
-       "      <td>[https://sverigesradio.se/artikel/domda-kvinno...</td>\n",
-       "      <td>1152</td>\n",
-       "      <td>85</td>\n",
-       "      <td>132</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>1534230353094885383</td>\n",
-       "      <td>Det är ytterst beklagligt att Magdalena Anders...</td>\n",
-       "      <td>2022-06-07 19:46:35</td>\n",
-       "      <td>95972673</td>\n",
-       "      <td>jimmieakesson</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>6121</td>\n",
-       "      <td>546</td>\n",
-       "      <td>557</td>\n",
-       "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
        "                    id                                              tweet  \\\n",
-       "1  1537770920621879297  Man kan ha synpunkter på en sådan lösning, men...   \n",
-       "2  1537770809225273344  Är det ont om plats på anstalterna så får man ...   \n",
-       "3  1537770713368735744  Döms man för brott, särskilt våldsbrott, ska m...   \n",
-       "4  1537770657823576066  Platsbrist? Jaha, vad spelar det för roll?   D...   \n",
-       "5  1534230353094885383  Det är ytterst beklagligt att Magdalena Anders...   \n",
-       "\n",
-       "                  date   user_id       username  \\\n",
-       "1  2022-06-17 14:15:32  95972673  jimmieakesson   \n",
-       "2  2022-06-17 14:15:05  95972673  jimmieakesson   \n",
-       "3  2022-06-17 14:14:43  95972673  jimmieakesson   \n",
-       "4  2022-06-17 14:14:29  95972673  jimmieakesson   \n",
-       "5  2022-06-07 19:46:35  95972673  jimmieakesson   \n",
        "\n",
-       "                                                urls  nlikes  nreplies  \\\n",
-       "1                                                 []     692        17   \n",
-       "2                                                 []     809        26   \n",
-       "3                                                 []    1020        26   \n",
-       "4  [https://sverigesradio.se/artikel/domda-kvinno...    1152        85   \n",
-       "5                                                 []    6121       546   \n",
        "\n",
        "   nretweets  \n",
-       "1         41  \n",
-       "2         57  \n",
-       "3         86  \n",
-       "4        132  \n",
-       "5        557  "
       ]
      },
-     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df=sc.scrape_by_user(\"jimmieakesson\")\n",
     "df.head()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "id": "7db69757",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[!] No more data! Scraping will stop now.\n",
-      "found 0 deleted tweets in this search.\n",
       "[+] Finished: Successfully collected 16 Tweets from @jimmieakesson.\n"
      ]
     },
@@ -418,7 +556,7 @@
        "      <td>95972673</td>\n",
        "      <td>jimmieakesson</td>\n",
        "      <td>[]</td>\n",
-       "      <td>626</td>\n",
        "      <td>9</td>\n",
        "      <td>68</td>\n",
        "    </tr>\n",
@@ -430,7 +568,7 @@
        "      <td>95972673</td>\n",
        "      <td>jimmieakesson</td>\n",
        "      <td>[]</td>\n",
-       "      <td>2459</td>\n",
        "      <td>199</td>\n",
        "      <td>336</td>\n",
        "    </tr>\n",
@@ -472,8 +610,8 @@
        "\n",
        "                  date   user_id       username urls  nlikes  nreplies  \\\n",
        "0  2021-02-20 11:07:50  95972673  jimmieakesson   []    1277        22   \n",
-       "1  2021-02-20 11:06:58  95972673  jimmieakesson   []     626         9   \n",
-       "2  2021-02-20 11:06:45  95972673  jimmieakesson   []    2459       199   \n",
        "3  2021-02-19 14:00:01  95972673  jimmieakesson   []    1334        55   \n",
        "4  2021-02-18 15:31:53  95972673  jimmieakesson   []    3044       268   \n",
        "\n",
@@ -485,57 +623,158 @@
        "4        404  "
       ]
      },
-     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df=sc.scrape_by_user_and_string(\"jimmieakesson\",\"invandring\")\n",
-    "df.head()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "48d50b46",
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Utan massiv, asylrelaterad invandring från främmande länder, varifrån dessa kulturfenomen härstammar, hade förekomsten i Sverige varit ytterst begränsad (för att inte säga obefintlig).  Jag har, tydligt och utan floskler, stått upp för denna hållning under hela mitt politiska liv \n",
-      " __________________________________________________________\n",
-      "Många vänsterliberaler tycks ha reagerat på detta med \"invandring som innebär social, ekonomisk och kulturell belastning\" och då särskilt hakat upp sig på \"kulturell\". Låt mig därför ge några exempel: \n",
-      " __________________________________________________________\n",
-      "Jag förstår — uppriktigt — inte den närmast hysteriska reaktion som mitt uttalande igår om invandring gett upphov till. Jag måste ha sagt samma sak minst tusen gånger, och det var länge sedan det skapade någon vidare debatt.  https://t.co/zDLdpu6HSU \n",
-      " __________________________________________________________\n",
-      "Invandring av hundratusentals människor från främmande kulturer med helt andra grundläggande normer och värderingar än de som byggt vårt svenska samhälle blir — ställt utom allt tvivel — en belastning ekonomiskt, socialt OCH kulturellt. \n",
-      " __________________________________________________________\n",
-      "Vårt land behöver ett totalstopp för all asyl- och anhöriginvandring, inklusive kvotflyktingar.  Sverige behöver ett totalstopp, för all invandring som utgör en social, kulturell eller ekonomisk belastning och ett moratorium för mottagande av kvotflyktingar. \n",
-      " __________________________________________________________\n",
-      "En afghanamnesti urholkar både tryggheten och välfärden.   Vänsterliberalerna och socialdemokraterna prioriterar alltid invandring före välfärd och trygghet.   Därför måste de bytas ut.  #svpol  https://t.co/l0wiRrbs12 \n",
-      " __________________________________________________________\n",
-      "Det spelar ingen roll att Stefan Löfven och Socialdemokraterna talar om att Sverige ska ha en reglerad invandring på ”EU:s miniminivå”. Det är bara tomma ord, en dålig bluff.    https://t.co/sYBGoVK4ev \n",
-      " __________________________________________________________\n",
-      "- Nu är man på väg att ännu en gång upprepa samma haveri. Man har uppenbarligen inte lärt sig nånting.  - Sverige behöver avsevärt mindre asylrelaterad invandring, inte mer. Där är både verkligheten och opinionen tydlig. \n",
-      " __________________________________________________________\n",
-      "Hans desperata försök att förminska, att flytta över skulden till allt annat än invandring, går som en röd tråd i hans argumentation. Den genomsyrar hela hans tankevärld. \n",
-      " __________________________________________________________\n",
-      "Det senaste decenniet har Sveriges befolkning ökat med nära en miljon människor. Nu kan dock 80 % av tillväxten tillskrivas invandring, varav den absolut största delen från länder som är historiskt, kulturellt och värderingsmässigt avlägsna från Sverige.  https://t.co/ZJd2zA41m4 \n",
-      " __________________________________________________________\n",
-      "M, KD och V har kommit överens om arbetskraftsinvandringen. Förslagen verkar rimliga, men tyvärr lyckas man inte komma överens om skarpa förslag som stoppar fortsatt invandring av okvalificerad arbetskraft.  https://t.co/oXqxhQDP5R \n",
-      " __________________________________________________________\n",
-      "Invandring av högkvalificerad arbetskraft är bra för Sverige, men vi behöver inte fler städare och diskplockare från andra länder. \n",
-      " __________________________________________________________\n",
-      "5. SD:s syn på arbetsinvandring är, till skillnad från din, praktisk och pragmatisk. Om det uppstår tillfällig arbetsbrist kan luckor behöva fyllas med kompetens utifrån. Behovet avgör. Arbetsinvandring är dock något helt annan än asylrelaterad invandring. \n",
-      " __________________________________________________________\n",
-      "Nej, vi släpper inga krav om invandring. Däremot är jag öppen för samtal om annat om S/M är rädda för att prata om massinvandringen. \n",
-      " __________________________________________________________\n"
-     ]
     }
    ],
    "source": [
     "tweets= df[\"tweet\"]\n",
     "for tweet in tweets:\n",

    "outputs": [],
    "source": [
     "import scrape\n",
+    "sc= scrape.TwitterScraper(num_tweets=10)\n"
    ]
   },
   {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "[+] Finished: Successfully collected 20 Tweets.\n"
      ]
     },
     {
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
+       "      <td>1545194541006950400</td>\n",
+       "      <td>kim sever benim gibi sevmeyecekler bıraktığın ...</td>\n",
+       "      <td>2022-07-08 01:54:21</td>\n",
+       "      <td>1396065566117466113</td>\n",
+       "      <td>heja4r</td>\n",
        "      <td>[]</td>\n",
+       "      <td>1</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1545192735354806274</td>\n",
+       "      <td>Kelimeler,albayım,bazı anlamalara gelmiyor..</td>\n",
+       "      <td>2022-07-08 01:47:11</td>\n",
+       "      <td>1481604485118140425</td>\n",
+       "      <td>Theguapo6</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1545190168533008385</td>\n",
+       "      <td>@shikan213 ptdr ? y’a aucune racisme à quel mo...</td>\n",
+       "      <td>2022-07-08 01:36:59</td>\n",
+       "      <td>1476042813741617155</td>\n",
+       "      <td>srndz213__</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1545190106910171136</td>\n",
+       "      <td>@guzzeida Men gud du har presterat så mkt bätt...</td>\n",
+       "      <td>2022-07-08 01:36:44</td>\n",
+       "      <td>34343541</td>\n",
+       "      <td>lisaxamanda</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1545190096042860544</td>\n",
+       "      <td>Heja, heja, heja Slovensko</td>\n",
+       "      <td>2022-07-08 01:36:41</td>\n",
+       "      <td>3158344237</td>\n",
+       "      <td>ian_10_19</td>\n",
+       "      <td>[]</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    id                                              tweet  \\\n",
+       "0  1545194541006950400  kim sever benim gibi sevmeyecekler bıraktığın ...   \n",
+       "1  1545192735354806274       Kelimeler,albayım,bazı anlamalara gelmiyor..   \n",
+       "2  1545190168533008385  @shikan213 ptdr ? y’a aucune racisme à quel mo...   \n",
+       "3  1545190106910171136  @guzzeida Men gud du har presterat så mkt bätt...   \n",
+       "4  1545190096042860544                         Heja, heja, heja Slovensko   \n",
+       "\n",
+       "                  date              user_id     username urls  nlikes  \\\n",
+       "0  2022-07-08 01:54:21  1396065566117466113       heja4r   []       1   \n",
+       "1  2022-07-08 01:47:11  1481604485118140425    Theguapo6   []       1   \n",
+       "2  2022-07-08 01:36:59  1476042813741617155   srndz213__   []       0   \n",
+       "3  2022-07-08 01:36:44             34343541  lisaxamanda   []       1   \n",
+       "4  2022-07-08 01:36:41           3158344237    ian_10_19   []       0   \n",
+       "\n",
+       "   nreplies  nretweets  \n",
+       "0         0          0  \n",
+       "1         0          0  \n",
+       "2         1          0  \n",
+       "3         0          0  \n",
+       "4         0          0  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "string_tr_info=sc.scrape_by_string(\"heja\")\n",
+    "string_tr_info.head()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "902170ad",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tweet</th>\n",
+       "      <th>date</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>username</th>\n",
+       "      <th>urls</th>\n",
+       "      <th>nlikes</th>\n",
+       "      <th>nreplies</th>\n",
+       "      <th>nretweets</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
        "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1545194541006950400</td>\n",
+       "      <td>kim sever benim gibi sevmeyecekler bıraktığın ...</td>\n",
+       "      <td>2022-07-08 01:54:21</td>\n",
+       "      <td>1396065566117466113</td>\n",
+       "      <td>heja4r</td>\n",
        "      <td>[]</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
        "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1545192735354806274</td>\n",
+       "      <td>Kelimeler,albayım,bazı anlamalara gelmiyor..</td>\n",
+       "      <td>2022-07-08 01:47:11</td>\n",
+       "      <td>1481604485118140425</td>\n",
+       "      <td>Theguapo6</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1545190096042860544</td>\n",
+       "      <td>Heja, heja, heja Slovensko</td>\n",
+       "      <td>2022-07-08 01:36:41</td>\n",
+       "      <td>3158344237</td>\n",
+       "      <td>ian_10_19</td>\n",
        "      <td>[]</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1545189783747436545</td>\n",
+       "      <td>Beni sorarsan dardayım..</td>\n",
+       "      <td>2022-07-08 01:35:27</td>\n",
+       "      <td>1481604485118140425</td>\n",
+       "      <td>Theguapo6</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>1545186234623991813</td>\n",
+       "      <td>Heja strandhäll.  Vilket jävla block mongo</td>\n",
+       "      <td>2022-07-08 01:21:21</td>\n",
+       "      <td>1160537136250195968</td>\n",
+       "      <td>Siggydunn</td>\n",
        "      <td>[]</td>\n",
+       "      <td>0</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
       ],
       "text/plain": [
        "                     id                                              tweet  \\\n",
+       "0   1545194541006950400  kim sever benim gibi sevmeyecekler bıraktığın ...   \n",
+       "1   1545192735354806274       Kelimeler,albayım,bazı anlamalara gelmiyor..   \n",
+       "4   1545190096042860544                         Heja, heja, heja Slovensko   \n",
+       "6   1545189783747436545                           Beni sorarsan dardayım..   \n",
+       "12  1545186234623991813         Heja strandhäll.  Vilket jävla block mongo   \n",
        "\n",
+       "                   date              user_id   username urls  nlikes  \\\n",
+       "0   2022-07-08 01:54:21  1396065566117466113     heja4r   []       1   \n",
+       "1   2022-07-08 01:47:11  1481604485118140425  Theguapo6   []       1   \n",
+       "4   2022-07-08 01:36:41           3158344237  ian_10_19   []       0   \n",
+       "6   2022-07-08 01:35:27  1481604485118140425  Theguapo6   []       2   \n",
+       "12  2022-07-08 01:21:21  1160537136250195968  Siggydunn   []       0   \n",
        "\n",
+       "    nreplies  nretweets  \n",
+       "0          0          0  \n",
+       "1          0          0  \n",
+       "4          0          0  \n",
+       "6          0          0  \n",
+       "12         0          0  "
       ]
      },
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "string_t_info=sc.get_only_tweets(tr_info)\n",
+    "string_t_info.head()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "a7912a91",
    "metadata": {},
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "[+] Finished: Successfully collected 20 Tweets.\n"
      ]
     },
     {
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1544748873767424001</td>\n",
+       "      <td>Fruktansvärt att nås av beskedet att kvinnan s...</td>\n",
+       "      <td>2022-07-06 20:23:26</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>3397</td>\n",
+       "      <td>167</td>\n",
+       "      <td>140</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th>1</th>\n",
+       "      <td>1538948369611210764</td>\n",
+       "      <td>@annieloof Nej, jag håller med. Tänk mer som M...</td>\n",
+       "      <td>2022-06-20 20:14:18</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1513</td>\n",
+       "      <td>89</td>\n",
+       "      <td>115</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
        "      <td>1537770920621879297</td>\n",
        "      <td>Man kan ha synpunkter på en sådan lösning, men...</td>\n",
        "      <td>2022-06-17 14:15:32</td>\n",
        "      <td>95972673</td>\n",
        "      <td>jimmieakesson</td>\n",
        "      <td>[]</td>\n",
+       "      <td>694</td>\n",
        "      <td>17</td>\n",
        "      <td>41</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>3</th>\n",
        "      <td>1537770809225273344</td>\n",
        "      <td>Är det ont om plats på anstalterna så får man ...</td>\n",
        "      <td>2022-06-17 14:15:05</td>\n",
        "      <td>95972673</td>\n",
        "      <td>jimmieakesson</td>\n",
        "      <td>[]</td>\n",
+       "      <td>810</td>\n",
        "      <td>26</td>\n",
        "      <td>57</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>4</th>\n",
        "      <td>1537770713368735744</td>\n",
        "      <td>Döms man för brott, särskilt våldsbrott, ska m...</td>\n",
        "      <td>2022-06-17 14:14:43</td>\n",
        "      <td>26</td>\n",
        "      <td>86</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
        "                    id                                              tweet  \\\n",
+       "0  1544748873767424001  Fruktansvärt att nås av beskedet att kvinnan s...   \n",
+       "1  1538948369611210764  @annieloof Nej, jag håller med. Tänk mer som M...   \n",
+       "2  1537770920621879297  Man kan ha synpunkter på en sådan lösning, men...   \n",
+       "3  1537770809225273344  Är det ont om plats på anstalterna så får man ...   \n",
+       "4  1537770713368735744  Döms man för brott, särskilt våldsbrott, ska m...   \n",
        "\n",
+       "                  date   user_id       username urls  nlikes  nreplies  \\\n",
+       "0  2022-07-06 20:23:26  95972673  jimmieakesson   []    3397       167   \n",
+       "1  2022-06-20 20:14:18  95972673  jimmieakesson   []    1513        89   \n",
+       "2  2022-06-17 14:15:32  95972673  jimmieakesson   []     694        17   \n",
+       "3  2022-06-17 14:15:05  95972673  jimmieakesson   []     810        26   \n",
+       "4  2022-06-17 14:14:43  95972673  jimmieakesson   []    1020        26   \n",
        "\n",
        "   nretweets  \n",
+       "0        140  \n",
+       "1        115  \n",
+       "2         41  \n",
+       "3         57  \n",
+       "4         86  "
       ]
      },
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "user__tr_info=sc.scrape_by_user(\"jimmieakesson\")\n",
     "df.head()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "7db69757",
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "user__t_info=sc.get_only_tweets(tr_info)\n",
+    "user__t_info.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "9d6b1bdf",
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "[+] Finished: Successfully collected 16 Tweets from @jimmieakesson.\n"
      ]
     },
        "      <td>95972673</td>\n",
        "      <td>jimmieakesson</td>\n",
        "      <td>[]</td>\n",
+       "      <td>625</td>\n",
        "      <td>9</td>\n",
        "      <td>68</td>\n",
        "    </tr>\n",
        "      <td>95972673</td>\n",
        "      <td>jimmieakesson</td>\n",
        "      <td>[]</td>\n",
+       "      <td>2458</td>\n",
        "      <td>199</td>\n",
        "      <td>336</td>\n",
        "    </tr>\n",
        "\n",
        "                  date   user_id       username urls  nlikes  nreplies  \\\n",
        "0  2021-02-20 11:07:50  95972673  jimmieakesson   []    1277        22   \n",
+       "1  2021-02-20 11:06:58  95972673  jimmieakesson   []     625         9   \n",
+       "2  2021-02-20 11:06:45  95972673  jimmieakesson   []    2458       199   \n",
        "3  2021-02-19 14:00:01  95972673  jimmieakesson   []    1334        55   \n",
        "4  2021-02-18 15:31:53  95972673  jimmieakesson   []    3044       268   \n",
        "\n",
        "4        404  "
       ]
      },
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "user__string_tr_info=sc.scrape_by_user_and_string(\"jimmieakesson\",\"invandring\")\n",
+    "user__string_tr_info.head()\n"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
+   "id": "a1aede79",
    "metadata": {},
    "outputs": [
     {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tweet</th>\n",
+       "      <th>date</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>username</th>\n",
+       "      <th>urls</th>\n",
+       "      <th>nlikes</th>\n",
+       "      <th>nreplies</th>\n",
+       "      <th>nretweets</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1363067834260201475</td>\n",
+       "      <td>Utan massiv, asylrelaterad invandring från frä...</td>\n",
+       "      <td>2021-02-20 11:07:50</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1277</td>\n",
+       "      <td>22</td>\n",
+       "      <td>105</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1363067613660778496</td>\n",
+       "      <td>Många vänsterliberaler tycks ha reagerat på de...</td>\n",
+       "      <td>2021-02-20 11:06:58</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>625</td>\n",
+       "      <td>9</td>\n",
+       "      <td>68</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1363067558409158656</td>\n",
+       "      <td>Jag förstår — uppriktigt — inte den närmast hy...</td>\n",
+       "      <td>2021-02-20 11:06:45</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>2458</td>\n",
+       "      <td>199</td>\n",
+       "      <td>336</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1362748777552113670</td>\n",
+       "      <td>Invandring av hundratusentals människor från f...</td>\n",
+       "      <td>2021-02-19 14:00:01</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1334</td>\n",
+       "      <td>55</td>\n",
+       "      <td>101</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1362409505557012490</td>\n",
+       "      <td>Vårt land behöver ett totalstopp för all asyl-...</td>\n",
+       "      <td>2021-02-18 15:31:53</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>3044</td>\n",
+       "      <td>268</td>\n",
+       "      <td>404</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    id                                              tweet  \\\n",
+       "0  1363067834260201475  Utan massiv, asylrelaterad invandring från frä...   \n",
+       "1  1363067613660778496  Många vänsterliberaler tycks ha reagerat på de...   \n",
+       "2  1363067558409158656  Jag förstår — uppriktigt — inte den närmast hy...   \n",
+       "3  1362748777552113670  Invandring av hundratusentals människor från f...   \n",
+       "4  1362409505557012490  Vårt land behöver ett totalstopp för all asyl-...   \n",
+       "\n",
+       "                  date   user_id       username urls  nlikes  nreplies  \\\n",
+       "0  2021-02-20 11:07:50  95972673  jimmieakesson   []    1277        22   \n",
+       "1  2021-02-20 11:06:58  95972673  jimmieakesson   []     625         9   \n",
+       "2  2021-02-20 11:06:45  95972673  jimmieakesson   []    2458       199   \n",
+       "3  2021-02-19 14:00:01  95972673  jimmieakesson   []    1334        55   \n",
+       "4  2021-02-18 15:31:53  95972673  jimmieakesson   []    3044       268   \n",
+       "\n",
+       "   nretweets  \n",
+       "0        105  \n",
+       "1         68  \n",
+       "2        336  \n",
+       "3        101  \n",
+       "4        404  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
+   "source": [
+    "user__string_t_info = sc.get_only_tweets(user__string_tr_info)\n",
+    "user__string_t_info.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "48d50b46",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "tweets= df[\"tweet\"]\n",
     "for tweet in tweets:\n",