Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Mosa commited on Jul 7, 2022

Commit

d94f83a

•

1 Parent(s): 35c6ca6

Fixed_the_comments

Browse files

Files changed (9) hide show

twitter-scraper/scrape.py +65 -91
twitter-scraper/twitter_scraper.ipynb +387 -140
twitter_scraper/twint_master/elasticsearch/dashboard.json +0 -18
twitter_scraper/twint_master/elasticsearch/index-follow.json +0 -15
twitter_scraper/twint_master/elasticsearch/index-tweets.json +0 -48
twitter_scraper/twint_master/elasticsearch/index-user.json +0 -33
twitter_scraper/twint_master/elasticsearch/visualizations.json +0 -100
twitter_scraper/twint_master/extracted-tweets.txt +0 -5
twitter_scraper/twint_master/requirements.txt +0 -13

twitter-scraper/scrape.py CHANGED Viewed

@@ -2,102 +2,76 @@
 from tkinter import EXCEPTION
 import twint
 from datetime import date
-import pandas as pd
-import sys
-import io
-import time
 class scraper:
-    def get_tweets(search_str, from_date="2006-07-01", to_date=str(date.today()), num_tweets=10, u_or_s='s',
-                   acceptable_range=10):
-        if (type(from_date) or type("str")) is not type("str"):
-            print("[!] Please make sure the date is a string in this format \"yyyy-mm-dd\" ")
-            raise EXCEPTION("Incorrect date type Exception!")
-        time_out = time.time() + 2 * 60
-        _dict = {}
-        c = twint.Config()
-        if u_or_s.lower() == "u":
-            c.Search = "from:@" + search_str  # topic
-        else:
-            c.Search = search_str  # topic
-        c.Pandas = True
-        num_tweets_and_replies = num_tweets
-        c.Count = True
-        #for j in range(1, 5):
-        c.Limit = num_tweets_and_replies
-        c.Since = from_date
-        c.Until = to_date
-        c.Hide_output = True
-        old_stdout = sys.stdout
-        new_stdout = io.StringIO()
-        sys.stdout = new_stdout
-        twint.run.Search(c)
-        output = new_stdout.getvalue()
-        sys.stdout = old_stdout
-        print(output[0:-2])
-        tweet_info =twint.output.panda.Tweets_df
         indx_replies=[]
-        tweet=tweet_info['tweet']
         for i in range(len(tweet)):
             if tweet[i].startswith("@"):
                 indx_replies.append(i)
-        tweet_info.drop(indx_replies,axis=0, inplace =True)
-        print(len(tweet_info['tweet']), " of them are Tweets")
-        #df.drop([5,6], axis=0, inplace=True)
-        return tweet_info
-            # try:
-            #     _keys = tweet_info[["id","tweet","date","user_id","urls" ,'nlikes', 'nreplies', 'nretweets']]
-            #     # tweet infor is a dataframe with fallowing columns
-            #     # Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
-            #     # 'tweet', 'language', 'hashtags', 'cashtags', 'user_id', 'user_id_str',
-            #     # 'username', 'name', 'day', 'hour', 'link', 'urls', 'photos', 'video',
-            #     # 'thumbnail', 'retweet', 'nlikes', 'nreplies', 'nretweets', 'quote_url',
-            #     # 'search', 'near', 'geo', 'source', 'user_rt_id', 'user_rt',
-            #     # 'retweet_id', 'reply_to', 'retweet_date', 'translate', 'trans_src',
-            #     # 'trans_dest'],
-            #     # dtype='object')
-            #     for i in range(len( _keys)):
-            #         if _keys[i] in _dict.keys() or tweet_info["tweet"][i].startswith("@"):
-            #             pass
-            #         else:
-            #             _dict[int(_keys[i])] = {"tweet": tweet_info["tweet"][i],
-            #                                     "date": tweet_info["date"][i],
-            #                                     "nlikes": tweet_info["nlikes"][i],
-            #                                     "nreplies": tweet_info["nreplies"][i],
-            #                                     "nretweets": tweet_info["nretweets"][i], "topic": ""}
-            #             if len(list(_dict.keys())) == num_tweets:
-            #                 break
-            # except:
-            #     pass
-            # print(len(list(_dict.keys())), " of them are Tweets")
-            # if (num_tweets - len(list(_dict.keys()))) < acceptable_range:
-            #     return _dict
-            # if len(list(_dict.keys())) < num_tweets:
-            #     num_tweets_and_replies = num_tweets_and_replies + 100 * 3 ** j
-            # else:
-            #     break
-            # if time_out < time.time():
-            #     break
-            # if output.startswith("[!] No more data!"):
-            #     break
-        #return _dict
-    def string_search_user_tweets(user_name, search_str, from_date="2006-07-01", to_date=str(date.today()),
-        num_tweets=10):
-        c = twint.Config()
-        c.Username = user_name
-        c.Search = search_str  # topic
-        c.Pandas = True
-        num_tweets_and_replies = num_tweets
-        c.Count = True
-        c.Limit = num_tweets_and_replies
-        c.Since = from_date
-        c.Until = to_date
-        c.Hide_output = True
-        twint.run.Search(c)
-        return twint.output.panda.Tweets_df

 from tkinter import EXCEPTION
 import twint
 from datetime import date
+"""
+This class is a twitter scraper called TwitterScraper. It takes the user as input and collects the user's tweets
+from 'from_date' to 'to_date'. If 'from_date' and 'to_date' are not specified, it collects the number of tweets 'num_tweets' from today.
+It outputs a dictionary with the tweet unique id and some other information.
+input: user, from_date, to_date, num_tweets
+output: dict
+"""
 class scraper:
+    def __init__(self, from_date="2006-07-01", to_date=str(date.today()), num_tweets=20):
+        #self.user = user
+        self.from_date = from_date
+        self.to_date = to_date
+        self.num_tweets = num_tweets
+        self.conf = twint.Config()
+    def scrape_by_user(self,_user):
+        ##using twint to extract tweets
+        self.conf.Search = "from:@" + _user   # If the search string is a username.
+        return self.__get_tweets_from_twint__()
+    def scrape_by_string(self,_string:str):
+        self.conf.Search = _string
+        return self.__get_tweets_from_twint__()
+    def scrape_by_user_and_string(self,_user:str,_string:str):
+        self.conf.Username = _user
+        self.conf.Search = _string
+        return self.__get_tweets_from_twint__()
+    def __get_only_tweets(tweet_and_replies):
+        #This functions input arg is a data frame with tweets and removes all tweets with  starting with \"@\" which is indicator of a reply or retweet.
+        tweet=tweet_and_replies["tweet"]
         indx_replies=[]
         for i in range(len(tweet)):
             if tweet[i].startswith("@"):
                 indx_replies.append(i)
+        only_tweets=tweet_and_replies.drop(labels=indx_replies,axis=0)
+            # drop removes the columns which its index specified by indx_replies...   axis=0  if we want to delete rows, and inplace changes the same data_frame without creating a new.
+        #print(len(tweet_and_replies['tweet']), " of them are Tweets")
+        return only_tweets
+    def __get_tweets_from_twint__(self):
+        ''' __get_tweets_from_twint__
+        tweet info is a dataframe with fallowing columns
+            Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
+            'tweet', 'language', 'hashtags', 'cashtags', 'user_id', 'user_id_str',
+            'username', 'name', 'day', 'hour', 'link', 'urls', 'photos', 'video',
+            'thumbnail', 'retweet', 'nlikes', 'nreplies', 'nretweets', 'quote_url',
+            'search', 'near', 'geo', 'source', 'user_rt_id', 'user_rt',
+            'retweet_id', 'reply_to', 'retweet_date', 'translate', 'trans_src',
+            'trans_dest']
+        we just pick the relevant ones.
+        c is a twint.Config() object
+        '''
+        self.conf.Pandas = True
+        self.conf.Count = True
+        self.conf.Limit = self.num_tweets
+        self.conf.Since = self.from_date
+        self.conf.Until = self.to_date
+        self.conf.Hide_output = True
+        twint.run.Search(self.conf)
+        tweet_info =twint.output.panda.Tweets_df
+        tweet_info = tweet_info[["id","tweet","date","user_id","urls" ,'nlikes', 'nreplies', 'nretweets']]
+        df = scraper.__get_only_tweets(tweet_info)
+        return df
+    def __check_date_type(d1,d2):
+            if (type(d1) or type(d2)) is not type("str"):  # If the type of ite date input isent string it generates exception
+                print("[!] Please make sure the date is a string in this format \"yyyy-mm-dd\" ")
+                raise EXCEPTION("Incorrect date type Exception!")
+            elif (len(d1.split("-")) or len(d2.split("-")))<2:
+                print("[!] Please make sure the date is a string in this format \"yyyy-mm-dd\" ")
+                raise EXCEPTION("Incorrect date type Exception!")

twitter-scraper/twitter_scraper.ipynb CHANGED Viewed

@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "c9021300",
    "metadata": {
     "scrolled": true
@@ -31,209 +31,456 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "1413ab2b",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# import asyncio\n",
-    "# import os\n",
-    "# loop = asyncio.get_event_loop()\n",
-    "# loop.is_running()\n",
-    "# import twint\n",
-    "# import nest_asyncio\n",
-    "# nest_asyncio.apply()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "d38514f3",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import scrape\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a7912a91",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from_date=\"2022-6-10 10:30:22\"\n",
-    "to_date= \"2022-6-30\"\n",
-    "num_tweets = 20\n",
-    "_data=scrape.scraper.get_tweets(\"jimmieakesson\",u_or_s=\"u\",from_date=221232,to_date=2313)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "48d50b46",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tweets= _data.keys()\n",
-    "for i in tweets:\n",
-    "    _data[i][\"tweet\"]\n",
-    "    print(_data[i][\"tweet\"], \"\\n\", \"__________________________________________________________\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "72cabcb5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from_date=\"2022-6-10 10:30:22\"\n",
-    "to_date= \"2022-6-30\"\n",
-    "num_tweets = 20\n",
-    "_data=scrape.scraper.string_search_user_tweets(\"jimmieakesson\",\"invandring\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "549e4fb3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tweets= _data[\"tweet\"]\n",
-    "for i in tweets:\n",
-    "    print(i, \"\\n\", \"__________________________________________________________\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "733dd44a",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Defaulting to user installation because normal site-packages is not writeable\n",
-      "Requirement already satisfied: snscrape in /home/oxygen/.local/lib/python3.10/site-packages (0.3.4)\n",
-      "Requirement already satisfied: beautifulsoup4 in /home/oxygen/.local/lib/python3.10/site-packages (from snscrape) (4.11.1)\n",
-      "Requirement already satisfied: requests[socks] in /usr/lib/python3/dist-packages (from snscrape) (2.25.1)\n",
-      "Requirement already satisfied: lxml in /usr/lib/python3/dist-packages (from snscrape) (4.8.0)\n",
-      "Requirement already satisfied: soupsieve>1.2 in /home/oxygen/.local/lib/python3.10/site-packages (from beautifulsoup4->snscrape) (2.3.2.post1)\n",
-      "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /home/oxygen/.local/lib/python3.10/site-packages (from requests[socks]->snscrape) (1.7.1)\n"
      ]
     }
    ],
    "source": [
-    "#%pip install -q snscrape==0.3.4\n",
-    "!pip3 install snscrape\n",
-    "#!pip3 install git+https://github.com/JustAnotherArchivist/snscrape.git"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "id": "0d16422c",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
      ]
     }
    ],
    "source": [
-    "%pip install -q snscrape==0.3.4\n",
-    "from datetime import date\n",
-    "import os\n",
-    "import pandas as pd\n",
-    "\n",
-    "\n",
-    "def get_tweets(search_term, from_date, to_date=date.today(), num_tweets=100,u_or_s='s'):\n",
-    "  if u_or_s.lower() =='u':\n",
-    "    extracted_tweets = \"snscrape --format '{content!r}'\"+ f\" --max-results {num_tweets} --since {from_date} twitter-user '{search_term} until:{to_date}' > extracted-tweets.txt\" \n",
-    "  else:\n",
-    "    extracted_tweets = \"snscrape --format '{content!r}'\"+ f\" --max-results {num_tweets} --since {from_date} twitter-search '{search_term} until:{to_date}' > extracted-tweets.txt\"\n",
-    "  \n",
-    "  os.system(extracted_tweets)\n",
-    "  if os.stat(\"extracted-tweets.txt\").st_size == 0:\n",
-    "    print('No Tweets found')\n",
-    "  else:\n",
-    "    df = pd.read_csv('extracted-tweets.txt', names=['content'])\n",
-    "  data_list=[]\n",
-    "  for row in df['content'].iteritems():\n",
-    "    temp= str(row[0])+str(row[1])\n",
-    "    temp= temp.replace(\"\\'\",\"\")\n",
-    "    data_list.append(temp)\n",
-    "  return data_list\n",
-    "\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 12,
-   "id": "8e2adb35",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "No Tweets found\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\n",
-      "  File \"/home/oxygen/.local/bin/snscrape\", line 8, in <module>\n",
-      "    sys.exit(main())\n",
-      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/cli.py\", line 224, in main\n",
-      "    args = parse_args()\n",
-      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/cli.py\", line 159, in parse_args\n",
-      "    import snscrape.modules\n",
-      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/modules/__init__.py\", line 15, in <module>\n",
-      "    _import_modules()\n",
-      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/modules/__init__.py\", line 12, in _import_modules\n",
-      "    module = importlib.import_module(moduleName)\n",
-      "  File \"/usr/lib/python3.10/importlib/__init__.py\", line 126, in import_module\n",
-      "    return _bootstrap._gcd_import(name[level:], package, level)\n",
-      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/modules/instagram.py\", line 12, in <module>\n",
-      "    class InstagramPost(typing.NamedTuple, snscrape.base.Item):\n",
-      "  File \"/usr/lib/python3.10/typing.py\", line 2329, in _namedtuple_mro_entries\n",
-      "    raise TypeError(\"Multiple inheritance with NamedTuple is not supported\")\n",
-      "TypeError: Multiple inheritance with NamedTuple is not supported\n"
      ]
     },
     {
-     "ename": "UnboundLocalError",
-     "evalue": "local variable 'df' referenced before assignment",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mUnboundLocalError\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[0;32m/tmp/ipykernel_26511/1892081786.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0md\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mget_tweets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"jimmieakesson\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfrom_date\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0;34m\"2022-06-01\"\u001b[0m \u001b[0;34m,\u001b[0m\u001b[0mnum_tweets\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mu_or_s\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"u\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m/tmp/ipykernel_26511/275462205.py\u001b[0m in \u001b[0;36mget_tweets\u001b[0;34m(search_term, from_date, to_date, num_tweets, u_or_s)\u001b[0m\n\u001b[1;32m     17\u001b[0m     \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'extracted-tweets.txt'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'content'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     18\u001b[0m   \u001b[0mdata_list\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m   \u001b[0;32mfor\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'content'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miteritems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     20\u001b[0m     \u001b[0mtemp\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     21\u001b[0m     \u001b[0mtemp\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\\'\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'df' referenced before assignment"
-     ]
     }
    ],
    "source": [
-    "d= get_tweets(\"jimmieakesson\",from_date= \"2022-06-01\" ,num_tweets =5, u_or_s=\"u\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a2c837f4",
    "metadata": {},
    "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

   },
   {
    "cell_type": "code",
+   "execution_count": 1,
    "id": "c9021300",
    "metadata": {
     "scrolled": true
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "1413ab2b",
    "metadata": {},
    "outputs": [],
    "source": [
+    "import asyncio\n",
+    "import os\n",
+    "loop = asyncio.get_event_loop()\n",
+    "loop.is_running()\n",
+    "import twint\n",
+    "import nest_asyncio\n",
+    "nest_asyncio.apply()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "id": "d38514f3",
    "metadata": {},
    "outputs": [],
    "source": [
+    "import scrape\n",
+    "sc= scrape.scraper( from_date=\"2006-07-01\", to_date= \"2022-06-22\",num_tweets=100)\n"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
+   "id": "d37e5cbf",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "[+] Finished: Successfully collected 100 Tweets.\n"
      ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tweet</th>\n",
+       "      <th>date</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>urls</th>\n",
+       "      <th>nlikes</th>\n",
+       "      <th>nreplies</th>\n",
+       "      <th>nretweets</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1539394015560359944</td>\n",
+       "      <td>wAllah comme si on avais pas d’autre choses j’...</td>\n",
+       "      <td>2022-06-22 01:45:08</td>\n",
+       "      <td>1202681666487115776</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1539387277960433664</td>\n",
+       "      <td>Şev baş temaşevanen heja  https://t.co/aqw5vNPLFr</td>\n",
+       "      <td>2022-06-22 01:18:22</td>\n",
+       "      <td>743954955220979713</td>\n",
+       "      <td>[https://youtu.be/n_vYzgRBFUI]</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>1539386040313851904</td>\n",
+       "      <td>Heja!＝頑張れ！</td>\n",
+       "      <td>2022-06-22 01:13:27</td>\n",
+       "      <td>176860217</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>1539379141597925377</td>\n",
+       "      <td>skończyłam po 15h naukę na dziś ❤️😋 wrócę po u...</td>\n",
+       "      <td>2022-06-22 00:46:02</td>\n",
+       "      <td>840668853948559360</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>1539377784707026945</td>\n",
+       "      <td>je suis en train de siroter mon candy up frche...</td>\n",
+       "      <td>2022-06-22 00:40:38</td>\n",
+       "      <td>980874157998137345</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     id                                              tweet  \\\n",
+       "0   1539394015560359944  wAllah comme si on avais pas d’autre choses j’...   \n",
+       "5   1539387277960433664  Şev baş temaşevanen heja  https://t.co/aqw5vNPLFr   \n",
+       "7   1539386040313851904                                         Heja!＝頑張れ！   \n",
+       "9   1539379141597925377  skończyłam po 15h naukę na dziś ❤️😋 wrócę po u...   \n",
+       "10  1539377784707026945  je suis en train de siroter mon candy up frche...   \n",
+       "\n",
+       "                   date              user_id                            urls  \\\n",
+       "0   2022-06-22 01:45:08  1202681666487115776                              []   \n",
+       "5   2022-06-22 01:18:22   743954955220979713  [https://youtu.be/n_vYzgRBFUI]   \n",
+       "7   2022-06-22 01:13:27            176860217                              []   \n",
+       "9   2022-06-22 00:46:02   840668853948559360                              []   \n",
+       "10  2022-06-22 00:40:38   980874157998137345                              []   \n",
+       "\n",
+       "    nlikes  nreplies  nretweets  \n",
+       "0        3         0          0  \n",
+       "5        0         0          0  \n",
+       "7        0         0          0  \n",
+       "9        0         0          0  \n",
+       "10       3         0          0  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
+    "d=sc.scrape_by_string(\"heja\")\n",
+    "d.head()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
+   "id": "a7912a91",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "[+] Finished: Successfully collected 100 Tweets.\n"
      ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tweet</th>\n",
+       "      <th>date</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>urls</th>\n",
+       "      <th>nlikes</th>\n",
+       "      <th>nreplies</th>\n",
+       "      <th>nretweets</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1537770920621879297</td>\n",
+       "      <td>Man kan ha synpunkter på en sådan lösning, men...</td>\n",
+       "      <td>2022-06-17 14:15:32</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>692</td>\n",
+       "      <td>17</td>\n",
+       "      <td>41</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1537770809225273344</td>\n",
+       "      <td>Är det ont om plats på anstalterna så får man ...</td>\n",
+       "      <td>2022-06-17 14:15:05</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>809</td>\n",
+       "      <td>26</td>\n",
+       "      <td>57</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1537770713368735744</td>\n",
+       "      <td>Döms man för brott, särskilt våldsbrott, ska m...</td>\n",
+       "      <td>2022-06-17 14:14:43</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1020</td>\n",
+       "      <td>26</td>\n",
+       "      <td>86</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1537770657823576066</td>\n",
+       "      <td>Platsbrist? Jaha, vad spelar det för roll?   D...</td>\n",
+       "      <td>2022-06-17 14:14:29</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>[https://sverigesradio.se/artikel/domda-kvinno...</td>\n",
+       "      <td>1152</td>\n",
+       "      <td>85</td>\n",
+       "      <td>132</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1534230353094885383</td>\n",
+       "      <td>Det är ytterst beklagligt att Magdalena Anders...</td>\n",
+       "      <td>2022-06-07 19:46:35</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>6121</td>\n",
+       "      <td>546</td>\n",
+       "      <td>557</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    id                                              tweet  \\\n",
+       "1  1537770920621879297  Man kan ha synpunkter på en sådan lösning, men...   \n",
+       "2  1537770809225273344  Är det ont om plats på anstalterna så får man ...   \n",
+       "3  1537770713368735744  Döms man för brott, särskilt våldsbrott, ska m...   \n",
+       "4  1537770657823576066  Platsbrist? Jaha, vad spelar det för roll?   D...   \n",
+       "5  1534230353094885383  Det är ytterst beklagligt att Magdalena Anders...   \n",
+       "\n",
+       "                  date   user_id  \\\n",
+       "1  2022-06-17 14:15:32  95972673   \n",
+       "2  2022-06-17 14:15:05  95972673   \n",
+       "3  2022-06-17 14:14:43  95972673   \n",
+       "4  2022-06-17 14:14:29  95972673   \n",
+       "5  2022-06-07 19:46:35  95972673   \n",
+       "\n",
+       "                                                urls  nlikes  nreplies  \\\n",
+       "1                                                 []     692        17   \n",
+       "2                                                 []     809        26   \n",
+       "3                                                 []    1020        26   \n",
+       "4  [https://sverigesradio.se/artikel/domda-kvinno...    1152        85   \n",
+       "5                                                 []    6121       546   \n",
+       "\n",
+       "   nretweets  \n",
+       "1         41  \n",
+       "2         57  \n",
+       "3         86  \n",
+       "4        132  \n",
+       "5        557  "
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
+    "df=sc.scrape_by_user(\"jimmieakesson\")\n",
+    "df.head()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 12,
+   "id": "7db69757",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "[!] No more data! Scraping will stop now.\n",
+      "found 0 deleted tweets in this search.\n",
+      "[+] Finished: Successfully collected 16 Tweets from @jimmieakesson.\n"
      ]
     },
     {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tweet</th>\n",
+       "      <th>date</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>urls</th>\n",
+       "      <th>nlikes</th>\n",
+       "      <th>nreplies</th>\n",
+       "      <th>nretweets</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1363067834260201475</td>\n",
+       "      <td>Utan massiv, asylrelaterad invandring från frä...</td>\n",
+       "      <td>2021-02-20 11:07:50</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1277</td>\n",
+       "      <td>22</td>\n",
+       "      <td>105</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1363067613660778496</td>\n",
+       "      <td>Många vänsterliberaler tycks ha reagerat på de...</td>\n",
+       "      <td>2021-02-20 11:06:58</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>626</td>\n",
+       "      <td>9</td>\n",
+       "      <td>68</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1363067558409158656</td>\n",
+       "      <td>Jag förstår — uppriktigt — inte den närmast hy...</td>\n",
+       "      <td>2021-02-20 11:06:45</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>2459</td>\n",
+       "      <td>199</td>\n",
+       "      <td>336</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1362748777552113670</td>\n",
+       "      <td>Invandring av hundratusentals människor från f...</td>\n",
+       "      <td>2021-02-19 14:00:01</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1334</td>\n",
+       "      <td>55</td>\n",
+       "      <td>101</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1362409505557012490</td>\n",
+       "      <td>Vårt land behöver ett totalstopp för all asyl-...</td>\n",
+       "      <td>2021-02-18 15:31:53</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>3044</td>\n",
+       "      <td>268</td>\n",
+       "      <td>404</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    id                                              tweet  \\\n",
+       "0  1363067834260201475  Utan massiv, asylrelaterad invandring från frä...   \n",
+       "1  1363067613660778496  Många vänsterliberaler tycks ha reagerat på de...   \n",
+       "2  1363067558409158656  Jag förstår — uppriktigt — inte den närmast hy...   \n",
+       "3  1362748777552113670  Invandring av hundratusentals människor från f...   \n",
+       "4  1362409505557012490  Vårt land behöver ett totalstopp för all asyl-...   \n",
+       "\n",
+       "                  date   user_id urls  nlikes  nreplies  nretweets  \n",
+       "0  2021-02-20 11:07:50  95972673   []    1277        22        105  \n",
+       "1  2021-02-20 11:06:58  95972673   []     626         9         68  \n",
+       "2  2021-02-20 11:06:45  95972673   []    2459       199        336  \n",
+       "3  2021-02-19 14:00:01  95972673   []    1334        55        101  \n",
+       "4  2021-02-18 15:31:53  95972673   []    3044       268        404  "
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
+    "df=sc.scrape_by_user_and_string(\"jimmieakesson\",\"invandring\")\n",
+    "df.head()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "48d50b46",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "tweets= _data.keys()\n",
+    "for i in tweets:\n",
+    "    _data[i][\"tweet\"]\n",
+    "    print(_data[i][\"tweet\"], \"\\n\", \"__________________________________________________________\")"
+   ]
   }
  ],
  "metadata": {

twitter_scraper/twint_master/elasticsearch/dashboard.json DELETED Viewed

@@ -1,18 +0,0 @@
-[
-  {
-    "_id": "e6d65380-bfe2-11e8-961a-d371b24d5d1d",
-    "_type": "dashboard",
-    "_source": {
-      "title": "Twint Dashboard",
-      "hits": 0,
-      "description": "",
-      "panelsJSON": "[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":40,\"h\":17,\"i\":\"1\"},\"embeddableConfig\":{},\"id\":\"d47421c0-bfd5-11e8-8858-bbc566841533\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"2\",\"gridData\":{\"x\":40,\"y\":6,\"w\":8,\"h\":11,\"i\":\"2\"},\"embeddableConfig\":{\"vis\":{\"legendOpen\":false}},\"id\":\"e2b89640-bfd4-11e8-8858-bbc566841533\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":32,\"w\":20,\"h\":17,\"i\":\"3\"},\"embeddableConfig\":{\"vis\":{\"legendOpen\":false}},\"id\":\"8a8bb420-bfd9-11e8-8858-bbc566841533\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":17,\"w\":33,\"h\":15,\"i\":\"4\"},\"embeddableConfig\":{\"vis\":{\"legendOpen\":false}},\"id\":\"a8d3ee70-bfd9-11e8-8858-bbc566841533\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"6\",\"gridData\":{\"x\":40,\"y\":0,\"w\":8,\"h\":6,\"i\":\"6\"},\"embeddableConfig\":{},\"id\":\"37cd72e0-bfe4-11e8-961a-d371b24d5d1d\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"7\",\"gridData\":{\"x\":33,\"y\":17,\"w\":15,\"h\":15,\"i\":\"7\"},\"embeddableConfig\":{},\"id\":\"149ecbc0-bfe4-11e8-961a-d371b24d5d1d\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"8\",\"gridData\":{\"x\":20,\"y\":32,\"w\":28,\"h\":17,\"i\":\"8\"},\"version\":\"6.3.2\",\"type\":\"visualization\",\"id\":\"b45ec590-c267-11e8-bcd4-3956fe930db7\",\"embeddableConfig\":{}}]",
-      "optionsJSON": "{\"darkTheme\":true,\"hidePanelTitles\":true,\"useMargins\":true}",
-      "version": 1,
-      "timeRestore": false,
-      "kibanaSavedObjectMeta": {
-        "searchSourceJSON": "{\"query\":{\"language\":\"lucene\",\"query\":\"\"},\"filter\":[],\"highlightAll\":true,\"version\":true}"
-      }
-    }
-  }
-]

twitter_scraper/twint_master/elasticsearch/index-follow.json DELETED Viewed

@@ -1,15 +0,0 @@
-PUT twintgraph
-{
-  "mappings": {
-    "items": {
-      "properties": {
-        "user": {"type": "keyword"},
-        "follow": {"type": "keyword"},
-        "essid": {"type": "keyword"}
-      }
-    }
-  },
-  "settings": {
-    "number_of_shards": 1
-  }
-}

twitter_scraper/twint_master/elasticsearch/index-tweets.json DELETED Viewed

@@ -1,48 +0,0 @@
-PUT twinttweets
-{
-  "mappings": {
-    "items": {
-      "properties": {
-          "id": {"type": "long"},
-          "conversation_id": {"type": "long"},
-          "created_at": {"type": "long"},
-          "date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
-          "timezone": {"type": "keyword"},
-          "place": {"type": "keyword"},
-          "location": {"type": "keyword"},
-          "tweet": {"type": "text"},
-          "hashtags": {"type": "keyword"},
-          "cashtags": {"type": "keyword"},
-          "user_id": {"type": "long"},
-          "user_id_str": {"type": "keyword"},
-          "username": {"type": "keyword"},
-          "name": {"type": "text"},
-          "profile_image_url": {"type": "text"},
-          "day": {"type": "integer"},
-          "hour": {"type": "integer"},
-          "link": {"type": "text"},
-          "retweet": {"type": "text"},
-          "essid": {"type": "keyword"},
-          "nlikes": {"type": "integer"},
-          "nreplies": {"type": "integer"},
-          "nretweets": {"type": "integer"},
-          "quote_url": {"type": "text"},
-          "video": {"type": "integer"},
-          "thumbnail": {"type": "text"},
-          "search": {"type": "text"},
-          "near":  {"type": "text"},
-          "geo_near": {"type": "geo_point"},
-          "geo_tweet": {"type": "geo_point"},
-          "photos": {"type": "text"},
-          "mentions": {"type": "text"},
-          "translation": {"type": "text"},
-          "trans_src": {"type": "keyword"},
-          "trans_dev": {"type": "keyword"},
-      }
-    }
-  }
-  ,
-  "settings": {
-    "number_of_shards": 1
-  }
-}

twitter_scraper/twint_master/elasticsearch/index-user.json DELETED Viewed

@@ -1,33 +0,0 @@
-PUT twintuser
-{
-  "mappings": {
-    "items": {
-      "properties": {
-        "id": {"type": "keyword"},
-        "name": {"type": "keyword"},
-        "username": {"type": "keyword"},
-        "bio": {"type": "text"},
-        "location": {"type": "keyword"},
-        "url": {"type": "text"},
-        "join_datetime": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
-        "join_date": {"type": "date", "format": "yyyy-MM-dd"},
-        "join_time": {"type": "date", "format": "HH:mm:ss"},
-        "tweets": {"type": "integer"},
-        "following": {"type": "integer"},
-        "followers": {"type": "integer"},
-        "likes": {"type": "integer"},
-        "media": {"type": "integer"},
-        "private": {"type": "integer"},
-        "verified": {"type": "integer"},
-        "avatar": {"type": "text"},
-        "background_image": {"type": "text"},
-        "session": {"type": "keyword"},
-        "geo_user": {"type": "geo_point"}
-      }
-    }
-  }
-  ,
-  "settings": {
-    "number_of_shards": 1
-  }
-}

twitter_scraper/twint_master/elasticsearch/visualizations.json DELETED Viewed

@@ -1,100 +0,0 @@
-[
-  {
-    "_id": "d47421c0-bfd5-11e8-8858-bbc566841533",
-    "_type": "visualization",
-    "_source": {
-      "title": "Activity [twinttweets]",
-      "visState": "{\"title\":\"Activity [twinttweets]\",\"type\":\"histogram\",\"params\":{\"type\":\"histogram\",\"grid\":{\"categoryLines\":true,\"style\":{\"color\":\"#eee\"},\"valueAxis\":\"ValueAxis-1\"},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Tweets\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"area\",\"mode\":\"stacked\",\"data\":{\"label\":\"Tweets\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"cardinal\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":true},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Tweets\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Days\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"user_id\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"User ids\"}}]}",
-      "uiStateJSON": "{}",
-      "description": "",
-      "version": 1,
-      "kibanaSavedObjectMeta": {
-        "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\",\"language\":\"lucene\"},\"filter\":[]}"
-      }
-    }
-  },
-  {
-    "_id": "e2b89640-bfd4-11e8-8858-bbc566841533",
-    "_type": "visualization",
-    "_source": {
-      "title": "Activity - pie [twinttweets]",
-      "visState": "{\"aggs\":[{\"enabled\":true,\"id\":\"1\",\"params\":{},\"schema\":\"metric\",\"type\":\"count\"},{\"enabled\":true,\"id\":\"2\",\"params\":{\"field\":\"user_id\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"size\":5},\"schema\":\"segment\",\"type\":\"terms\"}],\"params\":{\"addLegend\":true,\"addTooltip\":true,\"isDonut\":true,\"labels\":{\"last_level\":true,\"show\":false,\"truncate\":100,\"values\":true},\"legendPosition\":\"right\",\"type\":\"pie\"},\"title\":\"Activity - pie [twinttweets]\",\"type\":\"pie\"}",
-      "uiStateJSON": "{}",
-      "description": "",
-      "version": 1,
-      "kibanaSavedObjectMeta": {
-        "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"language\":\"lucene\",\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\"},\"filter\":[]}"
-      }
-    }
-  },
-  {
-    "_id": "37cd72e0-bfe4-11e8-961a-d371b24d5d1d",
-    "_type": "visualization",
-    "_source": {
-      "title": "Tweets Count [twinttweet]",
-      "visState": "{\"title\":\"Tweets Count [twinttweet]\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\",\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":33}}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Tweets\"}}]}",
-      "uiStateJSON": "{}",
-      "description": "",
-      "version": 1,
-      "kibanaSavedObjectMeta": {
-        "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"language\":\"lucene\",\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\"},\"filter\":[]}"
-      }
-    }
-  },
-  {
-    "_id": "149ecbc0-bfe4-11e8-961a-d371b24d5d1d",
-    "_type": "visualization",
-    "_source": {
-      "title": "Word Cloud [twinttweets]",
-      "visState": "{\"title\":\"Word Cloud [twinttweets]\",\"type\":\"tagcloud\",\"params\":{\"scale\":\"linear\",\"orientation\":\"single\",\"minFontSize\":10,\"maxFontSize\":50,\"showLabel\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"username\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}]}",
-      "uiStateJSON": "{}",
-      "description": "",
-      "version": 1,
-      "kibanaSavedObjectMeta": {
-        "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\",\"language\":\"lucene\"},\"filter\":[]}"
-      }
-    }
-  },
-  {
-    "_id": "a8d3ee70-bfd9-11e8-8858-bbc566841533",
-    "_type": "visualization",
-    "_source": {
-      "title": "Day-activity [twinttweet]",
-      "visState": "{\"title\":\"Day-activity [twinttweet]\",\"type\":\"histogram\",\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"labels\":{\"show\":true,\"truncate\":100,\"rotate\":0},\"position\":\"bottom\",\"scale\":{\"type\":\"linear\"},\"show\":true,\"style\":{},\"title\":{},\"type\":\"category\"}],\"grid\":{\"categoryLines\":true,\"style\":{\"color\":\"#eee\"},\"valueAxis\":\"ValueAxis-3\"},\"legendPosition\":\"right\",\"orderBucketsBySum\":false,\"seriesParams\":[{\"data\":{\"id\":\"1\",\"label\":\"Tweets\"},\"drawLinesBetweenPoints\":true,\"mode\":\"normal\",\"show\":\"true\",\"showCircles\":true,\"type\":\"histogram\",\"valueAxis\":\"ValueAxis-3\"}],\"times\":[],\"type\":\"histogram\",\"valueAxes\":[{\"id\":\"ValueAxis-3\",\"labels\":{\"filter\":false,\"rotate\":0,\"show\":true,\"truncate\":100},\"name\":\"LeftAxis-1\",\"position\":\"left\",\"scale\":{\"mode\":\"normal\",\"type\":\"linear\"},\"show\":true,\"style\":{},\"title\":{\"text\":\"Tweets\"},\"type\":\"value\"}]},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Tweets\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"hour\",\"interval\":1,\"min_doc_count\":true,\"extended_bounds\":{\"min\":0,\"max\":23}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"user_id\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"size\":10,\"order\":\"asc\",\"orderBy\":\"_term\",\"customLabel\":\"\"}}]}",
-      "uiStateJSON": "{\"vis\":{\"legendOpen\":true}}",
-      "description": "",
-      "version": 1,
-      "kibanaSavedObjectMeta": {
-        "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"language\":\"lucene\",\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\"},\"filter\":[]}"
-      }
-    }
-  },
-  {
-    "_id": "8a8bb420-bfd9-11e8-8858-bbc566841533",
-    "_type": "visualization",
-    "_source": {
-      "title": "Week-activity [twinttweet]",
-      "visState": "{\"title\":\"Week-activity [twinttweet]\",\"type\":\"histogram\",\"params\":{\"type\":\"histogram\",\"grid\":{\"categoryLines\":true,\"style\":{\"color\":\"#eee\"},\"valueAxis\":\"ValueAxis-1\"},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100,\"rotate\":0},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Tweets\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"histogram\",\"mode\":\"normal\",\"data\":{\"label\":\"Tweets\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Tweets\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"day\",\"interval\":1,\"min_doc_count\":true,\"extended_bounds\":{},\"customLabel\":\"Days of the week\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"user_id\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\",\"customLabel\":\"\"}}]}",
-      "uiStateJSON": "{}",
-      "description": "",
-      "version": 1,
-      "kibanaSavedObjectMeta": {
-        "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\",\"language\":\"lucene\"},\"filter\":[]}"
-      }
-    }
-  },
-  {
-    "_id": "b45ec590-c267-11e8-bcd4-3956fe930db7",
-    "_type": "visualization",
-    "_source": {
-      "title": "Heat-map [twinttweets]",
-      "visState": "{\"title\":\"Heat-map [twinttweets]\",\"type\":\"heatmap\",\"params\":{\"type\":\"heatmap\",\"addTooltip\":true,\"addLegend\":true,\"enableHover\":true,\"legendPosition\":\"right\",\"times\":[],\"colorsNumber\":10,\"colorSchema\":\"Reds\",\"setColorRange\":false,\"colorsRange\":[{\"from\":0,\"to\":10},{\"from\":10,\"to\":100},{\"from\":100,\"to\":200},{\"from\":200,\"to\":500},{\"from\":500,\"to\":1000},{\"from\":1000,\"to\":2000},{\"from\":2000,\"to\":3000},{\"from\":3000,\"to\":4000},{\"from\":4000,\"to\":5000},{\"from\":7000,\"to\":null}],\"invertColors\":false,\"percentageMode\":false,\"valueAxes\":[{\"show\":false,\"id\":\"ValueAxis-1\",\"type\":\"value\",\"scale\":{\"type\":\"linear\",\"defaultYExtents\":true},\"labels\":{\"show\":false,\"rotate\":270,\"overwriteColor\":false,\"color\":\"#555\"}}]},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"hour\",\"interval\":1,\"min_doc_count\":false,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"histogram\",\"schema\":\"group\",\"params\":{\"field\":\"day\",\"interval\":1,\"min_doc_count\":false,\"extended_bounds\":{\"min\":0,\"max\":2}}}]}",
-      "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"3 - 592\":\"rgb(255,245,240)\",\"592 - 1.180\":\"rgb(254,228,216)\",\"1.180 - 1.769\":\"rgb(253,202,181)\",\"1.769 - 2.357\":\"rgb(252,171,142)\",\"2.357 - 2.945\":\"rgb(252,138,106)\",\"2.945 - 3.534\":\"rgb(251,106,74)\",\"3.534 - 4.122\":\"rgb(241,68,50)\",\"4.122 - 4.711\":\"rgb(217,38,35)\",\"4.711 - 5.299\":\"rgb(188,20,26)\",\"5.299 - 5.887\":\"rgb(152,12,19)\"},\"colors\":{\"3 - 592\":\"#FCEACA\",\"592 - 1.180\":\"#F9E2D2\",\"1.180 - 1.769\":\"#F9BA8F\"}}}",
-      "description": "",
-      "version": 1,
-      "kibanaSavedObjectMeta": {
-        "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"filter\":[],\"query\":{\"language\":\"lucene\",\"query\":\"\"}}"
-      }
-    }
-  }
-]

twitter_scraper/twint_master/extracted-tweets.txt DELETED Viewed

@@ -1,5 +0,0 @@
-'@annieloof Nej, jag håller med. Tänk mer som Mathias Andersson (SD). https://t.co/gSqQDz5N8z'
-'Man kan ha synpunkter på en sådan lösning, men den är naturligtvis att föredra framför frigående våldsverkare som fortsätter misshandla sina offer i väntan på fängelse.'
-'Är det ont om plats på anstalterna så får man sänka standarden rejält för att få rum med fler interner per kvadratmeter.'
-'Döms man för brott, särskilt våldsbrott, ska man vara inlåst från det att domen faller tills straffet är avtjänat. Allt annat är vansinne.'
-'Platsbrist? Jaha, vad spelar det för roll? \n\nDet gör mig förbannad och bestört att lösningen på problemet med överfulla fängelser verkar vara att dömda våldsbrottslingar får röra sig fritt i samhället istället för att sitta inlåsta. \n\nhttps://t.co/QDi9rM3kMC'

twitter_scraper/twint_master/requirements.txt DELETED Viewed

@@ -1,13 +0,0 @@
-aiohttp
-aiodns
-beautifulsoup4
-cchardet
-dataclasses
-elasticsearch
-pysocks
-pandas>=0.23.0
-aiohttp_socks<=0.4.1
-schedule
-geopy
-fake-useragent
-googletransx