Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Mosa commited on Jul 6, 2022

Commit

a2b888f

•

1 Parent(s): 2981ede

here is my changes

Browse files

Files changed (1) hide show

twitter-scraper/twint-master/twitter_scraper.ipynb +155 -71

twitter-scraper/twint-master/twitter_scraper.ipynb CHANGED Viewed

@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
    "id": "c9021300",
    "metadata": {
     "scrolled": true
@@ -18,9 +18,7 @@
    "outputs": [],
    "source": [
     "%%capture \n",
-    "!pip3 install Twint \n",
-    "#!pip install asyncio\n",
-    "\n"
    ]
   },
   {
@@ -33,123 +31,209 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
    "id": "1413ab2b",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import asyncio\n",
-    "import os\n",
-    "loop = asyncio.get_event_loop()\n",
-    "loop.is_running()\n",
-    "import twint\n",
-    "import nest_asyncio\n",
-    "nest_asyncio.apply()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
-   "id": "193ee41e",
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "NameError",
-     "evalue": "name 'get_tweets' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[0;32m/tmp/ipykernel_17223/2414687227.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mto_date\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0;34m\"2022-6-30\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mnum_tweets\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m20\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0m_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mget_tweets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"jimmieakesson\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfrom_date\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mto_date\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mnum_tweets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mu_or_s\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"u\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'get_tweets' is not defined"
-     ]
-    }
-   ],
    "source": [
     "from_date=\"2022-6-10 10:30:22\"\n",
     "to_date= \"2022-6-30\"\n",
     "num_tweets = 20\n",
-    "_data=get_tweets(\"jimmieakesson\",from_date, to_date,num_tweets, u_or_s=\"u\")\n",
-    "\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1276f8a4",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "9"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "tweets= list(_data.keys())\n",
-    "len(list(_data.keys()))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c68d6f75",
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "'/home/oxygen/politweet/twitter-scraper/twint-master'"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
-    "pwd"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "d38514f3",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "import scrape\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
-   "id": "a7912a91",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "from_date=\"2022-6-10 10:30:22\"\n",
-    "to_date= \"2022-6-30\"\n",
-    "num_tweets = 20\n",
-    "_data=scrape.scraper.get_tweets(\"jimmieakesson\",u_or_s=\"u\")\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e3fe4402",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "tweets= df[\"tweet\"]\n",
-    "for i in tweets:\n",
-    "    print(i, \"\\n\", \"__________________________________________________________\")"
-   ]
   }
  ],
  "metadata": {

   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "c9021300",
    "metadata": {
     "scrolled": true
    "outputs": [],
    "source": [
     "%%capture \n",
+    "!pip3 install Twint \n"
    ]
   },
   {
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "1413ab2b",
    "metadata": {},
    "outputs": [],
    "source": [
+    "# import asyncio\n",
+    "# import os\n",
+    "# loop = asyncio.get_event_loop()\n",
+    "# loop.is_running()\n",
+    "# import twint\n",
+    "# import nest_asyncio\n",
+    "# nest_asyncio.apply()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "id": "d38514f3",
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "import scrape\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a7912a91",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from_date=\"2022-6-10 10:30:22\"\n",
     "to_date= \"2022-6-30\"\n",
     "num_tweets = 20\n",
+    "_data=scrape.scraper.get_tweets(\"jimmieakesson\",u_or_s=\"u\",from_date=221232,to_date=2313)\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "48d50b46",
    "metadata": {},
+   "outputs": [],
    "source": [
+    "tweets= _data.keys()\n",
+    "for i in tweets:\n",
+    "    _data[i][\"tweet\"]\n",
+    "    print(_data[i][\"tweet\"], \"\\n\", \"__________________________________________________________\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72cabcb5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from_date=\"2022-6-10 10:30:22\"\n",
+    "to_date= \"2022-6-30\"\n",
+    "num_tweets = 20\n",
+    "_data=scrape.scraper.string_search_user_tweets(\"jimmieakesson\",\"invandring\")\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "549e4fb3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tweets= _data[\"tweet\"]\n",
+    "for i in tweets:\n",
+    "    print(i, \"\\n\", \"__________________________________________________________\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "733dd44a",
    "metadata": {},
    "outputs": [
     {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Defaulting to user installation because normal site-packages is not writeable\n",
+      "Requirement already satisfied: snscrape in /home/oxygen/.local/lib/python3.10/site-packages (0.3.4)\n",
+      "Requirement already satisfied: beautifulsoup4 in /home/oxygen/.local/lib/python3.10/site-packages (from snscrape) (4.11.1)\n",
+      "Requirement already satisfied: requests[socks] in /usr/lib/python3/dist-packages (from snscrape) (2.25.1)\n",
+      "Requirement already satisfied: lxml in /usr/lib/python3/dist-packages (from snscrape) (4.8.0)\n",
+      "Requirement already satisfied: soupsieve>1.2 in /home/oxygen/.local/lib/python3.10/site-packages (from beautifulsoup4->snscrape) (2.3.2.post1)\n",
+      "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /home/oxygen/.local/lib/python3.10/site-packages (from requests[socks]->snscrape) (1.7.1)\n"
+     ]
     }
    ],
    "source": [
+    "#%pip install -q snscrape==0.3.4\n",
+    "!pip3 install snscrape\n",
+    "#!pip3 install git+https://github.com/JustAnotherArchivist/snscrape.git"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
+   "id": "0d16422c",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
    "source": [
+    "%pip install -q snscrape==0.3.4\n",
+    "from datetime import date\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "\n",
+    "\n",
+    "def get_tweets(search_term, from_date, to_date=date.today(), num_tweets=100,u_or_s='s'):\n",
+    "  if u_or_s.lower() =='u':\n",
+    "    extracted_tweets = \"snscrape --format '{content!r}'\"+ f\" --max-results {num_tweets} --since {from_date} twitter-user '{search_term} until:{to_date}' > extracted-tweets.txt\" \n",
+    "  else:\n",
+    "    extracted_tweets = \"snscrape --format '{content!r}'\"+ f\" --max-results {num_tweets} --since {from_date} twitter-search '{search_term} until:{to_date}' > extracted-tweets.txt\"\n",
+    "  \n",
+    "  os.system(extracted_tweets)\n",
+    "  if os.stat(\"extracted-tweets.txt\").st_size == 0:\n",
+    "    print('No Tweets found')\n",
+    "  else:\n",
+    "    df = pd.read_csv('extracted-tweets.txt', names=['content'])\n",
+    "  data_list=[]\n",
+    "  for row in df['content'].iteritems():\n",
+    "    temp= str(row[0])+str(row[1])\n",
+    "    temp= temp.replace(\"\\'\",\"\")\n",
+    "    data_list.append(temp)\n",
+    "  return data_list\n",
+    "\n"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
+   "id": "8e2adb35",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No Tweets found\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\n",
+      "  File \"/home/oxygen/.local/bin/snscrape\", line 8, in <module>\n",
+      "    sys.exit(main())\n",
+      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/cli.py\", line 224, in main\n",
+      "    args = parse_args()\n",
+      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/cli.py\", line 159, in parse_args\n",
+      "    import snscrape.modules\n",
+      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/modules/__init__.py\", line 15, in <module>\n",
+      "    _import_modules()\n",
+      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/modules/__init__.py\", line 12, in _import_modules\n",
+      "    module = importlib.import_module(moduleName)\n",
+      "  File \"/usr/lib/python3.10/importlib/__init__.py\", line 126, in import_module\n",
+      "    return _bootstrap._gcd_import(name[level:], package, level)\n",
+      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/modules/instagram.py\", line 12, in <module>\n",
+      "    class InstagramPost(typing.NamedTuple, snscrape.base.Item):\n",
+      "  File \"/usr/lib/python3.10/typing.py\", line 2329, in _namedtuple_mro_entries\n",
+      "    raise TypeError(\"Multiple inheritance with NamedTuple is not supported\")\n",
+      "TypeError: Multiple inheritance with NamedTuple is not supported\n"
+     ]
+    },
+    {
+     "ename": "UnboundLocalError",
+     "evalue": "local variable 'df' referenced before assignment",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mUnboundLocalError\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m/tmp/ipykernel_26511/1892081786.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0md\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mget_tweets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"jimmieakesson\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfrom_date\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0;34m\"2022-06-01\"\u001b[0m \u001b[0;34m,\u001b[0m\u001b[0mnum_tweets\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mu_or_s\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"u\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m/tmp/ipykernel_26511/275462205.py\u001b[0m in \u001b[0;36mget_tweets\u001b[0;34m(search_term, from_date, to_date, num_tweets, u_or_s)\u001b[0m\n\u001b[1;32m     17\u001b[0m     \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'extracted-tweets.txt'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'content'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     18\u001b[0m   \u001b[0mdata_list\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m   \u001b[0;32mfor\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'content'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miteritems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     20\u001b[0m     \u001b[0mtemp\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     21\u001b[0m     \u001b[0mtemp\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\\'\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'df' referenced before assignment"
+     ]
+    }
+   ],
    "source": [
+    "d= get_tweets(\"jimmieakesson\",from_date= \"2022-06-01\" ,num_tweets =5, u_or_s=\"u\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "a2c837f4",
    "metadata": {},
    "outputs": [],
+   "source": []
   }
  ],
  "metadata": {