Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Oresti Theodoridis commited on Jul 8, 2022

Commit

dcd24fe

•

2 Parent(s): 9de6496 ba518ab

Merge pull request #30 from Demea9000/22-redo-twitter-scraper

Browse files

Files changed (44) hide show

.idea/misc.xml +1 -1
.idea/politweet.iml +1 -0
requirements.txt +28 -0
twitter-scraper/TwitterScraper.py +17 -0
twitter-scraper/scrape.py +91 -0
twitter-scraper/twint-master/.github/FUNDING.yml +0 -3
twitter-scraper/twint-master/.github/ISSUE_TEMPLATE.md +0 -20
twitter-scraper/twint-master/.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md +0 -17
twitter-scraper/twint-master/.gitignore +0 -115
twitter-scraper/twint-master/.travis.yml +0 -23
twitter-scraper/twint-master/Dockerfile +0 -10
twitter-scraper/twint-master/LICENSE +0 -21
twitter-scraper/twint-master/MANIFEST.in +0 -1
twitter-scraper/twint-master/README.md +0 -272
twitter-scraper/twint-master/Untitled.ipynb +0 -282
twitter-scraper/twint-master/automate.py +0 -65
twitter-scraper/twint-master/elasticsearch/README.md +0 -5
twitter-scraper/twint-master/scrape.py +0 -102
twitter-scraper/twint-master/scrape__init__.py +0 -14
twitter-scraper/twint-master/setup.py +0 -65
twitter-scraper/twint-master/test.py +0 -92
twitter-scraper/twint-master/twint/__init__.py +0 -32
twitter-scraper/twint-master/twint/__version__.py +0 -3
twitter-scraper/twint-master/twint/cli.py +0 -342
twitter-scraper/twint-master/twint/config.py +0 -87
twitter-scraper/twint-master/twint/datelock.py +0 -44
twitter-scraper/twint-master/twint/feed.py +0 -145
twitter-scraper/twint-master/twint/format.py +0 -91
twitter-scraper/twint-master/twint/get.py +0 -298
twitter-scraper/twint-master/twint/output.py +0 -241
twitter-scraper/twint-master/twint/run.py +0 -412
twitter-scraper/twint-master/twint/storage/__init__.py +0 -0
twitter-scraper/twint-master/twint/storage/db.py +0 -297
twitter-scraper/twint-master/twint/storage/elasticsearch.py +0 -364
twitter-scraper/twint-master/twint/storage/panda.py +0 -196
twitter-scraper/twint-master/twint/storage/write.py +0 -77
twitter-scraper/twint-master/twint/storage/write_meta.py +0 -151
twitter-scraper/twint-master/twint/token.py +0 -94
twitter-scraper/twint-master/twint/tweet.py +0 -166
twitter-scraper/twint-master/twint/url.py +0 -195
twitter-scraper/twint-master/twint/user.py +0 -52
twitter-scraper/twint-master/twint/verbose.py +0 -18
twitter-scraper/twint-master/twitter_scraper.ipynb +0 -265
twitter-scraper/twitter_scraper.ipynb +819 -0

.idea/misc.xml CHANGED Viewed

@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet)" project-jdk-type="Python SDK" />
 </project>

 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (politweet)" project-jdk-type="Python SDK" />
 </project>

.idea/politweet.iml CHANGED Viewed

@@ -3,6 +3,7 @@
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$">
       <excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
     </content>
     <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />

   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$">
       <excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
     </content>
     <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />

requirements.txt CHANGED Viewed

@@ -1,23 +1,51 @@
 certifi==2022.6.15
 charset-normalizer==2.1.0
 cycler==0.11.0
 et-xmlfile==1.1.0
 fonttools==4.34.0
 idna==3.3
 kiwisolver==1.4.3
 matplotlib==3.5.2
 numpy==1.23.0
 openai==0.20.0
 openpyxl==3.0.10
 packaging==21.3
 pandas==1.4.3
 pandas-stubs==1.4.3.220704
 Pillow==9.2.0
 pyparsing==3.0.9
 python-dateutil==2.8.2
 pytz==2022.1
 regex==2022.6.2
 requests==2.28.1
 six==1.16.0
 tqdm==4.64.0
 urllib3==1.26.9

+aiodns==3.0.0
+aiohttp==3.8.1
+aiohttp-socks==0.7.1
+aiosignal==1.2.0
+async-timeout==4.0.2
+attrs==21.4.0
+beautifulsoup4==4.11.1
+cchardet==2.1.7
 certifi==2022.6.15
+cffi==1.15.1
 charset-normalizer==2.1.0
 cycler==0.11.0
+dataclasses==0.6
+elastic-transport==8.1.2
+elasticsearch==8.3.1
 et-xmlfile==1.1.0
+fake-useragent==0.1.11
 fonttools==4.34.0
+frozenlist==1.3.0
+geographiclib==1.52
+geopy==2.2.0
+googletransx==2.4.2
 idna==3.3
 kiwisolver==1.4.3
 matplotlib==3.5.2
+multidict==6.0.2
 numpy==1.23.0
+oauthlib==3.2.0
 openai==0.20.0
 openpyxl==3.0.10
 packaging==21.3
 pandas==1.4.3
 pandas-stubs==1.4.3.220704
 Pillow==9.2.0
+pycares==4.2.1
+pycparser==2.21
 pyparsing==3.0.9
+PySocks==1.7.1
 python-dateutil==2.8.2
+python-socks==2.0.3
 pytz==2022.1
 regex==2022.6.2
 requests==2.28.1
+requests-oauthlib==1.3.1
+schedule==1.1.0
 six==1.16.0
+soupsieve==2.3.2.post1
 tqdm==4.64.0
+-e git+https://github.com/twintproject/twint.git@e7c8a0c764f6879188e5c21e25fb6f1f856a7221#egg=twint
 urllib3==1.26.9
+yarl==1.7.2

twitter-scraper/TwitterScraper.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import twint
+import datetime
+c = twint.Config()
+c.Search = ['Taylor Swift']  # topic
+c.Limit = 500  # number of Tweets to scrape
+c.Store_csv = True  # store tweets in a csv file
+c.Output = "taylor_swift_tweets.csv"  # path to csv file
+twint.run.Search(c)
+import pandas as pd
+df = pd.read_csv('taylor_swift_tweets.csv')
+print(df.head())

twitter-scraper/scrape.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import twint
+from datetime import date
+class TwitterScraper(object):
+    """
+    This class is a twitter TwitterScraper called TwitterScraper. It takes the user as input and collects the user's tweets
+    from 'from_date' to 'to_date'. If 'from_date' and 'to_date' are not specified, it collects the number of tweets 'num_tweets' from today.
+    It outputs a dictionary with the tweet unique id and some other information.
+    input: user, from_date, to_date, num_tweets
+    output: dict
+    """
+    def __init__(self, from_date="2006-07-01", to_date=str(date.today()), num_tweets=20):
+        self.from_date = from_date
+        self.to_date = to_date
+        self.num_tweets = num_tweets
+        self.conf = twint.Config()
+    def scrape_by_user(self, _user):
+        """This method uses twint to extract tweets  based on username"""
+        self.conf.Search = "from:@" + _user  # is the search configuration is given in this format it searches after
+        # user_names.
+        return self.__get_tweets__from_twint__()
+    def scrape_by_string(self, _string: str):
+        """This method uses twint to extract tweets based on string.
+        all extracted tweets have the specified word in _string parameter in it.
+        """
+        self.conf.Search = _string  # this tells twint configuration to search for string
+        return self.__get_tweets__from_twint__()
+    def scrape_by_user_and_string(self, _user: str, _string: str):
+        """This method uses twint to extract tweets brased on string and username"""
+        self.conf.Username = _user
+        self.conf.Search = _string
+        return self.__get_tweets__from_twint__()
+    def get_only_tweets(self, tweet_and_replies_info):
+        tweet_and_replies = tweet_and_replies_info["tweet"]
+        """
+        This functions input arg is a data frame (the output from scrape methords ) and removes...
+         all tweets starting with \"@\" which is indicator of a reply or retweet.
+        """
+        indx_replies = []
+        for i in range(len(tweet_and_replies)):
+            if tweet_and_replies[i].startswith("@"):
+                indx_replies.append(i)
+        tweets_info = tweet_and_replies_info.drop(labels=indx_replies, axis=0)
+        # drop removes the columns which its index specified by
+        # indx_replies. axis=0  if we want to delete rows.
+        #print(len(tweets['tweet']), " of them are Tweets")
+        return tweets_info
+    def __get_tweets__from_twint__(self):
+        """ __get_tweets_from_twint__
+        tweet info is a dataframe with fallowing columns
+            Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
+            'tweet', 'language', 'hashtags', 'cashtags', 'user_id', 'user_id_str',
+            'username', 'name', 'day', 'hour', 'link', 'urls', 'photos', 'video',
+            'thumbnail', 'retweet', 'nlikes', 'nreplies', 'nretweets', 'quote_url',
+            'search', 'near', 'geo', 'source', 'user_rt_id', 'user_rt',
+            'retweet_id', 'reply_to', 'retweet_date', 'translate', 'trans_src',
+            'trans_dest']
+        we just pick the relevant ones.
+        c is a twint.Config() object
+        we also configure twint output.
+        """
+        self.conf.Pandas = True  #
+        self.conf.Count = True  #
+        self.conf.Limit = self.num_tweets  # specifies how many tweet should be scraped
+        self.conf.Since = self.from_date
+        self.conf.Until = self.to_date
+        self.conf.Hide_output = True  # Hides the output. If set to False it will prints tweets in the terminal window.
+        twint.run.Search(self.conf)
+        tweet_and_replies_inf = twint.output.panda.Tweets_df  # here we say that output souldwe dataframe.
+        tweet_and_replies_inf = tweet_and_replies_inf[
+            ["id", "tweet", "date", "user_id", "username", "urls", 'nlikes', 'nreplies', 'nretweets']]
+        return tweet_and_replies_inf
+    # def __check_date_type(d1,d2): if (type(d1) or type(d2)) is not type("str"):  # If the type of ite date input
+    # is not string it generates exception print("[!] Please make sure the date is a string in this format
+    # \"yyyy-mm-dd\" ") raise EXCEPTION("Incorrect date type Exception!") elif (len(d1.split("-")) or len(d2.split(
+    # "-")))<2: print("[!] Please make sure the date is a string in this format \"yyyy-mm-dd\" ") raise EXCEPTION(
+    # "Incorrect date type Exception!")
+if __name__ == "__main__":
+     sc = TwitterScraper(num_tweets=1002)
+     dc = sc.scrape_by_string("jimmieakesson")
+     print(dc.head())
+     print(dc.shape)

twitter-scraper/twint-master/.github/FUNDING.yml DELETED Viewed

@@ -1,3 +0,0 @@
-# These are supported funding model platforms
-patreon: twintproject
-custom: paypal.me/noneprivacy

twitter-scraper/twint-master/.github/ISSUE_TEMPLATE.md DELETED Viewed

@@ -1,20 +0,0 @@
-# Issue Template
-Please use this template!
-## Initial Check
-> If the issue is a request please specify that it is a request in the title (Example: [REQUEST] more features). If this is a question regarding 'twint' please specify that it's a question in the title (Example: [QUESTION] What is x?). Please **only** submit issues related to 'twint'. Thanks.
->Make sure you've checked the following:
-- [] Python version is 3.6 or later;
-- [] Updated Twint with `pip3 install --user --upgrade -e git+https://github.com/minamotorin/twint.git@origin/master#egg=twint`;
-- [] I have searched the issues and there are no duplicates of this issue/question/request (please link to related issues of twintproject/twint for reference).
-## Command Ran
->Please provide the _exact_ command ran including the username/search/code so I may reproduce the issue.
-## Description of Issue
->Please use **as much detail as possible.**
-## Environment Details
->Using Windows, Linux? What OS version? Running this in Anaconda? Jupyter Notebook? Terminal?

twitter-scraper/twint-master/.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md DELETED Viewed

@@ -1,17 +0,0 @@
-### Initial Check
-> If the issue is a request please specify that it is a request in the title (Example: [REQUEST] more features). If this is a question regarding 'twint' please specify that it's a question in the title (Example: [QUESTION] What is x?). Please **only** submit issues related to 'twint'. Thanks.
->Make sure you've checked the following:
-- [] Python version is 3.6;
-- [] Using the latest version of Twint;
-- [] Updated Twint with `pip3 install --upgrade -e git+https://github.com/twintproject/twint.git@origin/master#egg=twint`;
-### Command Ran
->Please provide the _exact_ command ran including the username/search/code so I may reproduce the issue.
-### Description of Issue
->Please use **as much detail as possible.**
-### Environment Details
->Using Windows, Linux? What OS version? Running this in Anaconda? Jupyter Notebook? Terminal?

twitter-scraper/twint-master/.gitignore DELETED Viewed

@@ -1,115 +0,0 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-tweets.db
-# C extensions
-*.so
-config.ini
-twint/storage/mysql.py
-# Node Dependency directories
-node_modules/
-jspm_packages/
-tests/
-# Distribution / packaging
-.Python
-env/
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-.hypothesis/
-# Translations
-*.mo
-*.pot
-# Django stuff:
-*.log
-local_settings.py
-# Flask stuff:
-instance/
-.webassets-cache
-# Scrapy stuff:
-.scrapy
-# Sphinx documentation
-docs/_build/
-# PyBuilder
-target/
-# Jupyter Notebook
-.ipynb_checkpoints
-# pyenv
-.python-version
-# celery beat schedule file
-celerybeat-schedule
-# SageMath parsed files
-*.sage.py
-# dotenv
-.env
-# virtualenv
-.venv
-venv/
-ENV/
-# Spyder project settings
-.spyderproject
-.spyproject
-# Rope project settings
-.ropeproject
-# mkdocs documentation
-/site
-# mypy
-.mypy_cache/
-# output
-*.csv
-*.json
-*.txt
-test_twint.py

twitter-scraper/twint-master/.travis.yml DELETED Viewed

@@ -1,23 +0,0 @@
-dist: bionic
-language: python
-python:
-  - "3.6"
-  - "3.7"
-  - "3.8"
-  - "nightly"
-matrix:
-  allow_failures:
-    - python: "nightly"
-    - python: "3.8"
-install:
-- pip install -r requirements.txt
-script:
-- python test.py
-deploy:
-  provider: pypi
-  user: "codyzacharias"
-  password:
-    secure: sWWvx50F7KJBtf8z2njc+Q31WIAHiQs4zKEiGD4/7xrshw55H5z+WnqZ9VIP83qm9yKefoRKp7WnaJeXZ3ulZSLn64ue45lqFozWMyGvelRPOKvZi9XPMqBA7+qllR/GseTHSGC3G5EGxac6UEI3irYe3mZXxfjpxNOXVti8rJ2xX8TiJM0AVKRrdDiAstOhMMkXkB7fYXMQALwEp8UoW/UbjbeqsKueXydjStaESNP/QzRFZ3/tuNu+3HMz/olniLUhUWcF/xDbJVpXuaRMUalgqe+BTbDdtUVt/s/GKtpg5GAzJyhQphiCM/huihedUIKSoI+6A8PTzuxrLhB5BMi9pcllED02v7w1enpu5L2l5cRDgQJSOpkxkA5Eese8nxKOOq0KzwDQa3JByrRor8R4yz+p5s4u2r0Rs2A9fkjQYwd/uWBSEIRF4K9WZoniiikahwXq070DMRgV7HbovKSjo5NK5F8j+psrtqPF+OHN2aVfWxbGnezrOOkmzuTHhWZVj3pPSpQU1WFWHo9fPo4I6YstR4q6XjNNjrpY3ojSlv0ThMbUem7zhHTRkRsSA2SpPfqw5E3Jf7vaiQb4M5zkBVqxuq4tXb14GJ26tGD8tel8u8b+ccpkAE9xf+QavP8UHz4PbBhqgFX5TbV/H++cdsICyoZnT35yiaDOELM=
-  on:
-    tags: true
-    python: "3.7"

twitter-scraper/twint-master/Dockerfile DELETED Viewed

@@ -1,10 +0,0 @@
-FROM python:3.6-buster
-LABEL maintainer="codyzacharias@pm.me"
-WORKDIR /root
-RUN git clone --depth=1 https://github.com/twintproject/twint.git && \
-	cd /root/twint && \
-	pip3 install . -r requirements.txt
-CMD /bin/bash

twitter-scraper/twint-master/LICENSE DELETED Viewed

@@ -1,21 +0,0 @@
-MIT License
-Copyright (c) 2018 Cody Zacharias
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

twitter-scraper/twint-master/MANIFEST.in DELETED Viewed

	@@ -1 +0,0 @@
1	- include README.md LICENSE

twitter-scraper/twint-master/README.md DELETED Viewed

@@ -1,272 +0,0 @@
-20220207.0
-# About this fork
-[This repository](https://github.com/minamotorin/twint) is the fork of [https://github.com/twintproject/twint](https://github.com/twintproject/twint) and for myself.
-Modified by [minamotorin](https://github.com/minamotorin).
-## Updates from twintproject/twint
-### twint.token.RefreshTokenException: Could not find the Guest token in HTML
-This problem doesn't happen recently.
-#### Related
-- [twintproject/twint#1320](https://github.com/twintproject/twint/issues/1320)
-- [twintproject/twint#1322](https://github.com/twintproject/twint/pull/1322)
-- [twintproject/twint#1328](https://github.com/twintproject/twint/pull/1328)
-- [twintproject/twint#1061](https://github.com/twintproject/twint/issues/1061)
-- [twintproject/twint#1114](https://github.com/twintproject/twint/issues/1114)
-### json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
-The fix is **not complete**.
-`twint.run.Profile` will work but `twint.run.db` will not.
-This means [`test.py`](./test.py) causes an error.
-I think this is because the fields of the result table are not exactly the same as the traditional ones.
-#### Related
-- [twintproject/twint#1335](https://github.com/twintproject/twint/issues/1335)
-### [-] TWINT requires Python version 3.6+.
-#### Related
-- [twintproject/twint#1344](https://github.com/twintproject/twint/issues/1344)
-- [twintproject/twint#1345](https://github.com/twintproject/twint/pull/1345)
-- [twintproject/twint#1344](https://github.com/twintproject/twint/issues/1346)
-- [twintproject/twint#1309](https://github.com/twintproject/twint/pull/1309)
-- [twintproject/twint#1313](https://github.com/twintproject/twint/issues/1313)
-## References
-- [snscrape](https://github.com/JustAnotherArchivist/snscrape)
-- [gallery-dl](https://github.com/mikf/gallery-dl)
-## License
-This repository is also under the [MIT License](https://opensource.org/licenses/mit-license.php).
----
-# TWINT - Twitter Intelligence Tool
-![2](https://i.imgur.com/iaH3s7z.png)
-![3](https://i.imgur.com/hVeCrqL.png)
-[![PyPI](https://img.shields.io/pypi/v/twint.svg)](https://pypi.org/project/twint/) [![Build Status](https://travis-ci.org/twintproject/twint.svg?branch=master)](https://travis-ci.org/twintproject/twint) [![Python 3.6|3.7|3.8](https://img.shields.io/badge/Python-3.6%2F3.7%2F3.8-blue.svg)](https://www.python.org/download/releases/3.0/) [![GitHub license](https://img.shields.io/github/license/haccer/tweep.svg)](https://github.com/haccer/tweep/blob/master/LICENSE) [![Downloads](https://pepy.tech/badge/twint)](https://pepy.tech/project/twint) [![Downloads](https://pepy.tech/badge/twint/week)](https://pepy.tech/project/twint/week) [![Patreon](https://img.shields.io/endpoint.svg?url=https:%2F%2Fshieldsio-patreon.herokuapp.com%2Ftwintproject)](https://www.patreon.com/twintproject) ![](https://img.shields.io/twitter/follow/noneprivacy.svg?label=Follow&style=social)
->No authentication. No API. No limits.
-Twint is an advanced Twitter scraping tool written in Python that allows for scraping Tweets from Twitter profiles **without** using Twitter's API.
-Twint utilizes Twitter's search operators to let you scrape Tweets from specific users, scrape Tweets relating to certain topics, hashtags & trends, or sort out *sensitive* information from Tweets like e-mail and phone numbers. I find this very useful, and you can get really creative with it too.
-Twint also makes special queries to Twitter allowing you to also scrape a Twitter user's followers, Tweets a user has liked, and who they follow **without** any authentication, API, Selenium, or browser emulation.
-## tl;dr Benefits
-Some of the benefits of using Twint vs Twitter API:
-- Can fetch almost __all__ Tweets (Twitter API limits to last 3200 Tweets only);
-- Fast initial setup;
-- Can be used anonymously and without Twitter sign up;
-- **No rate limitations**.
-## Limits imposed by Twitter
-Twitter limits scrolls while browsing the user timeline. This means that with `.Profile` or with `.Favorites` you will be able to get ~3200 tweets.
-## Requirements
-- Python 3.6;
-- aiohttp;
-- aiodns;
-- beautifulsoup4;
-- cchardet;
-- dataclasses
-- elasticsearch;
-- pysocks;
-- pandas (>=0.23.0);
-- aiohttp_socks;
-- schedule;
-- geopy;
-- fake-useragent;
-- py-googletransx.
-## Installing
-**Git:**
-```bash
-git clone --depth=1 https://github.com/twintproject/twint.git
-cd twint
-pip3 install . -r requirements.txt
-```
-**Pip:**
-```bash
-pip3 install twint
-```
-or
-```bash
-pip3 install --user --upgrade git+https://github.com/twintproject/twint.git@origin/master#egg=twint
-```
-**Pipenv**:
-```bash
-pipenv install git+https://github.com/twintproject/twint.git#egg=twint
-```
-### March 2, 2021 Update
-**Added**: Dockerfile
-Noticed a lot of people are having issues installing (including me). Please use the Dockerfile temporarily while I look into them.
-## CLI Basic Examples and Combos
-A few simple examples to help you understand the basics:
-- `twint -u username` - Scrape all the Tweets of a *user* (doesn't include **retweets** but includes **replies**).
-- `twint -u username -s pineapple` - Scrape all Tweets from the *user*'s timeline containing _pineapple_.
-- `twint -s pineapple` - Collect every Tweet containing *pineapple* from everyone's Tweets.
-- `twint -u username --year 2014` - Collect Tweets that were tweeted **before** 2014.
-- `twint -u username --since "2015-12-20 20:30:15"` - Collect Tweets that were tweeted since 2015-12-20 20:30:15.
-- `twint -u username --since 2015-12-20` - Collect Tweets that were tweeted since 2015-12-20 00:00:00.
-- `twint -u username -o file.txt` - Scrape Tweets and save to file.txt.
-- `twint -u username -o file.csv --csv` - Scrape Tweets and save as a csv file.
-- `twint -u username --email --phone` - Show Tweets that might have phone numbers or email addresses.
-- `twint -s "Donald Trump" --verified` - Display Tweets by verified users that Tweeted about Donald Trump.
-- `twint -g="48.880048,2.385939,1km" -o file.csv --csv` - Scrape Tweets from a radius of 1km around a place in Paris and export them to a csv file.
-- `twint -u username -es localhost:9200` - Output Tweets to Elasticsearch
-- `twint -u username -o file.json --json` - Scrape Tweets and save as a json file.
-- `twint -u username --database tweets.db` - Save Tweets to a SQLite database.
-- `twint -u username --followers` - Scrape a Twitter user's followers.
-- `twint -u username --following` - Scrape who a Twitter user follows.
-- `twint -u username --favorites` - Collect all the Tweets a user has favorited (gathers ~3200 tweet).
-- `twint -u username --following --user-full` - Collect full user information a person follows
-- `twint -u username --timeline` - Use an effective method to gather Tweets from a user's profile (Gathers ~3200 Tweets, including **retweets** & **replies**).
-- `twint -u username --retweets` - Use a quick method to gather the last 900 Tweets (that includes retweets) from a user's profile.
-- `twint -u username --resume resume_file.txt` - Resume a search starting from the last saved scroll-id.
-More detail about the commands and options are located in the [wiki](https://github.com/twintproject/twint/wiki/Commands)
-## Module Example
-Twint can now be used as a module and supports custom formatting. **More details are located in the [wiki](https://github.com/twintproject/twint/wiki/Module)**
-```python
-import twint
-# Configure
-c = twint.Config()
-c.Username = "realDonaldTrump"
-c.Search = "great"
-# Run
-twint.run.Search(c)
-```
-> Output
-`955511208597184512 2018-01-22 18:43:19 GMT <now> pineapples are the best fruit`
-```python
-import twint
-c = twint.Config()
-c.Username = "noneprivacy"
-c.Custom["tweet"] = ["id"]
-c.Custom["user"] = ["bio"]
-c.Limit = 10
-c.Store_csv = True
-c.Output = "none"
-twint.run.Search(c)
-```
-## Storing Options
-- Write to file;
-- CSV;
-- JSON;
-- SQLite;
-- Elasticsearch.
-## Elasticsearch Setup
-Details on setting up Elasticsearch with Twint is located in the [wiki](https://github.com/twintproject/twint/wiki/Elasticsearch).
-## Graph Visualization
-![graph](https://i.imgur.com/EEJqB8n.png)
-[Graph](https://github.com/twintproject/twint/wiki/Graph) details are also located in the [wiki](https://github.com/twintproject/twint/wiki/Graph).
-We are developing a Twint Desktop App.
-![4](https://i.imgur.com/DzcfIgL.png)
-## FAQ
-> I tried scraping tweets from a user, I know that they exist but I'm not getting them
-Twitter can shadow-ban accounts, which means that their tweets will not be available via search. To solve this, pass `--profile-full` if you are using Twint via CLI or, if are using Twint as module, add `config.Profile_full = True`. Please note that this process will be quite slow.
-## More Examples
-#### Followers/Following
-> To get only follower usernames/following usernames
-`twint -u username --followers`
-`twint -u username --following`
-> To get user info of followers/following users
-`twint -u username --followers --user-full`
-`twint -u username --following --user-full`
-#### userlist
-> To get only user info of user
-`twint -u username --user-full`
-> To get user info of users from a userlist
-`twint --userlist inputlist --user-full`
-#### tweet translation (experimental)
-> To get 100 english tweets and translate them to italian
-`twint -u noneprivacy --csv --output none.csv --lang en --translate --translate-dest it --limit 100`
-or
-```python
-import twint
-c = twint.Config()
-c.Username = "noneprivacy"
-c.Limit = 100
-c.Store_csv = True
-c.Output = "none.csv"
-c.Lang = "en"
-c.Translate = True
-c.TranslateDest = "it"
-twint.run.Search(c)
-```
-Notes:
-- [Google translate has some quotas](https://cloud.google.com/translate/quotas)
-## Featured Blog Posts:
-- [How to use Twint as an OSINT tool](https://pielco11.ovh/posts/twint-osint/)
-- [Basic tutorial made by Null Byte](https://null-byte.wonderhowto.com/how-to/mine-twitter-for-targeted-information-with-twint-0193853/)
-- [Analyzing Tweets with NLP in minutes with Spark, Optimus and Twint](https://towardsdatascience.com/analyzing-tweets-with-nlp-in-minutes-with-spark-optimus-and-twint-a0c96084995f)
-- [Loading tweets into Kafka and Neo4j](https://markhneedham.com/blog/2019/05/29/loading-tweets-twint-kafka-neo4j/)
-## Contact
-If you have any question, want to join in discussions, or need extra help, you are welcome to join our Twint focused channel at [OSINT team](https://osint.team)

twitter-scraper/twint-master/Untitled.ipynb DELETED Viewed

@@ -1,282 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 67,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "text= \"\\n\\n0. Brottslighet, 1. Miljö, 2. Skola, 3. Sjukvård, 4. Militär, 5. Invandring, 6. Integration \""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "WARNING: pip is being invoked by an old script wrapper. This will fail in a future version of pip.\n",
-      "Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.\n",
-      "To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.\n",
-      "Requirement already satisfied: regex in /home/oxygen/snap/jupyter/common/lib/python3.7/site-packages (2022.6.2)\n"
-     ]
-    }
-   ],
-   "source": [
-    "!pip install regex\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['0']"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "re.findall(\"[0-9]+\", tl[0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'0. Äldrefrågor'"
-      ]
-     },
-     "execution_count": 48,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tl[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['0', ' Äldrefrågor']"
-      ]
-     },
-     "execution_count": 49,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "f=tl[0].split('.')\n",
-    "\n",
-    "f#int(f[0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "NameError",
-     "evalue": "name 'str_topics_to_dict' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-29-b05d9860dbcf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mstr_topics_to_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m: name 'str_topics_to_dict' is not defined"
-     ]
-    }
-   ],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 65,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "def str_topics_to_dict(topics):\n",
-    "    topic_list=topics.split(\",\")\n",
-    "    ind_topic_dict={}\n",
-    "    for i inrange(len(topic_list)): \n",
-    "        index_topic_list=\n",
-    "        ind=index_topic_list[0]\n",
-    "        just_topic=index_topic_list[1][1:]\n",
-    "        ind_topic_dict[int(ind)]=just_topic\n",
-    "    return ind_topic_dict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 68,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{0: 'Brottslighet',\n",
-       " 1: 'Miljö',\n",
-       " 2: 'Skola',\n",
-       " 3: 'Sjukvård',\n",
-       " 4: 'Militär',\n",
-       " 5: 'Invandring',\n",
-       " 6: 'Integration '}"
-      ]
-     },
-     "execution_count": 68,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "str_topics_to_dict(text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 109,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'  Brottslighet,  Miljö,  Skola,  Sjukvård,  Militär stöd,  Invandring,  Integration '"
-      ]
-     },
-     "execution_count": 109,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "\n",
-    "text=\"\\n\\n0. Brottslighet, 1. Miljö, 2. Skola, 3. Sjukvård, 4. Militär stöd, 5. Invandring, 6. Integration \"\n",
-    "text=re.sub(r\"(\\n+)\",\" \",text)\n",
-    "text=re.sub(\"(\\.)|\\d+\",\"\",text )\n",
-    "text"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 100,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[' Brottslighet',\n",
-       " '  Miljö',\n",
-       " '  Skola',\n",
-       " '  Sjukvård',\n",
-       " '  Militär stöd',\n",
-       " '  Invandring',\n",
-       " '  Integration ']"
-      ]
-     },
-     "execution_count": 100,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "text.split(\",\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 116,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import regex as re \n",
-    "def str_topics_to_dict(topics):\n",
-    "    text=re.sub(r\"(\\n+)\",\" \",topics)\n",
-    "    text=re.sub(\"(\\.)|\\d+\",\"\",topics )\n",
-    "    topics=re.sub(r\"(\\n+)|(\\.)|\\d+\",\"\",topics)\n",
-    "    topic_list=topics.split(\",\")\n",
-    "    ind_topic_dict={}\n",
-    "    for i in range(len(topic_list)): \n",
-    "        ind=i\n",
-    "        just_topic=topic_list[i]\n",
-    "        ind_topic_dict[ind]=just_topic\n",
-    "    return ind_topic_dict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 117,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{0: '  Brottslighet',\n",
-       " 1: '  Miljö',\n",
-       " 2: '  Skola',\n",
-       " 3: '  Sjukvård',\n",
-       " 4: '  Militär stöd',\n",
-       " 5: '  Invandring',\n",
-       " 6: '  Integration '}"
-      ]
-     },
-     "execution_count": 117,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "str_topics_to_dict(text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

twitter-scraper/twint-master/automate.py DELETED Viewed

@@ -1,65 +0,0 @@
-import twint
-import schedule
-import time
-# you can change the name of each "job" after "def" if you'd like.
-def jobone():
-	print ("Fetching Tweets")
-	c = twint.Config()
-	# choose username (optional)
-	c.Username = "insert username here"
-	# choose search term (optional)
-	c.Search = "insert search term here"
-	# choose beginning time (narrow results)
-	c.Since = "2018-01-01"
-	# set limit on total tweets
-	c.Limit = 1000
-	# no idea, but makes the csv format properly
-	c.Store_csv = True
-	# format of the csv
-	c.Custom = ["date", "time", "username", "tweet", "link", "likes", "retweets", "replies", "mentions", "hashtags"]
-	# change the name of the csv file
-	c.Output = "filename.csv"
-	twint.run.Search(c)
-def jobtwo():
-	print ("Fetching Tweets")
-	c = twint.Config()
-	# choose username (optional)
-	c.Username = "insert username here"
-	# choose search term (optional)
-	c.Search = "insert search term here"
-	# choose beginning time (narrow results)
-	c.Since = "2018-01-01"
-	# set limit on total tweets
-	c.Limit = 1000
-	# no idea, but makes the csv format properly
-	c.Store_csv = True
-	# format of the csv
-	c.Custom = ["date", "time", "username", "tweet", "link", "likes", "retweets", "replies", "mentions", "hashtags"]
-	# change the name of the csv file
-	c.Output = "filename2.csv"
-	twint.run.Search(c)
-# run once when you start the program
-jobone()
-jobtwo()
-# run every minute(s), hour, day at, day of the week, day of the week and time. Use "#" to block out which ones you don't want to use.  Remove it to active. Also, replace "jobone" and "jobtwo" with your new function names (if applicable)
-# schedule.every(1).minutes.do(jobone)
-schedule.every().hour.do(jobone)
-# schedule.every().day.at("10:30").do(jobone)
-# schedule.every().monday.do(jobone)
-# schedule.every().wednesday.at("13:15").do(jobone)
-# schedule.every(1).minutes.do(jobtwo)
-schedule.every().hour.do(jobtwo)
-# schedule.every().day.at("10:30").do(jobtwo)
-# schedule.every().monday.do(jobtwo)
-# schedule.every().wednesday.at("13:15").do(jobtwo)
-while True:
-  schedule.run_pending()
-  time.sleep(1)

twitter-scraper/twint-master/elasticsearch/README.md DELETED Viewed

@@ -1,5 +0,0 @@
-# Elasticsearch How-To
-![dashboard](https://i.imgur.com/BEbtdo5.png)
-Please read the Wiki [here](https://github.com/twintproject/twint/wiki/Elasticsearch)

twitter-scraper/twint-master/scrape.py DELETED Viewed

@@ -1,102 +0,0 @@
-import sys
-import io
-import time
-import asyncio
-import os
-from tkinter import EXCEPTION
-from numpy import not_equal
-loop = asyncio.get_event_loop()
-loop.is_running()
-import twint
-import nest_asyncio
-nest_asyncio.apply()
-from datetime import date
-class scraper:
-    def get_tweets(search_str, from_date="2006-07-01", to_date=str(date.today()), num_tweets=10,u_or_s='s', acceptable_range=10):
-        if (type(from_date) or   type("str")) is not type("str"):
-            print("[!] Please make sure the date is a string in this format \"yyyy-mm-dd\" ")
-            raise EXCEPTION("Incorrect date type Exception!")
-        time_out= time.time()+2*60
-        _dict={}
-        c=twint.Config()
-        if u_or_s.lower() =="u":
-            c.Search = "from:@"+search_str # topic
-        else:
-            c.Search = search_str       # topic
-        c.Pandas = True
-        num_tweets_and_replies=num_tweets
-        c.Count=True
-        for j in range(1,5):
-            c.Limit = num_tweets_and_replies
-            c.Since = from_date
-            c.Until =  to_date
-            c.Hide_output =True
-            old_stdout = sys.stdout
-            new_stdout = io.StringIO()
-            sys.stdout = new_stdout
-            twint.run.Search(c)
-            output = new_stdout.getvalue()
-            sys.stdout = old_stdout
-            print(output[0:-2])
-            tweet_info=twint.output.panda.Tweets_df
-            t_count=0
-            try:
-                _keys=tweet_info["id"]
-                #tweet infor is a dataframe with fallowing columns
-                '''Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
-                'tweet', 'language', 'hashtags', 'cashtags', 'user_id', 'user_id_str',
-                'username', 'name', 'day', 'hour', 'link', 'urls', 'photos', 'video',
-                'thumbnail', 'retweet', 'nlikes', 'nreplies', 'nretweets', 'quote_url',
-                'search', 'near', 'geo', 'source', 'user_rt_id', 'user_rt',
-                'retweet_id', 'reply_to', 'retweet_date', 'translate', 'trans_src',
-                'trans_dest'],
-                dtype='object')'''
-                for i in range (len(_keys)):
-                    if _keys[i] in _dict.keys() or tweet_info["tweet"][i].startswith("@"):
-                        pass
-                    else:
-                        _dict[int(_keys[i])] = {"tweet": tweet_info["tweet"][i],
-                                                "date" :tweet_info["date"][i],
-                                                "nlikes": tweet_info["nlikes"][i],
-                                                "nreplies":tweet_info["nreplies"][i] ,
-                                                "nretweets": tweet_info["nretweets"][i],"topic":""}
-                        if len(list(_dict.keys()))==num_tweets:
-                            break
-            except:
-                pass
-            print(len(list(_dict.keys())), " of them are Tweets")
-            if (num_tweets-len(list(_dict.keys())))< acceptable_range:
-                return _dict
-            if len(list(_dict.keys())) < num_tweets:
-                num_tweets_and_replies= num_tweets_and_replies+100*3**j
-            else:
-                break
-            if time_out <time.time():
-                break
-            if output.startswith("[!] No more data!"):
-                break
-        return _dict
-    def string_search_user_tweets(user_name,search_str ,from_date="2006-07-01", to_date=str(date.today()), num_tweets=10):
-        c=twint.Config()
-        c.Username =user_name
-        c.Search = search_str       # topic
-        c.Pandas = True
-        num_tweets_and_replies=num_tweets
-        c.Count=True
-        c.Limit = num_tweets_and_replies
-        c.Since = from_date
-        c.Until =  to_date
-        c.Hide_output =True
-        twint.run.Search(c)
-        return twint.output.panda.Tweets_df

twitter-scraper/twint-master/scrape__init__.py DELETED Viewed

@@ -1,14 +0,0 @@
-def scraper_libs():
-    import sys
-    import io
-    import time
-    import asyncio
-    import os
-    from tkinter import EXCEPTION
-    from numpy import not_equal
-    loop = asyncio.get_event_loop()
-    loop.is_running()
-    import twint
-    import nest_asyncio
-    nest_asyncio.apply()
-    from datetime import date

twitter-scraper/twint-master/setup.py DELETED Viewed

@@ -1,65 +0,0 @@
-#!/usr/bin/python3
-from setuptools import setup
-import io
-import os
-# Package meta-data
-NAME = 'twint'
-DESCRIPTION = 'An advanced Twitter scraping & OSINT tool.'
-URL = 'https://github.com/twintproject/twint'
-EMAIL = 'codyzacharias@pm.me'
-AUTHOR = 'Cody Zacharias'
-REQUIRES_PYTHON = '>=3.6.0'
-VERSION = None
-# Packages required
-REQUIRED = [
-    'aiohttp', 'aiodns', 'beautifulsoup4', 'cchardet', 'dataclasses',
-    'elasticsearch', 'pysocks', 'pandas', 'aiohttp_socks',
-    'schedule', 'geopy', 'fake-useragent', 'googletransx'
-]
-here = os.path.abspath(os.path.dirname(__file__))
-with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
-    long_description = '\n' + f.read()
-# Load the package's __version__.py
-about = {}
-if not VERSION:
-    with open(os.path.join(here, NAME, '__version__.py')) as f:
-        exec(f.read(), about)
-else:
-    about['__version__'] = VERSION
-setup(
-    name=NAME,
-    version=about['__version__'],
-    description=DESCRIPTION,
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    author=AUTHOR,
-    author_email=EMAIL,
-    python_requires=REQUIRES_PYTHON,
-    url=URL,
-    packages=['twint', 'twint.storage'],
-    entry_points={
-        'console_scripts': [
-            'twint = twint.cli:run_as_command',
-        ],
-    },
-    install_requires=REQUIRED,
-    dependency_links=[
-        'git+https://github.com/x0rzkov/py-googletrans#egg=googletrans'
-    ],
-    license='MIT',
-    classifiers=[
-        'License :: OSI Approved :: MIT License',
-        'Programming Language :: Python',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: Implementation :: CPython',
-    ],
-)

twitter-scraper/twint-master/test.py DELETED Viewed

@@ -1,92 +0,0 @@
-import twint
-import os
-'''
-Test.py - Testing TWINT to make sure everything works.
-'''
-def test_reg(c, run):
-    print("[+] Beginning vanilla test in {}".format(str(run)))
-    run(c)
-def test_db(c, run):
-    print("[+] Beginning DB test in {}".format(str(run)))
-    c.Database = "test_twint.db"
-    run(c)
-def custom(c, run, _type):
-    print("[+] Beginning custom {} test in {}".format(_type, str(run)))
-    c.Custom['tweet'] = ["id", "username"]
-    c.Custom['user'] = ["id", "username"]
-    run(c)
-def test_json(c, run):
-    c.Store_json = True
-    c.Output = "test_twint.json"
-    custom(c, run, "JSON")
-    print("[+] Beginning JSON test in {}".format(str(run)))
-    run(c)
-def test_csv(c, run):
-    c.Store_csv = True
-    c.Output = "test_twint.csv"
-    custom(c, run, "CSV")
-    print("[+] Beginning CSV test in {}".format(str(run)))
-    run(c)
-def main():
-    c = twint.Config()
-    c.Username = "verified"
-    c.Limit = 20
-    c.Store_object = True
-    # Separate objects are necessary.
-    f = twint.Config()
-    f.Username = "verified"
-    f.Limit = 20
-    f.Store_object = True
-    f.User_full = True
-    runs = [
-        twint.run.Profile,  # this doesn't
-        twint.run.Search,  # this works
-        twint.run.Following,
-        twint.run.Followers,
-        twint.run.Favorites,
-    ]
-    tests = [test_reg, test_json, test_csv, test_db]
-    # Something breaks if we don't split these up
-    for run in runs[:3]:
-        if run == twint.run.Search:
-            c.Since = "2012-1-1 20:30:22"
-            c.Until = "2017-1-1"
-        else:
-            c.Since = ""
-            c.Until = ""
-        for test in tests:
-            test(c, run)
-    for run in runs[3:]:
-        for test in tests:
-            test(f, run)
-    files = ["test_twint.db", "test_twint.json", "test_twint.csv"]
-    for _file in files:
-        os.remove(_file)
-    print("[+] Testing complete!")
-if __name__ == '__main__':
-    main()

twitter-scraper/twint-master/twint/__init__.py DELETED Viewed

@@ -1,32 +0,0 @@
-'''
-TWINT - Twitter Intelligence Tool (formerly known as Tweep).
-See wiki on Github for in-depth details.
-https://github.com/twintproject/twint/wiki
-Licensed under MIT License
-Copyright (c) 2018 Cody Zacharias
-'''
-import logging, os
-from .config import Config
-from .__version__ import __version__
-from . import run
-_levels = {
-    'info': logging.INFO,
-    'debug': logging.DEBUG
-}
-_level = os.getenv('TWINT_DEBUG', 'info')
-_logLevel = _levels[_level]
-if _level == "debug":
-    logger = logging.getLogger()
-    _output_fn = 'twint.log'
-    logger.setLevel(_logLevel)
-    formatter = logging.Formatter('%(levelname)s:%(asctime)s:%(name)s:%(message)s')
-    fileHandler = logging.FileHandler(_output_fn)
-    fileHandler.setLevel(_logLevel)
-    fileHandler.setFormatter(formatter)
-    logger.addHandler(fileHandler)

twitter-scraper/twint-master/twint/__version__.py DELETED Viewed

@@ -1,3 +0,0 @@
-VERSION = (2, 1, 21)
-__version__ = '.'.join(map(str, VERSION))

twitter-scraper/twint-master/twint/cli.py DELETED Viewed

@@ -1,342 +0,0 @@
-#!/usr/bin/env python3
-'''
-Twint.py - Twitter Intelligence Tool (formerly known as Tweep).
-See wiki on Github for in-depth details.
-https://github.com/twintproject/twint/wiki
-Licensed under MIT License
-Copyright (c) 2018 The Twint Project
-'''
-import sys
-import os
-import argparse
-from . import run
-from . import config
-from . import storage
-def error(_error, message):
-    """ Print errors to stdout
-    """
-    print("[-] {}: {}".format(_error, message))
-    sys.exit(0)
-def check(args):
-    """ Error checking
-    """
-    if args.username is not None or args.userlist or args.members_list:
-        if args.verified:
-            error("Contradicting Args",
-                  "Please use --verified in combination with -s.")
-        if args.userid:
-            error("Contradicting Args",
-                  "--userid and -u cannot be used together.")
-        if args.all:
-            error("Contradicting Args",
-                  "--all and -u cannot be used together.")
-    elif args.search and args.timeline:
-        error("Contradicting Args",
-              "--s and --tl cannot be used together.")
-    elif args.timeline and not args.username:
-        error("Error", "-tl cannot be used without -u.")
-    elif args.search is None:
-        if args.custom_query is not None:
-            pass
-        elif (args.geo or args.near) is None and not (args.all or args.userid):
-            error("Error", "Please use at least -u, -s, -g or --near.")
-    elif args.all and args.userid:
-        error("Contradicting Args",
-              "--all and --userid cannot be used together")
-    if args.output is None:
-        if args.csv:
-            error("Error", "Please specify an output file (Example: -o file.csv).")
-        elif args.json:
-            error("Error", "Please specify an output file (Example: -o file.json).")
-    if args.backoff_exponent <= 0:
-        error("Error", "Please specifiy a positive value for backoff_exponent")
-    if args.min_wait_time < 0:
-        error("Error", "Please specifiy a non negative value for min_wait_time")
-def loadUserList(ul, _type):
-    """ Concatenate users
-    """
-    if os.path.exists(os.path.abspath(ul)):
-        userlist = open(os.path.abspath(ul), "r").read().splitlines()
-    else:
-        userlist = ul.split(",")
-    if _type == "search":
-        un = ""
-        for user in userlist:
-            un += "%20OR%20from%3A" + user
-        return un[15:]
-    return userlist
-def initialize(args):
-    """ Set default values for config from args
-    """
-    c = config.Config()
-    c.Username = args.username
-    c.User_id = args.userid
-    c.Search = args.search
-    c.Geo = args.geo
-    c.Location = args.location
-    c.Near = args.near
-    c.Lang = args.lang
-    c.Output = args.output
-    c.Elasticsearch = args.elasticsearch
-    c.Year = args.year
-    c.Since = args.since
-    c.Until = args.until
-    c.Email = args.email
-    c.Phone = args.phone
-    c.Verified = args.verified
-    c.Store_csv = args.csv
-    c.Tabs = args.tabs
-    c.Store_json = args.json
-    c.Show_hashtags = args.hashtags
-    c.Show_cashtags = args.cashtags
-    c.Limit = args.limit
-    c.Count = args.count
-    c.Stats = args.stats
-    c.Database = args.database
-    c.To = args.to
-    c.All = args.all
-    c.Essid = args.essid
-    c.Format = args.format
-    c.User_full = args.user_full
-    # c.Profile_full = args.profile_full
-    c.Pandas_type = args.pandas_type
-    c.Index_tweets = args.index_tweets
-    c.Index_follow = args.index_follow
-    c.Index_users = args.index_users
-    c.Debug = args.debug
-    c.Resume = args.resume
-    c.Images = args.images
-    c.Videos = args.videos
-    c.Media = args.media
-    c.Replies = args.replies
-    c.Pandas_clean = args.pandas_clean
-    c.Proxy_host = args.proxy_host
-    c.Proxy_port = args.proxy_port
-    c.Proxy_type = args.proxy_type
-    c.Tor_control_port = args.tor_control_port
-    c.Tor_control_password = args.tor_control_password
-    c.Retweets = args.retweets
-    c.Custom_query = args.custom_query
-    c.Popular_tweets = args.popular_tweets
-    c.Skip_certs = args.skip_certs
-    c.Hide_output = args.hide_output
-    c.Native_retweets = args.native_retweets
-    c.Min_likes = args.min_likes
-    c.Min_retweets = args.min_retweets
-    c.Min_replies = args.min_replies
-    c.Links = args.links
-    c.Source = args.source
-    c.Members_list = args.members_list
-    c.Filter_retweets = args.filter_retweets
-    c.Translate = args.translate
-    c.TranslateDest = args.translate_dest
-    c.Backoff_exponent = args.backoff_exponent
-    c.Min_wait_time = args.min_wait_time
-    return c
-def options():
-    """ Parse arguments
-    """
-    ap = argparse.ArgumentParser(prog="twint",
-                                 usage="python3 %(prog)s [options]",
-                                 description="TWINT - An Advanced Twitter Scraping Tool.")
-    ap.add_argument("-u", "--username", help="User's Tweets you want to scrape.")
-    ap.add_argument("-s", "--search", help="Search for Tweets containing this word or phrase.")
-    ap.add_argument("-g", "--geo", help="Search for geocoded Tweets.")
-    ap.add_argument("--near", help="Near a specified city.")
-    ap.add_argument("--location", help="Show user's location (Experimental).", action="store_true")
-    ap.add_argument("-l", "--lang", help="Search for Tweets in a specific language.")
-    ap.add_argument("-o", "--output", help="Save output to a file.")
-    ap.add_argument("-es", "--elasticsearch", help="Index to Elasticsearch.")
-    ap.add_argument("--year", help="Filter Tweets before specified year.")
-    ap.add_argument("--since", help="Filter Tweets sent since date (Example: \"2017-12-27 20:30:15\" or 2017-12-27).",
-                    metavar="DATE")
-    ap.add_argument("--until", help="Filter Tweets sent until date (Example: \"2017-12-27 20:30:15\" or 2017-12-27).",
-                    metavar="DATE")
-    ap.add_argument("--email", help="Filter Tweets that might have email addresses", action="store_true")
-    ap.add_argument("--phone", help="Filter Tweets that might have phone numbers", action="store_true")
-    ap.add_argument("--verified", help="Display Tweets only from verified users (Use with -s).",
-                    action="store_true")
-    ap.add_argument("--csv", help="Write as .csv file.", action="store_true")
-    ap.add_argument("--tabs", help="Separate CSV fields with tab characters, not commas.", action="store_true")
-    ap.add_argument("--json", help="Write as .json file", action="store_true")
-    ap.add_argument("--hashtags", help="Output hashtags in seperate column.", action="store_true")
-    ap.add_argument("--cashtags", help="Output cashtags in seperate column.", action="store_true")
-    ap.add_argument("--userid", help="Twitter user id.")
-    ap.add_argument("--limit", help="Number of Tweets to pull (Increments of 20).")
-    ap.add_argument("--count", help="Display number of Tweets scraped at the end of session.",
-                    action="store_true")
-    ap.add_argument("--stats", help="Show number of replies, retweets, and likes.",
-                    action="store_true")
-    ap.add_argument("-db", "--database", help="Store Tweets in a sqlite3 database.")
-    ap.add_argument("--to", help="Search Tweets to a user.", metavar="USERNAME")
-    ap.add_argument("--all", help="Search all Tweets associated with a user.", metavar="USERNAME")
-    ap.add_argument("--followers", help="Scrape a person's followers.", action="store_true")
-    ap.add_argument("--following", help="Scrape a person's follows", action="store_true")
-    ap.add_argument("--favorites", help="Scrape Tweets a user has liked.", action="store_true")
-    ap.add_argument("--proxy-type", help="Socks5, HTTP, etc.")
-    ap.add_argument("--proxy-host", help="Proxy hostname or IP.")
-    ap.add_argument("--proxy-port", help="The port of the proxy server.")
-    ap.add_argument("--tor-control-port", help="If proxy-host is set to tor, this is the control port", default=9051)
-    ap.add_argument("--tor-control-password",
-                    help="If proxy-host is set to tor, this is the password for the control port",
-                    default="my_password")
-    ap.add_argument("--essid",
-                    help="Elasticsearch Session ID, use this to differentiate scraping sessions.",
-                    nargs="?", default="")
-    ap.add_argument("--userlist", help="Userlist from list or file.")
-    ap.add_argument("--retweets",
-                    help="Include user's Retweets (Warning: limited).",
-                    action="store_true")
-    ap.add_argument("--format", help="Custom output format (See wiki for details).")
-    ap.add_argument("--user-full",
-                    help="Collect all user information (Use with followers or following only).",
-                    action="store_true")
-    # I am removing this this feature for the time being, because it is no longer required, default method will do this
-    # ap.add_argument("--profile-full",
-    #                 help="Slow, but effective method of collecting a user's Tweets and RT.",
-    #                 action="store_true")
-    ap.add_argument(
-        "-tl",
-        "--timeline",
-        help="Collects every tweet from a User's Timeline. (Tweets, RTs & Replies)",
-        action="store_true",
-    )
-    ap.add_argument("--translate",
-                    help="Get tweets translated by Google Translate.",
-                    action="store_true")
-    ap.add_argument("--translate-dest", help="Translate tweet to language (ISO2).",
-                    default="en")
-    ap.add_argument("--store-pandas", help="Save Tweets in a DataFrame (Pandas) file.")
-    ap.add_argument("--pandas-type",
-                    help="Specify HDF5 or Pickle (HDF5 as default)", nargs="?", default="HDF5")
-    ap.add_argument("-it", "--index-tweets",
-                    help="Custom Elasticsearch Index name for Tweets.", nargs="?", default="twinttweets")
-    ap.add_argument("-if", "--index-follow",
-                    help="Custom Elasticsearch Index name for Follows.",
-                    nargs="?", default="twintgraph")
-    ap.add_argument("-iu", "--index-users", help="Custom Elasticsearch Index name for Users.",
-                    nargs="?", default="twintuser")
-    ap.add_argument("--debug",
-                    help="Store information in debug logs", action="store_true")
-    ap.add_argument("--resume", help="Resume from Tweet ID.", metavar="TWEET_ID")
-    ap.add_argument("--videos", help="Display only Tweets with videos.", action="store_true")
-    ap.add_argument("--images", help="Display only Tweets with images.", action="store_true")
-    ap.add_argument("--media",
-                    help="Display Tweets with only images or videos.", action="store_true")
-    ap.add_argument("--replies", help="Display replies to a subject.", action="store_true")
-    ap.add_argument("-pc", "--pandas-clean",
-                    help="Automatically clean Pandas dataframe at every scrape.")
-    ap.add_argument("-cq", "--custom-query", help="Custom search query.")
-    ap.add_argument("-pt", "--popular-tweets", help="Scrape popular tweets instead of recent ones.",
-                    action="store_true")
-    ap.add_argument("-sc", "--skip-certs", help="Skip certs verification, useful for SSC.", action="store_false")
-    ap.add_argument("-ho", "--hide-output", help="Hide output, no tweets will be displayed.", action="store_true")
-    ap.add_argument("-nr", "--native-retweets", help="Filter the results for retweets only.", action="store_true")
-    ap.add_argument("--min-likes", help="Filter the tweets by minimum number of likes.")
-    ap.add_argument("--min-retweets", help="Filter the tweets by minimum number of retweets.")
-    ap.add_argument("--min-replies", help="Filter the tweets by minimum number of replies.")
-    ap.add_argument("--links", help="Include or exclude tweets containing one o more links. If not specified" +
-                                    " you will get both tweets that might contain links or not.")
-    ap.add_argument("--source", help="Filter the tweets for specific source client.")
-    ap.add_argument("--members-list", help="Filter the tweets sent by users in a given list.")
-    ap.add_argument("-fr", "--filter-retweets", help="Exclude retweets from the results.", action="store_true")
-    ap.add_argument("--backoff-exponent", help="Specify a exponent for the polynomial backoff in case of errors.",
-                    type=float, default=3.0)
-    ap.add_argument("--min-wait-time", type=float, default=15,
-                    help="specifiy a minimum wait time in case of scraping limit error. This value will be adjusted by twint if the value provided does not satisfy the limits constraints")
-    args = ap.parse_args()
-    return args
-def main():
-    """ Main
-    """
-    args = options()
-    check(args)
-    if args.pandas_clean:
-        storage.panda.clean()
-    c = initialize(args)
-    if args.userlist:
-        c.Query = loadUserList(args.userlist, "search")
-    if args.pandas_clean:
-        storage.panda.clean()
-    if args.favorites:
-        if args.userlist:
-            _userlist = loadUserList(args.userlist, "favorites")
-            for _user in _userlist:
-                args.username = _user
-                c = initialize(args)
-                run.Favorites(c)
-        else:
-            run.Favorites(c)
-    elif args.following:
-        if args.userlist:
-            _userlist = loadUserList(args.userlist, "following")
-            for _user in _userlist:
-                args.username = _user
-                c = initialize(args)
-                run.Following(c)
-        else:
-            run.Following(c)
-    elif args.followers:
-        if args.userlist:
-            _userlist = loadUserList(args.userlist, "followers")
-            for _user in _userlist:
-                args.username = _user
-                c = initialize(args)
-                run.Followers(c)
-        else:
-            run.Followers(c)
-    elif args.retweets:  # or args.profile_full:
-        if args.userlist:
-            _userlist = loadUserList(args.userlist, "profile")
-            for _user in _userlist:
-                args.username = _user
-                c = initialize(args)
-                run.Profile(c)
-        else:
-            run.Profile(c)
-    elif args.user_full:
-        if args.userlist:
-            _userlist = loadUserList(args.userlist, "userlist")
-            for _user in _userlist:
-                args.username = _user
-                c = initialize(args)
-                run.Lookup(c)
-        else:
-            run.Lookup(c)
-    elif args.timeline:
-        run.Profile(c)
-    else:
-        run.Search(c)
-def run_as_command():
-    if(sys.version_info.major < 3 or (sys.version_info.major == 3 and sys.version_info.minor < 6)):
-        print("[-] TWINT requires Python version 3.6+.")
-        sys.exit(0)
-    main()
-if __name__ == '__main__':
-    main()

twitter-scraper/twint-master/twint/config.py DELETED Viewed

@@ -1,87 +0,0 @@
-from dataclasses import dataclass
-from typing import Optional
-@dataclass
-class Config:
-    Username: Optional[str] = None
-    User_id: Optional[str] = None
-    Search: Optional[str] = None
-    Lookup: bool = False
-    Geo: str = ""
-    Location: bool = False
-    Near: str = None
-    Lang: Optional[str] = None
-    Output: Optional[str] = None
-    Elasticsearch: object = None
-    Year: Optional[int] = None
-    Since: Optional[str] = None
-    Until: Optional[str] = None
-    Email: Optional[str] = None
-    Phone: Optional[str] = None
-    Verified: bool = False
-    Store_csv: bool = False
-    Store_json: bool = False
-    Custom = {"tweet": None, "user": None, "username": None}
-    Show_hashtags: bool = False
-    Show_cashtags: bool = False
-    Limit: Optional[int] = None
-    Count: Optional[int] = None
-    Stats: bool = False
-    Database: object = None
-    To: str = None
-    All = None
-    Debug: bool = False
-    Format = None
-    Essid: str = ""
-    Profile: bool = False
-    Followers: bool = False
-    Following: bool = False
-    Favorites: bool = False
-    TwitterSearch: bool = False
-    User_full: bool = False
-    # Profile_full: bool = False
-    Store_object: bool = False
-    Store_object_tweets_list: list = None
-    Store_object_users_list: list = None
-    Store_object_follow_list: list = None
-    Pandas_type: type = None
-    Pandas: bool = False
-    Index_tweets: str = "twinttweets"
-    Index_follow: str = "twintgraph"
-    Index_users: str = "twintuser"
-    Retries_count: int = 10
-    Resume: object = None
-    Images: bool = False
-    Videos: bool = False
-    Media: bool = False
-    Replies: bool = False
-    Pandas_clean: bool = True
-    Lowercase: bool = True
-    Pandas_au: bool = True
-    Proxy_host: str = ""
-    Proxy_port: int = 0
-    Proxy_type: object = None
-    Tor_control_port: int = 9051
-    Tor_control_password: str = None
-    Retweets: bool = False
-    Query: str = None
-    Hide_output: bool = False
-    Custom_query: str = ""
-    Popular_tweets: bool = False
-    Skip_certs: bool = False
-    Native_retweets: bool = False
-    Min_likes: int = 0
-    Min_retweets: int = 0
-    Min_replies: int = 0
-    Links: Optional[str] = None
-    Source: Optional[str] = None
-    Members_list: Optional[str] = None
-    Filter_retweets: bool = False
-    Translate: bool = False
-    TranslateSrc: str = "en"
-    TranslateDest: str = "en"
-    Backoff_exponent: float = 3.0
-    Min_wait_time: int = 0
-    Bearer_token: str = None
-    Guest_token: str = None
-    deleted: list = None

twitter-scraper/twint-master/twint/datelock.py DELETED Viewed

@@ -1,44 +0,0 @@
-import datetime
-import logging as logme
-from .tweet import utc_to_local
-class Datelock:
-    until = None
-    since = None
-    _since_def_user = None
-def convertToDateTime(string):
-    dateTimeList = string.split()
-    ListLength = len(dateTimeList)
-    if ListLength == 2:
-        return string
-    if ListLength == 1:
-        return string + " 00:00:00"
-    else:
-        return ""
-def Set(Until, Since):
-    logme.debug(__name__+':Set')
-    d = Datelock()
-    if Until:
-        d.until = datetime.datetime.strptime(convertToDateTime(Until), "%Y-%m-%d %H:%M:%S")
-        d.until = utc_to_local(d.until)
-    else:
-        d.until = datetime.datetime.today()
-    if Since:
-        d.since = datetime.datetime.strptime(convertToDateTime(Since), "%Y-%m-%d %H:%M:%S")
-        d.since = utc_to_local(d.since)
-        d._since_def_user = True
-    else:
-        d.since = datetime.datetime.strptime("2006-03-21 00:00:00", "%Y-%m-%d %H:%M:%S")
-        d.since = utc_to_local(d.since)
-        d._since_def_user = False
-    return d

twitter-scraper/twint-master/twint/feed.py DELETED Viewed

@@ -1,145 +0,0 @@
-import time
-from datetime import datetime
-from bs4 import BeautifulSoup
-from re import findall
-from json import loads
-import logging as logme
-from .tweet import utc_to_local, Tweet_formats
-class NoMoreTweetsException(Exception):
-    def __init__(self, msg):
-        super().__init__(msg)
-def Follow(response):
-    logme.debug(__name__ + ':Follow')
-    soup = BeautifulSoup(response, "html.parser")
-    follow = soup.find_all("td", "info fifty screenname")
-    cursor = soup.find_all("div", "w-button-more")
-    try:
-        cursor = findall(r'cursor=(.*?)">', str(cursor))[0]
-    except IndexError:
-        logme.critical(__name__ + ':Follow:IndexError')
-    return follow, cursor
-# TODO: this won't be used by --profile-full anymore. if it isn't used anywhere else, perhaps remove this in future
-def Mobile(response):
-    logme.debug(__name__ + ':Mobile')
-    soup = BeautifulSoup(response, "html.parser")
-    tweets = soup.find_all("span", "metadata")
-    max_id = soup.find_all("div", "w-button-more")
-    try:
-        max_id = findall(r'max_id=(.*?)">', str(max_id))[0]
-    except Exception as e:
-        logme.critical(__name__ + ':Mobile:' + str(e))
-    return tweets, max_id
-def MobileFav(response):
-    soup = BeautifulSoup(response, "html.parser")
-    tweets = soup.find_all("table", "tweet")
-    max_id = soup.find_all("div", "w-button-more")
-    try:
-        max_id = findall(r'max_id=(.*?)">', str(max_id))[0]
-    except Exception as e:
-        print(str(e) + " [x] feed.MobileFav")
-    return tweets, max_id
-def _get_cursor(response):
-    if isinstance(response, dict): # case 1
-        try:
-            next_cursor = response['timeline']['instructions'][0]['addEntries']['entries'][-1]['content'][
-                'operation']['cursor']['value']
-        except KeyError:
-            # this is needed because after the first request location of cursor is changed
-            next_cursor = response['timeline']['instructions'][-1]['replaceEntry']['entry']['content']['operation'][
-                'cursor']['value']
-    else: # case 2
-        next_cursor = response[-1]['content']['value']
-    return next_cursor
-def Json(response):
-    logme.debug(__name__ + ':Json')
-    json_response = loads(response)
-    html = json_response["items_html"]
-    soup = BeautifulSoup(html, "html.parser")
-    feed = soup.find_all("div", "tweet")
-    return feed, json_response["min_position"]
-def parse_tweets(config, response):
-    logme.debug(__name__ + ':parse_tweets')
-    response = loads(response)
-    feed = []
-    if 'globalObjects' in response:
-        if len(response['globalObjects']['tweets']) == 0:
-            msg = 'No more data!'
-            raise NoMoreTweetsException(msg)
-        for timeline_entry in response['timeline']['instructions'][0]['addEntries']['entries']:
-            # this will handle the cases when the timeline entry is a tweet
-            if (config.TwitterSearch or config.Profile) and (timeline_entry['entryId'].startswith('sq-I-t-') or
-                                                             timeline_entry['entryId'].startswith('tweet-')):
-                if 'tweet' in timeline_entry['content']['item']['content']:
-                    _id = timeline_entry['content']['item']['content']['tweet']['id']
-                    # skip the ads
-                    if 'promotedMetadata' in timeline_entry['content']['item']['content']['tweet']:
-                        continue
-                elif 'tombstone' in timeline_entry['content']['item']['content'] and 'tweet' in \
-                        timeline_entry['content']['item']['content']['tombstone']:
-                    _id = timeline_entry['content']['item']['content']['tombstone']['tweet']['id']
-                else:
-                    _id = None
-                if _id is None:
-                    raise ValueError('Unable to find ID of tweet in timeline.')
-                try:
-                    temp_obj = response['globalObjects']['tweets'][_id]
-                except KeyError:
-                    logme.info('encountered a deleted tweet with id {}'.format(_id))
-                    config.deleted.append(_id)
-                    continue
-                temp_obj['user_data'] = response['globalObjects']['users'][temp_obj['user_id_str']]
-                if 'retweeted_status_id_str' in temp_obj:
-                    rt_id = temp_obj['retweeted_status_id_str']
-                    _dt = response['globalObjects']['tweets'][rt_id]['created_at']
-                    _dt = datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
-                    _dt = utc_to_local(_dt)
-                    _dt = str(_dt.strftime(Tweet_formats['datetime']))
-                    temp_obj['retweet_data'] = {
-                        'user_rt_id': response['globalObjects']['tweets'][rt_id]['user_id_str'],
-                        'user_rt': response['globalObjects']['tweets'][rt_id]['full_text'],
-                        'retweet_id': rt_id,
-                        'retweet_date': _dt,
-                    }
-                feed.append(temp_obj)
-        next_cursor = _get_cursor(response) # case 1
-    else:
-        response = response['data']['user']['result']['timeline']
-        entries = response['timeline']['instructions']
-        for e in entries:
-            if e.get('entries'):
-                entries = e['entries']
-                break
-        if len(entries) == 2:
-            msg = 'No more data!'
-            raise NoMoreTweetsException(msg)
-        for timeline_entry in entries:
-            if timeline_entry['content'].get('itemContent'):
-                try:
-                    temp_obj = timeline_entry['content']['itemContent']['tweet_results']['result']['legacy']
-                    temp_obj['user_data'] = timeline_entry['content']['itemContent']['tweet_results']['result']['core']['user_results']['result']['legacy']
-                    feed.append(temp_obj)
-                except KeyError: # doubtful
-                    next
-        next_cursor = _get_cursor(entries) # case 2
-    return feed, next_cursor

twitter-scraper/twint-master/twint/format.py DELETED Viewed

@@ -1,91 +0,0 @@
-import logging as logme
-def Tweet(config, t):
-    if config.Format:
-        logme.debug(__name__+':Tweet:Format')
-        output = config.Format.replace("{id}", t.id_str)
-        output = output.replace("{conversation_id}", t.conversation_id)
-        output = output.replace("{date}", t.datestamp)
-        output = output.replace("{time}", t.timestamp)
-        output = output.replace("{user_id}", t.user_id_str)
-        output = output.replace("{username}", t.username)
-        output = output.replace("{name}", t.name)
-        output = output.replace("{place}", t.place)
-        output = output.replace("{timezone}", t.timezone)
-        output = output.replace("{urls}", ",".join(t.urls))
-        output = output.replace("{photos}", ",".join(t.photos))
-        output = output.replace("{video}", str(t.video))
-        output = output.replace("{thumbnail}", t.thumbnail)
-        output = output.replace("{tweet}", t.tweet)
-        output = output.replace("{language}", t.lang)
-        output = output.replace("{hashtags}", ",".join(t.hashtags))
-        output = output.replace("{cashtags}", ",".join(t.cashtags))
-        output = output.replace("{replies}", t.replies_count)
-        output = output.replace("{retweets}", t.retweets_count)
-        output = output.replace("{likes}", t.likes_count)
-        output = output.replace("{link}", t.link)
-        output = output.replace("{is_retweet}", str(t.retweet))
-        output = output.replace("{user_rt_id}", str(t.user_rt_id))
-        output = output.replace("{quote_url}", t.quote_url)
-        output = output.replace("{near}", t.near)
-        output = output.replace("{geo}", t.geo)
-        output = output.replace("{mentions}", ",".join(t.mentions))
-        output = output.replace("{translate}", t.translate)
-        output = output.replace("{trans_src}", t.trans_src)
-        output = output.replace("{trans_dest}", t.trans_dest)
-    else:
-        logme.debug(__name__+':Tweet:notFormat')
-        output = f"{t.id_str} {t.datestamp} {t.timestamp} {t.timezone} "
-        # TODO: someone who is familiar with this code, needs to take a look at what this is <also see tweet.py>
-        # if t.retweet:
-        #    output += "RT "
-        output += f"<{t.username}> {t.tweet}"
-        if config.Show_hashtags:
-            hashtags = ",".join(t.hashtags)
-            output += f" {hashtags}"
-        if config.Show_cashtags:
-            cashtags = ",".join(t.cashtags)
-            output += f" {cashtags}"
-        if config.Stats:
-            output += f" | {t.replies_count} replies {t.retweets_count} retweets {t.likes_count} likes"
-        if config.Translate:
-            output += f" {t.translate} {t.trans_src} {t.trans_dest}"
-    return output
-def User(_format, u):
-    if _format:
-        logme.debug(__name__+':User:Format')
-        output = _format.replace("{id}", str(u.id))
-        output = output.replace("{name}", u.name)
-        output = output.replace("{username}", u.username)
-        output = output.replace("{bio}", u.bio)
-        output = output.replace("{location}", u.location)
-        output = output.replace("{url}", u.url)
-        output = output.replace("{join_date}", u.join_date)
-        output = output.replace("{join_time}", u.join_time)
-        output = output.replace("{tweets}", str(u.tweets))
-        output = output.replace("{following}", str(u.following))
-        output = output.replace("{followers}", str(u.followers))
-        output = output.replace("{likes}", str(u.likes))
-        output = output.replace("{media}", str(u.media_count))
-        output = output.replace("{private}", str(u.is_private))
-        output = output.replace("{verified}", str(u.is_verified))
-        output = output.replace("{avatar}", u.avatar)
-        if u.background_image:
-            output = output.replace("{background_image}", u.background_image)
-        else:
-            output = output.replace("{background_image}", "")
-    else:
-        logme.debug(__name__+':User:notFormat')
-        output = f"{u.id} | {u.name} | @{u.username} | Private: "
-        output += f"{u.is_private} | Verified: {u.is_verified} |"
-        output += f" Bio: {u.bio} | Location: {u.location} | Url: "
-        output += f"{u.url} | Joined: {u.join_date} {u.join_time} "
-        output += f"| Tweets: {u.tweets} | Following: {u.following}"
-        output += f" | Followers: {u.followers} | Likes: {u.likes} "
-        output += f"| Media: {u.media_count} | Avatar: {u.avatar}"
-    return output

twitter-scraper/twint-master/twint/get.py DELETED Viewed

@@ -1,298 +0,0 @@
-from async_timeout import timeout
-from datetime import datetime
-from bs4 import BeautifulSoup
-import sys
-import socket
-import aiohttp
-from fake_useragent import UserAgent
-import asyncio
-import concurrent.futures
-import random
-from json import loads, dumps
-from aiohttp_socks import ProxyConnector, ProxyType
-from urllib.parse import quote
-import time
-from . import url
-from .output import Tweets, Users
-from .token import TokenExpiryException
-import logging as logme
-httpproxy = None
-user_agent_list = [
-    # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
-    # ' Chrome/60.0.3112.113 Safari/537.36',
-    # 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
-    # ' Chrome/60.0.3112.90 Safari/537.36',
-    # 'Mozilla/5.0 (Windows NT 5.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
-    # ' Chrome/60.0.3112.90 Safari/537.36',
-    # 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
-    # ' Chrome/60.0.3112.90 Safari/537.36',
-    # 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)'
-    # ' Chrome/44.0.2403.157 Safari/537.36',
-    # 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
-    # ' Chrome/60.0.3112.113 Safari/537.36',
-    # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
-    # ' Chrome/57.0.2987.133 Safari/537.36',
-    # 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
-    # ' Chrome/57.0.2987.133 Safari/537.36',
-    # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
-    # ' Chrome/55.0.2883.87 Safari/537.36',
-    # 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
-    # ' Chrome/55.0.2883.87 Safari/537.36',
-    'Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1)',
-    'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
-    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
-    'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
-    'Mozilla/5.0 (Windows NT 6.2; WOW64; Trident/7.0; rv:11.0) like Gecko',
-    'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
-    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)',
-    'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
-    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
-    'Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko',
-    'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)',
-    'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
-    'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET '
-    'CLR 3.5.30729)',
-]
-# function to convert python `dict` to json and then encode it to be passed in the url as a parameter
-# some urls require this format
-def dict_to_url(dct):
-    return quote(dumps(dct))
-def get_connector(config):
-    logme.debug(__name__ + ':get_connector')
-    _connector = None
-    if config.Proxy_host:
-        if config.Proxy_host.lower() == "tor":
-            _connector = ProxyConnector(
-                host='127.0.0.1',
-                port=9050,
-                rdns=True)
-        elif config.Proxy_port and config.Proxy_type:
-            if config.Proxy_type.lower() == "socks5":
-                _type = ProxyType.SOCKS5
-            elif config.Proxy_type.lower() == "socks4":
-                _type = ProxyType.SOCKS4
-            elif config.Proxy_type.lower() == "http":
-                global httpproxy
-                httpproxy = "http://" + config.Proxy_host + ":" + str(config.Proxy_port)
-                return _connector
-            else:
-                logme.critical("get_connector:proxy-type-error")
-                print("Error: Proxy types allowed are: http, socks5 and socks4. No https.")
-                sys.exit(1)
-            _connector = ProxyConnector(
-                proxy_type=_type,
-                host=config.Proxy_host,
-                port=config.Proxy_port,
-                rdns=True)
-        else:
-            logme.critical(__name__ + ':get_connector:proxy-port-type-error')
-            print("Error: Please specify --proxy-host, --proxy-port, and --proxy-type")
-            sys.exit(1)
-    else:
-        if config.Proxy_port or config.Proxy_type:
-            logme.critical(__name__ + ':get_connector:proxy-host-arg-error')
-            print("Error: Please specify --proxy-host, --proxy-port, and --proxy-type")
-            sys.exit(1)
-    return _connector
-async def RequestUrl(config, init):
-    logme.debug(__name__ + ':RequestUrl')
-    _connector = get_connector(config)
-    _serialQuery = ""
-    params = []
-    _url = ""
-    _headers = [("authorization", config.Bearer_token), ("x-guest-token", config.Guest_token)]
-    # TODO : do this later
-    if config.Profile:
-        logme.debug(__name__ + ':RequestUrl:Profile')
-        _url, params, _serialQuery = url.SearchProfile(config, init)
-    elif config.TwitterSearch:
-        logme.debug(__name__ + ':RequestUrl:TwitterSearch')
-        _url, params, _serialQuery = await url.Search(config, init)
-    else:
-        if config.Following:
-            logme.debug(__name__ + ':RequestUrl:Following')
-            _url = await url.Following(config.Username, init)
-        elif config.Followers:
-            logme.debug(__name__ + ':RequestUrl:Followers')
-            _url = await url.Followers(config.Username, init)
-        else:
-            logme.debug(__name__ + ':RequestUrl:Favorites')
-            _url = await url.Favorites(config.Username, init)
-        _serialQuery = _url
-    response = await Request(_url, params=params, connector=_connector, headers=_headers)
-    if config.Debug:
-        print(_serialQuery, file=open("twint-request_urls.log", "a", encoding="utf-8"))
-    return response
-def ForceNewTorIdentity(config):
-    logme.debug(__name__ + ':ForceNewTorIdentity')
-    try:
-        tor_c = socket.create_connection(('127.0.0.1', config.Tor_control_port))
-        tor_c.send('AUTHENTICATE "{}"\r\nSIGNAL NEWNYM\r\n'.format(config.Tor_control_password).encode())
-        response = tor_c.recv(1024)
-        if response != b'250 OK\r\n250 OK\r\n':
-            sys.stderr.write('Unexpected response from Tor control port: {}\n'.format(response))
-            logme.critical(__name__ + ':ForceNewTorIdentity:unexpectedResponse')
-    except Exception as e:
-        logme.debug(__name__ + ':ForceNewTorIdentity:errorConnectingTor')
-        sys.stderr.write('Error connecting to Tor control port: {}\n'.format(repr(e)))
-        sys.stderr.write('If you want to rotate Tor ports automatically - enable Tor control port\n')
-async def Request(_url, connector=None, params=None, headers=None):
-    logme.debug(__name__ + ':Request:Connector')
-    async with aiohttp.ClientSession(connector=connector, headers=headers) as session:
-        return await Response(session, _url, params)
-async def Response(session, _url, params=None):
-    logme.debug(__name__ + ':Response')
-    retries = 5
-    wait = 10 # No basis, maybe work with 0
-    for attempt in range(retries + 1):
-        try:
-            with timeout(120):
-                async with session.get(_url, ssl=True, params=params, proxy=httpproxy) as response:
-                    resp = await response.text()
-                    if response.status == 429:  # 429 implies Too many requests i.e. Rate Limit Exceeded
-                        raise TokenExpiryException(loads(resp)['errors'][0]['message'])
-                    return resp
-        except aiohttp.client_exceptions.ClientConnectorError as exc:
-            if attempt < retries:
-                retrying = ', retrying'
-                level = logme.WARNING
-            else:
-                retrying = ''
-                level = logme.ERROR
-            logme.log(level, f'Error retrieving {_url}: {exc!r}{retrying}')
-            if attempt < retries:
-                time.sleep(wait)
-            else:
-                logme.fatal(f'{retries + 1} requests to {_url} failed, giving up.')
-                raise TokenExpiryException(f'{exc!r}')
-async def RandomUserAgent(wa=None):
-    logme.debug(__name__ + ':RandomUserAgent')
-    try:
-        if wa:
-            return "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36"
-        return UserAgent(verify_ssl=False, use_cache_server=False).random
-    except:
-        return random.choice(user_agent_list)
-async def Username(_id, bearer_token, guest_token):
-    logme.debug(__name__ + ':Username')
-    _dct = {'userId': _id, 'withHighlightedLabel': False}
-    _url = "https://api.twitter.com/graphql/B9FuNQVmyx32rdbIPEZKag/UserByRestId?variables={}".format(dict_to_url(_dct))
-    _headers = {
-        'authorization': bearer_token,
-        'x-guest-token': guest_token,
-    }
-    r = await Request(_url, headers=_headers)
-    j_r = loads(r)
-    username = j_r['data']['user']['legacy']['screen_name']
-    return username
-async def Tweet(url, config, conn):
-    logme.debug(__name__ + ':Tweet')
-    try:
-        response = await Request(url)
-        soup = BeautifulSoup(response, "html.parser")
-        tweets = soup.find_all("div", "tweet")
-        await Tweets(tweets, config, conn, url)
-    except Exception as e:
-        logme.critical(__name__ + ':Tweet:' + str(e))
-async def User(username, config, conn, user_id=False):
-    logme.debug(__name__ + ':User')
-    _dct = {'screen_name': username, 'withHighlightedLabel': False}
-    _url = 'https://api.twitter.com/graphql/jMaTS-_Ea8vh9rpKggJbCQ/UserByScreenName?variables={}'\
-        .format(dict_to_url(_dct))
-    _headers = {
-        'authorization': config.Bearer_token,
-        'x-guest-token': config.Guest_token,
-    }
-    try:
-        response = await Request(_url, headers=_headers)
-        j_r = loads(response)
-        if user_id:
-            try:
-                _id = j_r['data']['user']['rest_id']
-                return _id
-            except KeyError as e:
-                logme.critical(__name__ + ':User:' + str(e))
-                return
-        await Users(j_r, config, conn)
-    except Exception as e:
-        logme.critical(__name__ + ':User:' + str(e))
-        raise
-def Limit(Limit, count):
-    logme.debug(__name__ + ':Limit')
-    if Limit is not None and count >= int(Limit):
-        return True
-async def Multi(feed, config, conn):
-    logme.debug(__name__ + ':Multi')
-    count = 0
-    try:
-        with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
-            loop = asyncio.get_event_loop()
-            futures = []
-            for tweet in feed:
-                count += 1
-                if config.Favorites or config.Profile_full:
-                    logme.debug(__name__ + ':Multi:Favorites-profileFull')
-                    link = tweet.find("a")["href"]
-                    url = f"https://twitter.com{link}&lang=en"
-                elif config.User_full:
-                    logme.debug(__name__ + ':Multi:userFull')
-                    username = tweet.find("a")["name"]
-                    url = f"http://twitter.com/{username}?lang=en"
-                else:
-                    logme.debug(__name__ + ':Multi:else-url')
-                    link = tweet.find("a", "tweet-timestamp js-permalink js-nav js-tooltip")["href"]
-                    url = f"https://twitter.com{link}?lang=en"
-                if config.User_full:
-                    logme.debug(__name__ + ':Multi:user-full-Run')
-                    futures.append(loop.run_in_executor(executor, await User(url,
-                                                                             config, conn)))
-                else:
-                    logme.debug(__name__ + ':Multi:notUser-full-Run')
-                    futures.append(loop.run_in_executor(executor, await Tweet(url,
-                                                                              config, conn)))
-            logme.debug(__name__ + ':Multi:asyncioGather')
-            await asyncio.gather(*futures)
-    except Exception as e:
-        # TODO: fix error not error
-        # print(str(e) + " [x] get.Multi")
-        # will return "'NoneType' object is not callable"
-        # but still works
-        # logme.critical(__name__+':Multi:' + str(e))
-        pass
-    return count

twitter-scraper/twint-master/twint/output.py DELETED Viewed

@@ -1,241 +0,0 @@
-from datetime import datetime
-from . import format, get
-from .tweet import Tweet
-from .user import User
-from .storage import db, elasticsearch, write, panda
-import logging as logme
-follows_list = []
-tweets_list = []
-users_list = []
-author_list = {''}
-author_list.pop()
-# used by Pandas
-_follows_object = {}
-def _formatDateTime(datetimestamp):
-    try:
-        return int(datetime.strptime(datetimestamp, "%Y-%m-%d %H:%M:%S").timestamp())
-    except ValueError:
-        return int(datetime.strptime(datetimestamp, "%Y-%m-%d").timestamp())
-def _clean_follow_list():
-    logme.debug(__name__ + ':clean_follow_list')
-    global _follows_object
-    _follows_object = {}
-def clean_lists():
-    logme.debug(__name__ + ':clean_lists')
-    global follows_list
-    global tweets_list
-    global users_list
-    follows_list = []
-    tweets_list = []
-    users_list = []
-def datecheck(datetimestamp, config):
-    logme.debug(__name__ + ':datecheck')
-    if config.Since:
-        logme.debug(__name__ + ':datecheck:SinceTrue')
-        d = _formatDateTime(datetimestamp)
-        s = _formatDateTime(config.Since)
-        if d < s:
-            return False
-    if config.Until:
-        logme.debug(__name__ + ':datecheck:UntilTrue')
-        d = _formatDateTime(datetimestamp)
-        s = _formatDateTime(config.Until)
-        if d > s:
-            return False
-    logme.debug(__name__ + ':datecheck:dateRangeFalse')
-    return True
-# TODO In this method we need to delete the quoted tweets, because twitter also sends the quoted tweets in the
-#  `tweets` list along with the other tweets
-def is_tweet(tw):
-    try:
-        tw["data-item-id"]
-        logme.debug(__name__ + ':is_tweet:True')
-        return True
-    except:
-        logme.critical(__name__ + ':is_tweet:False')
-        return False
-def _output(obj, output, config, **extra):
-    logme.debug(__name__ + ':_output')
-    if config.Lowercase:
-        if isinstance(obj, str):
-            logme.debug(__name__ + ':_output:Lowercase:username')
-            obj = obj.lower()
-        elif obj.__class__.__name__ == "user":
-            logme.debug(__name__ + ':_output:Lowercase:user')
-            pass
-        elif obj.__class__.__name__ == "tweet":
-            logme.debug(__name__ + ':_output:Lowercase:tweet')
-            obj.username = obj.username.lower()
-            author_list.update({obj.username})
-            for dct in obj.mentions:
-                for key, val in dct.items():
-                    dct[key] = val.lower()
-            for i in range(len(obj.hashtags)):
-                obj.hashtags[i] = obj.hashtags[i].lower()
-            for i in range(len(obj.cashtags)):
-                obj.cashtags[i] = obj.cashtags[i].lower()
-        else:
-            logme.info('_output:Lowercase:hiddenTweetFound')
-            print("[x] Hidden tweet found, account suspended due to violation of TOS")
-            return
-    if config.Output != None:
-        if config.Store_csv:
-            try:
-                write.Csv(obj, config)
-                logme.debug(__name__ + ':_output:CSV')
-            except Exception as e:
-                logme.critical(__name__ + ':_output:CSV:Error:' + str(e))
-                print(str(e) + " [x] output._output")
-        elif config.Store_json:
-            write.Json(obj, config)
-            logme.debug(__name__ + ':_output:JSON')
-        else:
-            write.Text(output, config.Output)
-            logme.debug(__name__ + ':_output:Text')
-    if config.Elasticsearch:
-        logme.debug(__name__ + ':_output:Elasticsearch')
-        print("", end=".", flush=True)
-    else:
-        if not config.Hide_output:
-            try:
-                print(output.replace('\n', ' '))
-            except UnicodeEncodeError:
-                logme.critical(__name__ + ':_output:UnicodeEncodeError')
-                print("unicode error [x] output._output")
-async def checkData(tweet, config, conn):
-    logme.debug(__name__ + ':checkData')
-    tweet = Tweet(tweet, config)
-    if not tweet.datestamp:
-        logme.critical(__name__ + ':checkData:hiddenTweetFound')
-        print("[x] Hidden tweet found, account suspended due to violation of TOS")
-        return
-    if datecheck(tweet.datestamp + " " + tweet.timestamp, config):
-        output = format.Tweet(config, tweet)
-        if config.Database:
-            logme.debug(__name__ + ':checkData:Database')
-            db.tweets(conn, tweet, config)
-        if config.Pandas:
-            logme.debug(__name__ + ':checkData:Pandas')
-            panda.update(tweet, config)
-        if config.Store_object:
-            logme.debug(__name__ + ':checkData:Store_object')
-            if hasattr(config.Store_object_tweets_list, 'append'):
-                config.Store_object_tweets_list.append(tweet)
-            else:
-                tweets_list.append(tweet)
-        if config.Elasticsearch:
-            logme.debug(__name__ + ':checkData:Elasticsearch')
-            elasticsearch.Tweet(tweet, config)
-        _output(tweet, output, config)
-    # else:
-    #     logme.critical(__name__+':checkData:copyrightedTweet')
-async def Tweets(tweets, config, conn):
-    logme.debug(__name__ + ':Tweets')
-    if config.Favorites or config.Location:
-        logme.debug(__name__ + ':Tweets:fav+full+loc')
-        for tw in tweets:
-            await checkData(tw, config, conn)
-    elif config.TwitterSearch or config.Profile:
-        logme.debug(__name__ + ':Tweets:TwitterSearch')
-        await checkData(tweets, config, conn)
-    else:
-        logme.debug(__name__ + ':Tweets:else')
-        if int(tweets["data-user-id"]) == config.User_id or config.Retweets:
-            await checkData(tweets, config, conn)
-async def Users(u, config, conn):
-    logme.debug(__name__ + ':User')
-    global users_list
-    user = User(u)
-    output = format.User(config.Format, user)
-    if config.Database:
-        logme.debug(__name__ + ':User:Database')
-        db.user(conn, config, user)
-    if config.Elasticsearch:
-        logme.debug(__name__ + ':User:Elasticsearch')
-        _save_date = user.join_date
-        _save_time = user.join_time
-        user.join_date = str(datetime.strptime(user.join_date, "%d %b %Y")).split()[0]
-        user.join_time = str(datetime.strptime(user.join_time, "%I:%M %p")).split()[1]
-        elasticsearch.UserProfile(user, config)
-        user.join_date = _save_date
-        user.join_time = _save_time
-    if config.Store_object:
-        logme.debug(__name__ + ':User:Store_object')
-        if hasattr(config.Store_object_follow_list, 'append'):
-            config.Store_object_follow_list.append(user)
-        elif hasattr(config.Store_object_users_list, 'append'):
-            config.Store_object_users_list.append(user)
-        else:
-            users_list.append(user)  # twint.user.user
-    if config.Pandas:
-        logme.debug(__name__ + ':User:Pandas+user')
-        panda.update(user, config)
-    _output(user, output, config)
-async def Username(username, config, conn):
-    logme.debug(__name__ + ':Username')
-    global _follows_object
-    global follows_list
-    follow_var = config.Following * "following" + config.Followers * "followers"
-    if config.Database:
-        logme.debug(__name__ + ':Username:Database')
-        db.follow(conn, config.Username, config.Followers, username)
-    if config.Elasticsearch:
-        logme.debug(__name__ + ':Username:Elasticsearch')
-        elasticsearch.Follow(username, config)
-    if config.Store_object:
-        if hasattr(config.Store_object_follow_list, 'append'):
-            config.Store_object_follow_list.append(username)
-        else:
-            follows_list.append(username)  # twint.user.user
-    if config.Pandas:
-        logme.debug(__name__ + ':Username:object+pandas')
-        try:
-            _ = _follows_object[config.Username][follow_var]
-        except KeyError:
-            _follows_object.update({config.Username: {follow_var: []}})
-        _follows_object[config.Username][follow_var].append(username)
-        if config.Pandas_au:
-            logme.debug(__name__ + ':Username:object+pandas+au')
-            panda.update(_follows_object[config.Username], config)
-    _output(username, username, config)

twitter-scraper/twint-master/twint/run.py DELETED Viewed

@@ -1,412 +0,0 @@
-import sys, os, datetime
-from asyncio import get_event_loop, TimeoutError, ensure_future, new_event_loop, set_event_loop
-from . import datelock, feed, get, output, verbose, storage
-from .token import TokenExpiryException
-from . import token
-from .storage import db
-from .feed import NoMoreTweetsException
-import logging as logme
-import time
-bearer = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs' \
-         '%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
-class Twint:
-    def __init__(self, config):
-        logme.debug(__name__ + ':Twint:__init__')
-        if config.Resume is not None and (config.TwitterSearch or config.Followers or config.Following):
-            logme.debug(__name__ + ':Twint:__init__:Resume')
-            self.init = self.get_resume(config.Resume)
-        else:
-            self.init = -1
-        config.deleted = []
-        self.feed: list = [-1]
-        self.count = 0
-        self.user_agent = ""
-        self.config = config
-        self.config.Bearer_token = bearer
-        # TODO might have to make some adjustments for it to work with multi-treading
-        # USAGE : to get a new guest token simply do `self.token.refresh()`
-        self.token = token.Token(config)
-        self.token.refresh()
-        self.conn = db.Conn(config.Database)
-        self.d = datelock.Set(self.config.Until, self.config.Since)
-        verbose.Elastic(config.Elasticsearch)
-        if self.config.Store_object:
-            logme.debug(__name__ + ':Twint:__init__:clean_follow_list')
-            output._clean_follow_list()
-        if self.config.Pandas_clean:
-            logme.debug(__name__ + ':Twint:__init__:pandas_clean')
-            storage.panda.clean()
-    def get_resume(self, resumeFile):
-        if not os.path.exists(resumeFile):
-            return '-1'
-        with open(resumeFile, 'r') as rFile:
-            _init = rFile.readlines()[-1].strip('\n')
-            return _init
-    async def Feed(self):
-        logme.debug(__name__ + ':Twint:Feed')
-        consecutive_errors_count = 0
-        while True:
-            # this will receive a JSON string, parse it into a `dict` and do the required stuff
-            try:
-                response = await get.RequestUrl(self.config, self.init)
-            except TokenExpiryException as e:
-                logme.debug(__name__ + 'Twint:Feed:' + str(e))
-                self.token.refresh()
-                response = await get.RequestUrl(self.config, self.init)
-            if self.config.Debug:
-                print(response, file=open("twint-last-request.log", "w", encoding="utf-8"))
-            self.feed = []
-            try:
-                if self.config.Favorites:
-                    self.feed, self.init = feed.MobileFav(response)
-                    favorite_err_cnt = 0
-                    if len(self.feed) == 0 and len(self.init) == 0:
-                        while (len(self.feed) == 0 or len(self.init) == 0) and favorite_err_cnt < 5:
-                            self.user_agent = await get.RandomUserAgent(wa=False)
-                            response = await get.RequestUrl(self.config, self.init,
-                                                            headers=[("User-Agent", self.user_agent)])
-                            self.feed, self.init = feed.MobileFav(response)
-                            favorite_err_cnt += 1
-                            time.sleep(1)
-                        if favorite_err_cnt == 5:
-                            print("Favorite page could not be fetched")
-                    if not self.count % 40:
-                        time.sleep(5)
-                elif self.config.Followers or self.config.Following:
-                    self.feed, self.init = feed.Follow(response)
-                    if not self.count % 40:
-                        time.sleep(5)
-                elif self.config.Profile or self.config.TwitterSearch:
-                    try:
-                        self.feed, self.init = feed.parse_tweets(self.config, response)
-                    except NoMoreTweetsException as e:
-                        logme.debug(__name__ + ':Twint:Feed:' + str(e))
-                        print('[!] ' + str(e) + ' Scraping will stop now.')
-                        print('found {} deleted tweets in this search.'.format(len(self.config.deleted)))
-                        break
-                break
-            except TimeoutError as e:
-                if self.config.Proxy_host.lower() == "tor":
-                    print("[?] Timed out, changing Tor identity...")
-                    if self.config.Tor_control_password is None:
-                        logme.critical(__name__ + ':Twint:Feed:tor-password')
-                        sys.stderr.write("Error: config.Tor_control_password must be set for proxy auto-rotation!\r\n")
-                        sys.stderr.write(
-                            "Info: What is it? See https://stem.torproject.org/faq.html#can-i-interact-with-tors"
-                            "-controller-interface-directly\r\n")
-                        break
-                    else:
-                        get.ForceNewTorIdentity(self.config)
-                        continue
-                else:
-                    logme.critical(__name__ + ':Twint:Feed:' + str(e))
-                    print(str(e))
-                    break
-            except Exception as e:
-                if self.config.Profile or self.config.Favorites:
-                    print("[!] Twitter does not return more data, scrape stops here.")
-                    break
-                logme.critical(__name__ + ':Twint:Feed:noData' + str(e))
-                # Sometimes Twitter says there is no data. But it's a lie.
-                # raise
-                consecutive_errors_count += 1
-                if consecutive_errors_count < self.config.Retries_count:
-                    # skip to the next iteration if wait time does not satisfy limit constraints
-                    delay = round(consecutive_errors_count ** self.config.Backoff_exponent, 1)
-                    # if the delay is less than users set min wait time then replace delay
-                    if self.config.Min_wait_time > delay:
-                        delay = self.config.Min_wait_time
-                    sys.stderr.write('sleeping for {} secs\n'.format(delay))
-                    time.sleep(delay)
-                    self.user_agent = await get.RandomUserAgent(wa=True)
-                    continue
-                logme.critical(__name__ + ':Twint:Feed:Tweets_known_error:' + str(e))
-                sys.stderr.write(str(e) + " [x] run.Feed")
-                sys.stderr.write(
-                    "[!] if you get this error but you know for sure that more tweets exist, please open an issue and "
-                    "we will investigate it!")
-                break
-        if self.config.Resume:
-            print(self.init, file=open(self.config.Resume, "a", encoding="utf-8"))
-    async def follow(self):
-        await self.Feed()
-        if self.config.User_full:
-            logme.debug(__name__ + ':Twint:follow:userFull')
-            self.count += await get.Multi(self.feed, self.config, self.conn)
-        else:
-            logme.debug(__name__ + ':Twint:follow:notUserFull')
-            for user in self.feed:
-                self.count += 1
-                username = user.find("a")["name"]
-                await output.Username(username, self.config, self.conn)
-    async def favorite(self):
-        logme.debug(__name__ + ':Twint:favorite')
-        await self.Feed()
-        favorited_tweets_list = []
-        for tweet in self.feed:
-            tweet_dict = {}
-            self.count += 1
-            try:
-                tweet_dict['data-item-id'] = tweet.find("div", {"class": "tweet-text"})['data-id']
-                t_url = tweet.find("span", {"class": "metadata"}).find("a")["href"]
-                tweet_dict['data-conversation-id'] = t_url.split('?')[0].split('/')[-1]
-                tweet_dict['username'] = tweet.find("div", {"class": "username"}).text.replace('\n', '').replace(' ',
-                                                                                                                 '')
-                tweet_dict['tweet'] = tweet.find("div", {"class": "tweet-text"}).find("div", {"class": "dir-ltr"}).text
-                date_str = tweet.find("td", {"class": "timestamp"}).find("a").text
-                # test_dates = ["1m", "2h", "Jun 21, 2019", "Mar 12", "28 Jun 19"]
-                # date_str = test_dates[3]
-                if len(date_str) <= 3 and (date_str[-1] == "m" or date_str[-1] == "h"):  # 25m 1h
-                    dateu = str(datetime.date.today())
-                    tweet_dict['date'] = dateu
-                elif ',' in date_str:  # Aug 21, 2019
-                    sp = date_str.replace(',', '').split(' ')
-                    date_str_formatted = sp[1] + ' ' + sp[0] + ' ' + sp[2]
-                    dateu = datetime.datetime.strptime(date_str_formatted, "%d %b %Y").strftime("%Y-%m-%d")
-                    tweet_dict['date'] = dateu
-                elif len(date_str.split(' ')) == 3:  # 28 Jun 19
-                    sp = date_str.split(' ')
-                    if len(sp[2]) == 2:
-                        sp[2] = '20' + sp[2]
-                    date_str_formatted = sp[0] + ' ' + sp[1] + ' ' + sp[2]
-                    dateu = datetime.datetime.strptime(date_str_formatted, "%d %b %Y").strftime("%Y-%m-%d")
-                    tweet_dict['date'] = dateu
-                else:  # Aug 21
-                    sp = date_str.split(' ')
-                    date_str_formatted = sp[1] + ' ' + sp[0] + ' ' + str(datetime.date.today().year)
-                    dateu = datetime.datetime.strptime(date_str_formatted, "%d %b %Y").strftime("%Y-%m-%d")
-                    tweet_dict['date'] = dateu
-                favorited_tweets_list.append(tweet_dict)
-            except Exception as e:
-                logme.critical(__name__ + ':Twint:favorite:favorite_field_lack')
-                print("shit: ", date_str, " ", str(e))
-        try:
-            self.config.favorited_tweets_list += favorited_tweets_list
-        except AttributeError:
-            self.config.favorited_tweets_list = favorited_tweets_list
-    async def profile(self):
-        await self.Feed()
-        logme.debug(__name__ + ':Twint:profile')
-        for tweet in self.feed:
-            self.count += 1
-            await output.Tweets(tweet, self.config, self.conn)
-    async def tweets(self):
-        await self.Feed()
-        # TODO : need to take care of this later
-        if self.config.Location:
-            logme.debug(__name__ + ':Twint:tweets:location')
-            self.count += await get.Multi(self.feed, self.config, self.conn)
-        else:
-            logme.debug(__name__ + ':Twint:tweets:notLocation')
-            for tweet in self.feed:
-                self.count += 1
-                await output.Tweets(tweet, self.config, self.conn)
-    async def main(self, callback=None):
-        task = ensure_future(self.run())  # Might be changed to create_task in 3.7+.
-        if callback:
-            task.add_done_callback(callback)
-        await task
-    async def run(self):
-        if self.config.TwitterSearch:
-            self.user_agent = await get.RandomUserAgent(wa=True)
-        else:
-            self.user_agent = await get.RandomUserAgent()
-        if self.config.User_id is not None and self.config.Username is None:
-            logme.debug(__name__ + ':Twint:main:user_id')
-            self.config.Username = await get.Username(self.config.User_id, self.config.Bearer_token,
-                                                      self.config.Guest_token)
-        if self.config.Username is not None and self.config.User_id is None:
-            logme.debug(__name__ + ':Twint:main:username')
-            self.config.User_id = await get.User(self.config.Username, self.config, self.conn, True)
-            if self.config.User_id is None:
-                raise ValueError("Cannot find twitter account with name = " + self.config.Username)
-        # TODO : will need to modify it to work with the new endpoints
-        if self.config.TwitterSearch and self.config.Since and self.config.Until:
-            logme.debug(__name__ + ':Twint:main:search+since+until')
-            while self.d.since < self.d.until:
-                self.config.Since = datetime.datetime.strftime(self.d.since, "%Y-%m-%d %H:%M:%S")
-                self.config.Until = datetime.datetime.strftime(self.d.until, "%Y-%m-%d %H:%M:%S")
-                if len(self.feed) > 0:
-                    await self.tweets()
-                else:
-                    logme.debug(__name__ + ':Twint:main:gettingNewTweets')
-                    break
-                if get.Limit(self.config.Limit, self.count):
-                    break
-        elif self.config.Lookup:
-            await self.Lookup()
-        else:
-            logme.debug(__name__ + ':Twint:main:not-search+since+until')
-            while True:
-                if len(self.feed) > 0:
-                    if self.config.Followers or self.config.Following:
-                        logme.debug(__name__ + ':Twint:main:follow')
-                        await self.follow()
-                    elif self.config.Favorites:
-                        logme.debug(__name__ + ':Twint:main:favorites')
-                        await self.favorite()
-                    elif self.config.Profile:
-                        logme.debug(__name__ + ':Twint:main:profile')
-                        await self.profile()
-                    elif self.config.TwitterSearch:
-                        logme.debug(__name__ + ':Twint:main:twitter-search')
-                        await self.tweets()
-                else:
-                    logme.debug(__name__ + ':Twint:main:no-more-tweets')
-                    break
-                # logging.info("[<] " + str(datetime.now()) + ':: run+Twint+main+CallingGetLimit2')
-                if get.Limit(self.config.Limit, self.count):
-                    logme.debug(__name__ + ':Twint:main:reachedLimit')
-                    break
-        if self.config.Count:
-            verbose.Count(self.count, self.config)
-    async def Lookup(self):
-        logme.debug(__name__ + ':Twint:Lookup')
-        try:
-            if self.config.User_id is not None and self.config.Username is None:
-                logme.debug(__name__ + ':Twint:Lookup:user_id')
-                self.config.Username = await get.Username(self.config.User_id, self.config.Bearer_token,
-                                                          self.config.Guest_token)
-            await get.User(self.config.Username, self.config, db.Conn(self.config.Database))
-        except Exception as e:
-            logme.exception(__name__ + ':Twint:Lookup:Unexpected exception occurred.')
-            raise
-def run(config, callback=None):
-    logme.debug(__name__ + ':run')
-    try:
-        get_event_loop()
-    except RuntimeError as e:
-        if "no current event loop" in str(e):
-            set_event_loop(new_event_loop())
-        else:
-            logme.exception(__name__ + ':run:Unexpected exception while handling an expected RuntimeError.')
-            raise
-    except Exception as e:
-        logme.exception(
-            __name__ + ':run:Unexpected exception occurred while attempting to get or create a new event loop.')
-        raise
-    get_event_loop().run_until_complete(Twint(config).main(callback))
-def Favorites(config):
-    logme.debug(__name__ + ':Favorites')
-    config.Favorites = True
-    config.Following = False
-    config.Followers = False
-    config.Profile = False
-    config.TwitterSearch = False
-    run(config)
-    if config.Pandas_au:
-        storage.panda._autoget("tweet")
-def Followers(config):
-    logme.debug(__name__ + ':Followers')
-    config.Followers = True
-    config.Following = False
-    config.Profile = False
-    config.Favorites = False
-    config.TwitterSearch = False
-    run(config)
-    if config.Pandas_au:
-        storage.panda._autoget("followers")
-        if config.User_full:
-            storage.panda._autoget("user")
-    if config.Pandas_clean and not config.Store_object:
-        # storage.panda.clean()
-        output._clean_follow_list()
-def Following(config):
-    logme.debug(__name__ + ':Following')
-    config.Following = True
-    config.Followers = False
-    config.Profile = False
-    config.Favorites = False
-    config.TwitterSearch = False
-    run(config)
-    if config.Pandas_au:
-        storage.panda._autoget("following")
-        if config.User_full:
-            storage.panda._autoget("user")
-    if config.Pandas_clean and not config.Store_object:
-        # storage.panda.clean()
-        output._clean_follow_list()
-def Lookup(config):
-    logme.debug(__name__ + ':Lookup')
-    config.Profile = False
-    config.Lookup = True
-    config.Favorites = False
-    config.FOllowing = False
-    config.Followers = False
-    config.TwitterSearch = False
-    run(config)
-    if config.Pandas_au:
-        storage.panda._autoget("user")
-def Profile(config):
-    logme.debug(__name__ + ':Profile')
-    config.Profile = True
-    config.Favorites = False
-    config.Following = False
-    config.Followers = False
-    config.TwitterSearch = False
-    run(config)
-    if config.Pandas_au:
-        storage.panda._autoget("tweet")
-def Search(config, callback=None):
-    logme.debug(__name__ + ':Search')
-    config.TwitterSearch = True
-    config.Favorites = False
-    config.Following = False
-    config.Followers = False
-    config.Profile = False
-    run(config, callback)
-    if config.Pandas_au:
-        storage.panda._autoget("tweet")

twitter-scraper/twint-master/twint/storage/__init__.py DELETED Viewed

File without changes

twitter-scraper/twint-master/twint/storage/db.py DELETED Viewed

@@ -1,297 +0,0 @@
-import sqlite3
-import sys
-import time
-import hashlib
-from datetime import datetime
-def Conn(database):
-    if database:
-        print("[+] Inserting into Database: " + str(database))
-        conn = init(database)
-        if isinstance(conn, str): # error
-            print(conn)
-            sys.exit(1)
-    else:
-        conn = ""
-    return conn
-def init(db):
-    try:
-        conn = sqlite3.connect(db)
-        cursor = conn.cursor()
-        table_users = """
-            CREATE TABLE IF NOT EXISTS
-                users(
-                    id integer not null,
-                    id_str text not null,
-                    name text,
-                    username text not null,
-                    bio text,
-                    location text,
-                    url text,
-                    join_date text not null,
-                    join_time text not null,
-                    tweets integer,
-                    following integer,
-                    followers integer,
-                    likes integer,
-                    media integer,
-                    private integer not null,
-                    verified integer not null,
-                    profile_image_url text not null,
-                    background_image text,
-                    hex_dig  text not null,
-                    time_update integer not null,
-                    CONSTRAINT users_pk PRIMARY KEY (id, hex_dig)
-                );
-            """
-        cursor.execute(table_users)
-        table_tweets = """
-            CREATE TABLE IF NOT EXISTS
-                tweets (
-                    id integer not null,
-                    id_str text not null,
-                    tweet text default '',
-                    language text default '',
-                    conversation_id text not null,
-                    created_at integer not null,
-                    date text not null,
-                    time text not null,
-                    timezone text not null,
-                    place text default '',
-                    replies_count integer,
-                    likes_count integer,
-                    retweets_count integer,
-                    user_id integer not null,
-                    user_id_str text not null,
-                    screen_name text not null,
-                    name text default '',
-                    link text,
-                    mentions text,
-                    hashtags text,
-                    cashtags text,
-                    urls text,
-                    photos text,
-                    thumbnail text,
-                    quote_url text,
-                    video integer,
-                    geo text,
-                    near text,
-                    source text,
-                    time_update integer not null,
-                    `translate` text default '',
-                    trans_src text default '',
-                    trans_dest text default '',
-                    PRIMARY KEY (id)
-                );
-        """
-        cursor.execute(table_tweets)
-        table_retweets = """
-            CREATE TABLE IF NOT EXISTS
-                retweets(
-                    user_id integer not null,
-                    username text not null,
-                    tweet_id integer not null,
-                    retweet_id integer not null,
-                    retweet_date integer,
-                    CONSTRAINT retweets_pk PRIMARY KEY(user_id, tweet_id),
-                    CONSTRAINT user_id_fk FOREIGN KEY(user_id) REFERENCES users(id),
-                    CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
-                );
-        """
-        cursor.execute(table_retweets)
-        table_reply_to = """
-            CREATE TABLE IF NOT EXISTS
-                replies(
-                    tweet_id integer not null,
-                    user_id integer not null,
-                    username text not null,
-                    CONSTRAINT replies_pk PRIMARY KEY (user_id, tweet_id),
-                    CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id)
-                );
-        """
-        cursor.execute(table_reply_to)
-        table_favorites =  """
-            CREATE TABLE IF NOT EXISTS
-                favorites(
-                    user_id integer not null,
-                    tweet_id integer not null,
-                    CONSTRAINT favorites_pk PRIMARY KEY (user_id, tweet_id),
-                    CONSTRAINT user_id_fk FOREIGN KEY (user_id) REFERENCES users(id),
-                    CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id)
-                );
-        """
-        cursor.execute(table_favorites)
-        table_followers = """
-            CREATE TABLE IF NOT EXISTS
-                followers (
-                    id integer not null,
-                    follower_id integer not null,
-                    CONSTRAINT followers_pk PRIMARY KEY (id, follower_id),
-                    CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id),
-                    CONSTRAINT follower_id_fk FOREIGN KEY(follower_id) REFERENCES users(id)
-                );
-        """
-        cursor.execute(table_followers)
-        table_following = """
-            CREATE TABLE IF NOT EXISTS
-                following (
-                    id integer not null,
-                    following_id integer not null,
-                    CONSTRAINT following_pk PRIMARY KEY (id, following_id),
-                    CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id),
-                    CONSTRAINT following_id_fk FOREIGN KEY(following_id) REFERENCES users(id)
-                );
-        """
-        cursor.execute(table_following)
-        table_followers_names = """
-            CREATE TABLE IF NOT EXISTS
-                followers_names (
-                    user text not null,
-                    time_update integer not null,
-                    follower text not null,
-                    PRIMARY KEY (user, follower)
-                );
-        """
-        cursor.execute(table_followers_names)
-        table_following_names = """
-            CREATE TABLE IF NOT EXISTS
-                following_names (
-                    user text not null,
-                    time_update integer not null,
-                    follows text not null,
-                    PRIMARY KEY (user, follows)
-                );
-        """
-        cursor.execute(table_following_names)
-        return conn
-    except Exception as e:
-        return str(e)
-def fTable(Followers):
-    if Followers:
-        table = "followers_names"
-    else:
-        table = "following_names"
-    return table
-def uTable(Followers):
-    if Followers:
-        table = "followers"
-    else:
-        table = "following"
-    return table
-def follow(conn, Username, Followers, User):
-    try:
-        time_ms = round(time.time()*1000)
-        cursor = conn.cursor()
-        entry = (User, time_ms, Username,)
-        table = fTable(Followers)
-        query = f"INSERT INTO {table} VALUES(?,?,?)"
-        cursor.execute(query, entry)
-        conn.commit()
-    except sqlite3.IntegrityError:
-        pass
-def get_hash_id(conn, id):
-    cursor = conn.cursor()
-    cursor.execute('SELECT hex_dig FROM users WHERE id = ? LIMIT 1', (id,))
-    resultset = cursor.fetchall()
-    return resultset[0][0] if resultset else -1
-def user(conn, config, User):
-    try:
-        time_ms = round(time.time()*1000)
-        cursor = conn.cursor()
-        user = [int(User.id), User.id, User.name, User.username, User.bio, User.location, User.url,User.join_date, User.join_time, User.tweets, User.following, User.followers, User.likes, User.media_count, User.is_private, User.is_verified, User.avatar, User.background_image]
-        hex_dig = hashlib.sha256(','.join(str(v) for v in user).encode()).hexdigest()
-        entry = tuple(user) + (hex_dig,time_ms,)
-        old_hash = get_hash_id(conn, User.id)
-        if old_hash == -1 or old_hash != hex_dig:
-            query = f"INSERT INTO users VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
-            cursor.execute(query, entry)
-        else:
-            pass
-        if config.Followers or config.Following:
-            table = uTable(config.Followers)
-            query = f"INSERT INTO {table} VALUES(?,?)"
-            cursor.execute(query, (config.User_id, int(User.id)))
-        conn.commit()
-    except sqlite3.IntegrityError:
-        pass
-def tweets(conn, Tweet, config):
-    try:
-        time_ms = round(time.time()*1000)
-        cursor = conn.cursor()
-        entry = (Tweet.id,
-                    Tweet.id_str,
-                    Tweet.tweet,
-                    Tweet.lang,
-                    Tweet.conversation_id,
-                    Tweet.datetime,
-                    Tweet.datestamp,
-                    Tweet.timestamp,
-                    Tweet.timezone,
-                    Tweet.place,
-                    Tweet.replies_count,
-                    Tweet.likes_count,
-                    Tweet.retweets_count,
-                    Tweet.user_id,
-                    Tweet.user_id_str,
-                    Tweet.username,
-                    Tweet.name,
-                    Tweet.link,
-                    ",".join(Tweet.mentions),
-                    ",".join(Tweet.hashtags),
-                    ",".join(Tweet.cashtags),
-                    ",".join(Tweet.urls),
-                    ",".join(Tweet.photos),
-                    Tweet.thumbnail,
-                    Tweet.quote_url,
-                    Tweet.video,
-                    Tweet.geo,
-                    Tweet.near,
-                    Tweet.source,
-                    time_ms,
-                    Tweet.translate,
-                    Tweet.trans_src,
-                    Tweet.trans_dest)
-        cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)
-        if config.Favorites:
-            query = 'INSERT INTO favorites VALUES(?,?)'
-            cursor.execute(query, (config.User_id, Tweet.id))
-        if Tweet.retweet:
-            query = 'INSERT INTO retweets VALUES(?,?,?,?,?)'
-            _d = datetime.timestamp(datetime.strptime(Tweet.retweet_date, "%Y-%m-%d %H:%M:%S"))
-            cursor.execute(query, (int(Tweet.user_rt_id), Tweet.user_rt, Tweet.id, int(Tweet.retweet_id), _d))
-        if Tweet.reply_to:
-            for reply in Tweet.reply_to:
-                query = 'INSERT INTO replies VALUES(?,?,?)'
-                cursor.execute(query, (Tweet.id, int(reply['user_id']), reply['username']))
-        conn.commit()
-    except sqlite3.IntegrityError:
-        pass

twitter-scraper/twint-master/twint/storage/elasticsearch.py DELETED Viewed

@@ -1,364 +0,0 @@
-## TODO - Fix Weekday situation
-from elasticsearch import Elasticsearch, helpers
-from geopy.geocoders import Nominatim
-from datetime import datetime
-import contextlib
-import sys
-_index_tweet_status = False
-_index_follow_status = False
-_index_user_status = False
-_is_near_def = False
-_is_location_def = False
-_near = {}
-_location = {}
-geolocator = Nominatim(user_agent="twint-1.2")
-class RecycleObject(object):
-    def write(self, junk): pass
-    def flush(self): pass
-def getLocation(place, **options):
-    location = geolocator.geocode(place,timeout=1000)
-    if location:
-        if options.get("near"):
-            global _near
-            _near = {"lat": location.latitude, "lon": location.longitude}
-            return True
-        elif options.get("location"):
-            global _location
-            _location = {"lat": location.latitude, "lon": location.longitude}
-            return True
-        return {"lat": location.latitude, "lon": location.longitude}
-    else:
-        return {}
-def handleIndexResponse(response):
-    try:
-        if response["status"] == 400:
-            return True
-    except KeyError:
-        pass
-    if response["acknowledged"]:
-        print("[+] Index \"" + response["index"] + "\" created!")
-    else:
-        print("[x] error index creation :: storage.elasticsearch.handleIndexCreation")
-    if response["shards_acknowledged"]:
-        print("[+] Shards acknowledged, everything is ready to be used!")
-        return True
-    else:
-        print("[x] error with shards :: storage.elasticsearch.HandleIndexCreation")
-        return False
-def createIndex(config, instance, **scope):
-    if scope.get("scope") == "tweet":
-        tweets_body = {
-                "mappings": {
-                    "properties": {
-                        "id": {"type": "long"},
-                        "conversation_id": {"type": "long"},
-                        "created_at": {"type": "text"},
-                        "date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
-                        "timezone": {"type": "keyword"},
-                        "place": {"type": "keyword"},
-                        "location": {"type": "keyword"},
-                        "tweet": {"type": "text"},
-                        "lang": {"type": "keyword"},
-                        "hashtags": {"type": "keyword", "normalizer": "hashtag_normalizer"},
-                        "cashtags": {"type": "keyword", "normalizer": "hashtag_normalizer"},
-                        "user_id_str": {"type": "keyword"},
-                        "username": {"type": "keyword", "normalizer": "hashtag_normalizer"},
-                        "name": {"type": "text"},
-                        "profile_image_url": {"type": "text"},
-                        "day": {"type": "integer"},
-                        "hour": {"type": "integer"},
-                        "link": {"type": "text"},
-                        "retweet": {"type": "text"},
-                        "essid": {"type": "keyword"},
-                        "nlikes": {"type": "integer"},
-                        "nreplies": {"type": "integer"},
-                        "nretweets": {"type": "integer"},
-                        "quote_url": {"type": "text"},
-                        "video": {"type":"integer"},
-                        "thumbnail": {"type":"text"},
-                        "search": {"type": "text"},
-                        "near": {"type": "text"},
-                        "geo_near": {"type": "geo_point"},
-                        "geo_tweet": {"type": "geo_point"},
-                        "photos": {"type": "text"},
-                        "user_rt_id": {"type": "keyword"},
-                        "mentions": {"type": "keyword", "normalizer": "hashtag_normalizer"},
-                        "source": {"type": "keyword"},
-                        "user_rt": {"type": "keyword"},
-                        "retweet_id": {"type": "keyword"},
-                        "reply_to": {
-                            "type": "nested",
-                            "properties": {
-                                "user_id": {"type": "keyword"},
-                                "username": {"type": "keyword"}
-                            }
-                        },
-                        "retweet_date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss", "ignore_malformed": True},
-                        "urls": {"type": "keyword"},
-                        "translate": {"type": "text"},
-                        "trans_src": {"type": "keyword"},
-                        "trans_dest": {"type": "keyword"},
-                        }
-                    },
-                    "settings": {
-                        "number_of_shards": 1,
-                        "analysis": {
-                            "normalizer": {
-                                "hashtag_normalizer": {
-                                    "type": "custom",
-                                    "char_filter": [],
-                                    "filter": ["lowercase", "asciifolding"]
-                                }
-                            }
-                        }
-                    }
-                }
-        with nostdout():
-            resp = instance.indices.create(index=config.Index_tweets, body=tweets_body, ignore=400)
-        return handleIndexResponse(resp)
-    elif scope.get("scope") == "follow":
-        follow_body = {
-                "mappings": {
-                    "properties": {
-                        "user": {"type": "keyword"},
-                        "follow": {"type": "keyword"},
-                        "essid": {"type": "keyword"}
-                        }
-                    },
-                    "settings": {
-                        "number_of_shards": 1
-                    }
-                }
-        with nostdout():
-            resp = instance.indices.create(index=config.Index_follow, body=follow_body, ignore=400)
-        return handleIndexResponse(resp)
-    elif scope.get("scope") == "user":
-        user_body = {
-                "mappings": {
-                    "properties": {
-                        "id": {"type": "keyword"},
-                        "name": {"type": "keyword"},
-                        "username": {"type": "keyword"},
-                        "bio": {"type": "text"},
-                        "location": {"type": "keyword"},
-                        "url": {"type": "text"},
-                        "join_datetime": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
-                        "tweets": {"type": "integer"},
-                        "following": {"type": "integer"},
-                        "followers": {"type": "integer"},
-                        "likes": {"type": "integer"},
-                        "media": {"type": "integer"},
-                        "private": {"type": "integer"},
-                        "verified": {"type": "integer"},
-                        "avatar": {"type": "text"},
-                        "background_image": {"type": "text"},
-                        "session": {"type": "keyword"},
-                        "geo_user": {"type": "geo_point"}
-                        }
-                    },
-                    "settings": {
-                        "number_of_shards": 1
-                    }
-                }
-        with nostdout():
-            resp = instance.indices.create(index=config.Index_users, body=user_body, ignore=400)
-        return handleIndexResponse(resp)
-    else:
-        print("[x] error index pre-creation :: storage.elasticsearch.createIndex")
-        return False
-@contextlib.contextmanager
-def nostdout():
-    savestdout = sys.stdout
-    sys.stdout = RecycleObject()
-    yield
-    sys.stdout = savestdout
-def weekday(day):
-    weekdays = {
-            "Monday": 1,
-            "Tuesday": 2,
-            "Wednesday": 3,
-            "Thursday": 4,
-            "Friday": 5,
-            "Saturday": 6,
-            "Sunday": 7,
-            }
-    return weekdays[day]
-def Tweet(Tweet, config):
-    global _index_tweet_status
-    global _is_near_def
-    date_obj = datetime.strptime(Tweet.datetime, "%Y-%m-%d %H:%M:%S %Z")
-    actions = []
-    try:
-        retweet = Tweet.retweet
-    except AttributeError:
-        retweet = None
-    dt = f"{Tweet.datestamp} {Tweet.timestamp}"
-    j_data = {
-            "_index": config.Index_tweets,
-            "_id": str(Tweet.id) + "_raw_" + config.Essid,
-            "_source": {
-                "id": str(Tweet.id),
-                "conversation_id": Tweet.conversation_id,
-                "created_at": Tweet.datetime,
-                "date": dt,
-                "timezone": Tweet.timezone,
-                "place": Tweet.place,
-                "tweet": Tweet.tweet,
-                "language": Tweet.lang,
-                "hashtags": Tweet.hashtags,
-                "cashtags": Tweet.cashtags,
-                "user_id_str": Tweet.user_id_str,
-                "username": Tweet.username,
-                "name": Tweet.name,
-                "day": date_obj.weekday(),
-                "hour": date_obj.hour,
-                "link": Tweet.link,
-                "retweet": retweet,
-                "essid": config.Essid,
-                "nlikes": int(Tweet.likes_count),
-                "nreplies": int(Tweet.replies_count),
-                "nretweets": int(Tweet.retweets_count),
-                "quote_url": Tweet.quote_url,
-                "video": Tweet.video,
-                "search": str(config.Search),
-                "near": config.Near
-                }
-            }
-    if retweet is not None:
-        j_data["_source"].update({"user_rt_id": Tweet.user_rt_id})
-        j_data["_source"].update({"user_rt": Tweet.user_rt})
-        j_data["_source"].update({"retweet_id": Tweet.retweet_id})
-        j_data["_source"].update({"retweet_date": Tweet.retweet_date})
-    if Tweet.reply_to:
-        j_data["_source"].update({"reply_to": Tweet.reply_to})
-    if Tweet.photos:
-        _photos = []
-        for photo in Tweet.photos:
-            _photos.append(photo)
-        j_data["_source"].update({"photos": _photos})
-    if Tweet.thumbnail:
-        j_data["_source"].update({"thumbnail": Tweet.thumbnail})
-    if Tweet.mentions:
-        _mentions = []
-        for mention in Tweet.mentions:
-            _mentions.append(mention)
-        j_data["_source"].update({"mentions": _mentions})
-    if Tweet.urls:
-        _urls = []
-        for url in Tweet.urls:
-            _urls.append(url)
-        j_data["_source"].update({"urls": _urls})
-    if config.Near or config.Geo:
-        if not _is_near_def:
-            __geo = ""
-            __near = ""
-            if config.Geo:
-                __geo = config.Geo
-            if config.Near:
-                __near = config.Near
-            _is_near_def = getLocation(__near + __geo, near=True)
-        if _near:
-            j_data["_source"].update({"geo_near": _near})
-    if Tweet.place:
-        _t_place = getLocation(Tweet.place)
-        if _t_place:
-            j_data["_source"].update({"geo_tweet": getLocation(Tweet.place)})
-    if Tweet.source:
-        j_data["_source"].update({"source": Tweet.Source})
-    if config.Translate:
-        j_data["_source"].update({"translate": Tweet.translate})
-        j_data["_source"].update({"trans_src": Tweet.trans_src})
-        j_data["_source"].update({"trans_dest": Tweet.trans_dest})
-    actions.append(j_data)
-    es = Elasticsearch(config.Elasticsearch, verify_certs=config.Skip_certs)
-    if not _index_tweet_status:
-        _index_tweet_status = createIndex(config, es, scope="tweet")
-    with nostdout():
-        helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
-    actions = []
-def Follow(user, config):
-    global _index_follow_status
-    actions = []
-    if config.Following:
-        _user = config.Username
-        _follow = user
-    else:
-        _user = user
-        _follow = config.Username
-    j_data = {
-            "_index": config.Index_follow,
-            "_id": _user + "_" + _follow + "_" + config.Essid,
-            "_source": {
-                "user": _user,
-                "follow": _follow,
-                "essid": config.Essid
-                }
-            }
-    actions.append(j_data)
-    es = Elasticsearch(config.Elasticsearch, verify_certs=config.Skip_certs)
-    if not _index_follow_status:
-        _index_follow_status = createIndex(config, es, scope="follow")
-    with nostdout():
-        helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
-    actions = []
-def UserProfile(user, config):
-    global _index_user_status
-    global _is_location_def
-    actions = []
-    j_data = {
-            "_index": config.Index_users,
-            "_id": user.id + "_" + user.join_date + "_" + user.join_time + "_" + config.Essid,
-            "_source": {
-                "id": user.id,
-                "name": user.name,
-                "username": user.username,
-                "bio": user.bio,
-                "location": user.location,
-                "url": user.url,
-                "join_datetime": user.join_date + " " + user.join_time,
-                "tweets": user.tweets,
-                "following": user.following,
-                "followers": user.followers,
-                "likes": user.likes,
-                "media": user.media_count,
-                "private": user.is_private,
-                "verified": user.is_verified,
-                "avatar": user.avatar,
-                "background_image": user.background_image,
-                "session": config.Essid
-                }
-            }
-    if config.Location:
-        if not _is_location_def:
-            _is_location_def = getLocation(user.location, location=True)
-        if _location:
-            j_data["_source"].update({"geo_user": _location})
-    actions.append(j_data)
-    es = Elasticsearch(config.Elasticsearch, verify_certs=config.Skip_certs)
-    if not _index_user_status:
-        _index_user_status = createIndex(config, es, scope="user")
-    with nostdout():
-        helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
-    actions = []

twitter-scraper/twint-master/twint/storage/panda.py DELETED Viewed

@@ -1,196 +0,0 @@
-import datetime, pandas as pd, warnings
-from time import strftime, localtime
-from twint.tweet import Tweet_formats
-Tweets_df = None
-Follow_df = None
-User_df = None
-_object_blocks = {
-    "tweet": [],
-    "user": [],
-    "following": [],
-    "followers": []
-}
-weekdays = {
-        "Monday": 1,
-        "Tuesday": 2,
-        "Wednesday": 3,
-        "Thursday": 4,
-        "Friday": 5,
-        "Saturday": 6,
-        "Sunday": 7,
-        }
-_type = ""
-def _concat(df, _type):
-    if df is None:
-        df = pd.DataFrame(_object_blocks[_type])
-    else:
-        _df = pd.DataFrame(_object_blocks[_type])
-        df = pd.concat([df, _df], sort=True)
-    return df
-def _autoget(_type):
-    global Tweets_df
-    global Follow_df
-    global User_df
-    if _type == "tweet":
-        Tweets_df = _concat(Tweets_df, _type)
-    elif _type == "followers" or _type == "following":
-        Follow_df = _concat(Follow_df, _type)
-    elif _type == "user":
-        User_df = _concat(User_df, _type)
-    else:
-        error("[x] Wrong type of object passed")
-def update(object, config):
-    global _type
-    #try:
-    #    _type = ((object.__class__.__name__ == "tweet")*"tweet" +
-    #             (object.__class__.__name__ == "user")*"user")
-    #except AttributeError:
-    #    _type = config.Following*"following" + config.Followers*"followers"
-    if object.__class__.__name__ == "tweet":
-        _type = "tweet"
-    elif object.__class__.__name__ == "user":
-        _type = "user"
-    elif object.__class__.__name__ == "dict":
-        _type = config.Following*"following" + config.Followers*"followers"
-    if _type == "tweet":
-        Tweet = object
-        datetime_ms = datetime.datetime.strptime(Tweet.datetime, Tweet_formats['datetime']).timestamp() * 1000
-        day = weekdays[strftime("%A", localtime(datetime_ms/1000))]
-        dt = f"{object.datestamp} {object.timestamp}"
-        _data = {
-            "id": str(Tweet.id),
-            "conversation_id": Tweet.conversation_id,
-            "created_at": datetime_ms,
-            "date": dt,
-            "timezone": Tweet.timezone,
-            "place": Tweet.place,
-            "tweet": Tweet.tweet,
-            "language": Tweet.lang,
-            "hashtags": Tweet.hashtags,
-            "cashtags": Tweet.cashtags,
-            "user_id": Tweet.user_id,
-            "user_id_str": Tweet.user_id_str,
-            "username": Tweet.username,
-            "name": Tweet.name,
-            "day": day,
-            "hour": strftime("%H", localtime(datetime_ms/1000)),
-            "link": Tweet.link,
-            "urls": Tweet.urls,
-            "photos": Tweet.photos,
-            "video": Tweet.video,
-            "thumbnail": Tweet.thumbnail,
-            "retweet": Tweet.retweet,
-            "nlikes": int(Tweet.likes_count),
-            "nreplies": int(Tweet.replies_count),
-            "nretweets": int(Tweet.retweets_count),
-            "quote_url": Tweet.quote_url,
-            "search": str(config.Search),
-            "near": Tweet.near,
-            "geo": Tweet.geo,
-            "source": Tweet.source,
-            "user_rt_id": Tweet.user_rt_id,
-            "user_rt": Tweet.user_rt,
-            "retweet_id": Tweet.retweet_id,
-            "reply_to": Tweet.reply_to,
-            "retweet_date": Tweet.retweet_date,
-            "translate": Tweet.translate,
-            "trans_src": Tweet.trans_src,
-            "trans_dest": Tweet.trans_dest
-            }
-        _object_blocks[_type].append(_data)
-    elif _type == "user":
-        user = object
-        try:
-            background_image = user.background_image
-        except:
-            background_image = ""
-        _data = {
-            "id": user.id,
-            "name": user.name,
-            "username": user.username,
-            "bio": user.bio,
-            "url": user.url,
-            "join_datetime": user.join_date + " " + user.join_time,
-            "join_date": user.join_date,
-            "join_time": user.join_time,
-            "tweets": user.tweets,
-            "location": user.location,
-            "following": user.following,
-            "followers": user.followers,
-            "likes": user.likes,
-            "media": user.media_count,
-            "private": user.is_private,
-            "verified": user.is_verified,
-            "avatar": user.avatar,
-            "background_image": background_image,
-            }
-        _object_blocks[_type].append(_data)
-    elif _type == "followers" or _type == "following":
-        _data = {
-            config.Following*"following" + config.Followers*"followers" :
-                             {config.Username: object[_type]}
-        }
-        _object_blocks[_type] = _data
-    else:
-        print("Wrong type of object passed!")
-def clean():
-    global Tweets_df
-    global Follow_df
-    global User_df
-    _object_blocks["tweet"].clear()
-    _object_blocks["following"].clear()
-    _object_blocks["followers"].clear()
-    _object_blocks["user"].clear()
-    Tweets_df = None
-    Follow_df = None
-    User_df = None
-def save(_filename, _dataframe, **options):
-    if options.get("dataname"):
-        _dataname = options.get("dataname")
-    else:
-        _dataname = "twint"
-    if not options.get("type"):
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            _store = pd.HDFStore(_filename + ".h5")
-            _store[_dataname] = _dataframe
-            _store.close()
-    elif options.get("type") == "Pickle":
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            _dataframe.to_pickle(_filename + ".pkl")
-    else:
-        print("""Please specify: filename, DataFrame, DataFrame name and type
-              (HDF5, default, or Pickle)""")
-def read(_filename, **options):
-    if not options.get("dataname"):
-        _dataname = "twint"
-    else:
-        _dataname = options.get("dataname")
-    if not options.get("type"):
-        _store = pd.HDFStore(_filename + ".h5")
-        _df = _store[_dataname]
-        return _df
-    elif options.get("type") == "Pickle":
-        _df = pd.read_pickle(_filename + ".pkl")
-        return _df
-    else:
-        print("""Please specify: DataFrame, DataFrame name (twint as default),
-              filename and type (HDF5, default, or Pickle""")

twitter-scraper/twint-master/twint/storage/write.py DELETED Viewed

@@ -1,77 +0,0 @@
-from . import write_meta as meta
-import csv
-import json
-import os
-def outputExt(objType, fType):
-    if objType == "str":
-        objType = "username"
-    outExt = f"/{objType}s.{fType}"
-    return outExt
-def addExt(base, objType, fType):
-    if len(base.split('.')) == 1:
-        createDirIfMissing(base)
-        base += outputExt(objType, fType)
-    return base
-def Text(entry, f):
-    print(entry.replace('\n', ' '), file=open(f, "a", encoding="utf-8"))
-def Type(config):
-    if config.User_full:
-        _type = "user"
-    elif config.Followers or config.Following:
-        _type = "username"
-    else:
-        _type = "tweet"
-    return _type
-def struct(obj, custom, _type):
-    if custom:
-        fieldnames = custom
-        row = {}
-        for f in fieldnames:
-            row[f] = meta.Data(obj, _type)[f]
-    else:
-        fieldnames = meta.Fieldnames(_type)
-        row = meta.Data(obj, _type)
-    return fieldnames, row
-def createDirIfMissing(dirname):
-    if not os.path.exists(dirname):
-        os.makedirs(dirname)
-def Csv(obj, config):
-    _obj_type = obj.__class__.__name__
-    if _obj_type == "str":
-        _obj_type = "username"
-    fieldnames, row = struct(obj, config.Custom[_obj_type], _obj_type)
-    base = addExt(config.Output, _obj_type, "csv")
-    dialect = 'excel-tab' if 'Tabs' in config.__dict__ else 'excel'
-    if not (os.path.exists(base)):
-        with open(base, "w", newline='', encoding="utf-8") as csv_file:
-            writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=dialect)
-            writer.writeheader()
-    with open(base, "a", newline='', encoding="utf-8") as csv_file:
-        writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=dialect)
-        writer.writerow(row)
-def Json(obj, config):
-    _obj_type = obj.__class__.__name__
-    if _obj_type == "str":
-        _obj_type = "username"
-    null, data = struct(obj, config.Custom[_obj_type], _obj_type)
-    base = addExt(config.Output, _obj_type, "json")
-    with open(base, "a", newline='', encoding="utf-8") as json_file:
-        json.dump(data, json_file, ensure_ascii=False)
-        json_file.write("\n")

twitter-scraper/twint-master/twint/storage/write_meta.py DELETED Viewed

@@ -1,151 +0,0 @@
-def tweetData(t):
-    data = {
-            "id": int(t.id),
-            "conversation_id": t.conversation_id,
-            "created_at": t.datetime,
-            "date": t.datestamp,
-            "time": t.timestamp,
-            "timezone": t.timezone,
-            "user_id": t.user_id,
-            "username": t.username,
-            "name": t.name,
-            "place": t.place,
-            "tweet": t.tweet,
-            "language": t.lang,
-            "mentions": t.mentions,
-            "urls": t.urls,
-            "photos": t.photos,
-            "replies_count": int(t.replies_count),
-            "retweets_count": int(t.retweets_count),
-            "likes_count": int(t.likes_count),
-            "hashtags": t.hashtags,
-            "cashtags": t.cashtags,
-            "link": t.link,
-            "retweet": t.retweet,
-            "quote_url": t.quote_url,
-            "video": t.video,
-            "thumbnail": t.thumbnail,
-            "near": t.near,
-            "geo": t.geo,
-            "source": t.source,
-            "user_rt_id": t.user_rt_id,
-            "user_rt": t.user_rt,
-            "retweet_id": t.retweet_id,
-            "reply_to": t.reply_to,
-            "retweet_date": t.retweet_date,
-            "translate": t.translate,
-            "trans_src": t.trans_src,
-            "trans_dest": t.trans_dest,
-            }
-    return data
-def tweetFieldnames():
-    fieldnames = [
-            "id",
-            "conversation_id",
-            "created_at",
-            "date",
-            "time",
-            "timezone",
-            "user_id",
-            "username",
-            "name",
-            "place",
-            "tweet",
-            "language",
-            "mentions",
-            "urls",
-            "photos",
-            "replies_count",
-            "retweets_count",
-            "likes_count",
-            "hashtags",
-            "cashtags",
-            "link",
-            "retweet",
-            "quote_url",
-            "video",
-            "thumbnail",
-            "near",
-            "geo",
-            "source",
-            "user_rt_id",
-            "user_rt",
-            "retweet_id",
-            "reply_to",
-            "retweet_date",
-            "translate",
-            "trans_src",
-            "trans_dest"
-            ]
-    return fieldnames
-def userData(u):
-    data = {
-            "id": int(u.id),
-            "name": u.name,
-            "username": u.username,
-            "bio": u.bio,
-            "location": u.location,
-            "url": u.url,
-            "join_date": u.join_date,
-            "join_time": u.join_time,
-            "tweets": int(u.tweets),
-            "following": int(u.following),
-            "followers": int(u.followers),
-            "likes": int(u.likes),
-            "media": int(u.media_count),
-            "private": u.is_private,
-            "verified": u.is_verified,
-            "profile_image_url": u.avatar,
-            "background_image": u.background_image
-            }
-    return data
-def userFieldnames():
-    fieldnames = [
-            "id",
-            "name",
-            "username",
-            "bio",
-            "location",
-            "url",
-            "join_date",
-            "join_time",
-            "tweets",
-            "following",
-            "followers",
-            "likes",
-            "media",
-            "private",
-            "verified",
-            "profile_image_url",
-            "background_image"
-            ]
-    return fieldnames
-def usernameData(u):
-    return {"username": u}
-def usernameFieldnames():
-    return ["username"]
-def Data(obj, _type):
-    if _type == "user":
-        ret = userData(obj)
-    elif _type == "username":
-        ret = usernameData(obj)
-    else:
-        ret = tweetData(obj)
-    return ret
-def Fieldnames(_type):
-    if _type == "user":
-        ret = userFieldnames()
-    elif _type == "username":
-        ret = usernameFieldnames()
-    else:
-        ret = tweetFieldnames()
-    return ret

twitter-scraper/twint-master/twint/token.py DELETED Viewed

@@ -1,94 +0,0 @@
-import re
-import time
-import requests
-import logging as logme
-class TokenExpiryException(Exception):
-    def __init__(self, msg):
-        super().__init__(msg)
-class RefreshTokenException(Exception):
-    def __init__(self, msg):
-        super().__init__(msg)
-class Token:
-    def __init__(self, config):
-        self._session = requests.Session()
-        self._session.headers.update({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0'})
-        self.config = config
-        self._retries = 5
-        self._timeout = 10
-        self.url = 'https://twitter.com'
-    def _request(self):
-        for attempt in range(self._retries + 1):
-            # The request is newly prepared on each retry because of potential cookie updates.
-            req = self._session.prepare_request(requests.Request('GET', self.url))
-            logme.debug(f'Retrieving {req.url}')
-            try:
-                r = self._session.send(req, allow_redirects=True, timeout=self._timeout)
-            except requests.exceptions.RequestException as exc:
-                if attempt < self._retries:
-                    retrying = ', retrying'
-                    level = logme.WARNING
-                else:
-                    retrying = ''
-                    level = logme.ERROR
-                logme.log(level, f'Error retrieving {req.url}: {exc!r}{retrying}')
-            else:
-                success, msg = (True, None)
-                msg = f': {msg}' if msg else ''
-                if success:
-                    logme.debug(f'{req.url} retrieved successfully{msg}')
-                    return r
-            if attempt < self._retries:
-                # TODO : might wanna tweak this back-off timer
-                sleep_time = 2.0 * 2 ** attempt
-                logme.info(f'Waiting {sleep_time:.0f} seconds')
-                time.sleep(sleep_time)
-        else:
-            msg = f'{self._retries + 1} requests to {self.url} failed, giving up.'
-            logme.fatal(msg)
-            self.config.Guest_token = None
-            raise RefreshTokenException(msg)
-    def refresh(self):
-        logme.debug('Retrieving guest token')
-        res = self._request()
-        match = re.search(r'\("gt=(\d+);', res.text)
-        if match:
-            logme.debug('Found guest token in HTML')
-            self.config.Guest_token = str(match.group(1))
-        else:
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0',
-                'authority': 'api.twitter.com',
-                'content-length': '0',
-                'authorization': self.config.Bearer_token,
-                'x-twitter-client-language': 'en',
-                'x-csrf-token': res.cookies.get("ct0"),
-                'x-twitter-active-user': 'yes',
-                'content-type': 'application/x-www-form-urlencoded',
-                'accept': '*/*',
-                'sec-gpc': '1',
-                'origin': 'https://twitter.com',
-                'sec-fetch-site': 'same-site',
-                'sec-fetch-mode': 'cors',
-                'sec-fetch-dest': 'empty',
-                'referer': 'https://twitter.com/',
-                'accept-language': 'en-US',
-            }
-            self._session.headers.update(headers)
-            req = self._session.prepare_request(requests.Request('POST', 'https://api.twitter.com/1.1/guest/activate.json'))
-            res = self._session.send(req, allow_redirects=True, timeout=self._timeout)
-            if 'guest_token' in res.json():
-                logme.debug('Found guest token in JSON')
-                self.config.Guest_token = res.json()['guest_token']
-            else:
-                self.config.Guest_token = None
-                raise RefreshTokenException('Could not find the Guest token in HTML')

twitter-scraper/twint-master/twint/tweet.py DELETED Viewed

@@ -1,166 +0,0 @@
-from time import strftime, localtime
-from datetime import datetime, timezone
-import logging as logme
-from googletransx import Translator
-# ref.
-# - https://github.com/x0rzkov/py-googletrans#basic-usage
-translator = Translator()
-class tweet:
-    """Define Tweet class
-    """
-    type = "tweet"
-    def __init__(self):
-        pass
-def utc_to_local(utc_dt):
-    return utc_dt.replace(tzinfo=timezone.utc).astimezone(tz=None)
-Tweet_formats = {
-    'datetime': '%Y-%m-%d %H:%M:%S %Z',
-    'datestamp': '%Y-%m-%d',
-    'timestamp': '%H:%M:%S'
-}
-def _get_mentions(tw):
-    """Extract mentions from tweet
-    """
-    logme.debug(__name__ + ':get_mentions')
-    try:
-        mentions = [
-            {
-                'screen_name': _mention['screen_name'],
-                'name': _mention['name'],
-                'id': _mention['id_str'],
-            } for _mention in tw['entities']['user_mentions']
-            if tw['display_text_range'][0] < _mention['indices'][0]
-        ]
-    except KeyError:
-        mentions = []
-    return mentions
-def _get_reply_to(tw):
-    try:
-        reply_to = [
-            {
-                'screen_name': _mention['screen_name'],
-                'name': _mention['name'],
-                'id': _mention['id_str'],
-            } for _mention in tw['entities']['user_mentions']
-            if tw['display_text_range'][0] > _mention['indices'][1]
-        ]
-    except KeyError:
-        reply_to = []
-    return reply_to
-def getText(tw):
-    """Replace some text
-    """
-    logme.debug(__name__ + ':getText')
-    text = tw['full_text']
-    text = text.replace("http", " http")
-    text = text.replace("pic.twitter", " pic.twitter")
-    text = text.replace("\n", " ")
-    return text
-def Tweet(tw, config):
-    """Create Tweet object
-    """
-    logme.debug(__name__ + ':Tweet')
-    t = tweet()
-    t.id = int(tw['id_str'])
-    t.id_str = tw["id_str"]
-    t.conversation_id = tw["conversation_id_str"]
-    # parsing date to user-friendly format
-    _dt = tw['created_at']
-    _dt = datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
-    _dt = utc_to_local(_dt)
-    t.datetime = str(_dt.strftime(Tweet_formats['datetime']))
-    # date is of the format year,
-    t.datestamp = _dt.strftime(Tweet_formats['datestamp'])
-    t.timestamp = _dt.strftime(Tweet_formats['timestamp'])
-    t.user_id = int(tw["user_id_str"])
-    t.user_id_str = tw["user_id_str"]
-    t.username = tw["user_data"]['screen_name']
-    t.name = tw["user_data"]['name']
-    t.place = tw['geo'] if 'geo' in tw and tw['geo'] else ""
-    t.timezone = strftime("%z", localtime())
-    t.mentions = _get_mentions(tw)
-    t.reply_to = _get_reply_to(tw)
-    try:
-        t.urls = [_url['expanded_url'] for _url in tw['entities']['urls']]
-    except KeyError:
-        t.urls = []
-    try:
-        t.photos = [_img['media_url_https'] for _img in tw['entities']['media'] if _img['type'] == 'photo' and
-                    _img['expanded_url'].find('/photo/') != -1]
-    except KeyError:
-        t.photos = []
-    try:
-        t.video = 1 if len(tw['extended_entities']['media']) else 0
-    except KeyError:
-        t.video = 0
-    try:
-        t.thumbnail = tw['extended_entities']['media'][0]['media_url_https']
-    except KeyError:
-        t.thumbnail = ''
-    t.tweet = getText(tw)
-    t.lang = tw['lang']
-    try:
-        t.hashtags = [hashtag['text'] for hashtag in tw['entities']['hashtags']]
-    except KeyError:
-        t.hashtags = []
-    try:
-        t.cashtags = [cashtag['text'] for cashtag in tw['entities']['symbols']]
-    except KeyError:
-        t.cashtags = []
-    t.replies_count = tw['reply_count']
-    t.retweets_count = tw['retweet_count']
-    t.likes_count = tw['favorite_count']
-    t.link = f"https://twitter.com/{t.username}/status/{t.id}"
-    try:
-        if 'user_rt_id' in tw['retweet_data']:
-            t.retweet = True
-            t.retweet_id = tw['retweet_data']['retweet_id']
-            t.retweet_date = tw['retweet_data']['retweet_date']
-            t.user_rt = tw['retweet_data']['user_rt']
-            t.user_rt_id = tw['retweet_data']['user_rt_id']
-    except KeyError:
-        t.retweet = False
-        t.retweet_id = ''
-        t.retweet_date = ''
-        t.user_rt = ''
-        t.user_rt_id = ''
-    try:
-        t.quote_url = tw['quoted_status_permalink']['expanded'] if tw['is_quote_status'] else ''
-    except KeyError:
-        # means that the quoted tweet have been deleted
-        t.quote_url = 0
-    t.near = config.Near if config.Near else ""
-    t.geo = config.Geo if config.Geo else ""
-    t.source = config.Source if config.Source else ""
-    t.translate = ''
-    t.trans_src = ''
-    t.trans_dest = ''
-    if config.Translate:
-        try:
-            ts = translator.translate(text=t.tweet, dest=config.TranslateDest)
-            t.translate = ts.text
-            t.trans_src = ts.src
-            t.trans_dest = ts.dest
-        # ref. https://github.com/SuniTheFish/ChainTranslator/blob/master/ChainTranslator/__main__.py#L31
-        except ValueError as e:
-            logme.debug(__name__ + ':Tweet:translator.translate:' + str(e))
-            raise Exception("Invalid destination language: {} / Tweet: {}".format(config.TranslateDest, t.tweet))
-    return t

twitter-scraper/twint-master/twint/url.py DELETED Viewed

@@ -1,195 +0,0 @@
-import datetime
-import json
-from sys import platform
-import logging as logme
-from urllib.parse import urlencode
-from urllib.parse import quote
-mobile = "https://mobile.twitter.com"
-base = "https://api.twitter.com/2/search/adaptive.json"
-def _sanitizeQuery(_url, params):
-    _serialQuery = ""
-    _serialQuery = urlencode(params, quote_via=quote)
-    _serialQuery = _url + "?" + _serialQuery
-    return _serialQuery
-def _formatDate(date):
-    if "win" in platform:
-        return f'\"{date.split()[0]}\"'
-    try:
-        return int(datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S").timestamp())
-    except ValueError:
-        return int(datetime.datetime.strptime(date, "%Y-%m-%d").timestamp())
-async def Favorites(username, init):
-    logme.debug(__name__ + ':Favorites')
-    url = f"{mobile}/{username}/favorites?lang=en"
-    if init != '-1':
-        url += f"&max_id={init}"
-    return url
-async def Followers(username, init):
-    logme.debug(__name__ + ':Followers')
-    url = f"{mobile}/{username}/followers?lang=en"
-    if init != '-1':
-        url += f"&cursor={init}"
-    return url
-async def Following(username, init):
-    logme.debug(__name__ + ':Following')
-    url = f"{mobile}/{username}/following?lang=en"
-    if init != '-1':
-        url += f"&cursor={init}"
-    return url
-async def MobileProfile(username, init):
-    logme.debug(__name__ + ':MobileProfile')
-    url = f"{mobile}/{username}?lang=en"
-    if init != '-1':
-        url += f"&max_id={init}"
-    return url
-async def Search(config, init):
-    logme.debug(__name__ + ':Search')
-    url = base
-    tweet_count = 100 if not config.Limit else config.Limit
-    q = ""
-    params = [
-        # ('include_blocking', '1'),
-        # ('include_blocked_by', '1'),
-        # ('include_followed_by', '1'),
-        # ('include_want_retweets', '1'),
-        # ('include_mute_edge', '1'),
-        # ('include_can_dm', '1'),
-        ('include_can_media_tag', '1'),
-        # ('skip_status', '1'),
-        # ('include_cards', '1'),
-        ('include_ext_alt_text', 'true'),
-        ('include_quote_count', 'true'),
-        ('include_reply_count', '1'),
-        ('tweet_mode', 'extended'),
-        ('include_entities', 'true'),
-        ('include_user_entities', 'true'),
-        ('include_ext_media_availability', 'true'),
-        ('send_error_codes', 'true'),
-        ('simple_quoted_tweet', 'true'),
-        ('count', tweet_count),
-        ('query_source', 'typed_query'),
-        # ('pc', '1'),
-        ('cursor', str(init)),
-        ('spelling_corrections', '1'),
-        ('ext', 'mediaStats%2ChighlightedLabel'),
-        ('tweet_search_mode', 'live'),  # this can be handled better, maybe take an argument and set it then
-    ]
-    if not config.Popular_tweets:
-        params.append(('f', 'tweets'))
-    if config.Lang:
-        params.append(("l", config.Lang))
-        params.append(("lang", "en"))
-    if config.Query:
-        q += f" from:{config.Query}"
-    if config.Username:
-        q += f" from:{config.Username}"
-    if config.Geo:
-        config.Geo = config.Geo.replace(" ", "")
-        q += f" geocode:{config.Geo}"
-    if config.Search:
-        q += f" {config.Search}"
-    if config.Year:
-        q += f" until:{config.Year}-1-1"
-    if config.Since:
-        q += f" since:{_formatDate(config.Since)}"
-    if config.Until:
-        q += f" until:{_formatDate(config.Until)}"
-    if config.Email:
-        q += ' "mail" OR "email" OR'
-        q += ' "gmail" OR "e-mail"'
-    if config.Phone:
-        q += ' "phone" OR "call me" OR "text me"'
-    if config.Verified:
-        q += " filter:verified"
-    if config.To:
-        q += f" to:{config.To}"
-    if config.All:
-        q += f" to:{config.All} OR from:{config.All} OR @{config.All}"
-    if config.Near:
-        q += f' near:"{config.Near}"'
-    if config.Images:
-        q += " filter:images"
-    if config.Videos:
-        q += " filter:videos"
-    if config.Media:
-        q += " filter:media"
-    if config.Replies:
-        q += " filter:replies"
-    # although this filter can still be used, but I found it broken in my preliminary testing, needs more testing
-    if config.Native_retweets:
-        q += " filter:nativeretweets"
-    if config.Min_likes:
-        q += f" min_faves:{config.Min_likes}"
-    if config.Min_retweets:
-        q += f" min_retweets:{config.Min_retweets}"
-    if config.Min_replies:
-        q += f" min_replies:{config.Min_replies}"
-    if config.Links == "include":
-        q += " filter:links"
-    elif config.Links == "exclude":
-        q += " exclude:links"
-    if config.Source:
-        q += f" source:\"{config.Source}\""
-    if config.Members_list:
-        q += f" list:{config.Members_list}"
-    if config.Filter_retweets:
-        q += f" exclude:nativeretweets exclude:retweets"
-    if config.Custom_query:
-        q = config.Custom_query
-    q = q.strip()
-    params.append(("q", q))
-    _serialQuery = _sanitizeQuery(url, params)
-    return url, params, _serialQuery
-def SearchProfile(config, init=None):
-    logme.debug(__name__ + ':SearchProfile')
-    _url = 'https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies'
-    tweet_count = 100
-    variables = {
-        "userId": config.User_id,
-        "count": tweet_count,
-        "includePromotedContent": True,
-        "withCommunity": True,
-        "withSuperFollowsUserFields": True,
-        "withBirdwatchPivots": False,
-        "withDownvotePerspective": False,
-        "withReactionsMetadata": False,
-        "withReactionsPerspective": False,
-        "withSuperFollowsTweetFields": True,
-        "withVoice": True,
-        "withV2Timeline": False,
-        "__fs_interactive_text": False,
-        "__fs_dont_mention_me_view_api_enabled": False,
-    }
-    if type(init) == str:
-        variables['cursor'] = init
-    params = [('variables', json.dumps(variables, separators=(',',':')))]
-    _serialQuery = _sanitizeQuery(_url, params)
-    return _serialQuery, [], _serialQuery

twitter-scraper/twint-master/twint/user.py DELETED Viewed

@@ -1,52 +0,0 @@
-import datetime
-import logging as logme
-class user:
-    type = "user"
-    def __init__(self):
-        pass
-User_formats = {
-    'join_date': '%Y-%m-%d',
-    'join_time': '%H:%M:%S %Z'
-}
-# ur object must be a json from the endpoint https://api.twitter.com/graphql
-def User(ur):
-    logme.debug(__name__ + ':User')
-    if 'data' not in ur and 'user' not in ur['data']:
-        msg = 'malformed json! cannot be parsed to get user data'
-        logme.fatal(msg)
-        raise KeyError(msg)
-    _usr = user()
-    _usr.id = ur['data']['user']['rest_id']
-    _usr.name = ur['data']['user']['legacy']['name']
-    _usr.username = ur['data']['user']['legacy']['screen_name']
-    _usr.bio = ur['data']['user']['legacy']['description']
-    _usr.location = ur['data']['user']['legacy']['location']
-    _usr.url = ur['data']['user']['legacy']['url']
-    # parsing date to user-friendly format
-    _dt = ur['data']['user']['legacy']['created_at']
-    _dt = datetime.datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
-    # date is of the format year,
-    _usr.join_date = _dt.strftime(User_formats['join_date'])
-    _usr.join_time = _dt.strftime(User_formats['join_time'])
-    # :type `int`
-    _usr.tweets = int(ur['data']['user']['legacy']['statuses_count'])
-    _usr.following = int(ur['data']['user']['legacy']['friends_count'])
-    _usr.followers = int(ur['data']['user']['legacy']['followers_count'])
-    _usr.likes = int(ur['data']['user']['legacy']['favourites_count'])
-    _usr.media_count = int(ur['data']['user']['legacy']['media_count'])
-    _usr.is_private = ur['data']['user']['legacy']['protected']
-    _usr.is_verified = ur['data']['user']['legacy']['verified']
-    _usr.avatar = ur['data']['user']['legacy']['profile_image_url_https']
-    _usr.background_image = ur['data']['user']['legacy']['profile_banner_url']
-    # TODO : future implementation
-    # legacy_extended_profile is also available in some cases which can be used to get DOB of user
-    return _usr

twitter-scraper/twint-master/twint/verbose.py DELETED Viewed

@@ -1,18 +0,0 @@
-def Count(count, config):
-    msg = "[+] Finished: Successfully collected "
-    if config.Followers:
-        msg += f"all {count} users who follow @{config.Username}"
-    elif config.Following:
-        msg += f"all {count} users who @{config.Username} follows"
-    elif config.Favorites:
-        msg += f"{count} Tweets that @{config.Username} liked"
-    else:
-        msg += f"{count} Tweets_and_replies"
-        if config.Username:
-            msg += f" from @{config.Username}"
-    msg += "."
-    print(msg)
-def Elastic(elasticsearch):
-    if elasticsearch:
-        print("[+] Indexing to Elasticsearch @ " + str(elasticsearch))

twitter-scraper/twint-master/twitter_scraper.ipynb DELETED Viewed

@@ -1,265 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "a5361789",
-   "metadata": {},
-   "source": [
-    "## Have to install these packages \n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c9021300",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "%%capture \n",
-    "!pip3 install Twint \n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5c857dbf",
-   "metadata": {},
-   "source": [
-    "## Nessessary Imports"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1413ab2b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# import asyncio\n",
-    "# import os\n",
-    "# loop = asyncio.get_event_loop()\n",
-    "# loop.is_running()\n",
-    "# import twint\n",
-    "# import nest_asyncio\n",
-    "# nest_asyncio.apply()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d38514f3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import scrape\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a7912a91",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from_date=\"2022-6-10 10:30:22\"\n",
-    "to_date= \"2022-6-30\"\n",
-    "num_tweets = 20\n",
-    "_data=scrape.scraper.get_tweets(\"jimmieakesson\",u_or_s=\"u\",from_date=221232,to_date=2313)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "48d50b46",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tweets= _data.keys()\n",
-    "for i in tweets:\n",
-    "    _data[i][\"tweet\"]\n",
-    "    print(_data[i][\"tweet\"], \"\\n\", \"__________________________________________________________\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "72cabcb5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from_date=\"2022-6-10 10:30:22\"\n",
-    "to_date= \"2022-6-30\"\n",
-    "num_tweets = 20\n",
-    "_data=scrape.scraper.string_search_user_tweets(\"jimmieakesson\",\"invandring\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "549e4fb3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tweets= _data[\"tweet\"]\n",
-    "for i in tweets:\n",
-    "    print(i, \"\\n\", \"__________________________________________________________\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "733dd44a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Defaulting to user installation because normal site-packages is not writeable\n",
-      "Requirement already satisfied: snscrape in /home/oxygen/.local/lib/python3.10/site-packages (0.3.4)\n",
-      "Requirement already satisfied: beautifulsoup4 in /home/oxygen/.local/lib/python3.10/site-packages (from snscrape) (4.11.1)\n",
-      "Requirement already satisfied: requests[socks] in /usr/lib/python3/dist-packages (from snscrape) (2.25.1)\n",
-      "Requirement already satisfied: lxml in /usr/lib/python3/dist-packages (from snscrape) (4.8.0)\n",
-      "Requirement already satisfied: soupsieve>1.2 in /home/oxygen/.local/lib/python3.10/site-packages (from beautifulsoup4->snscrape) (2.3.2.post1)\n",
-      "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /home/oxygen/.local/lib/python3.10/site-packages (from requests[socks]->snscrape) (1.7.1)\n"
-     ]
-    }
-   ],
-   "source": [
-    "#%pip install -q snscrape==0.3.4\n",
-    "!pip3 install snscrape\n",
-    "#!pip3 install git+https://github.com/JustAnotherArchivist/snscrape.git"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "0d16422c",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
-   "source": [
-    "%pip install -q snscrape==0.3.4\n",
-    "from datetime import date\n",
-    "import os\n",
-    "import pandas as pd\n",
-    "\n",
-    "\n",
-    "def get_tweets(search_term, from_date, to_date=date.today(), num_tweets=100,u_or_s='s'):\n",
-    "  if u_or_s.lower() =='u':\n",
-    "    extracted_tweets = \"snscrape --format '{content!r}'\"+ f\" --max-results {num_tweets} --since {from_date} twitter-user '{search_term} until:{to_date}' > extracted-tweets.txt\" \n",
-    "  else:\n",
-    "    extracted_tweets = \"snscrape --format '{content!r}'\"+ f\" --max-results {num_tweets} --since {from_date} twitter-search '{search_term} until:{to_date}' > extracted-tweets.txt\"\n",
-    "  \n",
-    "  os.system(extracted_tweets)\n",
-    "  if os.stat(\"extracted-tweets.txt\").st_size == 0:\n",
-    "    print('No Tweets found')\n",
-    "  else:\n",
-    "    df = pd.read_csv('extracted-tweets.txt', names=['content'])\n",
-    "  data_list=[]\n",
-    "  for row in df['content'].iteritems():\n",
-    "    temp= str(row[0])+str(row[1])\n",
-    "    temp= temp.replace(\"\\'\",\"\")\n",
-    "    data_list.append(temp)\n",
-    "  return data_list\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "8e2adb35",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No Tweets found\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\n",
-      "  File \"/home/oxygen/.local/bin/snscrape\", line 8, in <module>\n",
-      "    sys.exit(main())\n",
-      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/cli.py\", line 224, in main\n",
-      "    args = parse_args()\n",
-      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/cli.py\", line 159, in parse_args\n",
-      "    import snscrape.modules\n",
-      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/modules/__init__.py\", line 15, in <module>\n",
-      "    _import_modules()\n",
-      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/modules/__init__.py\", line 12, in _import_modules\n",
-      "    module = importlib.import_module(moduleName)\n",
-      "  File \"/usr/lib/python3.10/importlib/__init__.py\", line 126, in import_module\n",
-      "    return _bootstrap._gcd_import(name[level:], package, level)\n",
-      "  File \"/home/oxygen/.local/lib/python3.10/site-packages/snscrape/modules/instagram.py\", line 12, in <module>\n",
-      "    class InstagramPost(typing.NamedTuple, snscrape.base.Item):\n",
-      "  File \"/usr/lib/python3.10/typing.py\", line 2329, in _namedtuple_mro_entries\n",
-      "    raise TypeError(\"Multiple inheritance with NamedTuple is not supported\")\n",
-      "TypeError: Multiple inheritance with NamedTuple is not supported\n"
-     ]
-    },
-    {
-     "ename": "UnboundLocalError",
-     "evalue": "local variable 'df' referenced before assignment",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mUnboundLocalError\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[0;32m/tmp/ipykernel_26511/1892081786.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0md\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mget_tweets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"jimmieakesson\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfrom_date\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0;34m\"2022-06-01\"\u001b[0m \u001b[0;34m,\u001b[0m\u001b[0mnum_tweets\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mu_or_s\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"u\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m/tmp/ipykernel_26511/275462205.py\u001b[0m in \u001b[0;36mget_tweets\u001b[0;34m(search_term, from_date, to_date, num_tweets, u_or_s)\u001b[0m\n\u001b[1;32m     17\u001b[0m     \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'extracted-tweets.txt'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'content'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     18\u001b[0m   \u001b[0mdata_list\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m   \u001b[0;32mfor\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'content'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miteritems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     20\u001b[0m     \u001b[0mtemp\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     21\u001b[0m     \u001b[0mtemp\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\\'\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'df' referenced before assignment"
-     ]
-    }
-   ],
-   "source": [
-    "d= get_tweets(\"jimmieakesson\",from_date= \"2022-06-01\" ,num_tweets =5, u_or_s=\"u\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a2c837f4",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3.10.4 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

twitter-scraper/twitter_scraper.ipynb ADDED Viewed

	@@ -0,0 +1,819 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a5361789",
+   "metadata": {},
+   "source": [
+    "## Have to install these packages \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c9021300",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "%%capture \n",
+    "!pip3 install Twint \n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c857dbf",
+   "metadata": {},
+   "source": [
+    "## Nessessary Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1413ab2b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import asyncio\n",
+    "import os\n",
+    "loop = asyncio.get_event_loop()\n",
+    "loop.is_running()\n",
+    "import twint\n",
+    "import nest_asyncio\n",
+    "nest_asyncio.apply()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "d38514f3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import scrape\n",
+    "sc= scrape.TwitterScraper(num_tweets=10)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "d37e5cbf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[+] Finished: Successfully collected 20 Tweets.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tweet</th>\n",
+       "      <th>date</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>username</th>\n",
+       "      <th>urls</th>\n",
+       "      <th>nlikes</th>\n",
+       "      <th>nreplies</th>\n",
+       "      <th>nretweets</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1545194541006950400</td>\n",
+       "      <td>kim sever benim gibi sevmeyecekler bıraktığın ...</td>\n",
+       "      <td>2022-07-08 01:54:21</td>\n",
+       "      <td>1396065566117466113</td>\n",
+       "      <td>heja4r</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1545192735354806274</td>\n",
+       "      <td>Kelimeler,albayım,bazı anlamalara gelmiyor..</td>\n",
+       "      <td>2022-07-08 01:47:11</td>\n",
+       "      <td>1481604485118140425</td>\n",
+       "      <td>Theguapo6</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1545190168533008385</td>\n",
+       "      <td>@shikan213 ptdr ? y’a aucune racisme à quel mo...</td>\n",
+       "      <td>2022-07-08 01:36:59</td>\n",
+       "      <td>1476042813741617155</td>\n",
+       "      <td>srndz213__</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1545190106910171136</td>\n",
+       "      <td>@guzzeida Men gud du har presterat så mkt bätt...</td>\n",
+       "      <td>2022-07-08 01:36:44</td>\n",
+       "      <td>34343541</td>\n",
+       "      <td>lisaxamanda</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1545190096042860544</td>\n",
+       "      <td>Heja, heja, heja Slovensko</td>\n",
+       "      <td>2022-07-08 01:36:41</td>\n",
+       "      <td>3158344237</td>\n",
+       "      <td>ian_10_19</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    id                                              tweet  \\\n",
+       "0  1545194541006950400  kim sever benim gibi sevmeyecekler bıraktığın ...   \n",
+       "1  1545192735354806274       Kelimeler,albayım,bazı anlamalara gelmiyor..   \n",
+       "2  1545190168533008385  @shikan213 ptdr ? y’a aucune racisme à quel mo...   \n",
+       "3  1545190106910171136  @guzzeida Men gud du har presterat så mkt bätt...   \n",
+       "4  1545190096042860544                         Heja, heja, heja Slovensko   \n",
+       "\n",
+       "                  date              user_id     username urls  nlikes  \\\n",
+       "0  2022-07-08 01:54:21  1396065566117466113       heja4r   []       1   \n",
+       "1  2022-07-08 01:47:11  1481604485118140425    Theguapo6   []       1   \n",
+       "2  2022-07-08 01:36:59  1476042813741617155   srndz213__   []       0   \n",
+       "3  2022-07-08 01:36:44             34343541  lisaxamanda   []       1   \n",
+       "4  2022-07-08 01:36:41           3158344237    ian_10_19   []       0   \n",
+       "\n",
+       "   nreplies  nretweets  \n",
+       "0         0          0  \n",
+       "1         0          0  \n",
+       "2         1          0  \n",
+       "3         0          0  \n",
+       "4         0          0  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "string_tr_info=sc.scrape_by_string(\"heja\")\n",
+    "string_tr_info.head()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "902170ad",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tweet</th>\n",
+       "      <th>date</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>username</th>\n",
+       "      <th>urls</th>\n",
+       "      <th>nlikes</th>\n",
+       "      <th>nreplies</th>\n",
+       "      <th>nretweets</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1545194541006950400</td>\n",
+       "      <td>kim sever benim gibi sevmeyecekler bıraktığın ...</td>\n",
+       "      <td>2022-07-08 01:54:21</td>\n",
+       "      <td>1396065566117466113</td>\n",
+       "      <td>heja4r</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1545192735354806274</td>\n",
+       "      <td>Kelimeler,albayım,bazı anlamalara gelmiyor..</td>\n",
+       "      <td>2022-07-08 01:47:11</td>\n",
+       "      <td>1481604485118140425</td>\n",
+       "      <td>Theguapo6</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1545190096042860544</td>\n",
+       "      <td>Heja, heja, heja Slovensko</td>\n",
+       "      <td>2022-07-08 01:36:41</td>\n",
+       "      <td>3158344237</td>\n",
+       "      <td>ian_10_19</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1545189783747436545</td>\n",
+       "      <td>Beni sorarsan dardayım..</td>\n",
+       "      <td>2022-07-08 01:35:27</td>\n",
+       "      <td>1481604485118140425</td>\n",
+       "      <td>Theguapo6</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>1545186234623991813</td>\n",
+       "      <td>Heja strandhäll.  Vilket jävla block mongo</td>\n",
+       "      <td>2022-07-08 01:21:21</td>\n",
+       "      <td>1160537136250195968</td>\n",
+       "      <td>Siggydunn</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     id                                              tweet  \\\n",
+       "0   1545194541006950400  kim sever benim gibi sevmeyecekler bıraktığın ...   \n",
+       "1   1545192735354806274       Kelimeler,albayım,bazı anlamalara gelmiyor..   \n",
+       "4   1545190096042860544                         Heja, heja, heja Slovensko   \n",
+       "6   1545189783747436545                           Beni sorarsan dardayım..   \n",
+       "12  1545186234623991813         Heja strandhäll.  Vilket jävla block mongo   \n",
+       "\n",
+       "                   date              user_id   username urls  nlikes  \\\n",
+       "0   2022-07-08 01:54:21  1396065566117466113     heja4r   []       1   \n",
+       "1   2022-07-08 01:47:11  1481604485118140425  Theguapo6   []       1   \n",
+       "4   2022-07-08 01:36:41           3158344237  ian_10_19   []       0   \n",
+       "6   2022-07-08 01:35:27  1481604485118140425  Theguapo6   []       2   \n",
+       "12  2022-07-08 01:21:21  1160537136250195968  Siggydunn   []       0   \n",
+       "\n",
+       "    nreplies  nretweets  \n",
+       "0          0          0  \n",
+       "1          0          0  \n",
+       "4          0          0  \n",
+       "6          0          0  \n",
+       "12         0          0  "
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "string_t_info=sc.get_only_tweets(tr_info)\n",
+    "string_t_info.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "a7912a91",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[+] Finished: Successfully collected 20 Tweets.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tweet</th>\n",
+       "      <th>date</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>username</th>\n",
+       "      <th>urls</th>\n",
+       "      <th>nlikes</th>\n",
+       "      <th>nreplies</th>\n",
+       "      <th>nretweets</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1544748873767424001</td>\n",
+       "      <td>Fruktansvärt att nås av beskedet att kvinnan s...</td>\n",
+       "      <td>2022-07-06 20:23:26</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>3397</td>\n",
+       "      <td>167</td>\n",
+       "      <td>140</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1538948369611210764</td>\n",
+       "      <td>@annieloof Nej, jag håller med. Tänk mer som M...</td>\n",
+       "      <td>2022-06-20 20:14:18</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1513</td>\n",
+       "      <td>89</td>\n",
+       "      <td>115</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1537770920621879297</td>\n",
+       "      <td>Man kan ha synpunkter på en sådan lösning, men...</td>\n",
+       "      <td>2022-06-17 14:15:32</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>694</td>\n",
+       "      <td>17</td>\n",
+       "      <td>41</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1537770809225273344</td>\n",
+       "      <td>Är det ont om plats på anstalterna så får man ...</td>\n",
+       "      <td>2022-06-17 14:15:05</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>810</td>\n",
+       "      <td>26</td>\n",
+       "      <td>57</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1537770713368735744</td>\n",
+       "      <td>Döms man för brott, särskilt våldsbrott, ska m...</td>\n",
+       "      <td>2022-06-17 14:14:43</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1020</td>\n",
+       "      <td>26</td>\n",
+       "      <td>86</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    id                                              tweet  \\\n",
+       "0  1544748873767424001  Fruktansvärt att nås av beskedet att kvinnan s...   \n",
+       "1  1538948369611210764  @annieloof Nej, jag håller med. Tänk mer som M...   \n",
+       "2  1537770920621879297  Man kan ha synpunkter på en sådan lösning, men...   \n",
+       "3  1537770809225273344  Är det ont om plats på anstalterna så får man ...   \n",
+       "4  1537770713368735744  Döms man för brott, särskilt våldsbrott, ska m...   \n",
+       "\n",
+       "                  date   user_id       username urls  nlikes  nreplies  \\\n",
+       "0  2022-07-06 20:23:26  95972673  jimmieakesson   []    3397       167   \n",
+       "1  2022-06-20 20:14:18  95972673  jimmieakesson   []    1513        89   \n",
+       "2  2022-06-17 14:15:32  95972673  jimmieakesson   []     694        17   \n",
+       "3  2022-06-17 14:15:05  95972673  jimmieakesson   []     810        26   \n",
+       "4  2022-06-17 14:14:43  95972673  jimmieakesson   []    1020        26   \n",
+       "\n",
+       "   nretweets  \n",
+       "0        140  \n",
+       "1        115  \n",
+       "2         41  \n",
+       "3         57  \n",
+       "4         86  "
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "user__tr_info=sc.scrape_by_user(\"jimmieakesson\")\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7db69757",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user__t_info=sc.get_only_tweets(tr_info)\n",
+    "user__t_info.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "9d6b1bdf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[+] Finished: Successfully collected 16 Tweets from @jimmieakesson.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tweet</th>\n",
+       "      <th>date</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>username</th>\n",
+       "      <th>urls</th>\n",
+       "      <th>nlikes</th>\n",
+       "      <th>nreplies</th>\n",
+       "      <th>nretweets</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1363067834260201475</td>\n",
+       "      <td>Utan massiv, asylrelaterad invandring från frä...</td>\n",
+       "      <td>2021-02-20 11:07:50</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1277</td>\n",
+       "      <td>22</td>\n",
+       "      <td>105</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1363067613660778496</td>\n",
+       "      <td>Många vänsterliberaler tycks ha reagerat på de...</td>\n",
+       "      <td>2021-02-20 11:06:58</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>625</td>\n",
+       "      <td>9</td>\n",
+       "      <td>68</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1363067558409158656</td>\n",
+       "      <td>Jag förstår — uppriktigt — inte den närmast hy...</td>\n",
+       "      <td>2021-02-20 11:06:45</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>2458</td>\n",
+       "      <td>199</td>\n",
+       "      <td>336</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1362748777552113670</td>\n",
+       "      <td>Invandring av hundratusentals människor från f...</td>\n",
+       "      <td>2021-02-19 14:00:01</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1334</td>\n",
+       "      <td>55</td>\n",
+       "      <td>101</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1362409505557012490</td>\n",
+       "      <td>Vårt land behöver ett totalstopp för all asyl-...</td>\n",
+       "      <td>2021-02-18 15:31:53</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>3044</td>\n",
+       "      <td>268</td>\n",
+       "      <td>404</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    id                                              tweet  \\\n",
+       "0  1363067834260201475  Utan massiv, asylrelaterad invandring från frä...   \n",
+       "1  1363067613660778496  Många vänsterliberaler tycks ha reagerat på de...   \n",
+       "2  1363067558409158656  Jag förstår — uppriktigt — inte den närmast hy...   \n",
+       "3  1362748777552113670  Invandring av hundratusentals människor från f...   \n",
+       "4  1362409505557012490  Vårt land behöver ett totalstopp för all asyl-...   \n",
+       "\n",
+       "                  date   user_id       username urls  nlikes  nreplies  \\\n",
+       "0  2021-02-20 11:07:50  95972673  jimmieakesson   []    1277        22   \n",
+       "1  2021-02-20 11:06:58  95972673  jimmieakesson   []     625         9   \n",
+       "2  2021-02-20 11:06:45  95972673  jimmieakesson   []    2458       199   \n",
+       "3  2021-02-19 14:00:01  95972673  jimmieakesson   []    1334        55   \n",
+       "4  2021-02-18 15:31:53  95972673  jimmieakesson   []    3044       268   \n",
+       "\n",
+       "   nretweets  \n",
+       "0        105  \n",
+       "1         68  \n",
+       "2        336  \n",
+       "3        101  \n",
+       "4        404  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "user__string_tr_info=sc.scrape_by_user_and_string(\"jimmieakesson\",\"invandring\")\n",
+    "user__string_tr_info.head()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "a1aede79",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tweet</th>\n",
+       "      <th>date</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>username</th>\n",
+       "      <th>urls</th>\n",
+       "      <th>nlikes</th>\n",
+       "      <th>nreplies</th>\n",
+       "      <th>nretweets</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1363067834260201475</td>\n",
+       "      <td>Utan massiv, asylrelaterad invandring från frä...</td>\n",
+       "      <td>2021-02-20 11:07:50</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1277</td>\n",
+       "      <td>22</td>\n",
+       "      <td>105</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1363067613660778496</td>\n",
+       "      <td>Många vänsterliberaler tycks ha reagerat på de...</td>\n",
+       "      <td>2021-02-20 11:06:58</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>625</td>\n",
+       "      <td>9</td>\n",
+       "      <td>68</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1363067558409158656</td>\n",
+       "      <td>Jag förstår — uppriktigt — inte den närmast hy...</td>\n",
+       "      <td>2021-02-20 11:06:45</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>2458</td>\n",
+       "      <td>199</td>\n",
+       "      <td>336</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1362748777552113670</td>\n",
+       "      <td>Invandring av hundratusentals människor från f...</td>\n",
+       "      <td>2021-02-19 14:00:01</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>1334</td>\n",
+       "      <td>55</td>\n",
+       "      <td>101</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1362409505557012490</td>\n",
+       "      <td>Vårt land behöver ett totalstopp för all asyl-...</td>\n",
+       "      <td>2021-02-18 15:31:53</td>\n",
+       "      <td>95972673</td>\n",
+       "      <td>jimmieakesson</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>3044</td>\n",
+       "      <td>268</td>\n",
+       "      <td>404</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    id                                              tweet  \\\n",
+       "0  1363067834260201475  Utan massiv, asylrelaterad invandring från frä...   \n",
+       "1  1363067613660778496  Många vänsterliberaler tycks ha reagerat på de...   \n",
+       "2  1363067558409158656  Jag förstår — uppriktigt — inte den närmast hy...   \n",
+       "3  1362748777552113670  Invandring av hundratusentals människor från f...   \n",
+       "4  1362409505557012490  Vårt land behöver ett totalstopp för all asyl-...   \n",
+       "\n",
+       "                  date   user_id       username urls  nlikes  nreplies  \\\n",
+       "0  2021-02-20 11:07:50  95972673  jimmieakesson   []    1277        22   \n",
+       "1  2021-02-20 11:06:58  95972673  jimmieakesson   []     625         9   \n",
+       "2  2021-02-20 11:06:45  95972673  jimmieakesson   []    2458       199   \n",
+       "3  2021-02-19 14:00:01  95972673  jimmieakesson   []    1334        55   \n",
+       "4  2021-02-18 15:31:53  95972673  jimmieakesson   []    3044       268   \n",
+       "\n",
+       "   nretweets  \n",
+       "0        105  \n",
+       "1         68  \n",
+       "2        336  \n",
+       "3        101  \n",
+       "4        404  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "user__string_t_info = sc.get_only_tweets(user__string_tr_info)\n",
+    "user__string_t_info.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "48d50b46",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tweets= df[\"tweet\"]\n",
+    "for tweet in tweets:\n",
+    "    print(tweet, \"\\n\", \"__________________________________________________________\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "530c26e2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.4 64-bit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}