Mosa commited on
Commit
35c6ca6
1 Parent(s): b24e23b
Files changed (46) hide show
  1. twitter-scraper/scrape.py +103 -0
  2. twitter-scraper/twint-master/.github/FUNDING.yml +0 -3
  3. twitter-scraper/twint-master/.github/ISSUE_TEMPLATE.md +0 -20
  4. twitter-scraper/twint-master/.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md +0 -17
  5. twitter-scraper/twint-master/.gitignore +0 -115
  6. twitter-scraper/twint-master/.travis.yml +0 -23
  7. twitter-scraper/twint-master/Dockerfile +0 -10
  8. twitter-scraper/twint-master/LICENSE +0 -21
  9. twitter-scraper/twint-master/MANIFEST.in +0 -1
  10. twitter-scraper/twint-master/README.md +0 -272
  11. twitter-scraper/twint-master/Untitled.ipynb +0 -282
  12. twitter-scraper/twint-master/automate.py +0 -65
  13. twitter-scraper/twint-master/elasticsearch/README.md +0 -5
  14. twitter-scraper/twint-master/scrape.py +0 -102
  15. twitter-scraper/twint-master/scrape__init__.py +0 -14
  16. twitter-scraper/twint-master/setup.py +0 -65
  17. twitter-scraper/twint-master/test.py +0 -92
  18. twitter-scraper/twint-master/twint/__init__.py +0 -32
  19. twitter-scraper/twint-master/twint/__version__.py +0 -3
  20. twitter-scraper/twint-master/twint/cli.py +0 -342
  21. twitter-scraper/twint-master/twint/config.py +0 -87
  22. twitter-scraper/twint-master/twint/datelock.py +0 -44
  23. twitter-scraper/twint-master/twint/feed.py +0 -145
  24. twitter-scraper/twint-master/twint/format.py +0 -91
  25. twitter-scraper/twint-master/twint/get.py +0 -298
  26. twitter-scraper/twint-master/twint/output.py +0 -241
  27. twitter-scraper/twint-master/twint/run.py +0 -412
  28. twitter-scraper/twint-master/twint/storage/__init__.py +0 -0
  29. twitter-scraper/twint-master/twint/storage/db.py +0 -297
  30. twitter-scraper/twint-master/twint/storage/elasticsearch.py +0 -364
  31. twitter-scraper/twint-master/twint/storage/panda.py +0 -196
  32. twitter-scraper/twint-master/twint/storage/write.py +0 -77
  33. twitter-scraper/twint-master/twint/storage/write_meta.py +0 -151
  34. twitter-scraper/twint-master/twint/token.py +0 -94
  35. twitter-scraper/twint-master/twint/tweet.py +0 -166
  36. twitter-scraper/twint-master/twint/url.py +0 -195
  37. twitter-scraper/twint-master/twint/user.py +0 -52
  38. twitter-scraper/twint-master/twint/verbose.py +0 -18
  39. twitter-scraper/{twint-master/twitter_scraper.ipynb → twitter_scraper.ipynb} +0 -0
  40. twitter_scraper/twint_master/elasticsearch/dashboard.json +18 -0
  41. twitter_scraper/twint_master/elasticsearch/index-follow.json +15 -0
  42. twitter_scraper/twint_master/elasticsearch/index-tweets.json +48 -0
  43. twitter_scraper/twint_master/elasticsearch/index-user.json +33 -0
  44. twitter_scraper/twint_master/elasticsearch/visualizations.json +100 -0
  45. twitter_scraper/twint_master/extracted-tweets.txt +5 -0
  46. twitter_scraper/twint_master/requirements.txt +13 -0
twitter-scraper/scrape.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from tkinter import EXCEPTION
3
+ import twint
4
+ from datetime import date
5
+ import pandas as pd
6
+ import sys
7
+ import io
8
+ import time
9
+ class scraper:
10
+ def get_tweets(search_str, from_date="2006-07-01", to_date=str(date.today()), num_tweets=10, u_or_s='s',
11
+ acceptable_range=10):
12
+ if (type(from_date) or type("str")) is not type("str"):
13
+ print("[!] Please make sure the date is a string in this format \"yyyy-mm-dd\" ")
14
+ raise EXCEPTION("Incorrect date type Exception!")
15
+
16
+ time_out = time.time() + 2 * 60
17
+ _dict = {}
18
+ c = twint.Config()
19
+ if u_or_s.lower() == "u":
20
+ c.Search = "from:@" + search_str # topic
21
+ else:
22
+ c.Search = search_str # topic
23
+ c.Pandas = True
24
+ num_tweets_and_replies = num_tweets
25
+ c.Count = True
26
+ #for j in range(1, 5):
27
+ c.Limit = num_tweets_and_replies
28
+ c.Since = from_date
29
+ c.Until = to_date
30
+ c.Hide_output = True
31
+ old_stdout = sys.stdout
32
+ new_stdout = io.StringIO()
33
+ sys.stdout = new_stdout
34
+ twint.run.Search(c)
35
+ output = new_stdout.getvalue()
36
+ sys.stdout = old_stdout
37
+ print(output[0:-2])
38
+ tweet_info =twint.output.panda.Tweets_df
39
+ indx_replies=[]
40
+ tweet=tweet_info['tweet']
41
+ for i in range(len(tweet)):
42
+ if tweet[i].startswith("@"):
43
+ indx_replies.append(i)
44
+ tweet_info.drop(indx_replies,axis=0, inplace =True)
45
+ print(len(tweet_info['tweet']), " of them are Tweets")
46
+ #df.drop([5,6], axis=0, inplace=True)
47
+ return tweet_info
48
+
49
+
50
+
51
+
52
+ # try:
53
+ # _keys = tweet_info[["id","tweet","date","user_id","urls" ,'nlikes', 'nreplies', 'nretweets']]
54
+ # # tweet infor is a dataframe with fallowing columns
55
+ # # Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
56
+ # # 'tweet', 'language', 'hashtags', 'cashtags', 'user_id', 'user_id_str',
57
+ # # 'username', 'name', 'day', 'hour', 'link', 'urls', 'photos', 'video',
58
+ # # 'thumbnail', 'retweet', 'nlikes', 'nreplies', 'nretweets', 'quote_url',
59
+ # # 'search', 'near', 'geo', 'source', 'user_rt_id', 'user_rt',
60
+ # # 'retweet_id', 'reply_to', 'retweet_date', 'translate', 'trans_src',
61
+ # # 'trans_dest'],
62
+ # # dtype='object')
63
+
64
+ # for i in range(len( _keys)):
65
+ # if _keys[i] in _dict.keys() or tweet_info["tweet"][i].startswith("@"):
66
+ # pass
67
+ # else:
68
+ # _dict[int(_keys[i])] = {"tweet": tweet_info["tweet"][i],
69
+ # "date": tweet_info["date"][i],
70
+ # "nlikes": tweet_info["nlikes"][i],
71
+ # "nreplies": tweet_info["nreplies"][i],
72
+ # "nretweets": tweet_info["nretweets"][i], "topic": ""}
73
+ # if len(list(_dict.keys())) == num_tweets:
74
+ # break
75
+ # except:
76
+ # pass
77
+ # print(len(list(_dict.keys())), " of them are Tweets")
78
+ # if (num_tweets - len(list(_dict.keys()))) < acceptable_range:
79
+ # return _dict
80
+ # if len(list(_dict.keys())) < num_tweets:
81
+ # num_tweets_and_replies = num_tweets_and_replies + 100 * 3 ** j
82
+ # else:
83
+ # break
84
+ # if time_out < time.time():
85
+ # break
86
+ # if output.startswith("[!] No more data!"):
87
+ # break
88
+ #return _dict
89
+
90
+ def string_search_user_tweets(user_name, search_str, from_date="2006-07-01", to_date=str(date.today()),
91
+ num_tweets=10):
92
+ c = twint.Config()
93
+ c.Username = user_name
94
+ c.Search = search_str # topic
95
+ c.Pandas = True
96
+ num_tweets_and_replies = num_tweets
97
+ c.Count = True
98
+ c.Limit = num_tweets_and_replies
99
+ c.Since = from_date
100
+ c.Until = to_date
101
+ c.Hide_output = True
102
+ twint.run.Search(c)
103
+ return twint.output.panda.Tweets_df
twitter-scraper/twint-master/.github/FUNDING.yml DELETED
@@ -1,3 +0,0 @@
1
- # These are supported funding model platforms
2
- patreon: twintproject
3
- custom: paypal.me/noneprivacy
 
 
 
 
twitter-scraper/twint-master/.github/ISSUE_TEMPLATE.md DELETED
@@ -1,20 +0,0 @@
1
- # Issue Template
2
- Please use this template!
3
-
4
- ## Initial Check
5
- > If the issue is a request please specify that it is a request in the title (Example: [REQUEST] more features). If this is a question regarding 'twint' please specify that it's a question in the title (Example: [QUESTION] What is x?). Please **only** submit issues related to 'twint'. Thanks.
6
-
7
- >Make sure you've checked the following:
8
-
9
- - [] Python version is 3.6 or later;
10
- - [] Updated Twint with `pip3 install --user --upgrade -e git+https://github.com/minamotorin/twint.git@origin/master#egg=twint`;
11
- - [] I have searched the issues and there are no duplicates of this issue/question/request (please link to related issues of twintproject/twint for reference).
12
-
13
- ## Command Ran
14
- >Please provide the _exact_ command ran including the username/search/code so I may reproduce the issue.
15
-
16
- ## Description of Issue
17
- >Please use **as much detail as possible.**
18
-
19
- ## Environment Details
20
- >Using Windows, Linux? What OS version? Running this in Anaconda? Jupyter Notebook? Terminal?
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md DELETED
@@ -1,17 +0,0 @@
1
- ### Initial Check
2
- > If the issue is a request please specify that it is a request in the title (Example: [REQUEST] more features). If this is a question regarding 'twint' please specify that it's a question in the title (Example: [QUESTION] What is x?). Please **only** submit issues related to 'twint'. Thanks.
3
-
4
- >Make sure you've checked the following:
5
-
6
- - [] Python version is 3.6;
7
- - [] Using the latest version of Twint;
8
- - [] Updated Twint with `pip3 install --upgrade -e git+https://github.com/twintproject/twint.git@origin/master#egg=twint`;
9
-
10
- ### Command Ran
11
- >Please provide the _exact_ command ran including the username/search/code so I may reproduce the issue.
12
-
13
- ### Description of Issue
14
- >Please use **as much detail as possible.**
15
-
16
- ### Environment Details
17
- >Using Windows, Linux? What OS version? Running this in Anaconda? Jupyter Notebook? Terminal?
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/.gitignore DELETED
@@ -1,115 +0,0 @@
1
- # Byte-compiled / optimized / DLL files
2
- __pycache__/
3
- *.py[cod]
4
- *$py.class
5
- tweets.db
6
- # C extensions
7
- *.so
8
-
9
- config.ini
10
- twint/storage/mysql.py
11
-
12
- # Node Dependency directories
13
- node_modules/
14
- jspm_packages/
15
- tests/
16
- # Distribution / packaging
17
- .Python
18
- env/
19
- build/
20
- develop-eggs/
21
- dist/
22
- downloads/
23
- eggs/
24
- .eggs/
25
- lib/
26
- lib64/
27
- parts/
28
- sdist/
29
- var/
30
- wheels/
31
- *.egg-info/
32
- .installed.cfg
33
- *.egg
34
-
35
- # PyInstaller
36
- # Usually these files are written by a python script from a template
37
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
38
- *.manifest
39
- *.spec
40
-
41
- # Installer logs
42
- pip-log.txt
43
- pip-delete-this-directory.txt
44
-
45
- # Unit test / coverage reports
46
- htmlcov/
47
- .tox/
48
- .coverage
49
- .coverage.*
50
- .cache
51
- nosetests.xml
52
- coverage.xml
53
- *.cover
54
- .hypothesis/
55
-
56
- # Translations
57
- *.mo
58
- *.pot
59
-
60
- # Django stuff:
61
- *.log
62
- local_settings.py
63
-
64
- # Flask stuff:
65
- instance/
66
- .webassets-cache
67
-
68
- # Scrapy stuff:
69
- .scrapy
70
-
71
- # Sphinx documentation
72
- docs/_build/
73
-
74
- # PyBuilder
75
- target/
76
-
77
- # Jupyter Notebook
78
- .ipynb_checkpoints
79
-
80
- # pyenv
81
- .python-version
82
-
83
- # celery beat schedule file
84
- celerybeat-schedule
85
-
86
- # SageMath parsed files
87
- *.sage.py
88
-
89
- # dotenv
90
- .env
91
-
92
- # virtualenv
93
- .venv
94
- venv/
95
- ENV/
96
-
97
- # Spyder project settings
98
- .spyderproject
99
- .spyproject
100
-
101
- # Rope project settings
102
- .ropeproject
103
-
104
- # mkdocs documentation
105
- /site
106
-
107
- # mypy
108
- .mypy_cache/
109
-
110
- # output
111
- *.csv
112
- *.json
113
- *.txt
114
-
115
- test_twint.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/.travis.yml DELETED
@@ -1,23 +0,0 @@
1
- dist: bionic
2
- language: python
3
- python:
4
- - "3.6"
5
- - "3.7"
6
- - "3.8"
7
- - "nightly"
8
- matrix:
9
- allow_failures:
10
- - python: "nightly"
11
- - python: "3.8"
12
- install:
13
- - pip install -r requirements.txt
14
- script:
15
- - python test.py
16
- deploy:
17
- provider: pypi
18
- user: "codyzacharias"
19
- password:
20
- secure: sWWvx50F7KJBtf8z2njc+Q31WIAHiQs4zKEiGD4/7xrshw55H5z+WnqZ9VIP83qm9yKefoRKp7WnaJeXZ3ulZSLn64ue45lqFozWMyGvelRPOKvZi9XPMqBA7+qllR/GseTHSGC3G5EGxac6UEI3irYe3mZXxfjpxNOXVti8rJ2xX8TiJM0AVKRrdDiAstOhMMkXkB7fYXMQALwEp8UoW/UbjbeqsKueXydjStaESNP/QzRFZ3/tuNu+3HMz/olniLUhUWcF/xDbJVpXuaRMUalgqe+BTbDdtUVt/s/GKtpg5GAzJyhQphiCM/huihedUIKSoI+6A8PTzuxrLhB5BMi9pcllED02v7w1enpu5L2l5cRDgQJSOpkxkA5Eese8nxKOOq0KzwDQa3JByrRor8R4yz+p5s4u2r0Rs2A9fkjQYwd/uWBSEIRF4K9WZoniiikahwXq070DMRgV7HbovKSjo5NK5F8j+psrtqPF+OHN2aVfWxbGnezrOOkmzuTHhWZVj3pPSpQU1WFWHo9fPo4I6YstR4q6XjNNjrpY3ojSlv0ThMbUem7zhHTRkRsSA2SpPfqw5E3Jf7vaiQb4M5zkBVqxuq4tXb14GJ26tGD8tel8u8b+ccpkAE9xf+QavP8UHz4PbBhqgFX5TbV/H++cdsICyoZnT35yiaDOELM=
21
- on:
22
- tags: true
23
- python: "3.7"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/Dockerfile DELETED
@@ -1,10 +0,0 @@
1
- FROM python:3.6-buster
2
- LABEL maintainer="codyzacharias@pm.me"
3
-
4
- WORKDIR /root
5
-
6
- RUN git clone --depth=1 https://github.com/twintproject/twint.git && \
7
- cd /root/twint && \
8
- pip3 install . -r requirements.txt
9
-
10
- CMD /bin/bash
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2018 Cody Zacharias
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/MANIFEST.in DELETED
@@ -1 +0,0 @@
1
- include README.md LICENSE
 
 
twitter-scraper/twint-master/README.md DELETED
@@ -1,272 +0,0 @@
1
- 20220207.0
2
-
3
- # About this fork
4
-
5
- [This repository](https://github.com/minamotorin/twint) is the fork of [https://github.com/twintproject/twint](https://github.com/twintproject/twint) and for myself.
6
-
7
- Modified by [minamotorin](https://github.com/minamotorin).
8
-
9
- ## Updates from twintproject/twint
10
-
11
- ### twint.token.RefreshTokenException: Could not find the Guest token in HTML
12
-
13
- This problem doesn't happen recently.
14
-
15
- #### Related
16
-
17
- - [twintproject/twint#1320](https://github.com/twintproject/twint/issues/1320)
18
- - [twintproject/twint#1322](https://github.com/twintproject/twint/pull/1322)
19
- - [twintproject/twint#1328](https://github.com/twintproject/twint/pull/1328)
20
- - [twintproject/twint#1061](https://github.com/twintproject/twint/issues/1061)
21
- - [twintproject/twint#1114](https://github.com/twintproject/twint/issues/1114)
22
-
23
- ### json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
24
-
25
- The fix is **not complete**.
26
- `twint.run.Profile` will work but `twint.run.db` will not.
27
- This means [`test.py`](./test.py) causes an error.
28
-
29
- I think this is because the fields of the result table are not exactly the same as the traditional ones.
30
-
31
- #### Related
32
-
33
- - [twintproject/twint#1335](https://github.com/twintproject/twint/issues/1335)
34
-
35
- ### [-] TWINT requires Python version 3.6+.
36
-
37
- #### Related
38
-
39
- - [twintproject/twint#1344](https://github.com/twintproject/twint/issues/1344)
40
- - [twintproject/twint#1345](https://github.com/twintproject/twint/pull/1345)
41
- - [twintproject/twint#1344](https://github.com/twintproject/twint/issues/1346)
42
- - [twintproject/twint#1309](https://github.com/twintproject/twint/pull/1309)
43
- - [twintproject/twint#1313](https://github.com/twintproject/twint/issues/1313)
44
-
45
- ## References
46
-
47
- - [snscrape](https://github.com/JustAnotherArchivist/snscrape)
48
- - [gallery-dl](https://github.com/mikf/gallery-dl)
49
-
50
- ## License
51
-
52
- This repository is also under the [MIT License](https://opensource.org/licenses/mit-license.php).
53
-
54
- ---
55
-
56
- # TWINT - Twitter Intelligence Tool
57
- ![2](https://i.imgur.com/iaH3s7z.png)
58
- ![3](https://i.imgur.com/hVeCrqL.png)
59
-
60
- [![PyPI](https://img.shields.io/pypi/v/twint.svg)](https://pypi.org/project/twint/) [![Build Status](https://travis-ci.org/twintproject/twint.svg?branch=master)](https://travis-ci.org/twintproject/twint) [![Python 3.6|3.7|3.8](https://img.shields.io/badge/Python-3.6%2F3.7%2F3.8-blue.svg)](https://www.python.org/download/releases/3.0/) [![GitHub license](https://img.shields.io/github/license/haccer/tweep.svg)](https://github.com/haccer/tweep/blob/master/LICENSE) [![Downloads](https://pepy.tech/badge/twint)](https://pepy.tech/project/twint) [![Downloads](https://pepy.tech/badge/twint/week)](https://pepy.tech/project/twint/week) [![Patreon](https://img.shields.io/endpoint.svg?url=https:%2F%2Fshieldsio-patreon.herokuapp.com%2Ftwintproject)](https://www.patreon.com/twintproject) ![](https://img.shields.io/twitter/follow/noneprivacy.svg?label=Follow&style=social)
61
-
62
- >No authentication. No API. No limits.
63
-
64
- Twint is an advanced Twitter scraping tool written in Python that allows for scraping Tweets from Twitter profiles **without** using Twitter's API.
65
-
66
- Twint utilizes Twitter's search operators to let you scrape Tweets from specific users, scrape Tweets relating to certain topics, hashtags & trends, or sort out *sensitive* information from Tweets like e-mail and phone numbers. I find this very useful, and you can get really creative with it too.
67
-
68
- Twint also makes special queries to Twitter allowing you to also scrape a Twitter user's followers, Tweets a user has liked, and who they follow **without** any authentication, API, Selenium, or browser emulation.
69
-
70
- ## tl;dr Benefits
71
- Some of the benefits of using Twint vs Twitter API:
72
- - Can fetch almost __all__ Tweets (Twitter API limits to last 3200 Tweets only);
73
- - Fast initial setup;
74
- - Can be used anonymously and without Twitter sign up;
75
- - **No rate limitations**.
76
-
77
- ## Limits imposed by Twitter
78
- Twitter limits scrolls while browsing the user timeline. This means that with `.Profile` or with `.Favorites` you will be able to get ~3200 tweets.
79
-
80
- ## Requirements
81
- - Python 3.6;
82
- - aiohttp;
83
- - aiodns;
84
- - beautifulsoup4;
85
- - cchardet;
86
- - dataclasses
87
- - elasticsearch;
88
- - pysocks;
89
- - pandas (>=0.23.0);
90
- - aiohttp_socks;
91
- - schedule;
92
- - geopy;
93
- - fake-useragent;
94
- - py-googletransx.
95
-
96
- ## Installing
97
-
98
- **Git:**
99
- ```bash
100
- git clone --depth=1 https://github.com/twintproject/twint.git
101
- cd twint
102
- pip3 install . -r requirements.txt
103
- ```
104
-
105
- **Pip:**
106
- ```bash
107
- pip3 install twint
108
- ```
109
-
110
- or
111
-
112
- ```bash
113
- pip3 install --user --upgrade git+https://github.com/twintproject/twint.git@origin/master#egg=twint
114
- ```
115
-
116
- **Pipenv**:
117
- ```bash
118
- pipenv install git+https://github.com/twintproject/twint.git#egg=twint
119
- ```
120
-
121
- ### March 2, 2021 Update
122
-
123
- **Added**: Dockerfile
124
-
125
- Noticed a lot of people are having issues installing (including me). Please use the Dockerfile temporarily while I look into them.
126
-
127
- ## CLI Basic Examples and Combos
128
- A few simple examples to help you understand the basics:
129
-
130
- - `twint -u username` - Scrape all the Tweets of a *user* (doesn't include **retweets** but includes **replies**).
131
- - `twint -u username -s pineapple` - Scrape all Tweets from the *user*'s timeline containing _pineapple_.
132
- - `twint -s pineapple` - Collect every Tweet containing *pineapple* from everyone's Tweets.
133
- - `twint -u username --year 2014` - Collect Tweets that were tweeted **before** 2014.
134
- - `twint -u username --since "2015-12-20 20:30:15"` - Collect Tweets that were tweeted since 2015-12-20 20:30:15.
135
- - `twint -u username --since 2015-12-20` - Collect Tweets that were tweeted since 2015-12-20 00:00:00.
136
- - `twint -u username -o file.txt` - Scrape Tweets and save to file.txt.
137
- - `twint -u username -o file.csv --csv` - Scrape Tweets and save as a csv file.
138
- - `twint -u username --email --phone` - Show Tweets that might have phone numbers or email addresses.
139
- - `twint -s "Donald Trump" --verified` - Display Tweets by verified users that Tweeted about Donald Trump.
140
- - `twint -g="48.880048,2.385939,1km" -o file.csv --csv` - Scrape Tweets from a radius of 1km around a place in Paris and export them to a csv file.
141
- - `twint -u username -es localhost:9200` - Output Tweets to Elasticsearch
142
- - `twint -u username -o file.json --json` - Scrape Tweets and save as a json file.
143
- - `twint -u username --database tweets.db` - Save Tweets to a SQLite database.
144
- - `twint -u username --followers` - Scrape a Twitter user's followers.
145
- - `twint -u username --following` - Scrape who a Twitter user follows.
146
- - `twint -u username --favorites` - Collect all the Tweets a user has favorited (gathers ~3200 tweet).
147
- - `twint -u username --following --user-full` - Collect full user information a person follows
148
- - `twint -u username --timeline` - Use an effective method to gather Tweets from a user's profile (Gathers ~3200 Tweets, including **retweets** & **replies**).
149
- - `twint -u username --retweets` - Use a quick method to gather the last 900 Tweets (that includes retweets) from a user's profile.
150
- - `twint -u username --resume resume_file.txt` - Resume a search starting from the last saved scroll-id.
151
-
152
- More detail about the commands and options are located in the [wiki](https://github.com/twintproject/twint/wiki/Commands)
153
-
154
- ## Module Example
155
-
156
- Twint can now be used as a module and supports custom formatting. **More details are located in the [wiki](https://github.com/twintproject/twint/wiki/Module)**
157
-
158
- ```python
159
- import twint
160
-
161
- # Configure
162
- c = twint.Config()
163
- c.Username = "realDonaldTrump"
164
- c.Search = "great"
165
-
166
- # Run
167
- twint.run.Search(c)
168
- ```
169
- > Output
170
-
171
- `955511208597184512 2018-01-22 18:43:19 GMT <now> pineapples are the best fruit`
172
-
173
- ```python
174
- import twint
175
-
176
- c = twint.Config()
177
-
178
- c.Username = "noneprivacy"
179
- c.Custom["tweet"] = ["id"]
180
- c.Custom["user"] = ["bio"]
181
- c.Limit = 10
182
- c.Store_csv = True
183
- c.Output = "none"
184
-
185
- twint.run.Search(c)
186
- ```
187
-
188
- ## Storing Options
189
- - Write to file;
190
- - CSV;
191
- - JSON;
192
- - SQLite;
193
- - Elasticsearch.
194
-
195
- ## Elasticsearch Setup
196
-
197
- Details on setting up Elasticsearch with Twint is located in the [wiki](https://github.com/twintproject/twint/wiki/Elasticsearch).
198
-
199
- ## Graph Visualization
200
- ![graph](https://i.imgur.com/EEJqB8n.png)
201
-
202
- [Graph](https://github.com/twintproject/twint/wiki/Graph) details are also located in the [wiki](https://github.com/twintproject/twint/wiki/Graph).
203
-
204
- We are developing a Twint Desktop App.
205
-
206
- ![4](https://i.imgur.com/DzcfIgL.png)
207
-
208
- ## FAQ
209
- > I tried scraping tweets from a user, I know that they exist but I'm not getting them
210
-
211
- Twitter can shadow-ban accounts, which means that their tweets will not be available via search. To solve this, pass `--profile-full` if you are using Twint via CLI or, if are using Twint as module, add `config.Profile_full = True`. Please note that this process will be quite slow.
212
- ## More Examples
213
-
214
- #### Followers/Following
215
-
216
- > To get only follower usernames/following usernames
217
-
218
- `twint -u username --followers`
219
-
220
- `twint -u username --following`
221
-
222
- > To get user info of followers/following users
223
-
224
- `twint -u username --followers --user-full`
225
-
226
- `twint -u username --following --user-full`
227
-
228
- #### userlist
229
-
230
- > To get only user info of user
231
-
232
- `twint -u username --user-full`
233
-
234
- > To get user info of users from a userlist
235
-
236
- `twint --userlist inputlist --user-full`
237
-
238
-
239
- #### tweet translation (experimental)
240
-
241
- > To get 100 english tweets and translate them to italian
242
-
243
- `twint -u noneprivacy --csv --output none.csv --lang en --translate --translate-dest it --limit 100`
244
-
245
- or
246
-
247
- ```python
248
- import twint
249
-
250
- c = twint.Config()
251
- c.Username = "noneprivacy"
252
- c.Limit = 100
253
- c.Store_csv = True
254
- c.Output = "none.csv"
255
- c.Lang = "en"
256
- c.Translate = True
257
- c.TranslateDest = "it"
258
- twint.run.Search(c)
259
- ```
260
-
261
- Notes:
262
- - [Google translate has some quotas](https://cloud.google.com/translate/quotas)
263
-
264
- ## Featured Blog Posts:
265
- - [How to use Twint as an OSINT tool](https://pielco11.ovh/posts/twint-osint/)
266
- - [Basic tutorial made by Null Byte](https://null-byte.wonderhowto.com/how-to/mine-twitter-for-targeted-information-with-twint-0193853/)
267
- - [Analyzing Tweets with NLP in minutes with Spark, Optimus and Twint](https://towardsdatascience.com/analyzing-tweets-with-nlp-in-minutes-with-spark-optimus-and-twint-a0c96084995f)
268
- - [Loading tweets into Kafka and Neo4j](https://markhneedham.com/blog/2019/05/29/loading-tweets-twint-kafka-neo4j/)
269
-
270
- ## Contact
271
-
272
- If you have any question, want to join in discussions, or need extra help, you are welcome to join our Twint focused channel at [OSINT team](https://osint.team)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/Untitled.ipynb DELETED
@@ -1,282 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 67,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "text= \"\\n\\n0. Brottslighet, 1. Miljö, 2. Skola, 3. Sjukvård, 4. Militär, 5. Invandring, 6. Integration \""
10
- ]
11
- },
12
- {
13
- "cell_type": "code",
14
- "execution_count": 17,
15
- "metadata": {},
16
- "outputs": [
17
- {
18
- "name": "stdout",
19
- "output_type": "stream",
20
- "text": [
21
- "WARNING: pip is being invoked by an old script wrapper. This will fail in a future version of pip.\n",
22
- "Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.\n",
23
- "To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.\n",
24
- "Requirement already satisfied: regex in /home/oxygen/snap/jupyter/common/lib/python3.7/site-packages (2022.6.2)\n"
25
- ]
26
- }
27
- ],
28
- "source": [
29
- "!pip install regex\n"
30
- ]
31
- },
32
- {
33
- "cell_type": "code",
34
- "execution_count": 15,
35
- "metadata": {},
36
- "outputs": [
37
- {
38
- "data": {
39
- "text/plain": [
40
- "['0']"
41
- ]
42
- },
43
- "execution_count": 15,
44
- "metadata": {},
45
- "output_type": "execute_result"
46
- }
47
- ],
48
- "source": [
49
- "re.findall(\"[0-9]+\", tl[0])"
50
- ]
51
- },
52
- {
53
- "cell_type": "code",
54
- "execution_count": 48,
55
- "metadata": {},
56
- "outputs": [
57
- {
58
- "data": {
59
- "text/plain": [
60
- "'0. Äldrefrågor'"
61
- ]
62
- },
63
- "execution_count": 48,
64
- "metadata": {},
65
- "output_type": "execute_result"
66
- }
67
- ],
68
- "source": [
69
- "tl[0]"
70
- ]
71
- },
72
- {
73
- "cell_type": "code",
74
- "execution_count": 49,
75
- "metadata": {},
76
- "outputs": [
77
- {
78
- "data": {
79
- "text/plain": [
80
- "['0', ' Äldrefrågor']"
81
- ]
82
- },
83
- "execution_count": 49,
84
- "metadata": {},
85
- "output_type": "execute_result"
86
- }
87
- ],
88
- "source": [
89
- "f=tl[0].split('.')\n",
90
- "\n",
91
- "f#int(f[0])"
92
- ]
93
- },
94
- {
95
- "cell_type": "code",
96
- "execution_count": 29,
97
- "metadata": {},
98
- "outputs": [
99
- {
100
- "ename": "NameError",
101
- "evalue": "name 'str_topics_to_dict' is not defined",
102
- "output_type": "error",
103
- "traceback": [
104
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
105
- "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
106
- "\u001b[0;32m<ipython-input-29-b05d9860dbcf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mstr_topics_to_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
107
- "\u001b[0;31mNameError\u001b[0m: name 'str_topics_to_dict' is not defined"
108
- ]
109
- }
110
- ],
111
- "source": []
112
- },
113
- {
114
- "cell_type": "code",
115
- "execution_count": 65,
116
- "metadata": {},
117
- "outputs": [],
118
- "source": [
119
- "\n",
120
- "def str_topics_to_dict(topics):\n",
121
- " topic_list=topics.split(\",\")\n",
122
- " ind_topic_dict={}\n",
123
- " for i inrange(len(topic_list)): \n",
124
- " index_topic_list=\n",
125
- " ind=index_topic_list[0]\n",
126
- " just_topic=index_topic_list[1][1:]\n",
127
- " ind_topic_dict[int(ind)]=just_topic\n",
128
- " return ind_topic_dict"
129
- ]
130
- },
131
- {
132
- "cell_type": "code",
133
- "execution_count": 68,
134
- "metadata": {},
135
- "outputs": [
136
- {
137
- "data": {
138
- "text/plain": [
139
- "{0: 'Brottslighet',\n",
140
- " 1: 'Miljö',\n",
141
- " 2: 'Skola',\n",
142
- " 3: 'Sjukvård',\n",
143
- " 4: 'Militär',\n",
144
- " 5: 'Invandring',\n",
145
- " 6: 'Integration '}"
146
- ]
147
- },
148
- "execution_count": 68,
149
- "metadata": {},
150
- "output_type": "execute_result"
151
- }
152
- ],
153
- "source": [
154
- "str_topics_to_dict(text)"
155
- ]
156
- },
157
- {
158
- "cell_type": "code",
159
- "execution_count": 109,
160
- "metadata": {},
161
- "outputs": [
162
- {
163
- "data": {
164
- "text/plain": [
165
- "' Brottslighet, Miljö, Skola, Sjukvård, Militär stöd, Invandring, Integration '"
166
- ]
167
- },
168
- "execution_count": 109,
169
- "metadata": {},
170
- "output_type": "execute_result"
171
- }
172
- ],
173
- "source": [
174
- "\n",
175
- "text=\"\\n\\n0. Brottslighet, 1. Miljö, 2. Skola, 3. Sjukvård, 4. Militär stöd, 5. Invandring, 6. Integration \"\n",
176
- "text=re.sub(r\"(\\n+)\",\" \",text)\n",
177
- "text=re.sub(\"(\\.)|\\d+\",\"\",text )\n",
178
- "text"
179
- ]
180
- },
181
- {
182
- "cell_type": "code",
183
- "execution_count": 100,
184
- "metadata": {},
185
- "outputs": [
186
- {
187
- "data": {
188
- "text/plain": [
189
- "[' Brottslighet',\n",
190
- " ' Miljö',\n",
191
- " ' Skola',\n",
192
- " ' Sjukvård',\n",
193
- " ' Militär stöd',\n",
194
- " ' Invandring',\n",
195
- " ' Integration ']"
196
- ]
197
- },
198
- "execution_count": 100,
199
- "metadata": {},
200
- "output_type": "execute_result"
201
- }
202
- ],
203
- "source": [
204
- "text.split(\",\")"
205
- ]
206
- },
207
- {
208
- "cell_type": "code",
209
- "execution_count": 116,
210
- "metadata": {},
211
- "outputs": [],
212
- "source": [
213
- "import regex as re \n",
214
- "def str_topics_to_dict(topics):\n",
215
- " text=re.sub(r\"(\\n+)\",\" \",topics)\n",
216
- " text=re.sub(\"(\\.)|\\d+\",\"\",topics )\n",
217
- " topics=re.sub(r\"(\\n+)|(\\.)|\\d+\",\"\",topics)\n",
218
- " topic_list=topics.split(\",\")\n",
219
- " ind_topic_dict={}\n",
220
- " for i in range(len(topic_list)): \n",
221
- " ind=i\n",
222
- " just_topic=topic_list[i]\n",
223
- " ind_topic_dict[ind]=just_topic\n",
224
- " return ind_topic_dict"
225
- ]
226
- },
227
- {
228
- "cell_type": "code",
229
- "execution_count": 117,
230
- "metadata": {},
231
- "outputs": [
232
- {
233
- "data": {
234
- "text/plain": [
235
- "{0: ' Brottslighet',\n",
236
- " 1: ' Miljö',\n",
237
- " 2: ' Skola',\n",
238
- " 3: ' Sjukvård',\n",
239
- " 4: ' Militär stöd',\n",
240
- " 5: ' Invandring',\n",
241
- " 6: ' Integration '}"
242
- ]
243
- },
244
- "execution_count": 117,
245
- "metadata": {},
246
- "output_type": "execute_result"
247
- }
248
- ],
249
- "source": [
250
- "str_topics_to_dict(text)"
251
- ]
252
- },
253
- {
254
- "cell_type": "code",
255
- "execution_count": null,
256
- "metadata": {},
257
- "outputs": [],
258
- "source": []
259
- }
260
- ],
261
- "metadata": {
262
- "kernelspec": {
263
- "display_name": "Python 3",
264
- "language": "python",
265
- "name": "python3"
266
- },
267
- "language_info": {
268
- "codemirror_mode": {
269
- "name": "ipython",
270
- "version": 3
271
- },
272
- "file_extension": ".py",
273
- "mimetype": "text/x-python",
274
- "name": "python",
275
- "nbconvert_exporter": "python",
276
- "pygments_lexer": "ipython3",
277
- "version": "3.7.3"
278
- }
279
- },
280
- "nbformat": 4,
281
- "nbformat_minor": 2
282
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/automate.py DELETED
@@ -1,65 +0,0 @@
1
- import twint
2
- import schedule
3
- import time
4
-
5
- # you can change the name of each "job" after "def" if you'd like.
6
- def jobone():
7
- print ("Fetching Tweets")
8
- c = twint.Config()
9
- # choose username (optional)
10
- c.Username = "insert username here"
11
- # choose search term (optional)
12
- c.Search = "insert search term here"
13
- # choose beginning time (narrow results)
14
- c.Since = "2018-01-01"
15
- # set limit on total tweets
16
- c.Limit = 1000
17
- # no idea, but makes the csv format properly
18
- c.Store_csv = True
19
- # format of the csv
20
- c.Custom = ["date", "time", "username", "tweet", "link", "likes", "retweets", "replies", "mentions", "hashtags"]
21
- # change the name of the csv file
22
- c.Output = "filename.csv"
23
- twint.run.Search(c)
24
-
25
- def jobtwo():
26
- print ("Fetching Tweets")
27
- c = twint.Config()
28
- # choose username (optional)
29
- c.Username = "insert username here"
30
- # choose search term (optional)
31
- c.Search = "insert search term here"
32
- # choose beginning time (narrow results)
33
- c.Since = "2018-01-01"
34
- # set limit on total tweets
35
- c.Limit = 1000
36
- # no idea, but makes the csv format properly
37
- c.Store_csv = True
38
- # format of the csv
39
- c.Custom = ["date", "time", "username", "tweet", "link", "likes", "retweets", "replies", "mentions", "hashtags"]
40
- # change the name of the csv file
41
- c.Output = "filename2.csv"
42
- twint.run.Search(c)
43
-
44
- # run once when you start the program
45
-
46
- jobone()
47
- jobtwo()
48
-
49
- # run every minute(s), hour, day at, day of the week, day of the week and time. Use "#" to block out which ones you don't want to use. Remove it to active. Also, replace "jobone" and "jobtwo" with your new function names (if applicable)
50
-
51
- # schedule.every(1).minutes.do(jobone)
52
- schedule.every().hour.do(jobone)
53
- # schedule.every().day.at("10:30").do(jobone)
54
- # schedule.every().monday.do(jobone)
55
- # schedule.every().wednesday.at("13:15").do(jobone)
56
-
57
- # schedule.every(1).minutes.do(jobtwo)
58
- schedule.every().hour.do(jobtwo)
59
- # schedule.every().day.at("10:30").do(jobtwo)
60
- # schedule.every().monday.do(jobtwo)
61
- # schedule.every().wednesday.at("13:15").do(jobtwo)
62
-
63
- while True:
64
- schedule.run_pending()
65
- time.sleep(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/elasticsearch/README.md DELETED
@@ -1,5 +0,0 @@
1
- # Elasticsearch How-To
2
-
3
- ![dashboard](https://i.imgur.com/BEbtdo5.png)
4
-
5
- Please read the Wiki [here](https://github.com/twintproject/twint/wiki/Elasticsearch)
 
 
 
 
 
 
twitter-scraper/twint-master/scrape.py DELETED
@@ -1,102 +0,0 @@
1
- import sys
2
- import io
3
- import time
4
- import asyncio
5
- import os
6
- from tkinter import EXCEPTION
7
- from numpy import not_equal
8
-
9
- loop = asyncio.get_event_loop()
10
- loop.is_running()
11
- import twint
12
- import nest_asyncio
13
-
14
- nest_asyncio.apply()
15
- from datetime import date
16
-
17
-
18
- class scraper:
19
- def get_tweets(search_str, from_date="2006-07-01", to_date=str(date.today()), num_tweets=10, u_or_s='s',
20
- acceptable_range=10):
21
-
22
- if (type(from_date) or type("str")) is not type("str"):
23
- print("[!] Please make sure the date is a string in this format \"yyyy-mm-dd\" ")
24
- raise EXCEPTION("Incorrect date type Exception!")
25
-
26
- time_out = time.time() + 2 * 60
27
- _dict = {}
28
- c = twint.Config()
29
- if u_or_s.lower() == "u":
30
- c.Search = "from:@" + search_str # topic
31
- else:
32
- c.Search = search_str # topic
33
- c.Pandas = True
34
- num_tweets_and_replies = num_tweets
35
- c.Count = True
36
- for j in range(1, 5):
37
- c.Limit = num_tweets_and_replies
38
- c.Since = from_date
39
- c.Until = to_date
40
- c.Hide_output = True
41
- old_stdout = sys.stdout
42
- new_stdout = io.StringIO()
43
- sys.stdout = new_stdout
44
- twint.run.Search(c)
45
- output = new_stdout.getvalue()
46
- sys.stdout = old_stdout
47
- print(output[0:-2])
48
- tweet_info = twint.output.panda.Tweets_df
49
-
50
- t_count = 0
51
- try:
52
- _keys = tweet_info["id"]
53
- # tweet infor is a dataframe with fallowing columns
54
- '''Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
55
- 'tweet', 'language', 'hashtags', 'cashtags', 'user_id', 'user_id_str',
56
- 'username', 'name', 'day', 'hour', 'link', 'urls', 'photos', 'video',
57
- 'thumbnail', 'retweet', 'nlikes', 'nreplies', 'nretweets', 'quote_url',
58
- 'search', 'near', 'geo', 'source', 'user_rt_id', 'user_rt',
59
- 'retweet_id', 'reply_to', 'retweet_date', 'translate', 'trans_src',
60
- 'trans_dest'],
61
- dtype='object')'''
62
-
63
- for i in range(len(_keys)):
64
- if _keys[i] in _dict.keys() or tweet_info["tweet"][i].startswith("@"):
65
- pass
66
- else:
67
- _dict[int(_keys[i])] = {"tweet": tweet_info["tweet"][i],
68
- "date": tweet_info["date"][i],
69
- "nlikes": tweet_info["nlikes"][i],
70
- "nreplies": tweet_info["nreplies"][i],
71
- "nretweets": tweet_info["nretweets"][i], "topic": ""}
72
- if len(list(_dict.keys())) == num_tweets:
73
- break
74
- except:
75
- pass
76
- print(len(list(_dict.keys())), " of them are Tweets")
77
- if (num_tweets - len(list(_dict.keys()))) < acceptable_range:
78
- return _dict
79
- if len(list(_dict.keys())) < num_tweets:
80
- num_tweets_and_replies = num_tweets_and_replies + 100 * 3 ** j
81
- else:
82
- break
83
- if time_out < time.time():
84
- break
85
- if output.startswith("[!] No more data!"):
86
- break
87
- return _dict
88
-
89
- def string_search_user_tweets(user_name, search_str, from_date="2006-07-01", to_date=str(date.today()),
90
- num_tweets=10):
91
- c = twint.Config()
92
- c.Username = user_name
93
- c.Search = search_str # topic
94
- c.Pandas = True
95
- num_tweets_and_replies = num_tweets
96
- c.Count = True
97
- c.Limit = num_tweets_and_replies
98
- c.Since = from_date
99
- c.Until = to_date
100
- c.Hide_output = True
101
- twint.run.Search(c)
102
- return twint.output.panda.Tweets_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/scrape__init__.py DELETED
@@ -1,14 +0,0 @@
1
- def scraper_libs():
2
- import sys
3
- import io
4
- import time
5
- import asyncio
6
- import os
7
- from tkinter import EXCEPTION
8
- from numpy import not_equal
9
- loop = asyncio.get_event_loop()
10
- loop.is_running()
11
- import twint
12
- import nest_asyncio
13
- nest_asyncio.apply()
14
- from datetime import date
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/setup.py DELETED
@@ -1,65 +0,0 @@
1
- #!/usr/bin/python3
2
- from setuptools import setup
3
- import io
4
- import os
5
-
6
- # Package meta-data
7
- NAME = 'twint'
8
- DESCRIPTION = 'An advanced Twitter scraping & OSINT tool.'
9
- URL = 'https://github.com/twintproject/twint'
10
- EMAIL = 'codyzacharias@pm.me'
11
- AUTHOR = 'Cody Zacharias'
12
- REQUIRES_PYTHON = '>=3.6.0'
13
- VERSION = None
14
-
15
- # Packages required
16
- REQUIRED = [
17
- 'aiohttp', 'aiodns', 'beautifulsoup4', 'cchardet', 'dataclasses',
18
- 'elasticsearch', 'pysocks', 'pandas', 'aiohttp_socks',
19
- 'schedule', 'geopy', 'fake-useragent', 'googletransx'
20
- ]
21
-
22
- here = os.path.abspath(os.path.dirname(__file__))
23
-
24
- with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
25
- long_description = '\n' + f.read()
26
-
27
- # Load the package's __version__.py
28
- about = {}
29
- if not VERSION:
30
- with open(os.path.join(here, NAME, '__version__.py')) as f:
31
- exec(f.read(), about)
32
- else:
33
- about['__version__'] = VERSION
34
-
35
- setup(
36
- name=NAME,
37
- version=about['__version__'],
38
- description=DESCRIPTION,
39
- long_description=long_description,
40
- long_description_content_type="text/markdown",
41
- author=AUTHOR,
42
- author_email=EMAIL,
43
- python_requires=REQUIRES_PYTHON,
44
- url=URL,
45
- packages=['twint', 'twint.storage'],
46
- entry_points={
47
- 'console_scripts': [
48
- 'twint = twint.cli:run_as_command',
49
- ],
50
- },
51
- install_requires=REQUIRED,
52
- dependency_links=[
53
- 'git+https://github.com/x0rzkov/py-googletrans#egg=googletrans'
54
- ],
55
- license='MIT',
56
- classifiers=[
57
- 'License :: OSI Approved :: MIT License',
58
- 'Programming Language :: Python',
59
- 'Programming Language :: Python :: 3',
60
- 'Programming Language :: Python :: 3.6',
61
- 'Programming Language :: Python :: 3.7',
62
- 'Programming Language :: Python :: 3.8',
63
- 'Programming Language :: Python :: Implementation :: CPython',
64
- ],
65
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/test.py DELETED
@@ -1,92 +0,0 @@
1
- import twint
2
- import os
3
-
4
- '''
5
- Test.py - Testing TWINT to make sure everything works.
6
- '''
7
-
8
-
9
- def test_reg(c, run):
10
- print("[+] Beginning vanilla test in {}".format(str(run)))
11
- run(c)
12
-
13
-
14
- def test_db(c, run):
15
- print("[+] Beginning DB test in {}".format(str(run)))
16
- c.Database = "test_twint.db"
17
- run(c)
18
-
19
-
20
- def custom(c, run, _type):
21
- print("[+] Beginning custom {} test in {}".format(_type, str(run)))
22
- c.Custom['tweet'] = ["id", "username"]
23
- c.Custom['user'] = ["id", "username"]
24
- run(c)
25
-
26
-
27
- def test_json(c, run):
28
- c.Store_json = True
29
- c.Output = "test_twint.json"
30
- custom(c, run, "JSON")
31
- print("[+] Beginning JSON test in {}".format(str(run)))
32
- run(c)
33
-
34
-
35
- def test_csv(c, run):
36
- c.Store_csv = True
37
- c.Output = "test_twint.csv"
38
- custom(c, run, "CSV")
39
- print("[+] Beginning CSV test in {}".format(str(run)))
40
- run(c)
41
-
42
-
43
- def main():
44
- c = twint.Config()
45
- c.Username = "verified"
46
- c.Limit = 20
47
- c.Store_object = True
48
-
49
- # Separate objects are necessary.
50
-
51
- f = twint.Config()
52
- f.Username = "verified"
53
- f.Limit = 20
54
- f.Store_object = True
55
- f.User_full = True
56
-
57
- runs = [
58
- twint.run.Profile, # this doesn't
59
- twint.run.Search, # this works
60
- twint.run.Following,
61
- twint.run.Followers,
62
- twint.run.Favorites,
63
- ]
64
-
65
- tests = [test_reg, test_json, test_csv, test_db]
66
-
67
- # Something breaks if we don't split these up
68
-
69
- for run in runs[:3]:
70
- if run == twint.run.Search:
71
- c.Since = "2012-1-1 20:30:22"
72
- c.Until = "2017-1-1"
73
- else:
74
- c.Since = ""
75
- c.Until = ""
76
-
77
- for test in tests:
78
- test(c, run)
79
-
80
- for run in runs[3:]:
81
- for test in tests:
82
- test(f, run)
83
-
84
- files = ["test_twint.db", "test_twint.json", "test_twint.csv"]
85
- for _file in files:
86
- os.remove(_file)
87
-
88
- print("[+] Testing complete!")
89
-
90
-
91
- if __name__ == '__main__':
92
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/__init__.py DELETED
@@ -1,32 +0,0 @@
1
- '''
2
- TWINT - Twitter Intelligence Tool (formerly known as Tweep).
3
-
4
- See wiki on Github for in-depth details.
5
- https://github.com/twintproject/twint/wiki
6
-
7
- Licensed under MIT License
8
- Copyright (c) 2018 Cody Zacharias
9
- '''
10
- import logging, os
11
-
12
- from .config import Config
13
- from .__version__ import __version__
14
- from . import run
15
-
16
- _levels = {
17
- 'info': logging.INFO,
18
- 'debug': logging.DEBUG
19
- }
20
-
21
- _level = os.getenv('TWINT_DEBUG', 'info')
22
- _logLevel = _levels[_level]
23
-
24
- if _level == "debug":
25
- logger = logging.getLogger()
26
- _output_fn = 'twint.log'
27
- logger.setLevel(_logLevel)
28
- formatter = logging.Formatter('%(levelname)s:%(asctime)s:%(name)s:%(message)s')
29
- fileHandler = logging.FileHandler(_output_fn)
30
- fileHandler.setLevel(_logLevel)
31
- fileHandler.setFormatter(formatter)
32
- logger.addHandler(fileHandler)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/__version__.py DELETED
@@ -1,3 +0,0 @@
1
- VERSION = (2, 1, 21)
2
-
3
- __version__ = '.'.join(map(str, VERSION))
 
 
 
 
twitter-scraper/twint-master/twint/cli.py DELETED
@@ -1,342 +0,0 @@
1
- #!/usr/bin/env python3
2
- '''
3
- Twint.py - Twitter Intelligence Tool (formerly known as Tweep).
4
-
5
- See wiki on Github for in-depth details.
6
- https://github.com/twintproject/twint/wiki
7
-
8
- Licensed under MIT License
9
- Copyright (c) 2018 The Twint Project
10
- '''
11
- import sys
12
- import os
13
- import argparse
14
-
15
- from . import run
16
- from . import config
17
- from . import storage
18
-
19
-
20
- def error(_error, message):
21
- """ Print errors to stdout
22
- """
23
- print("[-] {}: {}".format(_error, message))
24
- sys.exit(0)
25
-
26
-
27
- def check(args):
28
- """ Error checking
29
- """
30
- if args.username is not None or args.userlist or args.members_list:
31
- if args.verified:
32
- error("Contradicting Args",
33
- "Please use --verified in combination with -s.")
34
- if args.userid:
35
- error("Contradicting Args",
36
- "--userid and -u cannot be used together.")
37
- if args.all:
38
- error("Contradicting Args",
39
- "--all and -u cannot be used together.")
40
- elif args.search and args.timeline:
41
- error("Contradicting Args",
42
- "--s and --tl cannot be used together.")
43
- elif args.timeline and not args.username:
44
- error("Error", "-tl cannot be used without -u.")
45
- elif args.search is None:
46
- if args.custom_query is not None:
47
- pass
48
- elif (args.geo or args.near) is None and not (args.all or args.userid):
49
- error("Error", "Please use at least -u, -s, -g or --near.")
50
- elif args.all and args.userid:
51
- error("Contradicting Args",
52
- "--all and --userid cannot be used together")
53
- if args.output is None:
54
- if args.csv:
55
- error("Error", "Please specify an output file (Example: -o file.csv).")
56
- elif args.json:
57
- error("Error", "Please specify an output file (Example: -o file.json).")
58
- if args.backoff_exponent <= 0:
59
- error("Error", "Please specifiy a positive value for backoff_exponent")
60
- if args.min_wait_time < 0:
61
- error("Error", "Please specifiy a non negative value for min_wait_time")
62
-
63
-
64
- def loadUserList(ul, _type):
65
- """ Concatenate users
66
- """
67
- if os.path.exists(os.path.abspath(ul)):
68
- userlist = open(os.path.abspath(ul), "r").read().splitlines()
69
- else:
70
- userlist = ul.split(",")
71
- if _type == "search":
72
- un = ""
73
- for user in userlist:
74
- un += "%20OR%20from%3A" + user
75
- return un[15:]
76
- return userlist
77
-
78
-
79
- def initialize(args):
80
- """ Set default values for config from args
81
- """
82
- c = config.Config()
83
- c.Username = args.username
84
- c.User_id = args.userid
85
- c.Search = args.search
86
- c.Geo = args.geo
87
- c.Location = args.location
88
- c.Near = args.near
89
- c.Lang = args.lang
90
- c.Output = args.output
91
- c.Elasticsearch = args.elasticsearch
92
- c.Year = args.year
93
- c.Since = args.since
94
- c.Until = args.until
95
- c.Email = args.email
96
- c.Phone = args.phone
97
- c.Verified = args.verified
98
- c.Store_csv = args.csv
99
- c.Tabs = args.tabs
100
- c.Store_json = args.json
101
- c.Show_hashtags = args.hashtags
102
- c.Show_cashtags = args.cashtags
103
- c.Limit = args.limit
104
- c.Count = args.count
105
- c.Stats = args.stats
106
- c.Database = args.database
107
- c.To = args.to
108
- c.All = args.all
109
- c.Essid = args.essid
110
- c.Format = args.format
111
- c.User_full = args.user_full
112
- # c.Profile_full = args.profile_full
113
- c.Pandas_type = args.pandas_type
114
- c.Index_tweets = args.index_tweets
115
- c.Index_follow = args.index_follow
116
- c.Index_users = args.index_users
117
- c.Debug = args.debug
118
- c.Resume = args.resume
119
- c.Images = args.images
120
- c.Videos = args.videos
121
- c.Media = args.media
122
- c.Replies = args.replies
123
- c.Pandas_clean = args.pandas_clean
124
- c.Proxy_host = args.proxy_host
125
- c.Proxy_port = args.proxy_port
126
- c.Proxy_type = args.proxy_type
127
- c.Tor_control_port = args.tor_control_port
128
- c.Tor_control_password = args.tor_control_password
129
- c.Retweets = args.retweets
130
- c.Custom_query = args.custom_query
131
- c.Popular_tweets = args.popular_tweets
132
- c.Skip_certs = args.skip_certs
133
- c.Hide_output = args.hide_output
134
- c.Native_retweets = args.native_retweets
135
- c.Min_likes = args.min_likes
136
- c.Min_retweets = args.min_retweets
137
- c.Min_replies = args.min_replies
138
- c.Links = args.links
139
- c.Source = args.source
140
- c.Members_list = args.members_list
141
- c.Filter_retweets = args.filter_retweets
142
- c.Translate = args.translate
143
- c.TranslateDest = args.translate_dest
144
- c.Backoff_exponent = args.backoff_exponent
145
- c.Min_wait_time = args.min_wait_time
146
- return c
147
-
148
-
149
- def options():
150
- """ Parse arguments
151
- """
152
- ap = argparse.ArgumentParser(prog="twint",
153
- usage="python3 %(prog)s [options]",
154
- description="TWINT - An Advanced Twitter Scraping Tool.")
155
- ap.add_argument("-u", "--username", help="User's Tweets you want to scrape.")
156
- ap.add_argument("-s", "--search", help="Search for Tweets containing this word or phrase.")
157
- ap.add_argument("-g", "--geo", help="Search for geocoded Tweets.")
158
- ap.add_argument("--near", help="Near a specified city.")
159
- ap.add_argument("--location", help="Show user's location (Experimental).", action="store_true")
160
- ap.add_argument("-l", "--lang", help="Search for Tweets in a specific language.")
161
- ap.add_argument("-o", "--output", help="Save output to a file.")
162
- ap.add_argument("-es", "--elasticsearch", help="Index to Elasticsearch.")
163
- ap.add_argument("--year", help="Filter Tweets before specified year.")
164
- ap.add_argument("--since", help="Filter Tweets sent since date (Example: \"2017-12-27 20:30:15\" or 2017-12-27).",
165
- metavar="DATE")
166
- ap.add_argument("--until", help="Filter Tweets sent until date (Example: \"2017-12-27 20:30:15\" or 2017-12-27).",
167
- metavar="DATE")
168
- ap.add_argument("--email", help="Filter Tweets that might have email addresses", action="store_true")
169
- ap.add_argument("--phone", help="Filter Tweets that might have phone numbers", action="store_true")
170
- ap.add_argument("--verified", help="Display Tweets only from verified users (Use with -s).",
171
- action="store_true")
172
- ap.add_argument("--csv", help="Write as .csv file.", action="store_true")
173
- ap.add_argument("--tabs", help="Separate CSV fields with tab characters, not commas.", action="store_true")
174
- ap.add_argument("--json", help="Write as .json file", action="store_true")
175
- ap.add_argument("--hashtags", help="Output hashtags in seperate column.", action="store_true")
176
- ap.add_argument("--cashtags", help="Output cashtags in seperate column.", action="store_true")
177
- ap.add_argument("--userid", help="Twitter user id.")
178
- ap.add_argument("--limit", help="Number of Tweets to pull (Increments of 20).")
179
- ap.add_argument("--count", help="Display number of Tweets scraped at the end of session.",
180
- action="store_true")
181
- ap.add_argument("--stats", help="Show number of replies, retweets, and likes.",
182
- action="store_true")
183
- ap.add_argument("-db", "--database", help="Store Tweets in a sqlite3 database.")
184
- ap.add_argument("--to", help="Search Tweets to a user.", metavar="USERNAME")
185
- ap.add_argument("--all", help="Search all Tweets associated with a user.", metavar="USERNAME")
186
- ap.add_argument("--followers", help="Scrape a person's followers.", action="store_true")
187
- ap.add_argument("--following", help="Scrape a person's follows", action="store_true")
188
- ap.add_argument("--favorites", help="Scrape Tweets a user has liked.", action="store_true")
189
- ap.add_argument("--proxy-type", help="Socks5, HTTP, etc.")
190
- ap.add_argument("--proxy-host", help="Proxy hostname or IP.")
191
- ap.add_argument("--proxy-port", help="The port of the proxy server.")
192
- ap.add_argument("--tor-control-port", help="If proxy-host is set to tor, this is the control port", default=9051)
193
- ap.add_argument("--tor-control-password",
194
- help="If proxy-host is set to tor, this is the password for the control port",
195
- default="my_password")
196
- ap.add_argument("--essid",
197
- help="Elasticsearch Session ID, use this to differentiate scraping sessions.",
198
- nargs="?", default="")
199
- ap.add_argument("--userlist", help="Userlist from list or file.")
200
- ap.add_argument("--retweets",
201
- help="Include user's Retweets (Warning: limited).",
202
- action="store_true")
203
- ap.add_argument("--format", help="Custom output format (See wiki for details).")
204
- ap.add_argument("--user-full",
205
- help="Collect all user information (Use with followers or following only).",
206
- action="store_true")
207
- # I am removing this this feature for the time being, because it is no longer required, default method will do this
208
- # ap.add_argument("--profile-full",
209
- # help="Slow, but effective method of collecting a user's Tweets and RT.",
210
- # action="store_true")
211
- ap.add_argument(
212
- "-tl",
213
- "--timeline",
214
- help="Collects every tweet from a User's Timeline. (Tweets, RTs & Replies)",
215
- action="store_true",
216
- )
217
- ap.add_argument("--translate",
218
- help="Get tweets translated by Google Translate.",
219
- action="store_true")
220
- ap.add_argument("--translate-dest", help="Translate tweet to language (ISO2).",
221
- default="en")
222
- ap.add_argument("--store-pandas", help="Save Tweets in a DataFrame (Pandas) file.")
223
- ap.add_argument("--pandas-type",
224
- help="Specify HDF5 or Pickle (HDF5 as default)", nargs="?", default="HDF5")
225
- ap.add_argument("-it", "--index-tweets",
226
- help="Custom Elasticsearch Index name for Tweets.", nargs="?", default="twinttweets")
227
- ap.add_argument("-if", "--index-follow",
228
- help="Custom Elasticsearch Index name for Follows.",
229
- nargs="?", default="twintgraph")
230
- ap.add_argument("-iu", "--index-users", help="Custom Elasticsearch Index name for Users.",
231
- nargs="?", default="twintuser")
232
- ap.add_argument("--debug",
233
- help="Store information in debug logs", action="store_true")
234
- ap.add_argument("--resume", help="Resume from Tweet ID.", metavar="TWEET_ID")
235
- ap.add_argument("--videos", help="Display only Tweets with videos.", action="store_true")
236
- ap.add_argument("--images", help="Display only Tweets with images.", action="store_true")
237
- ap.add_argument("--media",
238
- help="Display Tweets with only images or videos.", action="store_true")
239
- ap.add_argument("--replies", help="Display replies to a subject.", action="store_true")
240
- ap.add_argument("-pc", "--pandas-clean",
241
- help="Automatically clean Pandas dataframe at every scrape.")
242
- ap.add_argument("-cq", "--custom-query", help="Custom search query.")
243
- ap.add_argument("-pt", "--popular-tweets", help="Scrape popular tweets instead of recent ones.",
244
- action="store_true")
245
- ap.add_argument("-sc", "--skip-certs", help="Skip certs verification, useful for SSC.", action="store_false")
246
- ap.add_argument("-ho", "--hide-output", help="Hide output, no tweets will be displayed.", action="store_true")
247
- ap.add_argument("-nr", "--native-retweets", help="Filter the results for retweets only.", action="store_true")
248
- ap.add_argument("--min-likes", help="Filter the tweets by minimum number of likes.")
249
- ap.add_argument("--min-retweets", help="Filter the tweets by minimum number of retweets.")
250
- ap.add_argument("--min-replies", help="Filter the tweets by minimum number of replies.")
251
- ap.add_argument("--links", help="Include or exclude tweets containing one o more links. If not specified" +
252
- " you will get both tweets that might contain links or not.")
253
- ap.add_argument("--source", help="Filter the tweets for specific source client.")
254
- ap.add_argument("--members-list", help="Filter the tweets sent by users in a given list.")
255
- ap.add_argument("-fr", "--filter-retweets", help="Exclude retweets from the results.", action="store_true")
256
- ap.add_argument("--backoff-exponent", help="Specify a exponent for the polynomial backoff in case of errors.",
257
- type=float, default=3.0)
258
- ap.add_argument("--min-wait-time", type=float, default=15,
259
- help="specifiy a minimum wait time in case of scraping limit error. This value will be adjusted by twint if the value provided does not satisfy the limits constraints")
260
- args = ap.parse_args()
261
-
262
- return args
263
-
264
-
265
- def main():
266
- """ Main
267
- """
268
- args = options()
269
- check(args)
270
-
271
- if args.pandas_clean:
272
- storage.panda.clean()
273
-
274
- c = initialize(args)
275
-
276
- if args.userlist:
277
- c.Query = loadUserList(args.userlist, "search")
278
-
279
- if args.pandas_clean:
280
- storage.panda.clean()
281
-
282
- if args.favorites:
283
- if args.userlist:
284
- _userlist = loadUserList(args.userlist, "favorites")
285
- for _user in _userlist:
286
- args.username = _user
287
- c = initialize(args)
288
- run.Favorites(c)
289
- else:
290
- run.Favorites(c)
291
- elif args.following:
292
- if args.userlist:
293
- _userlist = loadUserList(args.userlist, "following")
294
- for _user in _userlist:
295
- args.username = _user
296
- c = initialize(args)
297
- run.Following(c)
298
- else:
299
- run.Following(c)
300
- elif args.followers:
301
- if args.userlist:
302
- _userlist = loadUserList(args.userlist, "followers")
303
- for _user in _userlist:
304
- args.username = _user
305
- c = initialize(args)
306
- run.Followers(c)
307
- else:
308
- run.Followers(c)
309
- elif args.retweets: # or args.profile_full:
310
- if args.userlist:
311
- _userlist = loadUserList(args.userlist, "profile")
312
- for _user in _userlist:
313
- args.username = _user
314
- c = initialize(args)
315
- run.Profile(c)
316
- else:
317
- run.Profile(c)
318
- elif args.user_full:
319
- if args.userlist:
320
- _userlist = loadUserList(args.userlist, "userlist")
321
- for _user in _userlist:
322
- args.username = _user
323
- c = initialize(args)
324
- run.Lookup(c)
325
- else:
326
- run.Lookup(c)
327
- elif args.timeline:
328
- run.Profile(c)
329
- else:
330
- run.Search(c)
331
-
332
-
333
- def run_as_command():
334
- if(sys.version_info.major < 3 or (sys.version_info.major == 3 and sys.version_info.minor < 6)):
335
- print("[-] TWINT requires Python version 3.6+.")
336
- sys.exit(0)
337
-
338
- main()
339
-
340
-
341
- if __name__ == '__main__':
342
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/config.py DELETED
@@ -1,87 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Optional
3
-
4
- @dataclass
5
- class Config:
6
- Username: Optional[str] = None
7
- User_id: Optional[str] = None
8
- Search: Optional[str] = None
9
- Lookup: bool = False
10
- Geo: str = ""
11
- Location: bool = False
12
- Near: str = None
13
- Lang: Optional[str] = None
14
- Output: Optional[str] = None
15
- Elasticsearch: object = None
16
- Year: Optional[int] = None
17
- Since: Optional[str] = None
18
- Until: Optional[str] = None
19
- Email: Optional[str] = None
20
- Phone: Optional[str] = None
21
- Verified: bool = False
22
- Store_csv: bool = False
23
- Store_json: bool = False
24
- Custom = {"tweet": None, "user": None, "username": None}
25
- Show_hashtags: bool = False
26
- Show_cashtags: bool = False
27
- Limit: Optional[int] = None
28
- Count: Optional[int] = None
29
- Stats: bool = False
30
- Database: object = None
31
- To: str = None
32
- All = None
33
- Debug: bool = False
34
- Format = None
35
- Essid: str = ""
36
- Profile: bool = False
37
- Followers: bool = False
38
- Following: bool = False
39
- Favorites: bool = False
40
- TwitterSearch: bool = False
41
- User_full: bool = False
42
- # Profile_full: bool = False
43
- Store_object: bool = False
44
- Store_object_tweets_list: list = None
45
- Store_object_users_list: list = None
46
- Store_object_follow_list: list = None
47
- Pandas_type: type = None
48
- Pandas: bool = False
49
- Index_tweets: str = "twinttweets"
50
- Index_follow: str = "twintgraph"
51
- Index_users: str = "twintuser"
52
- Retries_count: int = 10
53
- Resume: object = None
54
- Images: bool = False
55
- Videos: bool = False
56
- Media: bool = False
57
- Replies: bool = False
58
- Pandas_clean: bool = True
59
- Lowercase: bool = True
60
- Pandas_au: bool = True
61
- Proxy_host: str = ""
62
- Proxy_port: int = 0
63
- Proxy_type: object = None
64
- Tor_control_port: int = 9051
65
- Tor_control_password: str = None
66
- Retweets: bool = False
67
- Query: str = None
68
- Hide_output: bool = False
69
- Custom_query: str = ""
70
- Popular_tweets: bool = False
71
- Skip_certs: bool = False
72
- Native_retweets: bool = False
73
- Min_likes: int = 0
74
- Min_retweets: int = 0
75
- Min_replies: int = 0
76
- Links: Optional[str] = None
77
- Source: Optional[str] = None
78
- Members_list: Optional[str] = None
79
- Filter_retweets: bool = False
80
- Translate: bool = False
81
- TranslateSrc: str = "en"
82
- TranslateDest: str = "en"
83
- Backoff_exponent: float = 3.0
84
- Min_wait_time: int = 0
85
- Bearer_token: str = None
86
- Guest_token: str = None
87
- deleted: list = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/datelock.py DELETED
@@ -1,44 +0,0 @@
1
- import datetime
2
-
3
- import logging as logme
4
-
5
- from .tweet import utc_to_local
6
-
7
-
8
- class Datelock:
9
- until = None
10
- since = None
11
- _since_def_user = None
12
-
13
-
14
- def convertToDateTime(string):
15
- dateTimeList = string.split()
16
- ListLength = len(dateTimeList)
17
- if ListLength == 2:
18
- return string
19
- if ListLength == 1:
20
- return string + " 00:00:00"
21
- else:
22
- return ""
23
-
24
-
25
- def Set(Until, Since):
26
- logme.debug(__name__+':Set')
27
- d = Datelock()
28
-
29
- if Until:
30
- d.until = datetime.datetime.strptime(convertToDateTime(Until), "%Y-%m-%d %H:%M:%S")
31
- d.until = utc_to_local(d.until)
32
- else:
33
- d.until = datetime.datetime.today()
34
-
35
- if Since:
36
- d.since = datetime.datetime.strptime(convertToDateTime(Since), "%Y-%m-%d %H:%M:%S")
37
- d.since = utc_to_local(d.since)
38
- d._since_def_user = True
39
- else:
40
- d.since = datetime.datetime.strptime("2006-03-21 00:00:00", "%Y-%m-%d %H:%M:%S")
41
- d.since = utc_to_local(d.since)
42
- d._since_def_user = False
43
-
44
- return d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/feed.py DELETED
@@ -1,145 +0,0 @@
1
- import time
2
- from datetime import datetime
3
-
4
- from bs4 import BeautifulSoup
5
- from re import findall
6
- from json import loads
7
-
8
- import logging as logme
9
-
10
- from .tweet import utc_to_local, Tweet_formats
11
-
12
-
13
- class NoMoreTweetsException(Exception):
14
- def __init__(self, msg):
15
- super().__init__(msg)
16
-
17
-
18
- def Follow(response):
19
- logme.debug(__name__ + ':Follow')
20
- soup = BeautifulSoup(response, "html.parser")
21
- follow = soup.find_all("td", "info fifty screenname")
22
- cursor = soup.find_all("div", "w-button-more")
23
- try:
24
- cursor = findall(r'cursor=(.*?)">', str(cursor))[0]
25
- except IndexError:
26
- logme.critical(__name__ + ':Follow:IndexError')
27
-
28
- return follow, cursor
29
-
30
-
31
- # TODO: this won't be used by --profile-full anymore. if it isn't used anywhere else, perhaps remove this in future
32
- def Mobile(response):
33
- logme.debug(__name__ + ':Mobile')
34
- soup = BeautifulSoup(response, "html.parser")
35
- tweets = soup.find_all("span", "metadata")
36
- max_id = soup.find_all("div", "w-button-more")
37
- try:
38
- max_id = findall(r'max_id=(.*?)">', str(max_id))[0]
39
- except Exception as e:
40
- logme.critical(__name__ + ':Mobile:' + str(e))
41
-
42
- return tweets, max_id
43
-
44
-
45
- def MobileFav(response):
46
- soup = BeautifulSoup(response, "html.parser")
47
- tweets = soup.find_all("table", "tweet")
48
- max_id = soup.find_all("div", "w-button-more")
49
- try:
50
- max_id = findall(r'max_id=(.*?)">', str(max_id))[0]
51
- except Exception as e:
52
- print(str(e) + " [x] feed.MobileFav")
53
-
54
- return tweets, max_id
55
-
56
-
57
- def _get_cursor(response):
58
- if isinstance(response, dict): # case 1
59
- try:
60
- next_cursor = response['timeline']['instructions'][0]['addEntries']['entries'][-1]['content'][
61
- 'operation']['cursor']['value']
62
- except KeyError:
63
- # this is needed because after the first request location of cursor is changed
64
- next_cursor = response['timeline']['instructions'][-1]['replaceEntry']['entry']['content']['operation'][
65
- 'cursor']['value']
66
- else: # case 2
67
- next_cursor = response[-1]['content']['value']
68
- return next_cursor
69
-
70
-
71
- def Json(response):
72
- logme.debug(__name__ + ':Json')
73
- json_response = loads(response)
74
- html = json_response["items_html"]
75
- soup = BeautifulSoup(html, "html.parser")
76
- feed = soup.find_all("div", "tweet")
77
- return feed, json_response["min_position"]
78
-
79
-
80
- def parse_tweets(config, response):
81
- logme.debug(__name__ + ':parse_tweets')
82
- response = loads(response)
83
- feed = []
84
- if 'globalObjects' in response:
85
- if len(response['globalObjects']['tweets']) == 0:
86
- msg = 'No more data!'
87
- raise NoMoreTweetsException(msg)
88
- for timeline_entry in response['timeline']['instructions'][0]['addEntries']['entries']:
89
- # this will handle the cases when the timeline entry is a tweet
90
- if (config.TwitterSearch or config.Profile) and (timeline_entry['entryId'].startswith('sq-I-t-') or
91
- timeline_entry['entryId'].startswith('tweet-')):
92
- if 'tweet' in timeline_entry['content']['item']['content']:
93
- _id = timeline_entry['content']['item']['content']['tweet']['id']
94
- # skip the ads
95
- if 'promotedMetadata' in timeline_entry['content']['item']['content']['tweet']:
96
- continue
97
- elif 'tombstone' in timeline_entry['content']['item']['content'] and 'tweet' in \
98
- timeline_entry['content']['item']['content']['tombstone']:
99
- _id = timeline_entry['content']['item']['content']['tombstone']['tweet']['id']
100
- else:
101
- _id = None
102
- if _id is None:
103
- raise ValueError('Unable to find ID of tweet in timeline.')
104
- try:
105
- temp_obj = response['globalObjects']['tweets'][_id]
106
- except KeyError:
107
- logme.info('encountered a deleted tweet with id {}'.format(_id))
108
-
109
- config.deleted.append(_id)
110
- continue
111
- temp_obj['user_data'] = response['globalObjects']['users'][temp_obj['user_id_str']]
112
- if 'retweeted_status_id_str' in temp_obj:
113
- rt_id = temp_obj['retweeted_status_id_str']
114
- _dt = response['globalObjects']['tweets'][rt_id]['created_at']
115
- _dt = datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
116
- _dt = utc_to_local(_dt)
117
- _dt = str(_dt.strftime(Tweet_formats['datetime']))
118
- temp_obj['retweet_data'] = {
119
- 'user_rt_id': response['globalObjects']['tweets'][rt_id]['user_id_str'],
120
- 'user_rt': response['globalObjects']['tweets'][rt_id]['full_text'],
121
- 'retweet_id': rt_id,
122
- 'retweet_date': _dt,
123
- }
124
- feed.append(temp_obj)
125
- next_cursor = _get_cursor(response) # case 1
126
- else:
127
- response = response['data']['user']['result']['timeline']
128
- entries = response['timeline']['instructions']
129
- for e in entries:
130
- if e.get('entries'):
131
- entries = e['entries']
132
- break
133
- if len(entries) == 2:
134
- msg = 'No more data!'
135
- raise NoMoreTweetsException(msg)
136
- for timeline_entry in entries:
137
- if timeline_entry['content'].get('itemContent'):
138
- try:
139
- temp_obj = timeline_entry['content']['itemContent']['tweet_results']['result']['legacy']
140
- temp_obj['user_data'] = timeline_entry['content']['itemContent']['tweet_results']['result']['core']['user_results']['result']['legacy']
141
- feed.append(temp_obj)
142
- except KeyError: # doubtful
143
- next
144
- next_cursor = _get_cursor(entries) # case 2
145
- return feed, next_cursor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/format.py DELETED
@@ -1,91 +0,0 @@
1
- import logging as logme
2
-
3
- def Tweet(config, t):
4
- if config.Format:
5
- logme.debug(__name__+':Tweet:Format')
6
- output = config.Format.replace("{id}", t.id_str)
7
- output = output.replace("{conversation_id}", t.conversation_id)
8
- output = output.replace("{date}", t.datestamp)
9
- output = output.replace("{time}", t.timestamp)
10
- output = output.replace("{user_id}", t.user_id_str)
11
- output = output.replace("{username}", t.username)
12
- output = output.replace("{name}", t.name)
13
- output = output.replace("{place}", t.place)
14
- output = output.replace("{timezone}", t.timezone)
15
- output = output.replace("{urls}", ",".join(t.urls))
16
- output = output.replace("{photos}", ",".join(t.photos))
17
- output = output.replace("{video}", str(t.video))
18
- output = output.replace("{thumbnail}", t.thumbnail)
19
- output = output.replace("{tweet}", t.tweet)
20
- output = output.replace("{language}", t.lang)
21
- output = output.replace("{hashtags}", ",".join(t.hashtags))
22
- output = output.replace("{cashtags}", ",".join(t.cashtags))
23
- output = output.replace("{replies}", t.replies_count)
24
- output = output.replace("{retweets}", t.retweets_count)
25
- output = output.replace("{likes}", t.likes_count)
26
- output = output.replace("{link}", t.link)
27
- output = output.replace("{is_retweet}", str(t.retweet))
28
- output = output.replace("{user_rt_id}", str(t.user_rt_id))
29
- output = output.replace("{quote_url}", t.quote_url)
30
- output = output.replace("{near}", t.near)
31
- output = output.replace("{geo}", t.geo)
32
- output = output.replace("{mentions}", ",".join(t.mentions))
33
- output = output.replace("{translate}", t.translate)
34
- output = output.replace("{trans_src}", t.trans_src)
35
- output = output.replace("{trans_dest}", t.trans_dest)
36
- else:
37
- logme.debug(__name__+':Tweet:notFormat')
38
- output = f"{t.id_str} {t.datestamp} {t.timestamp} {t.timezone} "
39
-
40
- # TODO: someone who is familiar with this code, needs to take a look at what this is <also see tweet.py>
41
- # if t.retweet:
42
- # output += "RT "
43
-
44
- output += f"<{t.username}> {t.tweet}"
45
-
46
- if config.Show_hashtags:
47
- hashtags = ",".join(t.hashtags)
48
- output += f" {hashtags}"
49
- if config.Show_cashtags:
50
- cashtags = ",".join(t.cashtags)
51
- output += f" {cashtags}"
52
- if config.Stats:
53
- output += f" | {t.replies_count} replies {t.retweets_count} retweets {t.likes_count} likes"
54
- if config.Translate:
55
- output += f" {t.translate} {t.trans_src} {t.trans_dest}"
56
- return output
57
-
58
- def User(_format, u):
59
- if _format:
60
- logme.debug(__name__+':User:Format')
61
- output = _format.replace("{id}", str(u.id))
62
- output = output.replace("{name}", u.name)
63
- output = output.replace("{username}", u.username)
64
- output = output.replace("{bio}", u.bio)
65
- output = output.replace("{location}", u.location)
66
- output = output.replace("{url}", u.url)
67
- output = output.replace("{join_date}", u.join_date)
68
- output = output.replace("{join_time}", u.join_time)
69
- output = output.replace("{tweets}", str(u.tweets))
70
- output = output.replace("{following}", str(u.following))
71
- output = output.replace("{followers}", str(u.followers))
72
- output = output.replace("{likes}", str(u.likes))
73
- output = output.replace("{media}", str(u.media_count))
74
- output = output.replace("{private}", str(u.is_private))
75
- output = output.replace("{verified}", str(u.is_verified))
76
- output = output.replace("{avatar}", u.avatar)
77
- if u.background_image:
78
- output = output.replace("{background_image}", u.background_image)
79
- else:
80
- output = output.replace("{background_image}", "")
81
- else:
82
- logme.debug(__name__+':User:notFormat')
83
- output = f"{u.id} | {u.name} | @{u.username} | Private: "
84
- output += f"{u.is_private} | Verified: {u.is_verified} |"
85
- output += f" Bio: {u.bio} | Location: {u.location} | Url: "
86
- output += f"{u.url} | Joined: {u.join_date} {u.join_time} "
87
- output += f"| Tweets: {u.tweets} | Following: {u.following}"
88
- output += f" | Followers: {u.followers} | Likes: {u.likes} "
89
- output += f"| Media: {u.media_count} | Avatar: {u.avatar}"
90
-
91
- return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/get.py DELETED
@@ -1,298 +0,0 @@
1
- from async_timeout import timeout
2
- from datetime import datetime
3
- from bs4 import BeautifulSoup
4
- import sys
5
- import socket
6
- import aiohttp
7
- from fake_useragent import UserAgent
8
- import asyncio
9
- import concurrent.futures
10
- import random
11
- from json import loads, dumps
12
- from aiohttp_socks import ProxyConnector, ProxyType
13
- from urllib.parse import quote
14
- import time
15
-
16
- from . import url
17
- from .output import Tweets, Users
18
- from .token import TokenExpiryException
19
-
20
- import logging as logme
21
-
22
- httpproxy = None
23
-
24
- user_agent_list = [
25
- # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
26
- # ' Chrome/60.0.3112.113 Safari/537.36',
27
- # 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
28
- # ' Chrome/60.0.3112.90 Safari/537.36',
29
- # 'Mozilla/5.0 (Windows NT 5.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
30
- # ' Chrome/60.0.3112.90 Safari/537.36',
31
- # 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
32
- # ' Chrome/60.0.3112.90 Safari/537.36',
33
- # 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)'
34
- # ' Chrome/44.0.2403.157 Safari/537.36',
35
- # 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
36
- # ' Chrome/60.0.3112.113 Safari/537.36',
37
- # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
38
- # ' Chrome/57.0.2987.133 Safari/537.36',
39
- # 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
40
- # ' Chrome/57.0.2987.133 Safari/537.36',
41
- # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
42
- # ' Chrome/55.0.2883.87 Safari/537.36',
43
- # 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
44
- # ' Chrome/55.0.2883.87 Safari/537.36',
45
-
46
- 'Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1)',
47
- 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
48
- 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
49
- 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
50
- 'Mozilla/5.0 (Windows NT 6.2; WOW64; Trident/7.0; rv:11.0) like Gecko',
51
- 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
52
- 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)',
53
- 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
54
- 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
55
- 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko',
56
- 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)',
57
- 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
58
- 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET '
59
- 'CLR 3.5.30729)',
60
- ]
61
-
62
-
63
- # function to convert python `dict` to json and then encode it to be passed in the url as a parameter
64
- # some urls require this format
65
- def dict_to_url(dct):
66
- return quote(dumps(dct))
67
-
68
-
69
- def get_connector(config):
70
- logme.debug(__name__ + ':get_connector')
71
- _connector = None
72
- if config.Proxy_host:
73
- if config.Proxy_host.lower() == "tor":
74
- _connector = ProxyConnector(
75
- host='127.0.0.1',
76
- port=9050,
77
- rdns=True)
78
- elif config.Proxy_port and config.Proxy_type:
79
- if config.Proxy_type.lower() == "socks5":
80
- _type = ProxyType.SOCKS5
81
- elif config.Proxy_type.lower() == "socks4":
82
- _type = ProxyType.SOCKS4
83
- elif config.Proxy_type.lower() == "http":
84
- global httpproxy
85
- httpproxy = "http://" + config.Proxy_host + ":" + str(config.Proxy_port)
86
- return _connector
87
- else:
88
- logme.critical("get_connector:proxy-type-error")
89
- print("Error: Proxy types allowed are: http, socks5 and socks4. No https.")
90
- sys.exit(1)
91
- _connector = ProxyConnector(
92
- proxy_type=_type,
93
- host=config.Proxy_host,
94
- port=config.Proxy_port,
95
- rdns=True)
96
- else:
97
- logme.critical(__name__ + ':get_connector:proxy-port-type-error')
98
- print("Error: Please specify --proxy-host, --proxy-port, and --proxy-type")
99
- sys.exit(1)
100
- else:
101
- if config.Proxy_port or config.Proxy_type:
102
- logme.critical(__name__ + ':get_connector:proxy-host-arg-error')
103
- print("Error: Please specify --proxy-host, --proxy-port, and --proxy-type")
104
- sys.exit(1)
105
-
106
- return _connector
107
-
108
-
109
- async def RequestUrl(config, init):
110
- logme.debug(__name__ + ':RequestUrl')
111
- _connector = get_connector(config)
112
- _serialQuery = ""
113
- params = []
114
- _url = ""
115
- _headers = [("authorization", config.Bearer_token), ("x-guest-token", config.Guest_token)]
116
-
117
- # TODO : do this later
118
- if config.Profile:
119
- logme.debug(__name__ + ':RequestUrl:Profile')
120
- _url, params, _serialQuery = url.SearchProfile(config, init)
121
- elif config.TwitterSearch:
122
- logme.debug(__name__ + ':RequestUrl:TwitterSearch')
123
- _url, params, _serialQuery = await url.Search(config, init)
124
- else:
125
- if config.Following:
126
- logme.debug(__name__ + ':RequestUrl:Following')
127
- _url = await url.Following(config.Username, init)
128
- elif config.Followers:
129
- logme.debug(__name__ + ':RequestUrl:Followers')
130
- _url = await url.Followers(config.Username, init)
131
- else:
132
- logme.debug(__name__ + ':RequestUrl:Favorites')
133
- _url = await url.Favorites(config.Username, init)
134
- _serialQuery = _url
135
-
136
- response = await Request(_url, params=params, connector=_connector, headers=_headers)
137
-
138
- if config.Debug:
139
- print(_serialQuery, file=open("twint-request_urls.log", "a", encoding="utf-8"))
140
-
141
- return response
142
-
143
-
144
- def ForceNewTorIdentity(config):
145
- logme.debug(__name__ + ':ForceNewTorIdentity')
146
- try:
147
- tor_c = socket.create_connection(('127.0.0.1', config.Tor_control_port))
148
- tor_c.send('AUTHENTICATE "{}"\r\nSIGNAL NEWNYM\r\n'.format(config.Tor_control_password).encode())
149
- response = tor_c.recv(1024)
150
- if response != b'250 OK\r\n250 OK\r\n':
151
- sys.stderr.write('Unexpected response from Tor control port: {}\n'.format(response))
152
- logme.critical(__name__ + ':ForceNewTorIdentity:unexpectedResponse')
153
- except Exception as e:
154
- logme.debug(__name__ + ':ForceNewTorIdentity:errorConnectingTor')
155
- sys.stderr.write('Error connecting to Tor control port: {}\n'.format(repr(e)))
156
- sys.stderr.write('If you want to rotate Tor ports automatically - enable Tor control port\n')
157
-
158
-
159
- async def Request(_url, connector=None, params=None, headers=None):
160
- logme.debug(__name__ + ':Request:Connector')
161
- async with aiohttp.ClientSession(connector=connector, headers=headers) as session:
162
- return await Response(session, _url, params)
163
-
164
-
165
- async def Response(session, _url, params=None):
166
- logme.debug(__name__ + ':Response')
167
- retries = 5
168
- wait = 10 # No basis, maybe work with 0
169
- for attempt in range(retries + 1):
170
- try:
171
- with timeout(120):
172
- async with session.get(_url, ssl=True, params=params, proxy=httpproxy) as response:
173
- resp = await response.text()
174
- if response.status == 429: # 429 implies Too many requests i.e. Rate Limit Exceeded
175
- raise TokenExpiryException(loads(resp)['errors'][0]['message'])
176
- return resp
177
- except aiohttp.client_exceptions.ClientConnectorError as exc:
178
- if attempt < retries:
179
- retrying = ', retrying'
180
- level = logme.WARNING
181
- else:
182
- retrying = ''
183
- level = logme.ERROR
184
- logme.log(level, f'Error retrieving {_url}: {exc!r}{retrying}')
185
- if attempt < retries:
186
- time.sleep(wait)
187
- else:
188
- logme.fatal(f'{retries + 1} requests to {_url} failed, giving up.')
189
- raise TokenExpiryException(f'{exc!r}')
190
-
191
-
192
- async def RandomUserAgent(wa=None):
193
- logme.debug(__name__ + ':RandomUserAgent')
194
- try:
195
- if wa:
196
- return "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36"
197
- return UserAgent(verify_ssl=False, use_cache_server=False).random
198
- except:
199
- return random.choice(user_agent_list)
200
-
201
-
202
- async def Username(_id, bearer_token, guest_token):
203
- logme.debug(__name__ + ':Username')
204
- _dct = {'userId': _id, 'withHighlightedLabel': False}
205
- _url = "https://api.twitter.com/graphql/B9FuNQVmyx32rdbIPEZKag/UserByRestId?variables={}".format(dict_to_url(_dct))
206
- _headers = {
207
- 'authorization': bearer_token,
208
- 'x-guest-token': guest_token,
209
- }
210
- r = await Request(_url, headers=_headers)
211
- j_r = loads(r)
212
- username = j_r['data']['user']['legacy']['screen_name']
213
- return username
214
-
215
-
216
- async def Tweet(url, config, conn):
217
- logme.debug(__name__ + ':Tweet')
218
- try:
219
- response = await Request(url)
220
- soup = BeautifulSoup(response, "html.parser")
221
- tweets = soup.find_all("div", "tweet")
222
- await Tweets(tweets, config, conn, url)
223
- except Exception as e:
224
- logme.critical(__name__ + ':Tweet:' + str(e))
225
-
226
-
227
- async def User(username, config, conn, user_id=False):
228
- logme.debug(__name__ + ':User')
229
- _dct = {'screen_name': username, 'withHighlightedLabel': False}
230
- _url = 'https://api.twitter.com/graphql/jMaTS-_Ea8vh9rpKggJbCQ/UserByScreenName?variables={}'\
231
- .format(dict_to_url(_dct))
232
- _headers = {
233
- 'authorization': config.Bearer_token,
234
- 'x-guest-token': config.Guest_token,
235
- }
236
- try:
237
- response = await Request(_url, headers=_headers)
238
- j_r = loads(response)
239
- if user_id:
240
- try:
241
- _id = j_r['data']['user']['rest_id']
242
- return _id
243
- except KeyError as e:
244
- logme.critical(__name__ + ':User:' + str(e))
245
- return
246
- await Users(j_r, config, conn)
247
- except Exception as e:
248
- logme.critical(__name__ + ':User:' + str(e))
249
- raise
250
-
251
-
252
- def Limit(Limit, count):
253
- logme.debug(__name__ + ':Limit')
254
- if Limit is not None and count >= int(Limit):
255
- return True
256
-
257
-
258
- async def Multi(feed, config, conn):
259
- logme.debug(__name__ + ':Multi')
260
- count = 0
261
- try:
262
- with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
263
- loop = asyncio.get_event_loop()
264
- futures = []
265
- for tweet in feed:
266
- count += 1
267
- if config.Favorites or config.Profile_full:
268
- logme.debug(__name__ + ':Multi:Favorites-profileFull')
269
- link = tweet.find("a")["href"]
270
- url = f"https://twitter.com{link}&lang=en"
271
- elif config.User_full:
272
- logme.debug(__name__ + ':Multi:userFull')
273
- username = tweet.find("a")["name"]
274
- url = f"http://twitter.com/{username}?lang=en"
275
- else:
276
- logme.debug(__name__ + ':Multi:else-url')
277
- link = tweet.find("a", "tweet-timestamp js-permalink js-nav js-tooltip")["href"]
278
- url = f"https://twitter.com{link}?lang=en"
279
-
280
- if config.User_full:
281
- logme.debug(__name__ + ':Multi:user-full-Run')
282
- futures.append(loop.run_in_executor(executor, await User(url,
283
- config, conn)))
284
- else:
285
- logme.debug(__name__ + ':Multi:notUser-full-Run')
286
- futures.append(loop.run_in_executor(executor, await Tweet(url,
287
- config, conn)))
288
- logme.debug(__name__ + ':Multi:asyncioGather')
289
- await asyncio.gather(*futures)
290
- except Exception as e:
291
- # TODO: fix error not error
292
- # print(str(e) + " [x] get.Multi")
293
- # will return "'NoneType' object is not callable"
294
- # but still works
295
- # logme.critical(__name__+':Multi:' + str(e))
296
- pass
297
-
298
- return count
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/output.py DELETED
@@ -1,241 +0,0 @@
1
- from datetime import datetime
2
-
3
- from . import format, get
4
- from .tweet import Tweet
5
- from .user import User
6
- from .storage import db, elasticsearch, write, panda
7
-
8
- import logging as logme
9
-
10
- follows_list = []
11
- tweets_list = []
12
- users_list = []
13
-
14
- author_list = {''}
15
- author_list.pop()
16
-
17
- # used by Pandas
18
- _follows_object = {}
19
-
20
-
21
- def _formatDateTime(datetimestamp):
22
- try:
23
- return int(datetime.strptime(datetimestamp, "%Y-%m-%d %H:%M:%S").timestamp())
24
- except ValueError:
25
- return int(datetime.strptime(datetimestamp, "%Y-%m-%d").timestamp())
26
-
27
-
28
- def _clean_follow_list():
29
- logme.debug(__name__ + ':clean_follow_list')
30
- global _follows_object
31
- _follows_object = {}
32
-
33
-
34
- def clean_lists():
35
- logme.debug(__name__ + ':clean_lists')
36
- global follows_list
37
- global tweets_list
38
- global users_list
39
- follows_list = []
40
- tweets_list = []
41
- users_list = []
42
-
43
-
44
- def datecheck(datetimestamp, config):
45
- logme.debug(__name__ + ':datecheck')
46
- if config.Since:
47
- logme.debug(__name__ + ':datecheck:SinceTrue')
48
-
49
- d = _formatDateTime(datetimestamp)
50
- s = _formatDateTime(config.Since)
51
-
52
- if d < s:
53
- return False
54
- if config.Until:
55
- logme.debug(__name__ + ':datecheck:UntilTrue')
56
-
57
- d = _formatDateTime(datetimestamp)
58
- s = _formatDateTime(config.Until)
59
-
60
- if d > s:
61
- return False
62
- logme.debug(__name__ + ':datecheck:dateRangeFalse')
63
- return True
64
-
65
-
66
- # TODO In this method we need to delete the quoted tweets, because twitter also sends the quoted tweets in the
67
- # `tweets` list along with the other tweets
68
- def is_tweet(tw):
69
- try:
70
- tw["data-item-id"]
71
- logme.debug(__name__ + ':is_tweet:True')
72
- return True
73
- except:
74
- logme.critical(__name__ + ':is_tweet:False')
75
- return False
76
-
77
-
78
- def _output(obj, output, config, **extra):
79
- logme.debug(__name__ + ':_output')
80
- if config.Lowercase:
81
- if isinstance(obj, str):
82
- logme.debug(__name__ + ':_output:Lowercase:username')
83
- obj = obj.lower()
84
- elif obj.__class__.__name__ == "user":
85
- logme.debug(__name__ + ':_output:Lowercase:user')
86
- pass
87
- elif obj.__class__.__name__ == "tweet":
88
- logme.debug(__name__ + ':_output:Lowercase:tweet')
89
- obj.username = obj.username.lower()
90
- author_list.update({obj.username})
91
- for dct in obj.mentions:
92
- for key, val in dct.items():
93
- dct[key] = val.lower()
94
- for i in range(len(obj.hashtags)):
95
- obj.hashtags[i] = obj.hashtags[i].lower()
96
- for i in range(len(obj.cashtags)):
97
- obj.cashtags[i] = obj.cashtags[i].lower()
98
- else:
99
- logme.info('_output:Lowercase:hiddenTweetFound')
100
- print("[x] Hidden tweet found, account suspended due to violation of TOS")
101
- return
102
- if config.Output != None:
103
- if config.Store_csv:
104
- try:
105
- write.Csv(obj, config)
106
- logme.debug(__name__ + ':_output:CSV')
107
- except Exception as e:
108
- logme.critical(__name__ + ':_output:CSV:Error:' + str(e))
109
- print(str(e) + " [x] output._output")
110
- elif config.Store_json:
111
- write.Json(obj, config)
112
- logme.debug(__name__ + ':_output:JSON')
113
- else:
114
- write.Text(output, config.Output)
115
- logme.debug(__name__ + ':_output:Text')
116
-
117
- if config.Elasticsearch:
118
- logme.debug(__name__ + ':_output:Elasticsearch')
119
- print("", end=".", flush=True)
120
- else:
121
- if not config.Hide_output:
122
- try:
123
- print(output.replace('\n', ' '))
124
- except UnicodeEncodeError:
125
- logme.critical(__name__ + ':_output:UnicodeEncodeError')
126
- print("unicode error [x] output._output")
127
-
128
-
129
- async def checkData(tweet, config, conn):
130
- logme.debug(__name__ + ':checkData')
131
- tweet = Tweet(tweet, config)
132
- if not tweet.datestamp:
133
- logme.critical(__name__ + ':checkData:hiddenTweetFound')
134
- print("[x] Hidden tweet found, account suspended due to violation of TOS")
135
- return
136
- if datecheck(tweet.datestamp + " " + tweet.timestamp, config):
137
- output = format.Tweet(config, tweet)
138
- if config.Database:
139
- logme.debug(__name__ + ':checkData:Database')
140
- db.tweets(conn, tweet, config)
141
- if config.Pandas:
142
- logme.debug(__name__ + ':checkData:Pandas')
143
- panda.update(tweet, config)
144
- if config.Store_object:
145
- logme.debug(__name__ + ':checkData:Store_object')
146
- if hasattr(config.Store_object_tweets_list, 'append'):
147
- config.Store_object_tweets_list.append(tweet)
148
- else:
149
- tweets_list.append(tweet)
150
- if config.Elasticsearch:
151
- logme.debug(__name__ + ':checkData:Elasticsearch')
152
- elasticsearch.Tweet(tweet, config)
153
- _output(tweet, output, config)
154
- # else:
155
- # logme.critical(__name__+':checkData:copyrightedTweet')
156
-
157
-
158
- async def Tweets(tweets, config, conn):
159
- logme.debug(__name__ + ':Tweets')
160
- if config.Favorites or config.Location:
161
- logme.debug(__name__ + ':Tweets:fav+full+loc')
162
- for tw in tweets:
163
- await checkData(tw, config, conn)
164
- elif config.TwitterSearch or config.Profile:
165
- logme.debug(__name__ + ':Tweets:TwitterSearch')
166
- await checkData(tweets, config, conn)
167
- else:
168
- logme.debug(__name__ + ':Tweets:else')
169
- if int(tweets["data-user-id"]) == config.User_id or config.Retweets:
170
- await checkData(tweets, config, conn)
171
-
172
-
173
- async def Users(u, config, conn):
174
- logme.debug(__name__ + ':User')
175
- global users_list
176
-
177
- user = User(u)
178
- output = format.User(config.Format, user)
179
-
180
- if config.Database:
181
- logme.debug(__name__ + ':User:Database')
182
- db.user(conn, config, user)
183
-
184
- if config.Elasticsearch:
185
- logme.debug(__name__ + ':User:Elasticsearch')
186
- _save_date = user.join_date
187
- _save_time = user.join_time
188
- user.join_date = str(datetime.strptime(user.join_date, "%d %b %Y")).split()[0]
189
- user.join_time = str(datetime.strptime(user.join_time, "%I:%M %p")).split()[1]
190
- elasticsearch.UserProfile(user, config)
191
- user.join_date = _save_date
192
- user.join_time = _save_time
193
-
194
- if config.Store_object:
195
- logme.debug(__name__ + ':User:Store_object')
196
-
197
- if hasattr(config.Store_object_follow_list, 'append'):
198
- config.Store_object_follow_list.append(user)
199
- elif hasattr(config.Store_object_users_list, 'append'):
200
- config.Store_object_users_list.append(user)
201
- else:
202
- users_list.append(user) # twint.user.user
203
-
204
- if config.Pandas:
205
- logme.debug(__name__ + ':User:Pandas+user')
206
- panda.update(user, config)
207
-
208
- _output(user, output, config)
209
-
210
-
211
- async def Username(username, config, conn):
212
- logme.debug(__name__ + ':Username')
213
- global _follows_object
214
- global follows_list
215
- follow_var = config.Following * "following" + config.Followers * "followers"
216
-
217
- if config.Database:
218
- logme.debug(__name__ + ':Username:Database')
219
- db.follow(conn, config.Username, config.Followers, username)
220
-
221
- if config.Elasticsearch:
222
- logme.debug(__name__ + ':Username:Elasticsearch')
223
- elasticsearch.Follow(username, config)
224
-
225
- if config.Store_object:
226
- if hasattr(config.Store_object_follow_list, 'append'):
227
- config.Store_object_follow_list.append(username)
228
- else:
229
- follows_list.append(username) # twint.user.user
230
-
231
- if config.Pandas:
232
- logme.debug(__name__ + ':Username:object+pandas')
233
- try:
234
- _ = _follows_object[config.Username][follow_var]
235
- except KeyError:
236
- _follows_object.update({config.Username: {follow_var: []}})
237
- _follows_object[config.Username][follow_var].append(username)
238
- if config.Pandas_au:
239
- logme.debug(__name__ + ':Username:object+pandas+au')
240
- panda.update(_follows_object[config.Username], config)
241
- _output(username, username, config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/run.py DELETED
@@ -1,412 +0,0 @@
1
- import sys, os, datetime
2
- from asyncio import get_event_loop, TimeoutError, ensure_future, new_event_loop, set_event_loop
3
-
4
- from . import datelock, feed, get, output, verbose, storage
5
- from .token import TokenExpiryException
6
- from . import token
7
- from .storage import db
8
- from .feed import NoMoreTweetsException
9
-
10
- import logging as logme
11
-
12
- import time
13
-
14
- bearer = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs' \
15
- '%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
16
-
17
-
18
- class Twint:
19
- def __init__(self, config):
20
- logme.debug(__name__ + ':Twint:__init__')
21
- if config.Resume is not None and (config.TwitterSearch or config.Followers or config.Following):
22
- logme.debug(__name__ + ':Twint:__init__:Resume')
23
- self.init = self.get_resume(config.Resume)
24
- else:
25
- self.init = -1
26
-
27
- config.deleted = []
28
- self.feed: list = [-1]
29
- self.count = 0
30
- self.user_agent = ""
31
- self.config = config
32
- self.config.Bearer_token = bearer
33
- # TODO might have to make some adjustments for it to work with multi-treading
34
- # USAGE : to get a new guest token simply do `self.token.refresh()`
35
- self.token = token.Token(config)
36
- self.token.refresh()
37
- self.conn = db.Conn(config.Database)
38
- self.d = datelock.Set(self.config.Until, self.config.Since)
39
- verbose.Elastic(config.Elasticsearch)
40
-
41
- if self.config.Store_object:
42
- logme.debug(__name__ + ':Twint:__init__:clean_follow_list')
43
- output._clean_follow_list()
44
-
45
- if self.config.Pandas_clean:
46
- logme.debug(__name__ + ':Twint:__init__:pandas_clean')
47
- storage.panda.clean()
48
-
49
- def get_resume(self, resumeFile):
50
- if not os.path.exists(resumeFile):
51
- return '-1'
52
- with open(resumeFile, 'r') as rFile:
53
- _init = rFile.readlines()[-1].strip('\n')
54
- return _init
55
-
56
- async def Feed(self):
57
- logme.debug(__name__ + ':Twint:Feed')
58
- consecutive_errors_count = 0
59
- while True:
60
- # this will receive a JSON string, parse it into a `dict` and do the required stuff
61
- try:
62
- response = await get.RequestUrl(self.config, self.init)
63
- except TokenExpiryException as e:
64
- logme.debug(__name__ + 'Twint:Feed:' + str(e))
65
- self.token.refresh()
66
- response = await get.RequestUrl(self.config, self.init)
67
-
68
- if self.config.Debug:
69
- print(response, file=open("twint-last-request.log", "w", encoding="utf-8"))
70
-
71
- self.feed = []
72
- try:
73
- if self.config.Favorites:
74
- self.feed, self.init = feed.MobileFav(response)
75
- favorite_err_cnt = 0
76
- if len(self.feed) == 0 and len(self.init) == 0:
77
- while (len(self.feed) == 0 or len(self.init) == 0) and favorite_err_cnt < 5:
78
- self.user_agent = await get.RandomUserAgent(wa=False)
79
- response = await get.RequestUrl(self.config, self.init,
80
- headers=[("User-Agent", self.user_agent)])
81
- self.feed, self.init = feed.MobileFav(response)
82
- favorite_err_cnt += 1
83
- time.sleep(1)
84
- if favorite_err_cnt == 5:
85
- print("Favorite page could not be fetched")
86
- if not self.count % 40:
87
- time.sleep(5)
88
- elif self.config.Followers or self.config.Following:
89
- self.feed, self.init = feed.Follow(response)
90
- if not self.count % 40:
91
- time.sleep(5)
92
- elif self.config.Profile or self.config.TwitterSearch:
93
- try:
94
- self.feed, self.init = feed.parse_tweets(self.config, response)
95
- except NoMoreTweetsException as e:
96
- logme.debug(__name__ + ':Twint:Feed:' + str(e))
97
- print('[!] ' + str(e) + ' Scraping will stop now.')
98
- print('found {} deleted tweets in this search.'.format(len(self.config.deleted)))
99
- break
100
- break
101
- except TimeoutError as e:
102
- if self.config.Proxy_host.lower() == "tor":
103
- print("[?] Timed out, changing Tor identity...")
104
- if self.config.Tor_control_password is None:
105
- logme.critical(__name__ + ':Twint:Feed:tor-password')
106
- sys.stderr.write("Error: config.Tor_control_password must be set for proxy auto-rotation!\r\n")
107
- sys.stderr.write(
108
- "Info: What is it? See https://stem.torproject.org/faq.html#can-i-interact-with-tors"
109
- "-controller-interface-directly\r\n")
110
- break
111
- else:
112
- get.ForceNewTorIdentity(self.config)
113
- continue
114
- else:
115
- logme.critical(__name__ + ':Twint:Feed:' + str(e))
116
- print(str(e))
117
- break
118
- except Exception as e:
119
- if self.config.Profile or self.config.Favorites:
120
- print("[!] Twitter does not return more data, scrape stops here.")
121
- break
122
-
123
- logme.critical(__name__ + ':Twint:Feed:noData' + str(e))
124
- # Sometimes Twitter says there is no data. But it's a lie.
125
- # raise
126
- consecutive_errors_count += 1
127
- if consecutive_errors_count < self.config.Retries_count:
128
- # skip to the next iteration if wait time does not satisfy limit constraints
129
- delay = round(consecutive_errors_count ** self.config.Backoff_exponent, 1)
130
-
131
- # if the delay is less than users set min wait time then replace delay
132
- if self.config.Min_wait_time > delay:
133
- delay = self.config.Min_wait_time
134
-
135
- sys.stderr.write('sleeping for {} secs\n'.format(delay))
136
- time.sleep(delay)
137
- self.user_agent = await get.RandomUserAgent(wa=True)
138
- continue
139
- logme.critical(__name__ + ':Twint:Feed:Tweets_known_error:' + str(e))
140
- sys.stderr.write(str(e) + " [x] run.Feed")
141
- sys.stderr.write(
142
- "[!] if you get this error but you know for sure that more tweets exist, please open an issue and "
143
- "we will investigate it!")
144
- break
145
- if self.config.Resume:
146
- print(self.init, file=open(self.config.Resume, "a", encoding="utf-8"))
147
-
148
- async def follow(self):
149
- await self.Feed()
150
- if self.config.User_full:
151
- logme.debug(__name__ + ':Twint:follow:userFull')
152
- self.count += await get.Multi(self.feed, self.config, self.conn)
153
- else:
154
- logme.debug(__name__ + ':Twint:follow:notUserFull')
155
- for user in self.feed:
156
- self.count += 1
157
- username = user.find("a")["name"]
158
- await output.Username(username, self.config, self.conn)
159
-
160
- async def favorite(self):
161
- logme.debug(__name__ + ':Twint:favorite')
162
- await self.Feed()
163
- favorited_tweets_list = []
164
- for tweet in self.feed:
165
- tweet_dict = {}
166
- self.count += 1
167
- try:
168
- tweet_dict['data-item-id'] = tweet.find("div", {"class": "tweet-text"})['data-id']
169
- t_url = tweet.find("span", {"class": "metadata"}).find("a")["href"]
170
- tweet_dict['data-conversation-id'] = t_url.split('?')[0].split('/')[-1]
171
- tweet_dict['username'] = tweet.find("div", {"class": "username"}).text.replace('\n', '').replace(' ',
172
- '')
173
- tweet_dict['tweet'] = tweet.find("div", {"class": "tweet-text"}).find("div", {"class": "dir-ltr"}).text
174
- date_str = tweet.find("td", {"class": "timestamp"}).find("a").text
175
- # test_dates = ["1m", "2h", "Jun 21, 2019", "Mar 12", "28 Jun 19"]
176
- # date_str = test_dates[3]
177
- if len(date_str) <= 3 and (date_str[-1] == "m" or date_str[-1] == "h"): # 25m 1h
178
- dateu = str(datetime.date.today())
179
- tweet_dict['date'] = dateu
180
- elif ',' in date_str: # Aug 21, 2019
181
- sp = date_str.replace(',', '').split(' ')
182
- date_str_formatted = sp[1] + ' ' + sp[0] + ' ' + sp[2]
183
- dateu = datetime.datetime.strptime(date_str_formatted, "%d %b %Y").strftime("%Y-%m-%d")
184
- tweet_dict['date'] = dateu
185
- elif len(date_str.split(' ')) == 3: # 28 Jun 19
186
- sp = date_str.split(' ')
187
- if len(sp[2]) == 2:
188
- sp[2] = '20' + sp[2]
189
- date_str_formatted = sp[0] + ' ' + sp[1] + ' ' + sp[2]
190
- dateu = datetime.datetime.strptime(date_str_formatted, "%d %b %Y").strftime("%Y-%m-%d")
191
- tweet_dict['date'] = dateu
192
- else: # Aug 21
193
- sp = date_str.split(' ')
194
- date_str_formatted = sp[1] + ' ' + sp[0] + ' ' + str(datetime.date.today().year)
195
- dateu = datetime.datetime.strptime(date_str_formatted, "%d %b %Y").strftime("%Y-%m-%d")
196
- tweet_dict['date'] = dateu
197
-
198
- favorited_tweets_list.append(tweet_dict)
199
-
200
- except Exception as e:
201
- logme.critical(__name__ + ':Twint:favorite:favorite_field_lack')
202
- print("shit: ", date_str, " ", str(e))
203
-
204
- try:
205
- self.config.favorited_tweets_list += favorited_tweets_list
206
- except AttributeError:
207
- self.config.favorited_tweets_list = favorited_tweets_list
208
-
209
- async def profile(self):
210
- await self.Feed()
211
- logme.debug(__name__ + ':Twint:profile')
212
- for tweet in self.feed:
213
- self.count += 1
214
- await output.Tweets(tweet, self.config, self.conn)
215
-
216
- async def tweets(self):
217
- await self.Feed()
218
- # TODO : need to take care of this later
219
- if self.config.Location:
220
- logme.debug(__name__ + ':Twint:tweets:location')
221
- self.count += await get.Multi(self.feed, self.config, self.conn)
222
- else:
223
- logme.debug(__name__ + ':Twint:tweets:notLocation')
224
- for tweet in self.feed:
225
- self.count += 1
226
- await output.Tweets(tweet, self.config, self.conn)
227
-
228
- async def main(self, callback=None):
229
-
230
- task = ensure_future(self.run()) # Might be changed to create_task in 3.7+.
231
-
232
- if callback:
233
- task.add_done_callback(callback)
234
-
235
- await task
236
-
237
- async def run(self):
238
- if self.config.TwitterSearch:
239
- self.user_agent = await get.RandomUserAgent(wa=True)
240
- else:
241
- self.user_agent = await get.RandomUserAgent()
242
-
243
- if self.config.User_id is not None and self.config.Username is None:
244
- logme.debug(__name__ + ':Twint:main:user_id')
245
- self.config.Username = await get.Username(self.config.User_id, self.config.Bearer_token,
246
- self.config.Guest_token)
247
-
248
- if self.config.Username is not None and self.config.User_id is None:
249
- logme.debug(__name__ + ':Twint:main:username')
250
-
251
- self.config.User_id = await get.User(self.config.Username, self.config, self.conn, True)
252
- if self.config.User_id is None:
253
- raise ValueError("Cannot find twitter account with name = " + self.config.Username)
254
-
255
- # TODO : will need to modify it to work with the new endpoints
256
- if self.config.TwitterSearch and self.config.Since and self.config.Until:
257
- logme.debug(__name__ + ':Twint:main:search+since+until')
258
- while self.d.since < self.d.until:
259
- self.config.Since = datetime.datetime.strftime(self.d.since, "%Y-%m-%d %H:%M:%S")
260
- self.config.Until = datetime.datetime.strftime(self.d.until, "%Y-%m-%d %H:%M:%S")
261
- if len(self.feed) > 0:
262
- await self.tweets()
263
- else:
264
- logme.debug(__name__ + ':Twint:main:gettingNewTweets')
265
- break
266
-
267
- if get.Limit(self.config.Limit, self.count):
268
- break
269
- elif self.config.Lookup:
270
- await self.Lookup()
271
- else:
272
- logme.debug(__name__ + ':Twint:main:not-search+since+until')
273
- while True:
274
- if len(self.feed) > 0:
275
- if self.config.Followers or self.config.Following:
276
- logme.debug(__name__ + ':Twint:main:follow')
277
- await self.follow()
278
- elif self.config.Favorites:
279
- logme.debug(__name__ + ':Twint:main:favorites')
280
- await self.favorite()
281
- elif self.config.Profile:
282
- logme.debug(__name__ + ':Twint:main:profile')
283
- await self.profile()
284
- elif self.config.TwitterSearch:
285
- logme.debug(__name__ + ':Twint:main:twitter-search')
286
- await self.tweets()
287
- else:
288
- logme.debug(__name__ + ':Twint:main:no-more-tweets')
289
- break
290
-
291
- # logging.info("[<] " + str(datetime.now()) + ':: run+Twint+main+CallingGetLimit2')
292
- if get.Limit(self.config.Limit, self.count):
293
- logme.debug(__name__ + ':Twint:main:reachedLimit')
294
- break
295
-
296
- if self.config.Count:
297
- verbose.Count(self.count, self.config)
298
-
299
- async def Lookup(self):
300
- logme.debug(__name__ + ':Twint:Lookup')
301
-
302
- try:
303
- if self.config.User_id is not None and self.config.Username is None:
304
- logme.debug(__name__ + ':Twint:Lookup:user_id')
305
- self.config.Username = await get.Username(self.config.User_id, self.config.Bearer_token,
306
- self.config.Guest_token)
307
- await get.User(self.config.Username, self.config, db.Conn(self.config.Database))
308
-
309
- except Exception as e:
310
- logme.exception(__name__ + ':Twint:Lookup:Unexpected exception occurred.')
311
- raise
312
-
313
-
314
- def run(config, callback=None):
315
- logme.debug(__name__ + ':run')
316
- try:
317
- get_event_loop()
318
- except RuntimeError as e:
319
- if "no current event loop" in str(e):
320
- set_event_loop(new_event_loop())
321
- else:
322
- logme.exception(__name__ + ':run:Unexpected exception while handling an expected RuntimeError.')
323
- raise
324
- except Exception as e:
325
- logme.exception(
326
- __name__ + ':run:Unexpected exception occurred while attempting to get or create a new event loop.')
327
- raise
328
-
329
- get_event_loop().run_until_complete(Twint(config).main(callback))
330
-
331
-
332
- def Favorites(config):
333
- logme.debug(__name__ + ':Favorites')
334
- config.Favorites = True
335
- config.Following = False
336
- config.Followers = False
337
- config.Profile = False
338
- config.TwitterSearch = False
339
- run(config)
340
- if config.Pandas_au:
341
- storage.panda._autoget("tweet")
342
-
343
-
344
- def Followers(config):
345
- logme.debug(__name__ + ':Followers')
346
- config.Followers = True
347
- config.Following = False
348
- config.Profile = False
349
- config.Favorites = False
350
- config.TwitterSearch = False
351
- run(config)
352
- if config.Pandas_au:
353
- storage.panda._autoget("followers")
354
- if config.User_full:
355
- storage.panda._autoget("user")
356
- if config.Pandas_clean and not config.Store_object:
357
- # storage.panda.clean()
358
- output._clean_follow_list()
359
-
360
-
361
- def Following(config):
362
- logme.debug(__name__ + ':Following')
363
- config.Following = True
364
- config.Followers = False
365
- config.Profile = False
366
- config.Favorites = False
367
- config.TwitterSearch = False
368
- run(config)
369
- if config.Pandas_au:
370
- storage.panda._autoget("following")
371
- if config.User_full:
372
- storage.panda._autoget("user")
373
- if config.Pandas_clean and not config.Store_object:
374
- # storage.panda.clean()
375
- output._clean_follow_list()
376
-
377
-
378
- def Lookup(config):
379
- logme.debug(__name__ + ':Lookup')
380
- config.Profile = False
381
- config.Lookup = True
382
- config.Favorites = False
383
- config.FOllowing = False
384
- config.Followers = False
385
- config.TwitterSearch = False
386
- run(config)
387
- if config.Pandas_au:
388
- storage.panda._autoget("user")
389
-
390
-
391
- def Profile(config):
392
- logme.debug(__name__ + ':Profile')
393
- config.Profile = True
394
- config.Favorites = False
395
- config.Following = False
396
- config.Followers = False
397
- config.TwitterSearch = False
398
- run(config)
399
- if config.Pandas_au:
400
- storage.panda._autoget("tweet")
401
-
402
-
403
- def Search(config, callback=None):
404
- logme.debug(__name__ + ':Search')
405
- config.TwitterSearch = True
406
- config.Favorites = False
407
- config.Following = False
408
- config.Followers = False
409
- config.Profile = False
410
- run(config, callback)
411
- if config.Pandas_au:
412
- storage.panda._autoget("tweet")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/storage/__init__.py DELETED
File without changes
twitter-scraper/twint-master/twint/storage/db.py DELETED
@@ -1,297 +0,0 @@
1
- import sqlite3
2
- import sys
3
- import time
4
- import hashlib
5
-
6
- from datetime import datetime
7
-
8
- def Conn(database):
9
- if database:
10
- print("[+] Inserting into Database: " + str(database))
11
- conn = init(database)
12
- if isinstance(conn, str): # error
13
- print(conn)
14
- sys.exit(1)
15
- else:
16
- conn = ""
17
-
18
- return conn
19
-
20
- def init(db):
21
- try:
22
- conn = sqlite3.connect(db)
23
- cursor = conn.cursor()
24
-
25
- table_users = """
26
- CREATE TABLE IF NOT EXISTS
27
- users(
28
- id integer not null,
29
- id_str text not null,
30
- name text,
31
- username text not null,
32
- bio text,
33
- location text,
34
- url text,
35
- join_date text not null,
36
- join_time text not null,
37
- tweets integer,
38
- following integer,
39
- followers integer,
40
- likes integer,
41
- media integer,
42
- private integer not null,
43
- verified integer not null,
44
- profile_image_url text not null,
45
- background_image text,
46
- hex_dig text not null,
47
- time_update integer not null,
48
- CONSTRAINT users_pk PRIMARY KEY (id, hex_dig)
49
- );
50
- """
51
- cursor.execute(table_users)
52
-
53
- table_tweets = """
54
- CREATE TABLE IF NOT EXISTS
55
- tweets (
56
- id integer not null,
57
- id_str text not null,
58
- tweet text default '',
59
- language text default '',
60
- conversation_id text not null,
61
- created_at integer not null,
62
- date text not null,
63
- time text not null,
64
- timezone text not null,
65
- place text default '',
66
- replies_count integer,
67
- likes_count integer,
68
- retweets_count integer,
69
- user_id integer not null,
70
- user_id_str text not null,
71
- screen_name text not null,
72
- name text default '',
73
- link text,
74
- mentions text,
75
- hashtags text,
76
- cashtags text,
77
- urls text,
78
- photos text,
79
- thumbnail text,
80
- quote_url text,
81
- video integer,
82
- geo text,
83
- near text,
84
- source text,
85
- time_update integer not null,
86
- `translate` text default '',
87
- trans_src text default '',
88
- trans_dest text default '',
89
- PRIMARY KEY (id)
90
- );
91
- """
92
- cursor.execute(table_tweets)
93
-
94
- table_retweets = """
95
- CREATE TABLE IF NOT EXISTS
96
- retweets(
97
- user_id integer not null,
98
- username text not null,
99
- tweet_id integer not null,
100
- retweet_id integer not null,
101
- retweet_date integer,
102
- CONSTRAINT retweets_pk PRIMARY KEY(user_id, tweet_id),
103
- CONSTRAINT user_id_fk FOREIGN KEY(user_id) REFERENCES users(id),
104
- CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
105
- );
106
- """
107
- cursor.execute(table_retweets)
108
-
109
- table_reply_to = """
110
- CREATE TABLE IF NOT EXISTS
111
- replies(
112
- tweet_id integer not null,
113
- user_id integer not null,
114
- username text not null,
115
- CONSTRAINT replies_pk PRIMARY KEY (user_id, tweet_id),
116
- CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id)
117
- );
118
- """
119
- cursor.execute(table_reply_to)
120
-
121
- table_favorites = """
122
- CREATE TABLE IF NOT EXISTS
123
- favorites(
124
- user_id integer not null,
125
- tweet_id integer not null,
126
- CONSTRAINT favorites_pk PRIMARY KEY (user_id, tweet_id),
127
- CONSTRAINT user_id_fk FOREIGN KEY (user_id) REFERENCES users(id),
128
- CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id)
129
- );
130
- """
131
- cursor.execute(table_favorites)
132
-
133
- table_followers = """
134
- CREATE TABLE IF NOT EXISTS
135
- followers (
136
- id integer not null,
137
- follower_id integer not null,
138
- CONSTRAINT followers_pk PRIMARY KEY (id, follower_id),
139
- CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id),
140
- CONSTRAINT follower_id_fk FOREIGN KEY(follower_id) REFERENCES users(id)
141
- );
142
- """
143
- cursor.execute(table_followers)
144
-
145
- table_following = """
146
- CREATE TABLE IF NOT EXISTS
147
- following (
148
- id integer not null,
149
- following_id integer not null,
150
- CONSTRAINT following_pk PRIMARY KEY (id, following_id),
151
- CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id),
152
- CONSTRAINT following_id_fk FOREIGN KEY(following_id) REFERENCES users(id)
153
- );
154
- """
155
- cursor.execute(table_following)
156
-
157
- table_followers_names = """
158
- CREATE TABLE IF NOT EXISTS
159
- followers_names (
160
- user text not null,
161
- time_update integer not null,
162
- follower text not null,
163
- PRIMARY KEY (user, follower)
164
- );
165
- """
166
- cursor.execute(table_followers_names)
167
-
168
- table_following_names = """
169
- CREATE TABLE IF NOT EXISTS
170
- following_names (
171
- user text not null,
172
- time_update integer not null,
173
- follows text not null,
174
- PRIMARY KEY (user, follows)
175
- );
176
- """
177
- cursor.execute(table_following_names)
178
-
179
- return conn
180
- except Exception as e:
181
- return str(e)
182
-
183
- def fTable(Followers):
184
- if Followers:
185
- table = "followers_names"
186
- else:
187
- table = "following_names"
188
-
189
- return table
190
-
191
- def uTable(Followers):
192
- if Followers:
193
- table = "followers"
194
- else:
195
- table = "following"
196
-
197
- return table
198
-
199
- def follow(conn, Username, Followers, User):
200
- try:
201
- time_ms = round(time.time()*1000)
202
- cursor = conn.cursor()
203
- entry = (User, time_ms, Username,)
204
- table = fTable(Followers)
205
- query = f"INSERT INTO {table} VALUES(?,?,?)"
206
- cursor.execute(query, entry)
207
- conn.commit()
208
- except sqlite3.IntegrityError:
209
- pass
210
-
211
- def get_hash_id(conn, id):
212
- cursor = conn.cursor()
213
- cursor.execute('SELECT hex_dig FROM users WHERE id = ? LIMIT 1', (id,))
214
- resultset = cursor.fetchall()
215
- return resultset[0][0] if resultset else -1
216
-
217
- def user(conn, config, User):
218
- try:
219
- time_ms = round(time.time()*1000)
220
- cursor = conn.cursor()
221
- user = [int(User.id), User.id, User.name, User.username, User.bio, User.location, User.url,User.join_date, User.join_time, User.tweets, User.following, User.followers, User.likes, User.media_count, User.is_private, User.is_verified, User.avatar, User.background_image]
222
-
223
- hex_dig = hashlib.sha256(','.join(str(v) for v in user).encode()).hexdigest()
224
- entry = tuple(user) + (hex_dig,time_ms,)
225
- old_hash = get_hash_id(conn, User.id)
226
-
227
- if old_hash == -1 or old_hash != hex_dig:
228
- query = f"INSERT INTO users VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
229
- cursor.execute(query, entry)
230
- else:
231
- pass
232
-
233
- if config.Followers or config.Following:
234
- table = uTable(config.Followers)
235
- query = f"INSERT INTO {table} VALUES(?,?)"
236
- cursor.execute(query, (config.User_id, int(User.id)))
237
-
238
- conn.commit()
239
- except sqlite3.IntegrityError:
240
- pass
241
-
242
- def tweets(conn, Tweet, config):
243
- try:
244
- time_ms = round(time.time()*1000)
245
- cursor = conn.cursor()
246
- entry = (Tweet.id,
247
- Tweet.id_str,
248
- Tweet.tweet,
249
- Tweet.lang,
250
- Tweet.conversation_id,
251
- Tweet.datetime,
252
- Tweet.datestamp,
253
- Tweet.timestamp,
254
- Tweet.timezone,
255
- Tweet.place,
256
- Tweet.replies_count,
257
- Tweet.likes_count,
258
- Tweet.retweets_count,
259
- Tweet.user_id,
260
- Tweet.user_id_str,
261
- Tweet.username,
262
- Tweet.name,
263
- Tweet.link,
264
- ",".join(Tweet.mentions),
265
- ",".join(Tweet.hashtags),
266
- ",".join(Tweet.cashtags),
267
- ",".join(Tweet.urls),
268
- ",".join(Tweet.photos),
269
- Tweet.thumbnail,
270
- Tweet.quote_url,
271
- Tweet.video,
272
- Tweet.geo,
273
- Tweet.near,
274
- Tweet.source,
275
- time_ms,
276
- Tweet.translate,
277
- Tweet.trans_src,
278
- Tweet.trans_dest)
279
- cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)
280
-
281
- if config.Favorites:
282
- query = 'INSERT INTO favorites VALUES(?,?)'
283
- cursor.execute(query, (config.User_id, Tweet.id))
284
-
285
- if Tweet.retweet:
286
- query = 'INSERT INTO retweets VALUES(?,?,?,?,?)'
287
- _d = datetime.timestamp(datetime.strptime(Tweet.retweet_date, "%Y-%m-%d %H:%M:%S"))
288
- cursor.execute(query, (int(Tweet.user_rt_id), Tweet.user_rt, Tweet.id, int(Tweet.retweet_id), _d))
289
-
290
- if Tweet.reply_to:
291
- for reply in Tweet.reply_to:
292
- query = 'INSERT INTO replies VALUES(?,?,?)'
293
- cursor.execute(query, (Tweet.id, int(reply['user_id']), reply['username']))
294
-
295
- conn.commit()
296
- except sqlite3.IntegrityError:
297
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/storage/elasticsearch.py DELETED
@@ -1,364 +0,0 @@
1
- ## TODO - Fix Weekday situation
2
- from elasticsearch import Elasticsearch, helpers
3
- from geopy.geocoders import Nominatim
4
- from datetime import datetime
5
- import contextlib
6
- import sys
7
-
8
- _index_tweet_status = False
9
- _index_follow_status = False
10
- _index_user_status = False
11
- _is_near_def = False
12
- _is_location_def = False
13
- _near = {}
14
- _location = {}
15
-
16
- geolocator = Nominatim(user_agent="twint-1.2")
17
-
18
- class RecycleObject(object):
19
- def write(self, junk): pass
20
- def flush(self): pass
21
-
22
- def getLocation(place, **options):
23
- location = geolocator.geocode(place,timeout=1000)
24
- if location:
25
- if options.get("near"):
26
- global _near
27
- _near = {"lat": location.latitude, "lon": location.longitude}
28
- return True
29
- elif options.get("location"):
30
- global _location
31
- _location = {"lat": location.latitude, "lon": location.longitude}
32
- return True
33
- return {"lat": location.latitude, "lon": location.longitude}
34
- else:
35
- return {}
36
-
37
- def handleIndexResponse(response):
38
- try:
39
- if response["status"] == 400:
40
- return True
41
- except KeyError:
42
- pass
43
- if response["acknowledged"]:
44
- print("[+] Index \"" + response["index"] + "\" created!")
45
- else:
46
- print("[x] error index creation :: storage.elasticsearch.handleIndexCreation")
47
- if response["shards_acknowledged"]:
48
- print("[+] Shards acknowledged, everything is ready to be used!")
49
- return True
50
- else:
51
- print("[x] error with shards :: storage.elasticsearch.HandleIndexCreation")
52
- return False
53
-
54
- def createIndex(config, instance, **scope):
55
- if scope.get("scope") == "tweet":
56
- tweets_body = {
57
- "mappings": {
58
- "properties": {
59
- "id": {"type": "long"},
60
- "conversation_id": {"type": "long"},
61
- "created_at": {"type": "text"},
62
- "date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
63
- "timezone": {"type": "keyword"},
64
- "place": {"type": "keyword"},
65
- "location": {"type": "keyword"},
66
- "tweet": {"type": "text"},
67
- "lang": {"type": "keyword"},
68
- "hashtags": {"type": "keyword", "normalizer": "hashtag_normalizer"},
69
- "cashtags": {"type": "keyword", "normalizer": "hashtag_normalizer"},
70
- "user_id_str": {"type": "keyword"},
71
- "username": {"type": "keyword", "normalizer": "hashtag_normalizer"},
72
- "name": {"type": "text"},
73
- "profile_image_url": {"type": "text"},
74
- "day": {"type": "integer"},
75
- "hour": {"type": "integer"},
76
- "link": {"type": "text"},
77
- "retweet": {"type": "text"},
78
- "essid": {"type": "keyword"},
79
- "nlikes": {"type": "integer"},
80
- "nreplies": {"type": "integer"},
81
- "nretweets": {"type": "integer"},
82
- "quote_url": {"type": "text"},
83
- "video": {"type":"integer"},
84
- "thumbnail": {"type":"text"},
85
- "search": {"type": "text"},
86
- "near": {"type": "text"},
87
- "geo_near": {"type": "geo_point"},
88
- "geo_tweet": {"type": "geo_point"},
89
- "photos": {"type": "text"},
90
- "user_rt_id": {"type": "keyword"},
91
- "mentions": {"type": "keyword", "normalizer": "hashtag_normalizer"},
92
- "source": {"type": "keyword"},
93
- "user_rt": {"type": "keyword"},
94
- "retweet_id": {"type": "keyword"},
95
- "reply_to": {
96
- "type": "nested",
97
- "properties": {
98
- "user_id": {"type": "keyword"},
99
- "username": {"type": "keyword"}
100
- }
101
- },
102
- "retweet_date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss", "ignore_malformed": True},
103
- "urls": {"type": "keyword"},
104
- "translate": {"type": "text"},
105
- "trans_src": {"type": "keyword"},
106
- "trans_dest": {"type": "keyword"},
107
- }
108
- },
109
- "settings": {
110
- "number_of_shards": 1,
111
- "analysis": {
112
- "normalizer": {
113
- "hashtag_normalizer": {
114
- "type": "custom",
115
- "char_filter": [],
116
- "filter": ["lowercase", "asciifolding"]
117
- }
118
- }
119
- }
120
- }
121
- }
122
- with nostdout():
123
- resp = instance.indices.create(index=config.Index_tweets, body=tweets_body, ignore=400)
124
- return handleIndexResponse(resp)
125
- elif scope.get("scope") == "follow":
126
- follow_body = {
127
- "mappings": {
128
- "properties": {
129
- "user": {"type": "keyword"},
130
- "follow": {"type": "keyword"},
131
- "essid": {"type": "keyword"}
132
- }
133
- },
134
- "settings": {
135
- "number_of_shards": 1
136
- }
137
- }
138
- with nostdout():
139
- resp = instance.indices.create(index=config.Index_follow, body=follow_body, ignore=400)
140
- return handleIndexResponse(resp)
141
- elif scope.get("scope") == "user":
142
- user_body = {
143
- "mappings": {
144
- "properties": {
145
- "id": {"type": "keyword"},
146
- "name": {"type": "keyword"},
147
- "username": {"type": "keyword"},
148
- "bio": {"type": "text"},
149
- "location": {"type": "keyword"},
150
- "url": {"type": "text"},
151
- "join_datetime": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
152
- "tweets": {"type": "integer"},
153
- "following": {"type": "integer"},
154
- "followers": {"type": "integer"},
155
- "likes": {"type": "integer"},
156
- "media": {"type": "integer"},
157
- "private": {"type": "integer"},
158
- "verified": {"type": "integer"},
159
- "avatar": {"type": "text"},
160
- "background_image": {"type": "text"},
161
- "session": {"type": "keyword"},
162
- "geo_user": {"type": "geo_point"}
163
- }
164
- },
165
- "settings": {
166
- "number_of_shards": 1
167
- }
168
- }
169
- with nostdout():
170
- resp = instance.indices.create(index=config.Index_users, body=user_body, ignore=400)
171
- return handleIndexResponse(resp)
172
- else:
173
- print("[x] error index pre-creation :: storage.elasticsearch.createIndex")
174
- return False
175
-
176
- @contextlib.contextmanager
177
- def nostdout():
178
- savestdout = sys.stdout
179
- sys.stdout = RecycleObject()
180
- yield
181
- sys.stdout = savestdout
182
-
183
- def weekday(day):
184
- weekdays = {
185
- "Monday": 1,
186
- "Tuesday": 2,
187
- "Wednesday": 3,
188
- "Thursday": 4,
189
- "Friday": 5,
190
- "Saturday": 6,
191
- "Sunday": 7,
192
- }
193
-
194
- return weekdays[day]
195
-
196
- def Tweet(Tweet, config):
197
- global _index_tweet_status
198
- global _is_near_def
199
- date_obj = datetime.strptime(Tweet.datetime, "%Y-%m-%d %H:%M:%S %Z")
200
-
201
- actions = []
202
-
203
- try:
204
- retweet = Tweet.retweet
205
- except AttributeError:
206
- retweet = None
207
-
208
- dt = f"{Tweet.datestamp} {Tweet.timestamp}"
209
-
210
- j_data = {
211
- "_index": config.Index_tweets,
212
- "_id": str(Tweet.id) + "_raw_" + config.Essid,
213
- "_source": {
214
- "id": str(Tweet.id),
215
- "conversation_id": Tweet.conversation_id,
216
- "created_at": Tweet.datetime,
217
- "date": dt,
218
- "timezone": Tweet.timezone,
219
- "place": Tweet.place,
220
- "tweet": Tweet.tweet,
221
- "language": Tweet.lang,
222
- "hashtags": Tweet.hashtags,
223
- "cashtags": Tweet.cashtags,
224
- "user_id_str": Tweet.user_id_str,
225
- "username": Tweet.username,
226
- "name": Tweet.name,
227
- "day": date_obj.weekday(),
228
- "hour": date_obj.hour,
229
- "link": Tweet.link,
230
- "retweet": retweet,
231
- "essid": config.Essid,
232
- "nlikes": int(Tweet.likes_count),
233
- "nreplies": int(Tweet.replies_count),
234
- "nretweets": int(Tweet.retweets_count),
235
- "quote_url": Tweet.quote_url,
236
- "video": Tweet.video,
237
- "search": str(config.Search),
238
- "near": config.Near
239
- }
240
- }
241
- if retweet is not None:
242
- j_data["_source"].update({"user_rt_id": Tweet.user_rt_id})
243
- j_data["_source"].update({"user_rt": Tweet.user_rt})
244
- j_data["_source"].update({"retweet_id": Tweet.retweet_id})
245
- j_data["_source"].update({"retweet_date": Tweet.retweet_date})
246
- if Tweet.reply_to:
247
- j_data["_source"].update({"reply_to": Tweet.reply_to})
248
- if Tweet.photos:
249
- _photos = []
250
- for photo in Tweet.photos:
251
- _photos.append(photo)
252
- j_data["_source"].update({"photos": _photos})
253
- if Tweet.thumbnail:
254
- j_data["_source"].update({"thumbnail": Tweet.thumbnail})
255
- if Tweet.mentions:
256
- _mentions = []
257
- for mention in Tweet.mentions:
258
- _mentions.append(mention)
259
- j_data["_source"].update({"mentions": _mentions})
260
- if Tweet.urls:
261
- _urls = []
262
- for url in Tweet.urls:
263
- _urls.append(url)
264
- j_data["_source"].update({"urls": _urls})
265
- if config.Near or config.Geo:
266
- if not _is_near_def:
267
- __geo = ""
268
- __near = ""
269
- if config.Geo:
270
- __geo = config.Geo
271
- if config.Near:
272
- __near = config.Near
273
- _is_near_def = getLocation(__near + __geo, near=True)
274
- if _near:
275
- j_data["_source"].update({"geo_near": _near})
276
- if Tweet.place:
277
- _t_place = getLocation(Tweet.place)
278
- if _t_place:
279
- j_data["_source"].update({"geo_tweet": getLocation(Tweet.place)})
280
- if Tweet.source:
281
- j_data["_source"].update({"source": Tweet.Source})
282
- if config.Translate:
283
- j_data["_source"].update({"translate": Tweet.translate})
284
- j_data["_source"].update({"trans_src": Tweet.trans_src})
285
- j_data["_source"].update({"trans_dest": Tweet.trans_dest})
286
-
287
- actions.append(j_data)
288
-
289
- es = Elasticsearch(config.Elasticsearch, verify_certs=config.Skip_certs)
290
- if not _index_tweet_status:
291
- _index_tweet_status = createIndex(config, es, scope="tweet")
292
- with nostdout():
293
- helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
294
- actions = []
295
-
296
- def Follow(user, config):
297
- global _index_follow_status
298
- actions = []
299
-
300
- if config.Following:
301
- _user = config.Username
302
- _follow = user
303
- else:
304
- _user = user
305
- _follow = config.Username
306
- j_data = {
307
- "_index": config.Index_follow,
308
- "_id": _user + "_" + _follow + "_" + config.Essid,
309
- "_source": {
310
- "user": _user,
311
- "follow": _follow,
312
- "essid": config.Essid
313
- }
314
- }
315
- actions.append(j_data)
316
-
317
- es = Elasticsearch(config.Elasticsearch, verify_certs=config.Skip_certs)
318
- if not _index_follow_status:
319
- _index_follow_status = createIndex(config, es, scope="follow")
320
- with nostdout():
321
- helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
322
- actions = []
323
-
324
- def UserProfile(user, config):
325
- global _index_user_status
326
- global _is_location_def
327
- actions = []
328
-
329
- j_data = {
330
- "_index": config.Index_users,
331
- "_id": user.id + "_" + user.join_date + "_" + user.join_time + "_" + config.Essid,
332
- "_source": {
333
- "id": user.id,
334
- "name": user.name,
335
- "username": user.username,
336
- "bio": user.bio,
337
- "location": user.location,
338
- "url": user.url,
339
- "join_datetime": user.join_date + " " + user.join_time,
340
- "tweets": user.tweets,
341
- "following": user.following,
342
- "followers": user.followers,
343
- "likes": user.likes,
344
- "media": user.media_count,
345
- "private": user.is_private,
346
- "verified": user.is_verified,
347
- "avatar": user.avatar,
348
- "background_image": user.background_image,
349
- "session": config.Essid
350
- }
351
- }
352
- if config.Location:
353
- if not _is_location_def:
354
- _is_location_def = getLocation(user.location, location=True)
355
- if _location:
356
- j_data["_source"].update({"geo_user": _location})
357
- actions.append(j_data)
358
-
359
- es = Elasticsearch(config.Elasticsearch, verify_certs=config.Skip_certs)
360
- if not _index_user_status:
361
- _index_user_status = createIndex(config, es, scope="user")
362
- with nostdout():
363
- helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
364
- actions = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/storage/panda.py DELETED
@@ -1,196 +0,0 @@
1
- import datetime, pandas as pd, warnings
2
- from time import strftime, localtime
3
- from twint.tweet import Tweet_formats
4
-
5
- Tweets_df = None
6
- Follow_df = None
7
- User_df = None
8
-
9
- _object_blocks = {
10
- "tweet": [],
11
- "user": [],
12
- "following": [],
13
- "followers": []
14
- }
15
-
16
- weekdays = {
17
- "Monday": 1,
18
- "Tuesday": 2,
19
- "Wednesday": 3,
20
- "Thursday": 4,
21
- "Friday": 5,
22
- "Saturday": 6,
23
- "Sunday": 7,
24
- }
25
-
26
- _type = ""
27
-
28
- def _concat(df, _type):
29
- if df is None:
30
- df = pd.DataFrame(_object_blocks[_type])
31
- else:
32
- _df = pd.DataFrame(_object_blocks[_type])
33
- df = pd.concat([df, _df], sort=True)
34
- return df
35
-
36
- def _autoget(_type):
37
- global Tweets_df
38
- global Follow_df
39
- global User_df
40
-
41
- if _type == "tweet":
42
- Tweets_df = _concat(Tweets_df, _type)
43
- elif _type == "followers" or _type == "following":
44
- Follow_df = _concat(Follow_df, _type)
45
- elif _type == "user":
46
- User_df = _concat(User_df, _type)
47
- else:
48
- error("[x] Wrong type of object passed")
49
-
50
-
51
- def update(object, config):
52
- global _type
53
-
54
- #try:
55
- # _type = ((object.__class__.__name__ == "tweet")*"tweet" +
56
- # (object.__class__.__name__ == "user")*"user")
57
- #except AttributeError:
58
- # _type = config.Following*"following" + config.Followers*"followers"
59
- if object.__class__.__name__ == "tweet":
60
- _type = "tweet"
61
- elif object.__class__.__name__ == "user":
62
- _type = "user"
63
- elif object.__class__.__name__ == "dict":
64
- _type = config.Following*"following" + config.Followers*"followers"
65
-
66
- if _type == "tweet":
67
- Tweet = object
68
- datetime_ms = datetime.datetime.strptime(Tweet.datetime, Tweet_formats['datetime']).timestamp() * 1000
69
- day = weekdays[strftime("%A", localtime(datetime_ms/1000))]
70
- dt = f"{object.datestamp} {object.timestamp}"
71
- _data = {
72
- "id": str(Tweet.id),
73
- "conversation_id": Tweet.conversation_id,
74
- "created_at": datetime_ms,
75
- "date": dt,
76
- "timezone": Tweet.timezone,
77
- "place": Tweet.place,
78
- "tweet": Tweet.tweet,
79
- "language": Tweet.lang,
80
- "hashtags": Tweet.hashtags,
81
- "cashtags": Tweet.cashtags,
82
- "user_id": Tweet.user_id,
83
- "user_id_str": Tweet.user_id_str,
84
- "username": Tweet.username,
85
- "name": Tweet.name,
86
- "day": day,
87
- "hour": strftime("%H", localtime(datetime_ms/1000)),
88
- "link": Tweet.link,
89
- "urls": Tweet.urls,
90
- "photos": Tweet.photos,
91
- "video": Tweet.video,
92
- "thumbnail": Tweet.thumbnail,
93
- "retweet": Tweet.retweet,
94
- "nlikes": int(Tweet.likes_count),
95
- "nreplies": int(Tweet.replies_count),
96
- "nretweets": int(Tweet.retweets_count),
97
- "quote_url": Tweet.quote_url,
98
- "search": str(config.Search),
99
- "near": Tweet.near,
100
- "geo": Tweet.geo,
101
- "source": Tweet.source,
102
- "user_rt_id": Tweet.user_rt_id,
103
- "user_rt": Tweet.user_rt,
104
- "retweet_id": Tweet.retweet_id,
105
- "reply_to": Tweet.reply_to,
106
- "retweet_date": Tweet.retweet_date,
107
- "translate": Tweet.translate,
108
- "trans_src": Tweet.trans_src,
109
- "trans_dest": Tweet.trans_dest
110
- }
111
- _object_blocks[_type].append(_data)
112
- elif _type == "user":
113
- user = object
114
- try:
115
- background_image = user.background_image
116
- except:
117
- background_image = ""
118
- _data = {
119
- "id": user.id,
120
- "name": user.name,
121
- "username": user.username,
122
- "bio": user.bio,
123
- "url": user.url,
124
- "join_datetime": user.join_date + " " + user.join_time,
125
- "join_date": user.join_date,
126
- "join_time": user.join_time,
127
- "tweets": user.tweets,
128
- "location": user.location,
129
- "following": user.following,
130
- "followers": user.followers,
131
- "likes": user.likes,
132
- "media": user.media_count,
133
- "private": user.is_private,
134
- "verified": user.is_verified,
135
- "avatar": user.avatar,
136
- "background_image": background_image,
137
- }
138
- _object_blocks[_type].append(_data)
139
- elif _type == "followers" or _type == "following":
140
- _data = {
141
- config.Following*"following" + config.Followers*"followers" :
142
- {config.Username: object[_type]}
143
- }
144
- _object_blocks[_type] = _data
145
- else:
146
- print("Wrong type of object passed!")
147
-
148
-
149
- def clean():
150
- global Tweets_df
151
- global Follow_df
152
- global User_df
153
- _object_blocks["tweet"].clear()
154
- _object_blocks["following"].clear()
155
- _object_blocks["followers"].clear()
156
- _object_blocks["user"].clear()
157
- Tweets_df = None
158
- Follow_df = None
159
- User_df = None
160
-
161
- def save(_filename, _dataframe, **options):
162
- if options.get("dataname"):
163
- _dataname = options.get("dataname")
164
- else:
165
- _dataname = "twint"
166
-
167
- if not options.get("type"):
168
- with warnings.catch_warnings():
169
- warnings.simplefilter("ignore")
170
- _store = pd.HDFStore(_filename + ".h5")
171
- _store[_dataname] = _dataframe
172
- _store.close()
173
- elif options.get("type") == "Pickle":
174
- with warnings.catch_warnings():
175
- warnings.simplefilter("ignore")
176
- _dataframe.to_pickle(_filename + ".pkl")
177
- else:
178
- print("""Please specify: filename, DataFrame, DataFrame name and type
179
- (HDF5, default, or Pickle)""")
180
-
181
- def read(_filename, **options):
182
- if not options.get("dataname"):
183
- _dataname = "twint"
184
- else:
185
- _dataname = options.get("dataname")
186
-
187
- if not options.get("type"):
188
- _store = pd.HDFStore(_filename + ".h5")
189
- _df = _store[_dataname]
190
- return _df
191
- elif options.get("type") == "Pickle":
192
- _df = pd.read_pickle(_filename + ".pkl")
193
- return _df
194
- else:
195
- print("""Please specify: DataFrame, DataFrame name (twint as default),
196
- filename and type (HDF5, default, or Pickle""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/storage/write.py DELETED
@@ -1,77 +0,0 @@
1
- from . import write_meta as meta
2
- import csv
3
- import json
4
- import os
5
-
6
- def outputExt(objType, fType):
7
- if objType == "str":
8
- objType = "username"
9
- outExt = f"/{objType}s.{fType}"
10
-
11
- return outExt
12
-
13
- def addExt(base, objType, fType):
14
- if len(base.split('.')) == 1:
15
- createDirIfMissing(base)
16
- base += outputExt(objType, fType)
17
-
18
- return base
19
-
20
- def Text(entry, f):
21
- print(entry.replace('\n', ' '), file=open(f, "a", encoding="utf-8"))
22
-
23
- def Type(config):
24
- if config.User_full:
25
- _type = "user"
26
- elif config.Followers or config.Following:
27
- _type = "username"
28
- else:
29
- _type = "tweet"
30
-
31
- return _type
32
-
33
- def struct(obj, custom, _type):
34
- if custom:
35
- fieldnames = custom
36
- row = {}
37
- for f in fieldnames:
38
- row[f] = meta.Data(obj, _type)[f]
39
- else:
40
- fieldnames = meta.Fieldnames(_type)
41
- row = meta.Data(obj, _type)
42
-
43
- return fieldnames, row
44
-
45
- def createDirIfMissing(dirname):
46
- if not os.path.exists(dirname):
47
- os.makedirs(dirname)
48
-
49
- def Csv(obj, config):
50
- _obj_type = obj.__class__.__name__
51
- if _obj_type == "str":
52
- _obj_type = "username"
53
- fieldnames, row = struct(obj, config.Custom[_obj_type], _obj_type)
54
-
55
- base = addExt(config.Output, _obj_type, "csv")
56
- dialect = 'excel-tab' if 'Tabs' in config.__dict__ else 'excel'
57
-
58
- if not (os.path.exists(base)):
59
- with open(base, "w", newline='', encoding="utf-8") as csv_file:
60
- writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=dialect)
61
- writer.writeheader()
62
-
63
- with open(base, "a", newline='', encoding="utf-8") as csv_file:
64
- writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=dialect)
65
- writer.writerow(row)
66
-
67
- def Json(obj, config):
68
- _obj_type = obj.__class__.__name__
69
- if _obj_type == "str":
70
- _obj_type = "username"
71
- null, data = struct(obj, config.Custom[_obj_type], _obj_type)
72
-
73
- base = addExt(config.Output, _obj_type, "json")
74
-
75
- with open(base, "a", newline='', encoding="utf-8") as json_file:
76
- json.dump(data, json_file, ensure_ascii=False)
77
- json_file.write("\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/storage/write_meta.py DELETED
@@ -1,151 +0,0 @@
1
- def tweetData(t):
2
- data = {
3
- "id": int(t.id),
4
- "conversation_id": t.conversation_id,
5
- "created_at": t.datetime,
6
- "date": t.datestamp,
7
- "time": t.timestamp,
8
- "timezone": t.timezone,
9
- "user_id": t.user_id,
10
- "username": t.username,
11
- "name": t.name,
12
- "place": t.place,
13
- "tweet": t.tweet,
14
- "language": t.lang,
15
- "mentions": t.mentions,
16
- "urls": t.urls,
17
- "photos": t.photos,
18
- "replies_count": int(t.replies_count),
19
- "retweets_count": int(t.retweets_count),
20
- "likes_count": int(t.likes_count),
21
- "hashtags": t.hashtags,
22
- "cashtags": t.cashtags,
23
- "link": t.link,
24
- "retweet": t.retweet,
25
- "quote_url": t.quote_url,
26
- "video": t.video,
27
- "thumbnail": t.thumbnail,
28
- "near": t.near,
29
- "geo": t.geo,
30
- "source": t.source,
31
- "user_rt_id": t.user_rt_id,
32
- "user_rt": t.user_rt,
33
- "retweet_id": t.retweet_id,
34
- "reply_to": t.reply_to,
35
- "retweet_date": t.retweet_date,
36
- "translate": t.translate,
37
- "trans_src": t.trans_src,
38
- "trans_dest": t.trans_dest,
39
- }
40
- return data
41
-
42
- def tweetFieldnames():
43
- fieldnames = [
44
- "id",
45
- "conversation_id",
46
- "created_at",
47
- "date",
48
- "time",
49
- "timezone",
50
- "user_id",
51
- "username",
52
- "name",
53
- "place",
54
- "tweet",
55
- "language",
56
- "mentions",
57
- "urls",
58
- "photos",
59
- "replies_count",
60
- "retweets_count",
61
- "likes_count",
62
- "hashtags",
63
- "cashtags",
64
- "link",
65
- "retweet",
66
- "quote_url",
67
- "video",
68
- "thumbnail",
69
- "near",
70
- "geo",
71
- "source",
72
- "user_rt_id",
73
- "user_rt",
74
- "retweet_id",
75
- "reply_to",
76
- "retweet_date",
77
- "translate",
78
- "trans_src",
79
- "trans_dest"
80
- ]
81
- return fieldnames
82
-
83
- def userData(u):
84
- data = {
85
- "id": int(u.id),
86
- "name": u.name,
87
- "username": u.username,
88
- "bio": u.bio,
89
- "location": u.location,
90
- "url": u.url,
91
- "join_date": u.join_date,
92
- "join_time": u.join_time,
93
- "tweets": int(u.tweets),
94
- "following": int(u.following),
95
- "followers": int(u.followers),
96
- "likes": int(u.likes),
97
- "media": int(u.media_count),
98
- "private": u.is_private,
99
- "verified": u.is_verified,
100
- "profile_image_url": u.avatar,
101
- "background_image": u.background_image
102
- }
103
- return data
104
-
105
- def userFieldnames():
106
- fieldnames = [
107
- "id",
108
- "name",
109
- "username",
110
- "bio",
111
- "location",
112
- "url",
113
- "join_date",
114
- "join_time",
115
- "tweets",
116
- "following",
117
- "followers",
118
- "likes",
119
- "media",
120
- "private",
121
- "verified",
122
- "profile_image_url",
123
- "background_image"
124
- ]
125
- return fieldnames
126
-
127
- def usernameData(u):
128
- return {"username": u}
129
-
130
- def usernameFieldnames():
131
- return ["username"]
132
-
133
- def Data(obj, _type):
134
- if _type == "user":
135
- ret = userData(obj)
136
- elif _type == "username":
137
- ret = usernameData(obj)
138
- else:
139
- ret = tweetData(obj)
140
-
141
- return ret
142
-
143
- def Fieldnames(_type):
144
- if _type == "user":
145
- ret = userFieldnames()
146
- elif _type == "username":
147
- ret = usernameFieldnames()
148
- else:
149
- ret = tweetFieldnames()
150
-
151
- return ret
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/token.py DELETED
@@ -1,94 +0,0 @@
1
- import re
2
- import time
3
-
4
- import requests
5
- import logging as logme
6
-
7
-
8
- class TokenExpiryException(Exception):
9
- def __init__(self, msg):
10
- super().__init__(msg)
11
-
12
-
13
- class RefreshTokenException(Exception):
14
- def __init__(self, msg):
15
- super().__init__(msg)
16
-
17
-
18
- class Token:
19
- def __init__(self, config):
20
- self._session = requests.Session()
21
- self._session.headers.update({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0'})
22
- self.config = config
23
- self._retries = 5
24
- self._timeout = 10
25
- self.url = 'https://twitter.com'
26
-
27
- def _request(self):
28
- for attempt in range(self._retries + 1):
29
- # The request is newly prepared on each retry because of potential cookie updates.
30
- req = self._session.prepare_request(requests.Request('GET', self.url))
31
- logme.debug(f'Retrieving {req.url}')
32
- try:
33
- r = self._session.send(req, allow_redirects=True, timeout=self._timeout)
34
- except requests.exceptions.RequestException as exc:
35
- if attempt < self._retries:
36
- retrying = ', retrying'
37
- level = logme.WARNING
38
- else:
39
- retrying = ''
40
- level = logme.ERROR
41
- logme.log(level, f'Error retrieving {req.url}: {exc!r}{retrying}')
42
- else:
43
- success, msg = (True, None)
44
- msg = f': {msg}' if msg else ''
45
-
46
- if success:
47
- logme.debug(f'{req.url} retrieved successfully{msg}')
48
- return r
49
- if attempt < self._retries:
50
- # TODO : might wanna tweak this back-off timer
51
- sleep_time = 2.0 * 2 ** attempt
52
- logme.info(f'Waiting {sleep_time:.0f} seconds')
53
- time.sleep(sleep_time)
54
- else:
55
- msg = f'{self._retries + 1} requests to {self.url} failed, giving up.'
56
- logme.fatal(msg)
57
- self.config.Guest_token = None
58
- raise RefreshTokenException(msg)
59
-
60
- def refresh(self):
61
- logme.debug('Retrieving guest token')
62
- res = self._request()
63
- match = re.search(r'\("gt=(\d+);', res.text)
64
- if match:
65
- logme.debug('Found guest token in HTML')
66
- self.config.Guest_token = str(match.group(1))
67
- else:
68
- headers = {
69
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0',
70
- 'authority': 'api.twitter.com',
71
- 'content-length': '0',
72
- 'authorization': self.config.Bearer_token,
73
- 'x-twitter-client-language': 'en',
74
- 'x-csrf-token': res.cookies.get("ct0"),
75
- 'x-twitter-active-user': 'yes',
76
- 'content-type': 'application/x-www-form-urlencoded',
77
- 'accept': '*/*',
78
- 'sec-gpc': '1',
79
- 'origin': 'https://twitter.com',
80
- 'sec-fetch-site': 'same-site',
81
- 'sec-fetch-mode': 'cors',
82
- 'sec-fetch-dest': 'empty',
83
- 'referer': 'https://twitter.com/',
84
- 'accept-language': 'en-US',
85
- }
86
- self._session.headers.update(headers)
87
- req = self._session.prepare_request(requests.Request('POST', 'https://api.twitter.com/1.1/guest/activate.json'))
88
- res = self._session.send(req, allow_redirects=True, timeout=self._timeout)
89
- if 'guest_token' in res.json():
90
- logme.debug('Found guest token in JSON')
91
- self.config.Guest_token = res.json()['guest_token']
92
- else:
93
- self.config.Guest_token = None
94
- raise RefreshTokenException('Could not find the Guest token in HTML')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/tweet.py DELETED
@@ -1,166 +0,0 @@
1
- from time import strftime, localtime
2
- from datetime import datetime, timezone
3
-
4
- import logging as logme
5
- from googletransx import Translator
6
- # ref.
7
- # - https://github.com/x0rzkov/py-googletrans#basic-usage
8
- translator = Translator()
9
-
10
-
11
- class tweet:
12
- """Define Tweet class
13
- """
14
- type = "tweet"
15
-
16
- def __init__(self):
17
- pass
18
-
19
-
20
- def utc_to_local(utc_dt):
21
- return utc_dt.replace(tzinfo=timezone.utc).astimezone(tz=None)
22
-
23
-
24
- Tweet_formats = {
25
- 'datetime': '%Y-%m-%d %H:%M:%S %Z',
26
- 'datestamp': '%Y-%m-%d',
27
- 'timestamp': '%H:%M:%S'
28
- }
29
-
30
-
31
- def _get_mentions(tw):
32
- """Extract mentions from tweet
33
- """
34
- logme.debug(__name__ + ':get_mentions')
35
- try:
36
- mentions = [
37
- {
38
- 'screen_name': _mention['screen_name'],
39
- 'name': _mention['name'],
40
- 'id': _mention['id_str'],
41
- } for _mention in tw['entities']['user_mentions']
42
- if tw['display_text_range'][0] < _mention['indices'][0]
43
- ]
44
- except KeyError:
45
- mentions = []
46
- return mentions
47
-
48
-
49
- def _get_reply_to(tw):
50
- try:
51
- reply_to = [
52
- {
53
- 'screen_name': _mention['screen_name'],
54
- 'name': _mention['name'],
55
- 'id': _mention['id_str'],
56
- } for _mention in tw['entities']['user_mentions']
57
- if tw['display_text_range'][0] > _mention['indices'][1]
58
- ]
59
- except KeyError:
60
- reply_to = []
61
- return reply_to
62
-
63
-
64
- def getText(tw):
65
- """Replace some text
66
- """
67
- logme.debug(__name__ + ':getText')
68
- text = tw['full_text']
69
- text = text.replace("http", " http")
70
- text = text.replace("pic.twitter", " pic.twitter")
71
- text = text.replace("\n", " ")
72
-
73
- return text
74
-
75
-
76
- def Tweet(tw, config):
77
- """Create Tweet object
78
- """
79
- logme.debug(__name__ + ':Tweet')
80
- t = tweet()
81
- t.id = int(tw['id_str'])
82
- t.id_str = tw["id_str"]
83
- t.conversation_id = tw["conversation_id_str"]
84
-
85
- # parsing date to user-friendly format
86
- _dt = tw['created_at']
87
- _dt = datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
88
- _dt = utc_to_local(_dt)
89
- t.datetime = str(_dt.strftime(Tweet_formats['datetime']))
90
- # date is of the format year,
91
- t.datestamp = _dt.strftime(Tweet_formats['datestamp'])
92
- t.timestamp = _dt.strftime(Tweet_formats['timestamp'])
93
- t.user_id = int(tw["user_id_str"])
94
- t.user_id_str = tw["user_id_str"]
95
- t.username = tw["user_data"]['screen_name']
96
- t.name = tw["user_data"]['name']
97
- t.place = tw['geo'] if 'geo' in tw and tw['geo'] else ""
98
- t.timezone = strftime("%z", localtime())
99
- t.mentions = _get_mentions(tw)
100
- t.reply_to = _get_reply_to(tw)
101
- try:
102
- t.urls = [_url['expanded_url'] for _url in tw['entities']['urls']]
103
- except KeyError:
104
- t.urls = []
105
- try:
106
- t.photos = [_img['media_url_https'] for _img in tw['entities']['media'] if _img['type'] == 'photo' and
107
- _img['expanded_url'].find('/photo/') != -1]
108
- except KeyError:
109
- t.photos = []
110
- try:
111
- t.video = 1 if len(tw['extended_entities']['media']) else 0
112
- except KeyError:
113
- t.video = 0
114
- try:
115
- t.thumbnail = tw['extended_entities']['media'][0]['media_url_https']
116
- except KeyError:
117
- t.thumbnail = ''
118
- t.tweet = getText(tw)
119
- t.lang = tw['lang']
120
- try:
121
- t.hashtags = [hashtag['text'] for hashtag in tw['entities']['hashtags']]
122
- except KeyError:
123
- t.hashtags = []
124
- try:
125
- t.cashtags = [cashtag['text'] for cashtag in tw['entities']['symbols']]
126
- except KeyError:
127
- t.cashtags = []
128
- t.replies_count = tw['reply_count']
129
- t.retweets_count = tw['retweet_count']
130
- t.likes_count = tw['favorite_count']
131
- t.link = f"https://twitter.com/{t.username}/status/{t.id}"
132
- try:
133
- if 'user_rt_id' in tw['retweet_data']:
134
- t.retweet = True
135
- t.retweet_id = tw['retweet_data']['retweet_id']
136
- t.retweet_date = tw['retweet_data']['retweet_date']
137
- t.user_rt = tw['retweet_data']['user_rt']
138
- t.user_rt_id = tw['retweet_data']['user_rt_id']
139
- except KeyError:
140
- t.retweet = False
141
- t.retweet_id = ''
142
- t.retweet_date = ''
143
- t.user_rt = ''
144
- t.user_rt_id = ''
145
- try:
146
- t.quote_url = tw['quoted_status_permalink']['expanded'] if tw['is_quote_status'] else ''
147
- except KeyError:
148
- # means that the quoted tweet have been deleted
149
- t.quote_url = 0
150
- t.near = config.Near if config.Near else ""
151
- t.geo = config.Geo if config.Geo else ""
152
- t.source = config.Source if config.Source else ""
153
- t.translate = ''
154
- t.trans_src = ''
155
- t.trans_dest = ''
156
- if config.Translate:
157
- try:
158
- ts = translator.translate(text=t.tweet, dest=config.TranslateDest)
159
- t.translate = ts.text
160
- t.trans_src = ts.src
161
- t.trans_dest = ts.dest
162
- # ref. https://github.com/SuniTheFish/ChainTranslator/blob/master/ChainTranslator/__main__.py#L31
163
- except ValueError as e:
164
- logme.debug(__name__ + ':Tweet:translator.translate:' + str(e))
165
- raise Exception("Invalid destination language: {} / Tweet: {}".format(config.TranslateDest, t.tweet))
166
- return t
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/url.py DELETED
@@ -1,195 +0,0 @@
1
- import datetime
2
- import json
3
- from sys import platform
4
- import logging as logme
5
- from urllib.parse import urlencode
6
- from urllib.parse import quote
7
-
8
- mobile = "https://mobile.twitter.com"
9
- base = "https://api.twitter.com/2/search/adaptive.json"
10
-
11
-
12
- def _sanitizeQuery(_url, params):
13
- _serialQuery = ""
14
- _serialQuery = urlencode(params, quote_via=quote)
15
- _serialQuery = _url + "?" + _serialQuery
16
- return _serialQuery
17
-
18
-
19
- def _formatDate(date):
20
- if "win" in platform:
21
- return f'\"{date.split()[0]}\"'
22
- try:
23
- return int(datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S").timestamp())
24
- except ValueError:
25
- return int(datetime.datetime.strptime(date, "%Y-%m-%d").timestamp())
26
-
27
-
28
- async def Favorites(username, init):
29
- logme.debug(__name__ + ':Favorites')
30
- url = f"{mobile}/{username}/favorites?lang=en"
31
-
32
- if init != '-1':
33
- url += f"&max_id={init}"
34
-
35
- return url
36
-
37
-
38
- async def Followers(username, init):
39
- logme.debug(__name__ + ':Followers')
40
- url = f"{mobile}/{username}/followers?lang=en"
41
-
42
- if init != '-1':
43
- url += f"&cursor={init}"
44
-
45
- return url
46
-
47
-
48
- async def Following(username, init):
49
- logme.debug(__name__ + ':Following')
50
- url = f"{mobile}/{username}/following?lang=en"
51
-
52
- if init != '-1':
53
- url += f"&cursor={init}"
54
-
55
- return url
56
-
57
-
58
- async def MobileProfile(username, init):
59
- logme.debug(__name__ + ':MobileProfile')
60
- url = f"{mobile}/{username}?lang=en"
61
-
62
- if init != '-1':
63
- url += f"&max_id={init}"
64
-
65
- return url
66
-
67
-
68
- async def Search(config, init):
69
- logme.debug(__name__ + ':Search')
70
- url = base
71
- tweet_count = 100 if not config.Limit else config.Limit
72
- q = ""
73
- params = [
74
- # ('include_blocking', '1'),
75
- # ('include_blocked_by', '1'),
76
- # ('include_followed_by', '1'),
77
- # ('include_want_retweets', '1'),
78
- # ('include_mute_edge', '1'),
79
- # ('include_can_dm', '1'),
80
- ('include_can_media_tag', '1'),
81
- # ('skip_status', '1'),
82
- # ('include_cards', '1'),
83
- ('include_ext_alt_text', 'true'),
84
- ('include_quote_count', 'true'),
85
- ('include_reply_count', '1'),
86
- ('tweet_mode', 'extended'),
87
- ('include_entities', 'true'),
88
- ('include_user_entities', 'true'),
89
- ('include_ext_media_availability', 'true'),
90
- ('send_error_codes', 'true'),
91
- ('simple_quoted_tweet', 'true'),
92
- ('count', tweet_count),
93
- ('query_source', 'typed_query'),
94
- # ('pc', '1'),
95
- ('cursor', str(init)),
96
- ('spelling_corrections', '1'),
97
- ('ext', 'mediaStats%2ChighlightedLabel'),
98
- ('tweet_search_mode', 'live'), # this can be handled better, maybe take an argument and set it then
99
- ]
100
- if not config.Popular_tweets:
101
- params.append(('f', 'tweets'))
102
- if config.Lang:
103
- params.append(("l", config.Lang))
104
- params.append(("lang", "en"))
105
- if config.Query:
106
- q += f" from:{config.Query}"
107
- if config.Username:
108
- q += f" from:{config.Username}"
109
- if config.Geo:
110
- config.Geo = config.Geo.replace(" ", "")
111
- q += f" geocode:{config.Geo}"
112
- if config.Search:
113
-
114
- q += f" {config.Search}"
115
- if config.Year:
116
- q += f" until:{config.Year}-1-1"
117
- if config.Since:
118
- q += f" since:{_formatDate(config.Since)}"
119
- if config.Until:
120
- q += f" until:{_formatDate(config.Until)}"
121
- if config.Email:
122
- q += ' "mail" OR "email" OR'
123
- q += ' "gmail" OR "e-mail"'
124
- if config.Phone:
125
- q += ' "phone" OR "call me" OR "text me"'
126
- if config.Verified:
127
- q += " filter:verified"
128
- if config.To:
129
- q += f" to:{config.To}"
130
- if config.All:
131
- q += f" to:{config.All} OR from:{config.All} OR @{config.All}"
132
- if config.Near:
133
- q += f' near:"{config.Near}"'
134
- if config.Images:
135
- q += " filter:images"
136
- if config.Videos:
137
- q += " filter:videos"
138
- if config.Media:
139
- q += " filter:media"
140
- if config.Replies:
141
- q += " filter:replies"
142
- # although this filter can still be used, but I found it broken in my preliminary testing, needs more testing
143
- if config.Native_retweets:
144
- q += " filter:nativeretweets"
145
- if config.Min_likes:
146
- q += f" min_faves:{config.Min_likes}"
147
- if config.Min_retweets:
148
- q += f" min_retweets:{config.Min_retweets}"
149
- if config.Min_replies:
150
- q += f" min_replies:{config.Min_replies}"
151
- if config.Links == "include":
152
- q += " filter:links"
153
- elif config.Links == "exclude":
154
- q += " exclude:links"
155
- if config.Source:
156
- q += f" source:\"{config.Source}\""
157
- if config.Members_list:
158
- q += f" list:{config.Members_list}"
159
- if config.Filter_retweets:
160
- q += f" exclude:nativeretweets exclude:retweets"
161
- if config.Custom_query:
162
- q = config.Custom_query
163
-
164
- q = q.strip()
165
- params.append(("q", q))
166
- _serialQuery = _sanitizeQuery(url, params)
167
- return url, params, _serialQuery
168
-
169
-
170
- def SearchProfile(config, init=None):
171
- logme.debug(__name__ + ':SearchProfile')
172
- _url = 'https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies'
173
- tweet_count = 100
174
- variables = {
175
- "userId": config.User_id,
176
- "count": tweet_count,
177
- "includePromotedContent": True,
178
- "withCommunity": True,
179
- "withSuperFollowsUserFields": True,
180
- "withBirdwatchPivots": False,
181
- "withDownvotePerspective": False,
182
- "withReactionsMetadata": False,
183
- "withReactionsPerspective": False,
184
- "withSuperFollowsTweetFields": True,
185
- "withVoice": True,
186
- "withV2Timeline": False,
187
- "__fs_interactive_text": False,
188
- "__fs_dont_mention_me_view_api_enabled": False,
189
- }
190
- if type(init) == str:
191
- variables['cursor'] = init
192
- params = [('variables', json.dumps(variables, separators=(',',':')))]
193
-
194
- _serialQuery = _sanitizeQuery(_url, params)
195
- return _serialQuery, [], _serialQuery
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/user.py DELETED
@@ -1,52 +0,0 @@
1
- import datetime
2
- import logging as logme
3
-
4
-
5
- class user:
6
- type = "user"
7
-
8
- def __init__(self):
9
- pass
10
-
11
-
12
- User_formats = {
13
- 'join_date': '%Y-%m-%d',
14
- 'join_time': '%H:%M:%S %Z'
15
- }
16
-
17
-
18
- # ur object must be a json from the endpoint https://api.twitter.com/graphql
19
- def User(ur):
20
- logme.debug(__name__ + ':User')
21
- if 'data' not in ur and 'user' not in ur['data']:
22
- msg = 'malformed json! cannot be parsed to get user data'
23
- logme.fatal(msg)
24
- raise KeyError(msg)
25
- _usr = user()
26
- _usr.id = ur['data']['user']['rest_id']
27
- _usr.name = ur['data']['user']['legacy']['name']
28
- _usr.username = ur['data']['user']['legacy']['screen_name']
29
- _usr.bio = ur['data']['user']['legacy']['description']
30
- _usr.location = ur['data']['user']['legacy']['location']
31
- _usr.url = ur['data']['user']['legacy']['url']
32
- # parsing date to user-friendly format
33
- _dt = ur['data']['user']['legacy']['created_at']
34
- _dt = datetime.datetime.strptime(_dt, '%a %b %d %H:%M:%S %z %Y')
35
- # date is of the format year,
36
- _usr.join_date = _dt.strftime(User_formats['join_date'])
37
- _usr.join_time = _dt.strftime(User_formats['join_time'])
38
-
39
- # :type `int`
40
- _usr.tweets = int(ur['data']['user']['legacy']['statuses_count'])
41
- _usr.following = int(ur['data']['user']['legacy']['friends_count'])
42
- _usr.followers = int(ur['data']['user']['legacy']['followers_count'])
43
- _usr.likes = int(ur['data']['user']['legacy']['favourites_count'])
44
- _usr.media_count = int(ur['data']['user']['legacy']['media_count'])
45
-
46
- _usr.is_private = ur['data']['user']['legacy']['protected']
47
- _usr.is_verified = ur['data']['user']['legacy']['verified']
48
- _usr.avatar = ur['data']['user']['legacy']['profile_image_url_https']
49
- _usr.background_image = ur['data']['user']['legacy']['profile_banner_url']
50
- # TODO : future implementation
51
- # legacy_extended_profile is also available in some cases which can be used to get DOB of user
52
- return _usr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/twint-master/twint/verbose.py DELETED
@@ -1,18 +0,0 @@
1
- def Count(count, config):
2
- msg = "[+] Finished: Successfully collected "
3
- if config.Followers:
4
- msg += f"all {count} users who follow @{config.Username}"
5
- elif config.Following:
6
- msg += f"all {count} users who @{config.Username} follows"
7
- elif config.Favorites:
8
- msg += f"{count} Tweets that @{config.Username} liked"
9
- else:
10
- msg += f"{count} Tweets_and_replies"
11
- if config.Username:
12
- msg += f" from @{config.Username}"
13
- msg += "."
14
- print(msg)
15
-
16
- def Elastic(elasticsearch):
17
- if elasticsearch:
18
- print("[+] Indexing to Elasticsearch @ " + str(elasticsearch))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
twitter-scraper/{twint-master/twitter_scraper.ipynb → twitter_scraper.ipynb} RENAMED
File without changes
twitter_scraper/twint_master/elasticsearch/dashboard.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "_id": "e6d65380-bfe2-11e8-961a-d371b24d5d1d",
4
+ "_type": "dashboard",
5
+ "_source": {
6
+ "title": "Twint Dashboard",
7
+ "hits": 0,
8
+ "description": "",
9
+ "panelsJSON": "[{\"panelIndex\":\"1\",\"gridData\":{\"x\":0,\"y\":0,\"w\":40,\"h\":17,\"i\":\"1\"},\"embeddableConfig\":{},\"id\":\"d47421c0-bfd5-11e8-8858-bbc566841533\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"2\",\"gridData\":{\"x\":40,\"y\":6,\"w\":8,\"h\":11,\"i\":\"2\"},\"embeddableConfig\":{\"vis\":{\"legendOpen\":false}},\"id\":\"e2b89640-bfd4-11e8-8858-bbc566841533\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":32,\"w\":20,\"h\":17,\"i\":\"3\"},\"embeddableConfig\":{\"vis\":{\"legendOpen\":false}},\"id\":\"8a8bb420-bfd9-11e8-8858-bbc566841533\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":17,\"w\":33,\"h\":15,\"i\":\"4\"},\"embeddableConfig\":{\"vis\":{\"legendOpen\":false}},\"id\":\"a8d3ee70-bfd9-11e8-8858-bbc566841533\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"6\",\"gridData\":{\"x\":40,\"y\":0,\"w\":8,\"h\":6,\"i\":\"6\"},\"embeddableConfig\":{},\"id\":\"37cd72e0-bfe4-11e8-961a-d371b24d5d1d\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"7\",\"gridData\":{\"x\":33,\"y\":17,\"w\":15,\"h\":15,\"i\":\"7\"},\"embeddableConfig\":{},\"id\":\"149ecbc0-bfe4-11e8-961a-d371b24d5d1d\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"panelIndex\":\"8\",\"gridData\":{\"x\":20,\"y\":32,\"w\":28,\"h\":17,\"i\":\"8\"},\"version\":\"6.3.2\",\"type\":\"visualization\",\"id\":\"b45ec590-c267-11e8-bcd4-3956fe930db7\",\"embeddableConfig\":{}}]",
10
+ "optionsJSON": "{\"darkTheme\":true,\"hidePanelTitles\":true,\"useMargins\":true}",
11
+ "version": 1,
12
+ "timeRestore": false,
13
+ "kibanaSavedObjectMeta": {
14
+ "searchSourceJSON": "{\"query\":{\"language\":\"lucene\",\"query\":\"\"},\"filter\":[],\"highlightAll\":true,\"version\":true}"
15
+ }
16
+ }
17
+ }
18
+ ]
twitter_scraper/twint_master/elasticsearch/index-follow.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PUT twintgraph
2
+ {
3
+ "mappings": {
4
+ "items": {
5
+ "properties": {
6
+ "user": {"type": "keyword"},
7
+ "follow": {"type": "keyword"},
8
+ "essid": {"type": "keyword"}
9
+ }
10
+ }
11
+ },
12
+ "settings": {
13
+ "number_of_shards": 1
14
+ }
15
+ }
twitter_scraper/twint_master/elasticsearch/index-tweets.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PUT twinttweets
2
+ {
3
+ "mappings": {
4
+ "items": {
5
+ "properties": {
6
+ "id": {"type": "long"},
7
+ "conversation_id": {"type": "long"},
8
+ "created_at": {"type": "long"},
9
+ "date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
10
+ "timezone": {"type": "keyword"},
11
+ "place": {"type": "keyword"},
12
+ "location": {"type": "keyword"},
13
+ "tweet": {"type": "text"},
14
+ "hashtags": {"type": "keyword"},
15
+ "cashtags": {"type": "keyword"},
16
+ "user_id": {"type": "long"},
17
+ "user_id_str": {"type": "keyword"},
18
+ "username": {"type": "keyword"},
19
+ "name": {"type": "text"},
20
+ "profile_image_url": {"type": "text"},
21
+ "day": {"type": "integer"},
22
+ "hour": {"type": "integer"},
23
+ "link": {"type": "text"},
24
+ "retweet": {"type": "text"},
25
+ "essid": {"type": "keyword"},
26
+ "nlikes": {"type": "integer"},
27
+ "nreplies": {"type": "integer"},
28
+ "nretweets": {"type": "integer"},
29
+ "quote_url": {"type": "text"},
30
+ "video": {"type": "integer"},
31
+ "thumbnail": {"type": "text"},
32
+ "search": {"type": "text"},
33
+ "near": {"type": "text"},
34
+ "geo_near": {"type": "geo_point"},
35
+ "geo_tweet": {"type": "geo_point"},
36
+ "photos": {"type": "text"},
37
+ "mentions": {"type": "text"},
38
+ "translation": {"type": "text"},
39
+ "trans_src": {"type": "keyword"},
40
+ "trans_dev": {"type": "keyword"},
41
+ }
42
+ }
43
+ }
44
+ ,
45
+ "settings": {
46
+ "number_of_shards": 1
47
+ }
48
+ }
twitter_scraper/twint_master/elasticsearch/index-user.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PUT twintuser
2
+ {
3
+ "mappings": {
4
+ "items": {
5
+ "properties": {
6
+ "id": {"type": "keyword"},
7
+ "name": {"type": "keyword"},
8
+ "username": {"type": "keyword"},
9
+ "bio": {"type": "text"},
10
+ "location": {"type": "keyword"},
11
+ "url": {"type": "text"},
12
+ "join_datetime": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
13
+ "join_date": {"type": "date", "format": "yyyy-MM-dd"},
14
+ "join_time": {"type": "date", "format": "HH:mm:ss"},
15
+ "tweets": {"type": "integer"},
16
+ "following": {"type": "integer"},
17
+ "followers": {"type": "integer"},
18
+ "likes": {"type": "integer"},
19
+ "media": {"type": "integer"},
20
+ "private": {"type": "integer"},
21
+ "verified": {"type": "integer"},
22
+ "avatar": {"type": "text"},
23
+ "background_image": {"type": "text"},
24
+ "session": {"type": "keyword"},
25
+ "geo_user": {"type": "geo_point"}
26
+ }
27
+ }
28
+ }
29
+ ,
30
+ "settings": {
31
+ "number_of_shards": 1
32
+ }
33
+ }
twitter_scraper/twint_master/elasticsearch/visualizations.json ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "_id": "d47421c0-bfd5-11e8-8858-bbc566841533",
4
+ "_type": "visualization",
5
+ "_source": {
6
+ "title": "Activity [twinttweets]",
7
+ "visState": "{\"title\":\"Activity [twinttweets]\",\"type\":\"histogram\",\"params\":{\"type\":\"histogram\",\"grid\":{\"categoryLines\":true,\"style\":{\"color\":\"#eee\"},\"valueAxis\":\"ValueAxis-1\"},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Tweets\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"area\",\"mode\":\"stacked\",\"data\":{\"label\":\"Tweets\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"cardinal\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":true},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Tweets\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Days\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"user_id\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"User ids\"}}]}",
8
+ "uiStateJSON": "{}",
9
+ "description": "",
10
+ "version": 1,
11
+ "kibanaSavedObjectMeta": {
12
+ "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\",\"language\":\"lucene\"},\"filter\":[]}"
13
+ }
14
+ }
15
+ },
16
+ {
17
+ "_id": "e2b89640-bfd4-11e8-8858-bbc566841533",
18
+ "_type": "visualization",
19
+ "_source": {
20
+ "title": "Activity - pie [twinttweets]",
21
+ "visState": "{\"aggs\":[{\"enabled\":true,\"id\":\"1\",\"params\":{},\"schema\":\"metric\",\"type\":\"count\"},{\"enabled\":true,\"id\":\"2\",\"params\":{\"field\":\"user_id\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"size\":5},\"schema\":\"segment\",\"type\":\"terms\"}],\"params\":{\"addLegend\":true,\"addTooltip\":true,\"isDonut\":true,\"labels\":{\"last_level\":true,\"show\":false,\"truncate\":100,\"values\":true},\"legendPosition\":\"right\",\"type\":\"pie\"},\"title\":\"Activity - pie [twinttweets]\",\"type\":\"pie\"}",
22
+ "uiStateJSON": "{}",
23
+ "description": "",
24
+ "version": 1,
25
+ "kibanaSavedObjectMeta": {
26
+ "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"language\":\"lucene\",\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\"},\"filter\":[]}"
27
+ }
28
+ }
29
+ },
30
+ {
31
+ "_id": "37cd72e0-bfe4-11e8-961a-d371b24d5d1d",
32
+ "_type": "visualization",
33
+ "_source": {
34
+ "title": "Tweets Count [twinttweet]",
35
+ "visState": "{\"title\":\"Tweets Count [twinttweet]\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\",\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":33}}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Tweets\"}}]}",
36
+ "uiStateJSON": "{}",
37
+ "description": "",
38
+ "version": 1,
39
+ "kibanaSavedObjectMeta": {
40
+ "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"language\":\"lucene\",\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\"},\"filter\":[]}"
41
+ }
42
+ }
43
+ },
44
+ {
45
+ "_id": "149ecbc0-bfe4-11e8-961a-d371b24d5d1d",
46
+ "_type": "visualization",
47
+ "_source": {
48
+ "title": "Word Cloud [twinttweets]",
49
+ "visState": "{\"title\":\"Word Cloud [twinttweets]\",\"type\":\"tagcloud\",\"params\":{\"scale\":\"linear\",\"orientation\":\"single\",\"minFontSize\":10,\"maxFontSize\":50,\"showLabel\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"username\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}}]}",
50
+ "uiStateJSON": "{}",
51
+ "description": "",
52
+ "version": 1,
53
+ "kibanaSavedObjectMeta": {
54
+ "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\",\"language\":\"lucene\"},\"filter\":[]}"
55
+ }
56
+ }
57
+ },
58
+ {
59
+ "_id": "a8d3ee70-bfd9-11e8-8858-bbc566841533",
60
+ "_type": "visualization",
61
+ "_source": {
62
+ "title": "Day-activity [twinttweet]",
63
+ "visState": "{\"title\":\"Day-activity [twinttweet]\",\"type\":\"histogram\",\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"labels\":{\"show\":true,\"truncate\":100,\"rotate\":0},\"position\":\"bottom\",\"scale\":{\"type\":\"linear\"},\"show\":true,\"style\":{},\"title\":{},\"type\":\"category\"}],\"grid\":{\"categoryLines\":true,\"style\":{\"color\":\"#eee\"},\"valueAxis\":\"ValueAxis-3\"},\"legendPosition\":\"right\",\"orderBucketsBySum\":false,\"seriesParams\":[{\"data\":{\"id\":\"1\",\"label\":\"Tweets\"},\"drawLinesBetweenPoints\":true,\"mode\":\"normal\",\"show\":\"true\",\"showCircles\":true,\"type\":\"histogram\",\"valueAxis\":\"ValueAxis-3\"}],\"times\":[],\"type\":\"histogram\",\"valueAxes\":[{\"id\":\"ValueAxis-3\",\"labels\":{\"filter\":false,\"rotate\":0,\"show\":true,\"truncate\":100},\"name\":\"LeftAxis-1\",\"position\":\"left\",\"scale\":{\"mode\":\"normal\",\"type\":\"linear\"},\"show\":true,\"style\":{},\"title\":{\"text\":\"Tweets\"},\"type\":\"value\"}]},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Tweets\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"hour\",\"interval\":1,\"min_doc_count\":true,\"extended_bounds\":{\"min\":0,\"max\":23}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"user_id\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"size\":10,\"order\":\"asc\",\"orderBy\":\"_term\",\"customLabel\":\"\"}}]}",
64
+ "uiStateJSON": "{\"vis\":{\"legendOpen\":true}}",
65
+ "description": "",
66
+ "version": 1,
67
+ "kibanaSavedObjectMeta": {
68
+ "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"language\":\"lucene\",\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\"},\"filter\":[]}"
69
+ }
70
+ }
71
+ },
72
+ {
73
+ "_id": "8a8bb420-bfd9-11e8-8858-bbc566841533",
74
+ "_type": "visualization",
75
+ "_source": {
76
+ "title": "Week-activity [twinttweet]",
77
+ "visState": "{\"title\":\"Week-activity [twinttweet]\",\"type\":\"histogram\",\"params\":{\"type\":\"histogram\",\"grid\":{\"categoryLines\":true,\"style\":{\"color\":\"#eee\"},\"valueAxis\":\"ValueAxis-1\"},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100,\"rotate\":0},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Tweets\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"histogram\",\"mode\":\"normal\",\"data\":{\"label\":\"Tweets\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Tweets\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"day\",\"interval\":1,\"min_doc_count\":true,\"extended_bounds\":{},\"customLabel\":\"Days of the week\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"user_id\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\",\"customLabel\":\"\"}}]}",
78
+ "uiStateJSON": "{}",
79
+ "description": "",
80
+ "version": 1,
81
+ "kibanaSavedObjectMeta": {
82
+ "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"query\":{\"query\":\"NOT _exists_:likes NOT _exists_:retweets NOT _exists_:replies\",\"language\":\"lucene\"},\"filter\":[]}"
83
+ }
84
+ }
85
+ },
86
+ {
87
+ "_id": "b45ec590-c267-11e8-bcd4-3956fe930db7",
88
+ "_type": "visualization",
89
+ "_source": {
90
+ "title": "Heat-map [twinttweets]",
91
+ "visState": "{\"title\":\"Heat-map [twinttweets]\",\"type\":\"heatmap\",\"params\":{\"type\":\"heatmap\",\"addTooltip\":true,\"addLegend\":true,\"enableHover\":true,\"legendPosition\":\"right\",\"times\":[],\"colorsNumber\":10,\"colorSchema\":\"Reds\",\"setColorRange\":false,\"colorsRange\":[{\"from\":0,\"to\":10},{\"from\":10,\"to\":100},{\"from\":100,\"to\":200},{\"from\":200,\"to\":500},{\"from\":500,\"to\":1000},{\"from\":1000,\"to\":2000},{\"from\":2000,\"to\":3000},{\"from\":3000,\"to\":4000},{\"from\":4000,\"to\":5000},{\"from\":7000,\"to\":null}],\"invertColors\":false,\"percentageMode\":false,\"valueAxes\":[{\"show\":false,\"id\":\"ValueAxis-1\",\"type\":\"value\",\"scale\":{\"type\":\"linear\",\"defaultYExtents\":true},\"labels\":{\"show\":false,\"rotate\":270,\"overwriteColor\":false,\"color\":\"#555\"}}]},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"hour\",\"interval\":1,\"min_doc_count\":false,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"histogram\",\"schema\":\"group\",\"params\":{\"field\":\"day\",\"interval\":1,\"min_doc_count\":false,\"extended_bounds\":{\"min\":0,\"max\":2}}}]}",
92
+ "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"3 - 592\":\"rgb(255,245,240)\",\"592 - 1.180\":\"rgb(254,228,216)\",\"1.180 - 1.769\":\"rgb(253,202,181)\",\"1.769 - 2.357\":\"rgb(252,171,142)\",\"2.357 - 2.945\":\"rgb(252,138,106)\",\"2.945 - 3.534\":\"rgb(251,106,74)\",\"3.534 - 4.122\":\"rgb(241,68,50)\",\"4.122 - 4.711\":\"rgb(217,38,35)\",\"4.711 - 5.299\":\"rgb(188,20,26)\",\"5.299 - 5.887\":\"rgb(152,12,19)\"},\"colors\":{\"3 - 592\":\"#FCEACA\",\"592 - 1.180\":\"#F9E2D2\",\"1.180 - 1.769\":\"#F9BA8F\"}}}",
93
+ "description": "",
94
+ "version": 1,
95
+ "kibanaSavedObjectMeta": {
96
+ "searchSourceJSON": "{\"index\":\"755f4660-bfee-11e8-9911-5b8e1e9c87c6\",\"filter\":[],\"query\":{\"language\":\"lucene\",\"query\":\"\"}}"
97
+ }
98
+ }
99
+ }
100
+ ]
twitter_scraper/twint_master/extracted-tweets.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ '@annieloof Nej, jag håller med. Tänk mer som Mathias Andersson (SD). https://t.co/gSqQDz5N8z'
2
+ 'Man kan ha synpunkter på en sådan lösning, men den är naturligtvis att föredra framför frigående våldsverkare som fortsätter misshandla sina offer i väntan på fängelse.'
3
+ 'Är det ont om plats på anstalterna så får man sänka standarden rejält för att få rum med fler interner per kvadratmeter.'
4
+ 'Döms man för brott, särskilt våldsbrott, ska man vara inlåst från det att domen faller tills straffet är avtjänat. Allt annat är vansinne.'
5
+ 'Platsbrist? Jaha, vad spelar det för roll? \n\nDet gör mig förbannad och bestört att lösningen på problemet med överfulla fängelser verkar vara att dömda våldsbrottslingar får röra sig fritt i samhället istället för att sitta inlåsta. \n\nhttps://t.co/QDi9rM3kMC'
twitter_scraper/twint_master/requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp
2
+ aiodns
3
+ beautifulsoup4
4
+ cchardet
5
+ dataclasses
6
+ elasticsearch
7
+ pysocks
8
+ pandas>=0.23.0
9
+ aiohttp_socks<=0.4.1
10
+ schedule
11
+ geopy
12
+ fake-useragent
13
+ googletransx