Spaces:
Runtime error
Runtime error
Upload radical_tweet_aggregator.py
Browse files- radical_tweet_aggregator.py +172 -0
radical_tweet_aggregator.py
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gc
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
from datetime import date
|
5 |
+
from pathlib import Path
|
6 |
+
|
7 |
+
import unicodedata
|
8 |
+
|
9 |
+
consumer_token = os.getenv('CONSUMER_TOKEN')
|
10 |
+
consumer_secret = os.getenv('CONSUMER_SECRET')
|
11 |
+
my_access_token = os.getenv('ACCESS_TOKEN')
|
12 |
+
my_access_secret = os.getenv('ACCESS_SECRET')
|
13 |
+
bearer = os.getenv('BEARER')
|
14 |
+
|
15 |
+
import time
|
16 |
+
import tweepy
|
17 |
+
from googletrans import Translator
|
18 |
+
|
19 |
+
from predictor import predictor
|
20 |
+
|
21 |
+
class grapher():
|
22 |
+
"""
|
23 |
+
A wrapper class used for generating a graph for interactions between users
|
24 |
+
"""
|
25 |
+
graph = None
|
26 |
+
|
27 |
+
def __init__(self):
|
28 |
+
"""
|
29 |
+
Constructor.
|
30 |
+
"""
|
31 |
+
self.graph = Graph()
|
32 |
+
|
33 |
+
def add_edge_wrapper(self, node_1_name, node_2_name, weight=1, relationship=None):
|
34 |
+
"""
|
35 |
+
A wrapper function used to add an edge connection or node.
|
36 |
+
:param node_1_name: from
|
37 |
+
:param node_2_name: to
|
38 |
+
:param weight:
|
39 |
+
:param relationship:
|
40 |
+
:return:
|
41 |
+
"""
|
42 |
+
|
43 |
+
# get node one ID
|
44 |
+
|
45 |
+
node_1 = None
|
46 |
+
for node in self.graph.vs:
|
47 |
+
if node["label"] == node_1_name.capitalize():
|
48 |
+
node_1 = node
|
49 |
+
|
50 |
+
if node_1 == None:
|
51 |
+
self.graph.add_vertices(1)
|
52 |
+
node_count = self.graph.vcount()
|
53 |
+
self.graph.vs[node_count-1]["id"] = node_count-1
|
54 |
+
self.graph.vs[node_count-1]["label"] = node_1_name.capitalize()
|
55 |
+
node_1 = self.graph.vs[node_count-1]
|
56 |
+
|
57 |
+
# get node two id
|
58 |
+
node_2 = None
|
59 |
+
for node in self.graph.vs:
|
60 |
+
if node["label"] == node_2_name.capitalize():
|
61 |
+
node_2 = node
|
62 |
+
|
63 |
+
if node_2 == None:
|
64 |
+
self.graph.add_vertices(1)
|
65 |
+
node_count = self.graph.vcount()
|
66 |
+
self.graph.vs[node_count - 1]["id"] = node_count - 1
|
67 |
+
self.graph.vs[node_count - 1]["label"] = node_2_name.capitalize()
|
68 |
+
node_2 = self.graph.vs[node_count - 1]
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
#print("User one {} - {}, user two {} - {}".format(node_1["label"], str(node_1["id"]),
|
73 |
+
# node_2["label"], str(node_2["id"])))
|
74 |
+
self.graph.add_edges([(node_1["id"], node_2["id"])])
|
75 |
+
#self.graph.add_edge(node_1_name, node_2_name, weight=weight, relation=relationship) # , attr={""}
|
76 |
+
|
77 |
+
def add_node(self, node_name):
|
78 |
+
"""
|
79 |
+
A wrapper function that adds a node with no edges to the graph
|
80 |
+
:param node_name:
|
81 |
+
"""
|
82 |
+
|
83 |
+
node_1 = None
|
84 |
+
for node in self.graph.vs:
|
85 |
+
if node["label"] == node_name.capitalize():
|
86 |
+
node_1 = node["id"]
|
87 |
+
|
88 |
+
if node_1 == None:
|
89 |
+
self.graph.add_vertices(1)
|
90 |
+
node_count = self.graph.vcount()
|
91 |
+
self.graph.vs[node_count-1]["id"] = node_count-1
|
92 |
+
self.graph.vs[node_count-1]["label"] = node_name.capitalize()
|
93 |
+
node_1 = self.graph.vs[node_count-1]
|
94 |
+
|
95 |
+
global_oauth1_user_handler = None
|
96 |
+
|
97 |
+
auth = tweepy.OAuth1UserHandler(
|
98 |
+
consumer_token, consumer_secret,
|
99 |
+
my_access_token, my_access_secret
|
100 |
+
)
|
101 |
+
api = tweepy.API(auth)
|
102 |
+
|
103 |
+
client = tweepy.Client(
|
104 |
+
bearer_token= bearer,
|
105 |
+
consumer_key=consumer_token,
|
106 |
+
consumer_secret=consumer_secret,
|
107 |
+
access_token=my_access_token,
|
108 |
+
access_token_secret=my_access_secret
|
109 |
+
)
|
110 |
+
|
111 |
+
class IDPrinter(tweepy.StreamingClient):
|
112 |
+
|
113 |
+
def on_tweet(self, tweet):
|
114 |
+
self.translator = Translator()
|
115 |
+
gc.collect()
|
116 |
+
if len(tweet.data["text"]) > 100:
|
117 |
+
#tweet = client.get_tweet(id=tweet.id)
|
118 |
+
if tweet and tweet.data:
|
119 |
+
|
120 |
+
if tweet.data["author_id"]:
|
121 |
+
tweet_data = tweet.data["text"].strip().replace("@", "").replace("\n","")
|
122 |
+
if tweet_data is not None or tweet != "":
|
123 |
+
username = client.get_user(id=tweet.author_id).data
|
124 |
+
lang = self.translator.detect(tweet_data).lang
|
125 |
+
|
126 |
+
if lang == "en":
|
127 |
+
tweet_data = unicodedata.normalize('NFKD', tweet_data).encode('ascii', 'ignore').decode()
|
128 |
+
if tweet_data != None:
|
129 |
+
is_extremist = predictor().predict(tweet_data)
|
130 |
+
print("user {} post extremist {} - message: {}".format(username, is_extremist, str(tweet_data)))
|
131 |
+
if is_extremist != None and is_extremist == 1:
|
132 |
+
tweets = client.get_users_tweets(id=tweet.author_id, max_results=10)
|
133 |
+
|
134 |
+
number_extreme = 0
|
135 |
+
tweets = tweets[0]
|
136 |
+
for users_tweet in tweets:
|
137 |
+
if users_tweet.text != None:
|
138 |
+
is_extremist = predictor().predict(users_tweet.text)
|
139 |
+
if is_extremist != None:
|
140 |
+
if is_extremist == True:
|
141 |
+
number_extreme = number_extreme + 1
|
142 |
+
|
143 |
+
print(number_extreme)
|
144 |
+
threshold = number_extreme/len(tweets[0]) * 100
|
145 |
+
print("Threshold {}".format(threshold))
|
146 |
+
if threshold > 1: #
|
147 |
+
|
148 |
+
file_name = os.path.join("users","{}-{}-radical_users.txt".format(username,date.today().strftime("%b-%d-%Y")))
|
149 |
+
print("User {} was found to be extremist".format(username))
|
150 |
+
file_path = Path(file_name)
|
151 |
+
file_path.touch(exist_ok=True)
|
152 |
+
|
153 |
+
|
154 |
+
with open(file_name, 'w') as outfile:
|
155 |
+
json_to_dump = [{"username": username.id, "threshold": threshold,
|
156 |
+
"date": date.today().strftime("%b-%d-%Y")}]
|
157 |
+
json.dump(json_to_dump, outfile, indent=4)
|
158 |
+
print("Got user {}".format(username))
|
159 |
+
|
160 |
+
gc.collect()
|
161 |
+
# calling the api
|
162 |
+
|
163 |
+
|
164 |
+
while True:
|
165 |
+
try:
|
166 |
+
printer = IDPrinter(bearer_token=bearer,wait_on_rate_limit =True,chunk_size=10000)
|
167 |
+
printer.add_rules(tweepy.StreamRule(value="en",tag="lang",id="lang-rule"))
|
168 |
+
printer.sample(expansions=["author_id", "geo.place_id"],threaded=False)
|
169 |
+
print("-"*20)
|
170 |
+
gc.collect()
|
171 |
+
except:
|
172 |
+
time.sleep(900)
|