User1342 commited on
Commit
a814b1b
1 Parent(s): 0366733

Upload radical_tweet_aggregator.py

Browse files
Files changed (1) hide show
  1. radical_tweet_aggregator.py +172 -0
radical_tweet_aggregator.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gc
2
+ import json
3
+ import os
4
+ from datetime import date
5
+ from pathlib import Path
6
+
7
+ import unicodedata
8
+
9
+ consumer_token = os.getenv('CONSUMER_TOKEN')
10
+ consumer_secret = os.getenv('CONSUMER_SECRET')
11
+ my_access_token = os.getenv('ACCESS_TOKEN')
12
+ my_access_secret = os.getenv('ACCESS_SECRET')
13
+ bearer = os.getenv('BEARER')
14
+
15
+ import time
16
+ import tweepy
17
+ from googletrans import Translator
18
+
19
+ from predictor import predictor
20
+
21
+ class grapher():
22
+ """
23
+ A wrapper class used for generating a graph for interactions between users
24
+ """
25
+ graph = None
26
+
27
+ def __init__(self):
28
+ """
29
+ Constructor.
30
+ """
31
+ self.graph = Graph()
32
+
33
+ def add_edge_wrapper(self, node_1_name, node_2_name, weight=1, relationship=None):
34
+ """
35
+ A wrapper function used to add an edge connection or node.
36
+ :param node_1_name: from
37
+ :param node_2_name: to
38
+ :param weight:
39
+ :param relationship:
40
+ :return:
41
+ """
42
+
43
+ # get node one ID
44
+
45
+ node_1 = None
46
+ for node in self.graph.vs:
47
+ if node["label"] == node_1_name.capitalize():
48
+ node_1 = node
49
+
50
+ if node_1 == None:
51
+ self.graph.add_vertices(1)
52
+ node_count = self.graph.vcount()
53
+ self.graph.vs[node_count-1]["id"] = node_count-1
54
+ self.graph.vs[node_count-1]["label"] = node_1_name.capitalize()
55
+ node_1 = self.graph.vs[node_count-1]
56
+
57
+ # get node two id
58
+ node_2 = None
59
+ for node in self.graph.vs:
60
+ if node["label"] == node_2_name.capitalize():
61
+ node_2 = node
62
+
63
+ if node_2 == None:
64
+ self.graph.add_vertices(1)
65
+ node_count = self.graph.vcount()
66
+ self.graph.vs[node_count - 1]["id"] = node_count - 1
67
+ self.graph.vs[node_count - 1]["label"] = node_2_name.capitalize()
68
+ node_2 = self.graph.vs[node_count - 1]
69
+
70
+
71
+
72
+ #print("User one {} - {}, user two {} - {}".format(node_1["label"], str(node_1["id"]),
73
+ # node_2["label"], str(node_2["id"])))
74
+ self.graph.add_edges([(node_1["id"], node_2["id"])])
75
+ #self.graph.add_edge(node_1_name, node_2_name, weight=weight, relation=relationship) # , attr={""}
76
+
77
+ def add_node(self, node_name):
78
+ """
79
+ A wrapper function that adds a node with no edges to the graph
80
+ :param node_name:
81
+ """
82
+
83
+ node_1 = None
84
+ for node in self.graph.vs:
85
+ if node["label"] == node_name.capitalize():
86
+ node_1 = node["id"]
87
+
88
+ if node_1 == None:
89
+ self.graph.add_vertices(1)
90
+ node_count = self.graph.vcount()
91
+ self.graph.vs[node_count-1]["id"] = node_count-1
92
+ self.graph.vs[node_count-1]["label"] = node_name.capitalize()
93
+ node_1 = self.graph.vs[node_count-1]
94
+
95
+ global_oauth1_user_handler = None
96
+
97
+ auth = tweepy.OAuth1UserHandler(
98
+ consumer_token, consumer_secret,
99
+ my_access_token, my_access_secret
100
+ )
101
+ api = tweepy.API(auth)
102
+
103
+ client = tweepy.Client(
104
+ bearer_token= bearer,
105
+ consumer_key=consumer_token,
106
+ consumer_secret=consumer_secret,
107
+ access_token=my_access_token,
108
+ access_token_secret=my_access_secret
109
+ )
110
+
111
+ class IDPrinter(tweepy.StreamingClient):
112
+
113
+ def on_tweet(self, tweet):
114
+ self.translator = Translator()
115
+ gc.collect()
116
+ if len(tweet.data["text"]) > 100:
117
+ #tweet = client.get_tweet(id=tweet.id)
118
+ if tweet and tweet.data:
119
+
120
+ if tweet.data["author_id"]:
121
+ tweet_data = tweet.data["text"].strip().replace("@", "").replace("\n","")
122
+ if tweet_data is not None or tweet != "":
123
+ username = client.get_user(id=tweet.author_id).data
124
+ lang = self.translator.detect(tweet_data).lang
125
+
126
+ if lang == "en":
127
+ tweet_data = unicodedata.normalize('NFKD', tweet_data).encode('ascii', 'ignore').decode()
128
+ if tweet_data != None:
129
+ is_extremist = predictor().predict(tweet_data)
130
+ print("user {} post extremist {} - message: {}".format(username, is_extremist, str(tweet_data)))
131
+ if is_extremist != None and is_extremist == 1:
132
+ tweets = client.get_users_tweets(id=tweet.author_id, max_results=10)
133
+
134
+ number_extreme = 0
135
+ tweets = tweets[0]
136
+ for users_tweet in tweets:
137
+ if users_tweet.text != None:
138
+ is_extremist = predictor().predict(users_tweet.text)
139
+ if is_extremist != None:
140
+ if is_extremist == True:
141
+ number_extreme = number_extreme + 1
142
+
143
+ print(number_extreme)
144
+ threshold = number_extreme/len(tweets[0]) * 100
145
+ print("Threshold {}".format(threshold))
146
+ if threshold > 1: #
147
+
148
+ file_name = os.path.join("users","{}-{}-radical_users.txt".format(username,date.today().strftime("%b-%d-%Y")))
149
+ print("User {} was found to be extremist".format(username))
150
+ file_path = Path(file_name)
151
+ file_path.touch(exist_ok=True)
152
+
153
+
154
+ with open(file_name, 'w') as outfile:
155
+ json_to_dump = [{"username": username.id, "threshold": threshold,
156
+ "date": date.today().strftime("%b-%d-%Y")}]
157
+ json.dump(json_to_dump, outfile, indent=4)
158
+ print("Got user {}".format(username))
159
+
160
+ gc.collect()
161
+ # calling the api
162
+
163
+
164
+ while True:
165
+ try:
166
+ printer = IDPrinter(bearer_token=bearer,wait_on_rate_limit =True,chunk_size=10000)
167
+ printer.add_rules(tweepy.StreamRule(value="en",tag="lang",id="lang-rule"))
168
+ printer.sample(expansions=["author_id", "geo.place_id"],threaded=False)
169
+ print("-"*20)
170
+ gc.collect()
171
+ except:
172
+ time.sleep(900)