User1342 commited on
Commit
6421f36
1 Parent(s): ba59577

Update radical_tweet_aggregator.py

Browse files
Files changed (1) hide show
  1. radical_tweet_aggregator.py +17 -21
radical_tweet_aggregator.py CHANGED
@@ -3,21 +3,20 @@ import json
3
  import os
4
  from datetime import date
5
  from pathlib import Path
6
-
 
 
 
7
  import unicodedata
8
 
 
9
  consumer_token = os.getenv('CONSUMER_TOKEN')
10
  consumer_secret = os.getenv('CONSUMER_SECRET')
11
  my_access_token = os.getenv('ACCESS_TOKEN')
12
  my_access_secret = os.getenv('ACCESS_SECRET')
13
  bearer = os.getenv('BEARER')
14
 
15
- import time
16
- import tweepy
17
- from googletrans import Translator
18
-
19
- from predictor import predictor
20
-
21
  class grapher():
22
  """
23
  A wrapper class used for generating a graph for interactions between users
@@ -67,12 +66,7 @@ class grapher():
67
  self.graph.vs[node_count - 1]["label"] = node_2_name.capitalize()
68
  node_2 = self.graph.vs[node_count - 1]
69
 
70
-
71
-
72
- #print("User one {} - {}, user two {} - {}".format(node_1["label"], str(node_1["id"]),
73
- # node_2["label"], str(node_2["id"])))
74
  self.graph.add_edges([(node_1["id"], node_2["id"])])
75
- #self.graph.add_edge(node_1_name, node_2_name, weight=weight, relation=relationship) # , attr={""}
76
 
77
  def add_node(self, node_name):
78
  """
@@ -92,8 +86,7 @@ class grapher():
92
  self.graph.vs[node_count-1]["label"] = node_name.capitalize()
93
  node_1 = self.graph.vs[node_count-1]
94
 
95
- global_oauth1_user_handler = None
96
-
97
  auth = tweepy.OAuth1UserHandler(
98
  consumer_token, consumer_secret,
99
  my_access_token, my_access_secret
@@ -108,26 +101,30 @@ client = tweepy.Client(
108
  access_token_secret=my_access_secret
109
  )
110
 
 
111
  class IDPrinter(tweepy.StreamingClient):
112
-
113
  def on_tweet(self, tweet):
114
  self.translator = Translator()
115
  gc.collect()
116
  if len(tweet.data["text"]) > 100:
117
- #tweet = client.get_tweet(id=tweet.id)
118
  if tweet and tweet.data:
119
 
120
  if tweet.data["author_id"]:
121
  tweet_data = tweet.data["text"].strip().replace("@", "").replace("\n","")
122
  if tweet_data is not None or tweet != "":
123
  username = client.get_user(id=tweet.author_id).data
 
124
  lang = self.translator.detect(tweet_data).lang
125
 
126
  if lang == "en":
127
  tweet_data = unicodedata.normalize('NFKD', tweet_data).encode('ascii', 'ignore').decode()
128
  if tweet_data != None:
 
129
  is_extremist = predictor().predict(tweet_data)
130
  print("user {} post extremist {} - message: {}".format(username, is_extremist, str(tweet_data)))
 
 
 
131
  if is_extremist != None and is_extremist == 1:
132
  tweets = client.get_users_tweets(id=tweet.author_id, max_results=10)
133
 
@@ -140,9 +137,9 @@ class IDPrinter(tweepy.StreamingClient):
140
  if is_extremist == True:
141
  number_extreme = number_extreme + 1
142
 
143
- print(number_extreme)
144
  threshold = number_extreme/len(tweets[0]) * 100
145
- print("Threshold {}".format(threshold))
146
  if threshold > 1: #
147
 
148
  file_name = os.path.join("users","{}-{}-radical_users.txt".format(username,date.today().strftime("%b-%d-%Y")))
@@ -150,7 +147,7 @@ class IDPrinter(tweepy.StreamingClient):
150
  file_path = Path(file_name)
151
  file_path.touch(exist_ok=True)
152
 
153
-
154
  with open(file_name, 'w') as outfile:
155
  json_to_dump = [{"username": username.id, "threshold": threshold,
156
  "date": date.today().strftime("%b-%d-%Y")}]
@@ -158,9 +155,8 @@ class IDPrinter(tweepy.StreamingClient):
158
  print("Got user {}".format(username))
159
 
160
  gc.collect()
161
- # calling the api
162
-
163
 
 
164
  while True:
165
  try:
166
  printer = IDPrinter(bearer_token=bearer,wait_on_rate_limit =True,chunk_size=10000)
 
3
  import os
4
  from datetime import date
5
  from pathlib import Path
6
+ import time
7
+ import tweepy
8
+ from googletrans import Translator
9
+ from predictor import predictor
10
  import unicodedata
11
 
12
+ # Twitter API keys
13
  consumer_token = os.getenv('CONSUMER_TOKEN')
14
  consumer_secret = os.getenv('CONSUMER_SECRET')
15
  my_access_token = os.getenv('ACCESS_TOKEN')
16
  my_access_secret = os.getenv('ACCESS_SECRET')
17
  bearer = os.getenv('BEARER')
18
 
19
+ # TODO: is this needed for mapping the object type after reading the pickle files? If not remove.
 
 
 
 
 
20
  class grapher():
21
  """
22
  A wrapper class used for generating a graph for interactions between users
 
66
  self.graph.vs[node_count - 1]["label"] = node_2_name.capitalize()
67
  node_2 = self.graph.vs[node_count - 1]
68
 
 
 
 
 
69
  self.graph.add_edges([(node_1["id"], node_2["id"])])
 
70
 
71
  def add_node(self, node_name):
72
  """
 
86
  self.graph.vs[node_count-1]["label"] = node_name.capitalize()
87
  node_1 = self.graph.vs[node_count-1]
88
 
89
+ # Setup Tweepy API and client objects
 
90
  auth = tweepy.OAuth1UserHandler(
91
  consumer_token, consumer_secret,
92
  my_access_token, my_access_secret
 
101
  access_token_secret=my_access_secret
102
  )
103
 
104
+ # This class is used for streaming Tweets via Tweepy
105
  class IDPrinter(tweepy.StreamingClient):
 
106
  def on_tweet(self, tweet):
107
  self.translator = Translator()
108
  gc.collect()
109
  if len(tweet.data["text"]) > 100:
 
110
  if tweet and tweet.data:
111
 
112
  if tweet.data["author_id"]:
113
  tweet_data = tweet.data["text"].strip().replace("@", "").replace("\n","")
114
  if tweet_data is not None or tweet != "":
115
  username = client.get_user(id=tweet.author_id).data
116
+ # Ensure that Tweet is in English
117
  lang = self.translator.detect(tweet_data).lang
118
 
119
  if lang == "en":
120
  tweet_data = unicodedata.normalize('NFKD', tweet_data).encode('ascii', 'ignore').decode()
121
  if tweet_data != None:
122
+ # Use Pinpoint to identify if a Tweet is extremist or not
123
  is_extremist = predictor().predict(tweet_data)
124
  print("user {} post extremist {} - message: {}".format(username, is_extremist, str(tweet_data)))
125
+
126
+ # If a tweet is extremist go through 10 of that users posts and identify the percentage
127
+ # of posts that are extremist
128
  if is_extremist != None and is_extremist == 1:
129
  tweets = client.get_users_tweets(id=tweet.author_id, max_results=10)
130
 
 
137
  if is_extremist == True:
138
  number_extreme = number_extreme + 1
139
 
140
+ #print(number_extreme)
141
  threshold = number_extreme/len(tweets[0]) * 100
142
+ #print("Threshold {}".format(threshold))
143
  if threshold > 1: #
144
 
145
  file_name = os.path.join("users","{}-{}-radical_users.txt".format(username,date.today().strftime("%b-%d-%Y")))
 
147
  file_path = Path(file_name)
148
  file_path.touch(exist_ok=True)
149
 
150
+ # Write user to a file in the user folder with the percentage of extremist posts
151
  with open(file_name, 'w') as outfile:
152
  json_to_dump = [{"username": username.id, "threshold": threshold,
153
  "date": date.today().strftime("%b-%d-%Y")}]
 
155
  print("Got user {}".format(username))
156
 
157
  gc.collect()
 
 
158
 
159
+ # Continue indefinitely and collects Twitter posts
160
  while True:
161
  try:
162
  printer = IDPrinter(bearer_token=bearer,wait_on_rate_limit =True,chunk_size=10000)