Spaces:
Runtime error
Runtime error
Update radical_tweet_aggregator.py
Browse files- radical_tweet_aggregator.py +17 -21
radical_tweet_aggregator.py
CHANGED
@@ -3,21 +3,20 @@ import json
|
|
3 |
import os
|
4 |
from datetime import date
|
5 |
from pathlib import Path
|
6 |
-
|
|
|
|
|
|
|
7 |
import unicodedata
|
8 |
|
|
|
9 |
consumer_token = os.getenv('CONSUMER_TOKEN')
|
10 |
consumer_secret = os.getenv('CONSUMER_SECRET')
|
11 |
my_access_token = os.getenv('ACCESS_TOKEN')
|
12 |
my_access_secret = os.getenv('ACCESS_SECRET')
|
13 |
bearer = os.getenv('BEARER')
|
14 |
|
15 |
-
|
16 |
-
import tweepy
|
17 |
-
from googletrans import Translator
|
18 |
-
|
19 |
-
from predictor import predictor
|
20 |
-
|
21 |
class grapher():
|
22 |
"""
|
23 |
A wrapper class used for generating a graph for interactions between users
|
@@ -67,12 +66,7 @@ class grapher():
|
|
67 |
self.graph.vs[node_count - 1]["label"] = node_2_name.capitalize()
|
68 |
node_2 = self.graph.vs[node_count - 1]
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
#print("User one {} - {}, user two {} - {}".format(node_1["label"], str(node_1["id"]),
|
73 |
-
# node_2["label"], str(node_2["id"])))
|
74 |
self.graph.add_edges([(node_1["id"], node_2["id"])])
|
75 |
-
#self.graph.add_edge(node_1_name, node_2_name, weight=weight, relation=relationship) # , attr={""}
|
76 |
|
77 |
def add_node(self, node_name):
|
78 |
"""
|
@@ -92,8 +86,7 @@ class grapher():
|
|
92 |
self.graph.vs[node_count-1]["label"] = node_name.capitalize()
|
93 |
node_1 = self.graph.vs[node_count-1]
|
94 |
|
95 |
-
|
96 |
-
|
97 |
auth = tweepy.OAuth1UserHandler(
|
98 |
consumer_token, consumer_secret,
|
99 |
my_access_token, my_access_secret
|
@@ -108,26 +101,30 @@ client = tweepy.Client(
|
|
108 |
access_token_secret=my_access_secret
|
109 |
)
|
110 |
|
|
|
111 |
class IDPrinter(tweepy.StreamingClient):
|
112 |
-
|
113 |
def on_tweet(self, tweet):
|
114 |
self.translator = Translator()
|
115 |
gc.collect()
|
116 |
if len(tweet.data["text"]) > 100:
|
117 |
-
#tweet = client.get_tweet(id=tweet.id)
|
118 |
if tweet and tweet.data:
|
119 |
|
120 |
if tweet.data["author_id"]:
|
121 |
tweet_data = tweet.data["text"].strip().replace("@", "").replace("\n","")
|
122 |
if tweet_data is not None or tweet != "":
|
123 |
username = client.get_user(id=tweet.author_id).data
|
|
|
124 |
lang = self.translator.detect(tweet_data).lang
|
125 |
|
126 |
if lang == "en":
|
127 |
tweet_data = unicodedata.normalize('NFKD', tweet_data).encode('ascii', 'ignore').decode()
|
128 |
if tweet_data != None:
|
|
|
129 |
is_extremist = predictor().predict(tweet_data)
|
130 |
print("user {} post extremist {} - message: {}".format(username, is_extremist, str(tweet_data)))
|
|
|
|
|
|
|
131 |
if is_extremist != None and is_extremist == 1:
|
132 |
tweets = client.get_users_tweets(id=tweet.author_id, max_results=10)
|
133 |
|
@@ -140,9 +137,9 @@ class IDPrinter(tweepy.StreamingClient):
|
|
140 |
if is_extremist == True:
|
141 |
number_extreme = number_extreme + 1
|
142 |
|
143 |
-
print(number_extreme)
|
144 |
threshold = number_extreme/len(tweets[0]) * 100
|
145 |
-
print("Threshold {}".format(threshold))
|
146 |
if threshold > 1: #
|
147 |
|
148 |
file_name = os.path.join("users","{}-{}-radical_users.txt".format(username,date.today().strftime("%b-%d-%Y")))
|
@@ -150,7 +147,7 @@ class IDPrinter(tweepy.StreamingClient):
|
|
150 |
file_path = Path(file_name)
|
151 |
file_path.touch(exist_ok=True)
|
152 |
|
153 |
-
|
154 |
with open(file_name, 'w') as outfile:
|
155 |
json_to_dump = [{"username": username.id, "threshold": threshold,
|
156 |
"date": date.today().strftime("%b-%d-%Y")}]
|
@@ -158,9 +155,8 @@ class IDPrinter(tweepy.StreamingClient):
|
|
158 |
print("Got user {}".format(username))
|
159 |
|
160 |
gc.collect()
|
161 |
-
# calling the api
|
162 |
-
|
163 |
|
|
|
164 |
while True:
|
165 |
try:
|
166 |
printer = IDPrinter(bearer_token=bearer,wait_on_rate_limit =True,chunk_size=10000)
|
|
|
3 |
import os
|
4 |
from datetime import date
|
5 |
from pathlib import Path
|
6 |
+
import time
|
7 |
+
import tweepy
|
8 |
+
from googletrans import Translator
|
9 |
+
from predictor import predictor
|
10 |
import unicodedata
|
11 |
|
12 |
+
# Twitter API keys
|
13 |
consumer_token = os.getenv('CONSUMER_TOKEN')
|
14 |
consumer_secret = os.getenv('CONSUMER_SECRET')
|
15 |
my_access_token = os.getenv('ACCESS_TOKEN')
|
16 |
my_access_secret = os.getenv('ACCESS_SECRET')
|
17 |
bearer = os.getenv('BEARER')
|
18 |
|
19 |
+
# TODO: is this needed for mapping the object type after reading the pickle files? If not remove.
|
|
|
|
|
|
|
|
|
|
|
20 |
class grapher():
|
21 |
"""
|
22 |
A wrapper class used for generating a graph for interactions between users
|
|
|
66 |
self.graph.vs[node_count - 1]["label"] = node_2_name.capitalize()
|
67 |
node_2 = self.graph.vs[node_count - 1]
|
68 |
|
|
|
|
|
|
|
|
|
69 |
self.graph.add_edges([(node_1["id"], node_2["id"])])
|
|
|
70 |
|
71 |
def add_node(self, node_name):
|
72 |
"""
|
|
|
86 |
self.graph.vs[node_count-1]["label"] = node_name.capitalize()
|
87 |
node_1 = self.graph.vs[node_count-1]
|
88 |
|
89 |
+
# Setup Tweepy API and client objects
|
|
|
90 |
auth = tweepy.OAuth1UserHandler(
|
91 |
consumer_token, consumer_secret,
|
92 |
my_access_token, my_access_secret
|
|
|
101 |
access_token_secret=my_access_secret
|
102 |
)
|
103 |
|
104 |
+
# This class is used for streaming Tweets via Tweepy
|
105 |
class IDPrinter(tweepy.StreamingClient):
|
|
|
106 |
def on_tweet(self, tweet):
|
107 |
self.translator = Translator()
|
108 |
gc.collect()
|
109 |
if len(tweet.data["text"]) > 100:
|
|
|
110 |
if tweet and tweet.data:
|
111 |
|
112 |
if tweet.data["author_id"]:
|
113 |
tweet_data = tweet.data["text"].strip().replace("@", "").replace("\n","")
|
114 |
if tweet_data is not None or tweet != "":
|
115 |
username = client.get_user(id=tweet.author_id).data
|
116 |
+
# Ensure that Tweet is in English
|
117 |
lang = self.translator.detect(tweet_data).lang
|
118 |
|
119 |
if lang == "en":
|
120 |
tweet_data = unicodedata.normalize('NFKD', tweet_data).encode('ascii', 'ignore').decode()
|
121 |
if tweet_data != None:
|
122 |
+
# Use Pinpoint to identify if a Tweet is extremist or not
|
123 |
is_extremist = predictor().predict(tweet_data)
|
124 |
print("user {} post extremist {} - message: {}".format(username, is_extremist, str(tweet_data)))
|
125 |
+
|
126 |
+
# If a tweet is extremist go through 10 of that users posts and identify the percentage
|
127 |
+
# of posts that are extremist
|
128 |
if is_extremist != None and is_extremist == 1:
|
129 |
tweets = client.get_users_tweets(id=tweet.author_id, max_results=10)
|
130 |
|
|
|
137 |
if is_extremist == True:
|
138 |
number_extreme = number_extreme + 1
|
139 |
|
140 |
+
#print(number_extreme)
|
141 |
threshold = number_extreme/len(tweets[0]) * 100
|
142 |
+
#print("Threshold {}".format(threshold))
|
143 |
if threshold > 1: #
|
144 |
|
145 |
file_name = os.path.join("users","{}-{}-radical_users.txt".format(username,date.today().strftime("%b-%d-%Y")))
|
|
|
147 |
file_path = Path(file_name)
|
148 |
file_path.touch(exist_ok=True)
|
149 |
|
150 |
+
# Write user to a file in the user folder with the percentage of extremist posts
|
151 |
with open(file_name, 'w') as outfile:
|
152 |
json_to_dump = [{"username": username.id, "threshold": threshold,
|
153 |
"date": date.today().strftime("%b-%d-%Y")}]
|
|
|
155 |
print("Got user {}".format(username))
|
156 |
|
157 |
gc.collect()
|
|
|
|
|
158 |
|
159 |
+
# Continue indefinitely and collects Twitter posts
|
160 |
while True:
|
161 |
try:
|
162 |
printer = IDPrinter(bearer_token=bearer,wait_on_rate_limit =True,chunk_size=10000)
|