James Stevenson commited on
Commit
32a03a4
1 Parent(s): b264f40

initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Pinpoint/Aggregator_NGram.py +103 -0
  2. Pinpoint/Aggregator_TfIdf.py +41 -0
  3. Pinpoint/Aggregator_Word2Vec.py +32 -0
  4. Pinpoint/Aggregator_WordingChoice.py +51 -0
  5. Pinpoint/ConfigManager.py +21 -0
  6. Pinpoint/FeatureExtraction.py +795 -0
  7. Pinpoint/Grapher.py +60 -0
  8. Pinpoint/Logger.py +21 -0
  9. Pinpoint/RandomForest.py +374 -0
  10. Pinpoint/Sanitizer.py +131 -0
  11. Pinpoint/Serializer.py +20 -0
  12. Pinpoint/__pycache__/Aggregator_NGram.cpython-310.pyc +0 -0
  13. Pinpoint/__pycache__/Aggregator_NGram.cpython-36.pyc +0 -0
  14. Pinpoint/__pycache__/Aggregator_NGram.cpython-38.pyc +0 -0
  15. Pinpoint/__pycache__/Aggregator_TfIdf.cpython-310.pyc +0 -0
  16. Pinpoint/__pycache__/Aggregator_TfIdf.cpython-36.pyc +0 -0
  17. Pinpoint/__pycache__/Aggregator_TfIdf.cpython-38.pyc +0 -0
  18. Pinpoint/__pycache__/Aggregator_Word2Vec.cpython-310.pyc +0 -0
  19. Pinpoint/__pycache__/Aggregator_Word2Vec.cpython-36.pyc +0 -0
  20. Pinpoint/__pycache__/Aggregator_Word2Vec.cpython-38.pyc +0 -0
  21. Pinpoint/__pycache__/Aggregator_WordingChoice.cpython-310.pyc +0 -0
  22. Pinpoint/__pycache__/Aggregator_WordingChoice.cpython-36.pyc +0 -0
  23. Pinpoint/__pycache__/Aggregator_WordingChoice.cpython-38.pyc +0 -0
  24. Pinpoint/__pycache__/FeatureExtraction.cpython-310.pyc +0 -0
  25. Pinpoint/__pycache__/FeatureExtraction.cpython-36.pyc +0 -0
  26. Pinpoint/__pycache__/FeatureExtraction.cpython-38.pyc +0 -0
  27. Pinpoint/__pycache__/Grapher.cpython-310.pyc +0 -0
  28. Pinpoint/__pycache__/Grapher.cpython-36.pyc +0 -0
  29. Pinpoint/__pycache__/Grapher.cpython-38.pyc +0 -0
  30. Pinpoint/__pycache__/Logger.cpython-310.pyc +0 -0
  31. Pinpoint/__pycache__/Logger.cpython-36.pyc +0 -0
  32. Pinpoint/__pycache__/Logger.cpython-38.pyc +0 -0
  33. Pinpoint/__pycache__/RandomForest.cpython-310.pyc +0 -0
  34. Pinpoint/__pycache__/RandomForest.cpython-36.pyc +0 -0
  35. Pinpoint/__pycache__/RandomForest.cpython-38.pyc +0 -0
  36. Pinpoint/__pycache__/Sanitizer.cpython-310.pyc +0 -0
  37. Pinpoint/__pycache__/Sanitizer.cpython-36.pyc +0 -0
  38. Pinpoint/__pycache__/Sanitizer.cpython-38.pyc +0 -0
  39. Pinpoint/__pycache__/predictor.cpython-38.pyc +0 -0
  40. Pinpoint/far-right-core.py +65 -0
  41. app.py +356 -0
  42. outputs/sanitized_text.txt +0 -0
  43. outputs/users.json +1 -0
  44. predictor.py +78 -0
  45. python-streamer.py +173 -0
  46. sign-in.png +0 -0
  47. swears/VIOLENT_TERRORIST_WORDS.txt +1 -0
  48. swears/bad_Words_list.txt +547 -0
  49. swears/badwords.txt +451 -0
  50. swears/cmu-bad-words.txt +1383 -0
Pinpoint/Aggregator_NGram.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.feature_extraction.text import CountVectorizer
2
+
3
+ from Pinpoint.Logger import *
4
+
5
+ c_vec = CountVectorizer(ngram_range=(1, 5))
6
+
7
+
8
+ class n_gram_aggregator():
9
+ """
10
+ This class is used to retrieve the most common NGrams for a given dataset corpus.
11
+ """
12
+
13
+ def _get_average_ngram_count(self, n_grams_dict):
14
+ """
15
+ takes a dict of Ngrams and identifies the average weighting
16
+ :param n_grams_dict:
17
+ :return:
18
+ """
19
+ all_count = []
20
+ for n_gram in n_grams_dict:
21
+ ng_count = n_grams_dict[n_gram]
22
+ all_count.append(ng_count)
23
+
24
+ average_count = sum(all_count) / len(all_count)
25
+ # print(all_count)
26
+ return average_count
27
+
28
+ def _get_all_ngrams(self, data):
29
+ """
30
+ Returns all ngrams (tri, bi, and uni) for a given piece of text
31
+ :param data:
32
+ :return:
33
+ """
34
+
35
+ if type(data) is not list:
36
+ data = [data]
37
+
38
+ # input to fit_transform() should be an iterable with strings
39
+ ngrams = c_vec.fit_transform(data)
40
+
41
+ # needs to happen after fit_transform()
42
+ vocab = c_vec.vocabulary_
43
+
44
+ count_values = ngrams.toarray().sum(axis=0)
45
+
46
+ # output n-grams
47
+ uni_grams = {}
48
+ bi_grams = {}
49
+ tri_grams = {}
50
+
51
+ for ng_count, ng_text in sorted([(count_values[i], k) for k, i in vocab.items()], reverse=True):
52
+ sentence_length = len(ng_text.split(" "))
53
+
54
+ if sentence_length == 3:
55
+ tri_grams[ng_text] = ng_count
56
+ elif sentence_length == 2:
57
+ bi_grams[ng_text] = ng_count
58
+ elif sentence_length == 1:
59
+ uni_grams[ng_text] = ng_count
60
+
61
+ return uni_grams, bi_grams, tri_grams
62
+
63
+ def _get_popular_ngrams(self, ngrams_dict):
64
+ """
65
+ Returns ngrams for a given piece of text that are the most popular (i.e. their weighting is
66
+ above the average ngram wighting)
67
+ :param ngrams_dict:
68
+ :return:
69
+ """
70
+ average_count = self._get_average_ngram_count(ngrams_dict)
71
+
72
+ popular_ngrams = {}
73
+ for n_gram in ngrams_dict:
74
+ ng_count = ngrams_dict[n_gram]
75
+
76
+ if ng_count >= average_count:
77
+ popular_ngrams[n_gram] = ng_count
78
+ return popular_ngrams
79
+
80
+ def get_ngrams(self, data=None, file_name_to_read=None):
81
+ """
82
+ Wrapper function for returning uni, bi, and tri grams that are the most popular (above the average weighting in
83
+ a given piece of text).
84
+ :param data:
85
+ :param file_name_to_read:
86
+ :return:
87
+ """
88
+ logger().print_message("Getting Ngrams")
89
+
90
+ if data is None and file_name_to_read is None:
91
+ raise Exception("No data supplied to retrieve n_grams")
92
+
93
+ if data is None and file_name_to_read is not None:
94
+ with open(file_name_to_read, 'r') as file_to_read:
95
+ data = file_to_read.read()
96
+
97
+ uni_grams, bi_grams, tri_grams = self._get_all_ngrams(data)
98
+
99
+ popular_uni_grams = list(self._get_popular_ngrams(uni_grams).keys())
100
+ popular_bi_grams = list(self._get_popular_ngrams(bi_grams).keys())
101
+ popular_tri_grams = list(self._get_popular_ngrams(tri_grams).keys())
102
+
103
+ return popular_uni_grams, popular_bi_grams, popular_tri_grams
Pinpoint/Aggregator_TfIdf.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.feature_extraction.text import TfidfVectorizer
2
+
3
+ from Pinpoint.Logger import *
4
+
5
+
6
+ class tf_idf_aggregator():
7
+ """
8
+ A wrapper class around SKlearn for retrieving TF-IDF scores.
9
+ """
10
+
11
+ def get_tf_idf_scores(self, ngrams_vocabulary, corpus_data=None, file_name_to_read=None):
12
+ """
13
+ Used to generate a TF IDF score based of a vocabulary of Ngrams and a data corpus.
14
+ :param ngrams_vocabulary:
15
+ :param corpus_data:
16
+ :param file_name_to_read:
17
+ :return: a dictionary of the pairing name and their score
18
+ """
19
+ logger.print_message("Getting TF IDF scores")
20
+
21
+ if corpus_data is None and file_name_to_read is None:
22
+ raise Exception("No data supplied to retrieve n_grams")
23
+
24
+ if corpus_data is None and file_name_to_read is not None:
25
+ with open(file_name_to_read, 'r') as file_to_read:
26
+ corpus_data = file_to_read.read()
27
+
28
+ tfidf = TfidfVectorizer(vocabulary=ngrams_vocabulary, stop_words='english', ngram_range=(1, 2))
29
+ tfs = tfidf.fit_transform([corpus_data])
30
+
31
+ feature_names = tfidf.get_feature_names()
32
+ corpus_index = [n for n in corpus_data]
33
+ rows, cols = tfs.nonzero()
34
+
35
+ dict_of_scores = {}
36
+
37
+ for row, col in zip(rows, cols):
38
+ dict_of_scores[feature_names[col]] = tfs[row, col]
39
+ logger.print_message((feature_names[col], corpus_index[row]), tfs[row, col])
40
+
41
+ return dict_of_scores
Pinpoint/Aggregator_Word2Vec.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gensim.models import Word2Vec
2
+
3
+
4
+ class word_2_vec_aggregator():
5
+ """
6
+ A wrapper function around gensim used for creating a word 2 vec model
7
+ """
8
+
9
+ def get_model(self, list_of_sentences):
10
+ """
11
+ Used to retrieve the model
12
+ :param list_of_sentences:
13
+ :return: the model
14
+ """
15
+
16
+ list_of_sentences_in_nested_list = []
17
+
18
+ for sentence in list_of_sentences:
19
+
20
+ # Skip unigrams
21
+ if " " not in sentence:
22
+ continue
23
+
24
+ list_of_sentences_in_nested_list.append(sentence.split(" "))
25
+
26
+ model = Word2Vec(min_count=1, window=5) # vector size of 100 and window size of 5?
27
+ model.build_vocab(list_of_sentences_in_nested_list) # prepare the model vocabulary
28
+ model.model_trimmed_post_training = False
29
+ model.train(list_of_sentences_in_nested_list, total_examples=model.corpus_count,
30
+ epochs=model.epochs) # train word vectors
31
+
32
+ return model
Pinpoint/Aggregator_WordingChoice.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+
4
+ class wording_choice_aggregator():
5
+ """
6
+ A class used for retrieving frequencies based on wording in a message
7
+ """
8
+
9
+ def get_frequency_of_capatalised_words(self, text):
10
+ """
11
+ A function used to retrieve the frequencies of capitalised words in a dataset
12
+ :param text:
13
+ :return: the frequency of capitalised words in a dataset
14
+ """
15
+ number_of_capatalised_words = 0
16
+ for word in text.split(" "):
17
+ if word.isupper():
18
+ number_of_capatalised_words = number_of_capatalised_words + 1
19
+
20
+ total_number_of_words = len(text.split(" "))
21
+ frequency = number_of_capatalised_words / total_number_of_words
22
+
23
+ return frequency
24
+
25
+ def get_frequency_of_violent_or_curse_words(self, text, violent_words_datasets_location):
26
+ """
27
+ A function ued for retrieving the frequencies of violent words in a dataset
28
+ :param text:
29
+ :return: the frequency of violent words in a dataset
30
+ """
31
+
32
+ dataset_folder = os.path.join(os.getcwd(), violent_words_datasets_location)
33
+
34
+ list_of_violent_or_curse_words = []
35
+
36
+ # Retrieves all words in all of the files in the violent or curse word datasets
37
+ for filename in os.listdir(dataset_folder):
38
+ with open(os.path.join(dataset_folder, filename), 'r') as file:
39
+
40
+ for line in file.readlines():
41
+ line = line.strip().replace("\n", " ").replace(",", "")
42
+ list_of_violent_or_curse_words.append(line)
43
+
44
+ number_of_swear_words = 0
45
+ for word in text.split(" "):
46
+ if word in list_of_violent_or_curse_words:
47
+ number_of_swear_words = number_of_swear_words + 1
48
+
49
+ total_number_of_words = len(text.split(" "))
50
+ frequency = number_of_swear_words / total_number_of_words
51
+ return frequency
Pinpoint/ConfigManager.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+
4
+
5
+ class ConfigManager:
6
+ """
7
+ A wrapper file used to abstract Twitter config options. """
8
+
9
+ @staticmethod
10
+ def _get_config(config_path):
11
+ if Path(config_path).is_file() == False:
12
+ raise Exception("The {} config file was not found.".format(config_path))
13
+
14
+ with open(config_path) as json_file:
15
+ twitter_config_dict = json.load(json_file)
16
+
17
+ return twitter_config_dict
18
+
19
+ @staticmethod
20
+ def getTwitterConfig():
21
+ return ConfigManager._get_config("twitterConfig.json")
Pinpoint/FeatureExtraction.py ADDED
@@ -0,0 +1,795 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import base64
3
+ import codecs
4
+ import csv
5
+ import gc
6
+ import json
7
+ import os
8
+ import pickle
9
+ import re
10
+ import shutil
11
+ import time
12
+
13
+ import numpy
14
+ import pandas as pd
15
+ import uuid
16
+ from scipy.spatial import distance
17
+
18
+ from Pinpoint.Aggregator_NGram import n_gram_aggregator
19
+ from Pinpoint.Aggregator_TfIdf import tf_idf_aggregator
20
+ from Pinpoint.Aggregator_Word2Vec import word_2_vec_aggregator
21
+ from Pinpoint.Aggregator_WordingChoice import wording_choice_aggregator
22
+ from Pinpoint.Grapher import grapher
23
+ from Pinpoint.Logger import logger
24
+ from Pinpoint.Sanitizer import sanitization, sys
25
+
26
+
27
+ class feature_extraction():
28
+ """
29
+ This class is used to wrap the functionality of aggregating tweets from CSV files and extracting features pertinent
30
+ to building a random forest extremist classifier.
31
+ """
32
+
33
+ # A graph used to store connections between aggregated users
34
+ graph = grapher()
35
+ archived_graphs = [] # an archive of the previous graphs
36
+ # A list storing dictionaries of user ids and their features.
37
+ tweet_user_features = []
38
+ completed_tweet_user_features = [] # has centrality added
39
+ # the global TF IDF model used for the Word 2 Vec model
40
+ saved_tf_idf_model = None
41
+ # A dictionary used for the translation of actual Twitter username to UUID
42
+ dict_of_users = {}
43
+
44
+ # The max size for all data entries (i.e. baseline tweets)
45
+ MAX_RECORD_SIZE = sys.maxsize # 3050
46
+
47
+ # Datasets for training
48
+ violent_words_dataset_location = None
49
+ tf_idf_training_dataset_location = None
50
+ outputs_location = None
51
+
52
+ # Used for knowing which columns to access data from. For Twitter data.
53
+ # Summary variables
54
+ DEFAULT_USERNAME_COLUMN_ID = 0
55
+ DEFAULT_DATE_COLUMN_ID = 1
56
+ DEFAULT_MESSAGE_COLUMN_ID = 2
57
+ DEFAULT_ANALYTIC_COLUMN_ID = 4
58
+ DEFAULT_CLOUT_COLUMN_ID = 5
59
+ DEFAULT_AUTHENTIC_COLUMN_ID = 6
60
+ DEFAULT_TONE_COLUMN_ID = 7
61
+ # Emotional Analysis
62
+ DEFAULT_ANGER_COLUMN_ID = 36
63
+ DEFAULT_SADNESS_COLUMN_ID = 37
64
+ DEFAULT_ANXIETY_COLUMN_ID = 35
65
+ # Personal Drives:
66
+ DEFAULT_POWER_COLUMN_ID = 62
67
+ DEFAULT_REWARD_COLUMN_ID = 63
68
+ DEFAULT_RISK_COLUMN_ID = 64
69
+ DEFAULT_ACHIEVEMENT_COLUMN_ID = 61
70
+ DEFAULT_AFFILIATION_COLUMN_ID = 60
71
+ # Personal pronouns
72
+ DEFAULT_P_PRONOUN_COLUMN_ID = 13
73
+ DEFAULT_I_PRONOUN_COLUMN_ID = 19
74
+
75
+ # Constants for the fields in the baseline data set (i.e. ISIS magazine/ Stormfront, etc)
76
+ DEFAULT_BASELINE_MESSAGE_COLUMN_ID = 5
77
+ # Summary variables
78
+ DEFAULT_BASELINE_CLOUT_COLUMN_ID = 10
79
+ DEFAULT_BASELINE_ANALYTIC_COLUMN_ID = 9
80
+ DEFAULT_BASELINE_TONE_COLUMN_ID = 12
81
+ DEFAULT_BASELINE_AUTHENTIC_COLUMN_ID = 11
82
+ # Emotional Analysis
83
+ DEFAULT_BASELINE_ANGER_COLUMN_ID = 41
84
+ DEFAULT_BASELINE_SADNESS_COLUMN_ID = 42
85
+ DEFAULT_BASELINE_ANXIETY_COLUMN_ID = 40
86
+ # Personal Drives
87
+ DEFAULT_BASELINE_POWER_COLUMN_ID = 67
88
+ DEFAULT_BASELINE_REWARD_COLUMN_ID = 68
89
+ DEFAULT_BASELINE_RISK_COLUMN_ID = 69
90
+ DEFAULT_BASELINE_ACHIEVEMENT_COLUMN_ID = 66
91
+ DEFAULT_BASELINE_AFFILIATION_COLUMN_ID = 65
92
+ # Personal pronouns
93
+ DEFAULT_BASELINE_P_PRONOUN_COLUMN_ID = 18
94
+ DEFAULT_BASELINE_I_PRONOUN_COLUMN_ID = 24
95
+
96
+ # Used for Minkowski distance
97
+ _average_clout = 0
98
+ _average_analytic = 0
99
+ _average_tone = 0
100
+ _average_authentic = 0
101
+ _average_anger = 0
102
+ _average_sadness = 0
103
+ average_anxiety = 0
104
+ average_power = 0
105
+ average_reward = 0
106
+ average_risk = 0
107
+ average_achievement = 0
108
+ average_affiliation = 0
109
+ average_p_pronoun = 0
110
+ average_i_pronoun = 0
111
+
112
+ # Used to chache messages to free memory
113
+ MESSAGE_TMP_CACHE_LOCATION = "message_cache"
114
+
115
+ def __init__(self, violent_words_dataset_location=None
116
+ , baseline_training_dataset_location=None,
117
+ outputs_location=r"outputs"):
118
+ """
119
+ Constructor
120
+
121
+ The feature_extraction() class can be initialised with violent_words_dataset_location,
122
+ tf_idf_training_dataset_location, and outputs_location locations. All files in the violent_words_dataset_location
123
+ will be read (one line at a time) and added to the corpus of violent and swear words. The csv file at
124
+ baseline_training_dataset_location is used to train the TFIDF model and a Minkowski distance score is calculated based on the LIWC scores present.
125
+
126
+ If the constant variable need to be changed, do this by setting the member variables.
127
+ """
128
+
129
+ # Error if datasets not provided
130
+ if violent_words_dataset_location is None:
131
+ raise Exception("No Violent Words dir provided. Provide a directory that contains new line seperated "
132
+ "files where each line is a violent, extremist, etc word")
133
+
134
+ if baseline_training_dataset_location is None:
135
+ raise Exception("No baseline (TF-IDF/ Minkowski) dataset provided. Thus should be a csv file containing "
136
+ "extremist content and LIWC scores.")
137
+
138
+ # Set datasets to member variables
139
+ self.violent_words_dataset_location = violent_words_dataset_location
140
+ self.tf_idf_training_dataset_location = baseline_training_dataset_location
141
+ self.outputs_location = outputs_location
142
+
143
+ # Attempt to make the outputs folder if it doesn't exist
144
+ try:
145
+ os.makedirs(outputs_location)
146
+ except:
147
+ pass
148
+
149
+ def _reset_stored_feature_data(self):
150
+ """
151
+ Resets memeber variables from a previous run. Importantly does not reset to TF IDF model.
152
+ :return:
153
+ """
154
+
155
+ # A graph used to store connections between aggregated users
156
+ self.graph = grapher()
157
+ archived_graphs = [] # an archive of the previous graphs
158
+ # A list storing dictionaries of user ids and their features.
159
+ self.tweet_user_features = []
160
+ self.completed_tweet_user_features = [] # has centrality added
161
+ # the global TF IDF model used for the Word 2 Vec model
162
+ self.dict_of_users = {}
163
+
164
+ # Used for Minkowski distance
165
+ self._average_clout = 0
166
+ self._average_analytic = 0
167
+ self._average_tone = 0
168
+ self._average_authentic = 0
169
+ self._average_anger = 0
170
+ self._average_sadness = 0
171
+ self.average_anxiety = 0
172
+ self.average_power = 0
173
+ self.average_reward = 0
174
+ self.average_risk = 0
175
+ self.average_achievement = 0
176
+ self.average_affiliation = 0
177
+ self.average_p_pronoun = 0
178
+ self.average_i_pronoun = 0
179
+
180
+ def _get_unique_id_from_username(self, username):
181
+ """
182
+ A function used to retrieve a UUID based on a twitter username. If a username has been used before the same UUID
183
+ will be returned as it is stored in a dictionary.
184
+ :param username:
185
+ :return: a string representation of a UUID relating to a Twitter username
186
+ """
187
+
188
+ if username in self.dict_of_users:
189
+ # username already in dictionary
190
+ unique_id = self.dict_of_users[username]
191
+ else:
192
+ # make new UUID
193
+ unique_id = uuid.uuid4().hex
194
+ # stops uuid collisions
195
+ while unique_id in self.dict_of_users.values():
196
+ unique_id = uuid.uuid4().hex
197
+
198
+ # Add new user id to dictionary
199
+ self.dict_of_users[username] = unique_id
200
+
201
+ # todo it's less efficient writing the whole file every run
202
+ path = os.path.join(self.outputs_location, "users.json")
203
+
204
+ with open(path, 'w') as outfile:
205
+ json.dump(self.dict_of_users, outfile)
206
+
207
+ return unique_id
208
+
209
+ def _add_to_graph(self, originating_user_name, message):
210
+ """
211
+ A wrapper function used for adding a node/ connection to the graph.
212
+ :param originating_user_name: the Twitter username
213
+ :param message: The Tweet
214
+ """
215
+
216
+ # Adds node to graph so that if they don't interact with anyone they still have a centrality
217
+ self.graph.add_node(originating_user_name)
218
+
219
+ # Process mentions
220
+ mentions = re.findall("\@([a-zA-Z\-\_]+)", message)
221
+
222
+ # For all mentions in the tweet add them to the graph as a node
223
+ for mention in mentions:
224
+ self.graph.add_edge_wrapper(originating_user_name, mention, 1, "mention")
225
+
226
+ # process hashtags
227
+ hashtags = re.findall("\#([a-zA-Z\-\_]+)", message)
228
+
229
+ # For all hashtags in the tweet add them to the graph as a node
230
+ for hashtag in hashtags:
231
+ self.graph.add_edge_wrapper(originating_user_name, hashtag, 1, "hashtag")
232
+
233
+ def _get_capitalised_word_frequency(self, message):
234
+ """
235
+ A wrapper function for returning the frequency of capitalised words in a message.
236
+ :param message:
237
+ :return: the frequency of capitalised words in a message.
238
+ """
239
+ return wording_choice_aggregator().get_frequency_of_capatalised_words(
240
+ message) # NEEDS TO BE DONE before lower case
241
+
242
+ def _get_violent_word_frequency(self, message):
243
+ """
244
+ A wrapper function used to retrieve the frequency of violent words in a message.
245
+ :param message: a string representation of a social media message
246
+ :return: The frequency of violent words in the message
247
+ """
248
+ return wording_choice_aggregator().get_frequency_of_violent_or_curse_words(message,
249
+ self.violent_words_dataset_location)
250
+
251
+ def _get_tweet_vector(self, message):
252
+ """
253
+ A wrapper function used retrieve the 200 size vector representation (Average and Max vector concatenated)
254
+ of that message.
255
+ :param message: a string representation of a message
256
+ :param tf_idf_model:
257
+ :return: a 200 size vector of the tweet
258
+ """
259
+ vectors = []
260
+ tf_idf_model = self._get_tf_idf_model()
261
+
262
+ for word in message.split(" "):
263
+ # todo add back word = sanitization().sanitize(word, self.outputs_location, force_new_data_and_dont_persisit=True)
264
+ try:
265
+ vectors.append(tf_idf_model.wv[word])
266
+ logger().print_message("Word '{}' in vocabulary...".format(word))
267
+ except KeyError as e:
268
+ pass
269
+ logger().print_message(e)
270
+ logger().print_message("Word '{}' not in vocabulary...".format(word))
271
+
272
+ # Lists of the values used to store the max and average vector values
273
+ max_value_list = []
274
+ average_value_list = []
275
+
276
+ # Check for if at least one word in the message is in the vocabulary of the model
277
+ final_array_of_vectors = pd.np.zeros(100)
278
+ if len(vectors) > 0:
279
+
280
+ # Loop through the elements in the vectors
281
+ for iterator in range(vectors[0].size):
282
+
283
+ list_of_all_values = []
284
+
285
+ # Loop through each vector
286
+ for vector in vectors:
287
+ value = vector[iterator]
288
+ list_of_all_values.append(value)
289
+
290
+ average_value = sum(list_of_all_values) / len(list_of_all_values)
291
+ max_value = max(list_of_all_values)
292
+ max_value_list.append(max_value)
293
+ average_value_list.append(average_value)
294
+
295
+ final_array_of_vectors = pd.np.append(pd.np.array([max_value_list]), pd.np.array([average_value_list]))
296
+
297
+ # Convert array to list
298
+ list_of_vectors = []
299
+ for vector in final_array_of_vectors:
300
+ list_of_vectors.append(vector)
301
+
302
+ return list_of_vectors
303
+
304
+ def _process_tweet(self, user_name, message, row):
305
+ """
306
+ Wrapper function for taking a username and tweet and extracting the features.
307
+ :param user_name:
308
+ :param message:
309
+ :return: a dictionary of all features from the message
310
+ """
311
+ self._add_to_graph(user_name, message)
312
+
313
+ features_dict = {"cap_freq": self._get_capitalised_word_frequency(message),
314
+ "violent_freq": self._get_violent_word_frequency(message),
315
+ "message_vector": self._get_tweet_vector(message)}
316
+
317
+
318
+ return features_dict
319
+
320
+ def _get_average_liwc_scores_for_baseline_data(self):
321
+ """
322
+ Calculate the LIWC scores for the baseline dataset and the minkowski dataset.
323
+ """
324
+
325
+ # Checks if the values have already been set this run, if so don't calculate again
326
+ # TODO what of the edge case where average clout is 0?
327
+ if self._average_clout == 0:
328
+ logger.print_message("Opening dataset {} for LIWC feature extraction and Minkowski distance".format(
329
+ self.tf_idf_training_dataset_location))
330
+ baseline_data_set_name = self.tf_idf_training_dataset_location
331
+
332
+ clout_list = []
333
+ analytic_list = []
334
+ tone_list = []
335
+ authentic_list = []
336
+ anger_list = []
337
+ sadness_list = []
338
+ anxiety_list = []
339
+ power_list = []
340
+ reward_list = []
341
+ risk_list = []
342
+ achievement_list = []
343
+ affiliation_list = []
344
+ p_pronoun_list = []
345
+ i_pronoun_list = []
346
+
347
+ with open(baseline_data_set_name, 'r', encoding='cp1252') as file:
348
+ reader = csv.reader(file)
349
+
350
+ is_header = True
351
+ for row in reader:
352
+
353
+ if is_header:
354
+ is_header = False
355
+ continue
356
+
357
+ # Try and access columns, if can't then LIWC fields haven't been set and should be set to 0
358
+ try:
359
+ clout = row[self.DEFAULT_BASELINE_CLOUT_COLUMN_ID]
360
+ analytic = row[self.DEFAULT_BASELINE_ANALYTIC_COLUMN_ID]
361
+ tone = row[self.DEFAULT_BASELINE_TONE_COLUMN_ID]
362
+ authentic = row[self.DEFAULT_BASELINE_AUTHENTIC_COLUMN_ID]
363
+ anger = row[self.DEFAULT_BASELINE_ANGER_COLUMN_ID]
364
+ sadness = row[self.DEFAULT_BASELINE_SADNESS_COLUMN_ID]
365
+ anxiety = row[self.DEFAULT_BASELINE_ANXIETY_COLUMN_ID]
366
+ power = row[self.DEFAULT_BASELINE_POWER_COLUMN_ID]
367
+ reward = row[self.DEFAULT_BASELINE_REWARD_COLUMN_ID]
368
+ risk = row[self.DEFAULT_BASELINE_RISK_COLUMN_ID]
369
+ achievement = row[self.DEFAULT_BASELINE_ACHIEVEMENT_COLUMN_ID]
370
+ affiliation = row[self.DEFAULT_BASELINE_AFFILIATION_COLUMN_ID]
371
+ p_pronoun = row[self.DEFAULT_BASELINE_P_PRONOUN_COLUMN_ID]
372
+ i_pronoun = row[self.DEFAULT_BASELINE_I_PRONOUN_COLUMN_ID]
373
+ except:
374
+ clout = 0
375
+ analytic = 0
376
+ tone = 0
377
+ authentic = 0
378
+ anger = 0
379
+ sadness = 0
380
+ anxiety = 0
381
+ power = 0
382
+ reward = 0
383
+ risk = 0
384
+ achievement = 0
385
+ affiliation = 0
386
+ p_pronoun = 0
387
+ i_pronoun = 0
388
+
389
+ clout_list.append(float(clout))
390
+ analytic_list.append(float(analytic))
391
+ tone_list.append(float(tone))
392
+ authentic_list.append(float(authentic))
393
+ anger_list.append(float(anger))
394
+ sadness_list.append(float(sadness))
395
+ anxiety_list.append(float(anxiety))
396
+ power_list.append(float(power))
397
+ reward_list.append(float(reward))
398
+ risk_list.append(float(risk))
399
+ achievement_list.append(float(achievement))
400
+ affiliation_list.append(float(affiliation))
401
+ p_pronoun_list.append(float(p_pronoun))
402
+ i_pronoun_list.append(float(i_pronoun))
403
+
404
+ # Get average for variables, used for distance score. These are member variables so that they don't
405
+ # have to be re-calculated on later runs
406
+ self._average_clout = sum(clout_list) / len(clout_list)
407
+ self._average_analytic = sum(analytic_list) / len(analytic_list)
408
+ self._average_tone = sum(tone_list) / len(tone_list)
409
+ self._average_authentic = sum(authentic_list) / len(authentic_list)
410
+ self._average_anger = sum(anger_list) / len(anger_list)
411
+ self._average_sadness = sum(sadness_list) / len(sadness_list)
412
+ self.average_anxiety = sum(anxiety_list) / len(anxiety_list)
413
+ self.average_power = sum(power_list) / len(power_list)
414
+ self.average_reward = sum(reward_list) / len(reward_list)
415
+ self.average_risk = sum(risk_list) / len(risk_list)
416
+ self.average_achievement = sum(achievement_list) / len(achievement_list)
417
+ self.average_affiliation = sum(affiliation_list) / len(affiliation_list)
418
+ self.average_p_pronoun = sum(p_pronoun_list) / len(p_pronoun_list)
419
+ self.average_i_pronoun = sum(i_pronoun_list) / len(i_pronoun_list)
420
+
421
+ return [self._average_clout, self._average_analytic, self._average_tone, self._average_authentic,
422
+ self._average_anger, self._average_sadness, self.average_anxiety,
423
+ self.average_power, self.average_reward, self.average_risk, self.average_achievement,
424
+ self.average_affiliation,
425
+ self.average_p_pronoun, self.average_i_pronoun]
426
+
427
+ def _get_tf_idf_model(self):
428
+ """
429
+ A function used to retrieve the TFIDF model trained on the extremist dataset. If the model has already been
430
+ created then the previously created model will be used.
431
+ :return: a TF-IDF model
432
+ """
433
+
434
+ # if already made model, reuse
435
+ if self.saved_tf_idf_model is None:
436
+ logger.print_message("Opening dataset {} for TF-IDF".format(self.tf_idf_training_dataset_location))
437
+ baseline_data_set_name = self.tf_idf_training_dataset_location
438
+
439
+ data_set = ""
440
+
441
+ with open(baseline_data_set_name, 'r', encoding='cp1252') as file:
442
+ reader = csv.reader(file)
443
+
444
+ is_header = True
445
+ for row in reader:
446
+
447
+ if is_header:
448
+ is_header = False
449
+ continue
450
+
451
+ # take quote from dataset and add it to dataset
452
+ message = row[self.DEFAULT_BASELINE_MESSAGE_COLUMN_ID] # data column
453
+ data_set = data_set + message + "/n"
454
+
455
+ # clean data set
456
+ # todo should we be doing sanitization clean_data = sanitization().sanitize(data_set, self.outputs_location) # if so remove line below
457
+ clean_data = data_set
458
+
459
+ # get ngrams
460
+ uni_grams, bi_grams, tri_grams = n_gram_aggregator().get_ngrams(clean_data)
461
+ ngrams = uni_grams + bi_grams + tri_grams
462
+
463
+ # todo The TF_IDF most important ngrams arn't being used. Should these be used instead of the other ngrams
464
+ tf_idf_scores = tf_idf_aggregator().get_tf_idf_scores(ngrams, data_set)
465
+ number_of_most_important_ngrams = int(len(ngrams) / 2) # number is half all ngrams
466
+ list_of_most_important_ngrams = sorted(tf_idf_scores, key=tf_idf_scores.get, reverse=True)[
467
+ :number_of_most_important_ngrams]
468
+
469
+ # create a word 2 vec model
470
+ model = word_2_vec_aggregator().get_model(list_of_sentences=list_of_most_important_ngrams)
471
+ self.saved_tf_idf_model = model
472
+ else:
473
+ model = self.saved_tf_idf_model
474
+
475
+ return model
476
+
477
+ def open_wrapper(self, location, access_type, list_of_encodings=["utf-8", 'latin-1', 'cp1252']):
478
+ """
479
+ A wrapper around the open built in function that has fallbacks for different encodings.
480
+ :return:
481
+ """
482
+
483
+ for encoding in list_of_encodings:
484
+ try:
485
+ file = open(location, access_type, encoding=encoding)
486
+ # Attempt to read file, if fails try other encoding
487
+ file.readlines()
488
+ file.seek(0)
489
+ file.close()
490
+ file = open(location, access_type, encoding=encoding)
491
+ return file
492
+ except LookupError as e:
493
+ continue
494
+ except UnicodeDecodeError as e:
495
+ continue
496
+
497
+ raise Exception(
498
+ "No valid encoding provided for file: '{}'. Encodings provided: '{}'".format(location, list_of_encodings))
499
+
500
+ def _add_user_post_db_cache(self, user_id, dict_to_add):
501
+ """
502
+ Used to add data to the post message db cache used to free up memory.
503
+ """
504
+
505
+ if not os.path.isdir(self.MESSAGE_TMP_CACHE_LOCATION):
506
+ os.mkdir(self.MESSAGE_TMP_CACHE_LOCATION)
507
+
508
+ # Save file as pickle
509
+ file_name = "{}-{}.pickle".format(user_id,int(time.time()))
510
+ file_name = os.path.join(self.MESSAGE_TMP_CACHE_LOCATION, file_name)
511
+ with open(file_name, 'wb') as pickle_handle:
512
+ pickle.dump({"description":"a temporery file used for saving memory",
513
+ "data":dict_to_add}, pickle_handle, protocol=pickle.HIGHEST_PROTOCOL)
514
+
515
+ def _get_user_post_db_cache(self, file_name):
516
+ """
517
+ Retrieves data from the cache database used to free up memory.
518
+ """
519
+ if not os.path.isdir(self.MESSAGE_TMP_CACHE_LOCATION):
520
+ raise Exception("Attempted to access temporery cache files before files are created")
521
+
522
+ if not os.path.isfile(file_name):
523
+ raise Exception("Attempted to access cache file {}, however, it does not exist".format(file_name))
524
+
525
+ with (open(file_name, "rb")) as openfile:
526
+ cache_data = pickle.load(openfile)
527
+
528
+ return cache_data["data"]
529
+
530
+ def _delete_user_post_db_cache(self):
531
+ try:
532
+ if os.path.isdir(self.MESSAGE_TMP_CACHE_LOCATION):
533
+ shutil.rmtree(self.MESSAGE_TMP_CACHE_LOCATION)
534
+ except:
535
+ pass
536
+
537
+ def _get_type_of_message_data(self, data_set_location, has_header=True, is_extremist=None):
538
+ # Ensure all temp files are deleted
539
+ self._delete_user_post_db_cache()
540
+
541
+ # Counts the total rows in the CSV. Used for progress reporting.
542
+ print("Starting entity count. Will count '{}'".format(self.MAX_RECORD_SIZE))
543
+
544
+ # Read one entry at a time
545
+ max_chunksize = 1
546
+ row_count = 0
547
+
548
+ for row in pd.read_csv(data_set_location, iterator=True,encoding='latin-1'):
549
+
550
+ row_count = row_count + 1
551
+
552
+ if row_count >= self.MAX_RECORD_SIZE:
553
+ break
554
+
555
+
556
+ print("Finished entity count. Count is: '{}'".format(row_count))
557
+ print("")
558
+ # Loops through all rows in the dataset CSV file.
559
+ current_processed_rows = 0
560
+ is_header = False
561
+
562
+ for row in pd.read_csv(data_set_location, iterator=True,encoding='latin-1'):
563
+ row = row.columns
564
+ # Makes sure same number for each dataset
565
+ if current_processed_rows > row_count:
566
+ break
567
+
568
+ # Skips the first entry, as it's the CSV header
569
+ if has_header and is_header:
570
+ is_header = False
571
+ continue
572
+
573
+ # Retrieve username
574
+ try:
575
+ username = row[self.DEFAULT_USERNAME_COLUMN_ID]
576
+ date = row[self.DEFAULT_DATE_COLUMN_ID]
577
+ user_unique_id = self._get_unique_id_from_username(username)
578
+ except:
579
+ # if empty entry
580
+ continue
581
+ # Attempt to get LIWC scores from csv, if not present return 0's
582
+ try:
583
+ # Summary variables
584
+ clout = float(row[self.DEFAULT_CLOUT_COLUMN_ID])
585
+ analytic = float(row[self.DEFAULT_ANALYTIC_COLUMN_ID])
586
+ tone = float(row[self.DEFAULT_TONE_COLUMN_ID])
587
+ authentic = float(row[self.DEFAULT_AUTHENTIC_COLUMN_ID])
588
+ # Emotional Analysis
589
+ anger = float(row[self.DEFAULT_ANGER_COLUMN_ID])
590
+ sadness = float(row[self.DEFAULT_SADNESS_COLUMN_ID])
591
+ anxiety = float(row[self.DEFAULT_ANXIETY_COLUMN_ID])
592
+ # Personal Drives:
593
+ power = float(row[self.DEFAULT_POWER_COLUMN_ID])
594
+ reward = float(row[self.DEFAULT_REWARD_COLUMN_ID])
595
+ risk = float(row[self.DEFAULT_RISK_COLUMN_ID])
596
+ achievement = float(row[self.DEFAULT_ACHIEVEMENT_COLUMN_ID])
597
+ affiliation = float(row[self.DEFAULT_AFFILIATION_COLUMN_ID])
598
+ # Personal pronouns
599
+ i_pronoun = float(row[self.DEFAULT_I_PRONOUN_COLUMN_ID])
600
+ p_pronoun = float(row[self.DEFAULT_P_PRONOUN_COLUMN_ID])
601
+
602
+ except:
603
+ # Summary variables
604
+ clout = 0
605
+ analytic = 0
606
+ tone = 0
607
+ authentic = 0
608
+ # Emotional Analysis
609
+ anger = 0
610
+ sadness = 0
611
+ anxiety = 0
612
+ # Personal Drives:
613
+ power = 0
614
+ reward = 0
615
+ risk = 0
616
+ achievement = 0
617
+ affiliation = 0
618
+ # Personal pronouns
619
+ i_pronoun = 0
620
+ p_pronoun = 0
621
+
622
+ liwc_dict = {
623
+ "clout": clout,
624
+ "analytic": analytic,
625
+ "tone": tone,
626
+ "authentic": authentic,
627
+ "anger": anger,
628
+ "sadness": sadness,
629
+ "anxiety": anxiety,
630
+ "power": power,
631
+ "reward": reward,
632
+ "risk": risk,
633
+ "achievement": achievement,
634
+ "affiliation": affiliation,
635
+ "i_pronoun": i_pronoun,
636
+ "p_pronoun": p_pronoun,
637
+ }
638
+
639
+ # Calculate minkowski distance
640
+ average_row = self._get_average_liwc_scores_for_baseline_data()
641
+
642
+ actual_row = [clout, analytic, tone, authentic,
643
+ anger, sadness, anxiety,
644
+ power, reward, risk, achievement, affiliation,
645
+ p_pronoun, i_pronoun
646
+ ]
647
+
648
+ try:
649
+ liwc_dict["minkowski"] = distance.minkowski(actual_row, average_row, 1)
650
+ except ValueError:
651
+ continue
652
+
653
+ # Retrieve Tweet for message
654
+ tweet = str(row[self.DEFAULT_MESSAGE_COLUMN_ID])
655
+
656
+ # clean/ remove markup in dataset
657
+ sanitised_message = sanitization().sanitize(tweet, self.outputs_location,
658
+ force_new_data_and_dont_persisit=True)
659
+
660
+ # If no message skip entry
661
+ if not len(tweet) > 0 or not len(sanitised_message) > 0 or sanitised_message == '' or not len(
662
+ sanitised_message.split(" ")) > 0:
663
+ continue
664
+
665
+ # Process Tweet and save as dict
666
+ tweet_dict = self._process_tweet(user_unique_id, tweet, row)
667
+
668
+ # If the message vector is not 200 skip (meaning that a blank message was processed)
669
+ if not len(tweet_dict["message_vector"]) == 200:
670
+ continue
671
+
672
+ if is_extremist is not None:
673
+ tweet_dict["is_extremist"] = is_extremist
674
+
675
+ tweet_dict["date"] = date
676
+
677
+ # Merge liwc dict with tweet dict
678
+ tweet_dict = {**tweet_dict, **liwc_dict}
679
+
680
+ #tweet_dict["user_unique_id"]= user_unique_id
681
+
682
+ self._add_user_post_db_cache(user_unique_id, {user_unique_id: tweet_dict})
683
+ #self.tweet_user_features.append()
684
+ # TODO here save to cache json instead of list and graph
685
+
686
+ logger().print_message("Added message from user: '{}', from dataset: '{}'. {} rows of {} completed."
687
+ .format(user_unique_id, data_set_location, current_processed_rows, row_count), 1)
688
+ current_processed_rows = current_processed_rows + 1
689
+ print("Finished reading row")
690
+
691
+ # Add the centrality (has to be done after all users are added to graph)
692
+ completed_tweet_user_features = []
693
+ # Loops through each item in the list which represents each message/ tweet
694
+
695
+ # Loop through all data in cache file
696
+ for cached_message_file in os.listdir(self.MESSAGE_TMP_CACHE_LOCATION):
697
+ cached_message_file = os.fsdecode(cached_message_file)
698
+ cached_message_file = os.path.join(self.MESSAGE_TMP_CACHE_LOCATION,cached_message_file)
699
+
700
+ # Only process pickle files
701
+ if not cached_message_file.endswith(".pickle"):
702
+ continue
703
+
704
+ print("Reading cache file: '{}'".format(cached_message_file))
705
+ cached_message_data = self._get_user_post_db_cache(cached_message_file)
706
+ # Loops through the data in that tweet (Should only be one entry per tweet).
707
+ for user_id in cached_message_data.keys():
708
+ updated_entry = {}
709
+ updated_entry[user_id] = cached_message_data[user_id]
710
+ # Adds centrality
711
+ updated_entry[user_id]["centrality"] = self.graph.get_degree_centrality_for_user(user_id)
712
+ logger().print_message(
713
+ "Added '{}' Centrality for user '{}'".format(updated_entry[user_id]["centrality"], user_id), 1)
714
+ completed_tweet_user_features.append(updated_entry)
715
+ gc.collect()
716
+ break # Only one entry per list
717
+
718
+
719
+ self._delete_user_post_db_cache()
720
+ self.completed_tweet_user_features = self.completed_tweet_user_features + completed_tweet_user_features
721
+ self.tweet_user_features = []
722
+ #self.archived_graphs.append(self.graph)
723
+ self.graph = grapher()
724
+ print("Finished messages")
725
+
726
+ def _get_extremist_data(self, dataset_location):
727
+ """
728
+ This function is responsible for aggregating tweets from the extremist dataset, extracting the features, and
729
+ saving them to a file for a model to be created.
730
+ """
731
+
732
+ self._get_type_of_message_data(data_set_location=dataset_location, is_extremist=True)
733
+
734
+ def _get_counterpoise_data(self, dataset_location):
735
+ """
736
+ This function is responsible for aggregating tweets from the counterpoise (related to the topic but from
737
+ legitimate sources, e.g. news outlets) dataset, extracting the features, and saving them to a file for a
738
+ model to be created.
739
+ """
740
+
741
+ self._get_type_of_message_data(data_set_location=dataset_location, is_extremist=False)
742
+
743
+ def _get_standard_tweets(self, dataset_location):
744
+ """
745
+ This function is responsible for aggregating tweets from the baseline (random sample of twitter posts)
746
+ dataset, extracting the features, and saving them to a file for a model to be created.
747
+ """
748
+
749
+ self._get_type_of_message_data(data_set_location=dataset_location, is_extremist=False)
750
+
751
+ def dump_features_for_list_of_datasets(self, feature_file_path_to_save_to, list_of_dataset_locations,
752
+ force_new_dataset=True):
753
+ """
754
+ Saves features representing a provided dataset to a json file. Designed to be used for testing after a
755
+ model has been created.
756
+ :param feature_file_path_to_save_to:
757
+ :param dataset_location:
758
+ :return:
759
+ """
760
+
761
+ self._reset_stored_feature_data()
762
+
763
+ if force_new_dataset or not os.path.isfile(feature_file_path_to_save_to):
764
+ for dataset in list_of_dataset_locations:
765
+ self._get_type_of_message_data(data_set_location=dataset, is_extremist=None)
766
+
767
+ with open(feature_file_path_to_save_to, 'w') as outfile:
768
+ json.dump(self.completed_tweet_user_features, outfile, indent=4)
769
+
770
+ else:
771
+ with open(feature_file_path_to_save_to, 'r') as file:
772
+ data = file.read()
773
+
774
+ # parse file
775
+ self.completed_tweet_user_features = json.loads(data)
776
+
777
+ def dump_training_data_features(self, feature_file_path_to_save_to, extremist_data_location,
778
+ baseline_data_location, force_new_dataset=True):
779
+ """
780
+ The entrypoint function, used to dump all features, for all users in the extreamist, counterpoise, and baseline
781
+ datsets to a json file.
782
+ :param feature_file_path_to_save_to: The filepath to save the datasets to
783
+ """
784
+
785
+ self._reset_stored_feature_data()
786
+
787
+ if force_new_dataset or not os.path.isfile(feature_file_path_to_save_to):
788
+ print("Starting baseline messages")
789
+ self._get_standard_tweets(baseline_data_location)
790
+ print("Starting extremist messages")
791
+ self._get_extremist_data(extremist_data_location)
792
+
793
+
794
+ with open(feature_file_path_to_save_to, 'w') as outfile:
795
+ json.dump(self.completed_tweet_user_features, outfile, indent=4)
Pinpoint/Grapher.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import networkx as nx
2
+
3
+
4
+ class grapher():
5
+ """
6
+ A wrapper class used for generating a graph for interactions between users
7
+ """
8
+ graph = None
9
+
10
+ def __init__(self):
11
+ """
12
+ Constructor.
13
+ """
14
+ self.graph = nx.DiGraph()
15
+
16
+ def add_edge_wrapper(self, node_1_name, node_2_name, weight, relationship):
17
+ """
18
+ A wrapper function used to add an edge connection or node.
19
+ :param node_1_name: from
20
+ :param node_2_name: to
21
+ :param weight:
22
+ :param relationship:
23
+ :return:
24
+ """
25
+ self.graph.add_edge(node_1_name, node_2_name, weight=weight, relation=relationship)
26
+
27
+ def add_node(self, node_name):
28
+ """
29
+ A wrapper function that adds a node with no edges to the graph
30
+ :param node_name:
31
+ """
32
+ self.graph.add_node(node_name)
33
+
34
+ def get_info(self):
35
+ """
36
+ Retrieves information about the graph
37
+ :return:
38
+ """
39
+ return nx.info(self.graph)
40
+
41
+ def show_graph(self):
42
+ """
43
+ Displays the graph
44
+ :return:
45
+ """
46
+ nx.spring_layout(self.graph)
47
+
48
+ def get_degree_centrality_for_user(self, user_name):
49
+ """
50
+ Returns the Degree of Centrality for a given user present in the graph
51
+ :param user_name:
52
+ :return: the Degree of Centrality for a given user present in the graph
53
+ """
54
+ centrality = nx.degree_centrality(self.graph)
55
+ return centrality[user_name]
56
+
57
+ # todo implement
58
+ # def get_eigenvector_centrality_for_user(self, user_name):
59
+ # centrality = nx.eigenvector_centrality(self.graph)
60
+ # return centrality[user_name]
Pinpoint/Logger.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+
3
+
4
+ class logger():
5
+ """
6
+ A wrapper class around the Python print function used to only print
7
+ """
8
+ DEBUG = False
9
+
10
+ @staticmethod
11
+ def print_message(message, logging_level=0):
12
+ """
13
+ A wrapper function around the Python print function used to only print
14
+ :param message: the message to print
15
+ :param override_debug: a boolean on if the DEBUG status should be override. if True a log will be printed,
16
+ irrespective of if in Debug mode.
17
+ """
18
+ if logging_level >= 1 or logger.DEBUG:
19
+ now = datetime.now()
20
+ current_time = now.strftime("%H:%M:%S")
21
+ print("{} | {}".format(current_time, message))
Pinpoint/RandomForest.py ADDED
@@ -0,0 +1,374 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import json
3
+ import os
4
+ import pickle
5
+ from datetime import datetime
6
+
7
+ import pandas
8
+ import pandas as pd
9
+ from sklearn import metrics
10
+ from sklearn.ensemble import RandomForestClassifier
11
+ from sklearn.model_selection import train_test_split
12
+
13
+ from Pinpoint import Logger
14
+
15
+
16
+ class random_forest():
17
+ """
18
+ A class used for creating a random forest binary classifier.
19
+ """
20
+
21
+ model = None
22
+ accuracy = None
23
+ precision = None
24
+ recall = None
25
+ f_measure = None
26
+
27
+ # Model variables populated on creation or reading of file
28
+
29
+ original_name = None
30
+ creation_date = None
31
+
32
+ _FRAMEWORK_VERSION = 0.2 # Used when creating a new model file
33
+ # v0.1 - versioning added.
34
+ # v0.2 - Added more LIWC scores and minkowski distance
35
+
36
+ model_version = _FRAMEWORK_VERSION # can be updated if reading and using a model file of a different version
37
+
38
+ _outputs_folder = None
39
+ _model_folder = None
40
+
41
+ # Categories of features used in the model
42
+ RADICAL_LANGUAGE_ENABLED = True # RF-IDF Scores, Word Embeddings
43
+ PSYCHOLOGICAL_SIGNALS_ENABLED = True # LIWC Dictionaries, Minkowski distance
44
+ BEHAVIOURAL_FEATURES_ENABLED = True # frequency of tweets, followers / following ratio, centrality
45
+
46
+ def __init__(self, outputs_folder="outputs", model_folder=None):
47
+ """
48
+ Constructor
49
+
50
+ The random_forest() class can be initialised with outputs_folder() and model_folder(). The outputs folder is
51
+ where output files are stored and the model folder is where the model will be created if not overwritten.
52
+ """
53
+
54
+ if model_folder is None:
55
+ model_folder = outputs_folder
56
+
57
+ self._outputs_folder = outputs_folder
58
+ self._model_folder = model_folder
59
+
60
+ def get_features_as_df(self, features_file, force_new_dataset=True):
61
+ """
62
+ Reads a JSON file file and converts to a Pandas dataframe that can be used to train and test the classifier.
63
+ :param features_file: the location of the JSON features file to convert to a dataframe
64
+ :param force_new_dataset: if true a new CSV file will be created even if one already exists.
65
+ :return: a Pandas dataframe with the features.
66
+ """
67
+
68
+ with open(features_file) as json_features_file:
69
+ csv_file = "{}.csv".format(features_file)
70
+
71
+ if force_new_dataset or not os.path.isfile(csv_file):
72
+ features = json.load(json_features_file)
73
+
74
+ # todo remove the data for the features not being used.
75
+ filtered_list_after_filters_applied = []
76
+
77
+ # If any of the filters are not true remove the features not requested
78
+ column_names = []
79
+
80
+ if self.PSYCHOLOGICAL_SIGNALS_ENABLED:
81
+ column_names = column_names + ["clout", "analytic", "tone", "authentic",
82
+ "anger", "sadness", "anxiety",
83
+ "power", "reward", "risk", "achievement", "affiliation",
84
+ "i_pronoun", "p_pronoun",
85
+ "minkowski"]
86
+ if self.BEHAVIOURAL_FEATURES_ENABLED:
87
+ column_names = column_names + ['centrality']
88
+
89
+ if self.RADICAL_LANGUAGE_ENABLED:
90
+ # Add column names
91
+ column_names = column_names + ["cap_freq", "violent_freq"]
92
+ # Add the two hundred vectors columns
93
+ for iterator in range(1, 201):
94
+ column_names.append("message_vector_{}".format(iterator))
95
+
96
+ column_names = column_names + ['is_extremist']
97
+
98
+ if not self.BEHAVIOURAL_FEATURES_ENABLED or not self.PSYCHOLOGICAL_SIGNALS_ENABLED or self.RADICAL_LANGUAGE_ENABLED:
99
+
100
+ # Loops through list of dicts (messages)
101
+ number_of_processed_messages = 0
102
+ for message in features:
103
+ number_of_processed_messages = number_of_processed_messages + 1
104
+ Logger.logger.print_message(
105
+ "Extracting information from message {} of {} in file {}".format(
106
+ number_of_processed_messages,
107
+ len(features),
108
+ features_file),
109
+ logging_level=1)
110
+
111
+ # Loops through dict keys (usernames)
112
+ for user in message.keys():
113
+
114
+ message_features = message[user]
115
+
116
+ feature_dict = {}
117
+
118
+ if self.PSYCHOLOGICAL_SIGNALS_ENABLED:
119
+ # Summary variables
120
+ feature_dict["clout"] = message_features["clout"]
121
+ feature_dict["analytic"] = message_features["analytic"]
122
+ feature_dict["tone"] = message_features["tone"]
123
+ feature_dict["authentic"] = message_features["authentic"]
124
+
125
+ # Emotional Analysis
126
+ feature_dict["anger"] = message_features["anger"]
127
+ feature_dict["sadness"] = message_features["sadness"]
128
+ feature_dict["anxiety"] = message_features["anxiety"]
129
+
130
+ # Personal Drives
131
+ feature_dict["power"] = message_features["power"]
132
+ feature_dict["reward"] = message_features["reward"]
133
+ feature_dict["risk"] = message_features["risk"]
134
+ feature_dict["achievement"] = message_features["achievement"]
135
+ feature_dict["affiliation"] = message_features["affiliation"]
136
+
137
+ # Personal Pronouns
138
+ feature_dict["i_pronoun"] = message_features["i_pronoun"]
139
+ feature_dict["p_pronoun"] = message_features["p_pronoun"]
140
+
141
+ # Minkowski distance
142
+ feature_dict["minkowski"] = message_features["minkowski"]
143
+
144
+ if self.BEHAVIOURAL_FEATURES_ENABLED:
145
+ #feature_dict['post_freq'] = message_features['post_freq']
146
+ #feature_dict['follower_freq'] = message_features['follower_freq']
147
+ feature_dict['centrality'] = message_features['centrality']
148
+
149
+ if self.RADICAL_LANGUAGE_ENABLED:
150
+ feature_dict["message_vector"] = message_features["message_vector"]
151
+ feature_dict["violent_freq"] = message_features["violent_freq"]
152
+ feature_dict["cap_freq"] = message_features["cap_freq"]
153
+
154
+ feature_dict['is_extremist'] = message_features['is_extremist']
155
+
156
+ user = {user: feature_dict}
157
+ filtered_list_after_filters_applied.append(user)
158
+
159
+ number_of_features = len(filtered_list_after_filters_applied)
160
+
161
+ # Creates the columns for the data frame
162
+ df = pd.DataFrame(
163
+ columns=column_names)
164
+
165
+ completed_features = 0
166
+ iterator = 0
167
+ error_count = 0
168
+ for message in features:
169
+ # should only be one user per entry
170
+ for user_id in message:
171
+ feature_data = message[user_id]
172
+ # ID is not included as it's hexidecimal and not float
173
+
174
+ row = []
175
+
176
+ if self.PSYCHOLOGICAL_SIGNALS_ENABLED:
177
+ clout = feature_data['clout']
178
+ analytic = feature_data['analytic']
179
+ tone = feature_data['tone']
180
+ authentic = feature_data['authentic']
181
+
182
+ anger = feature_data["anger"]
183
+ sadness = feature_data["sadness"]
184
+ anxiety = feature_data["anxiety"]
185
+ power = feature_data["power"]
186
+ reward = feature_data["reward"]
187
+ risk = feature_data["risk"]
188
+ achievement = feature_data["achievement"]
189
+ affiliation = feature_data["affiliation"]
190
+ i_pronoun = feature_data["i_pronoun"]
191
+ p_pronoun = feature_data["p_pronoun"]
192
+ minkowski = feature_data["minkowski"]
193
+
194
+ row = row + [clout, analytic, tone, authentic, anger, sadness, anxiety, power,
195
+ reward, risk, achievement, affiliation, i_pronoun, p_pronoun, minkowski]
196
+
197
+ if self.BEHAVIOURAL_FEATURES_ENABLED:
198
+ #post_freq = feature_data['post_freq']
199
+ #follower_freq = feature_data['follower_freq']
200
+ centrality = feature_data['centrality']
201
+
202
+ row = row + [#post_freq, follower_freq,
203
+ centrality]
204
+
205
+ if self.RADICAL_LANGUAGE_ENABLED:
206
+ cap_freq = feature_data['cap_freq']
207
+ violent_freq = feature_data['violent_freq']
208
+ message_vector = feature_data['message_vector']
209
+
210
+ row = row + [cap_freq, violent_freq] + message_vector
211
+
212
+ is_extremist = feature_data['is_extremist']
213
+
214
+ row = row + [is_extremist]
215
+ try:
216
+ df.loc[iterator] = row
217
+ except ValueError as e:
218
+ print(e)
219
+ error_count = error_count + 1
220
+ pass # if error with value probably column mismatch which is down to taking a mesage with no data
221
+
222
+ iterator = iterator + 1
223
+ completed_features = completed_features + 1
224
+ user_name = list(message.keys())[0]
225
+ Logger.logger.print_message(
226
+ "Added a message from user {} to data frame - {} messages of {} completed".format(user_name,
227
+ completed_features,
228
+ number_of_features),
229
+ logging_level=1)
230
+
231
+ Logger.logger.print_message("Total errors when creating data frame: {}".format(error_count),
232
+ logging_level=1)
233
+
234
+ # Replace boolean with float
235
+ df.replace({False: 0, True: 1}, inplace=True)
236
+
237
+ # Sets ID field
238
+ df.index.name = "ID"
239
+ df.to_csv("{}.csv".format(features_file))
240
+
241
+ else:
242
+ df = pandas.read_csv(csv_file)
243
+
244
+ return df
245
+
246
+ def create_model_info_output_file(self, location_of_output_file = None, training_data_csv_location = None):
247
+ """
248
+ If the model has been loaded or trained this function will create a summary text file with information relating to
249
+ the model.
250
+ :param location_of_output_file: The location to save the output file to.
251
+ :param training_data_csv_location: The location of the training data csv. This is used to retrieve the name of the
252
+ feature columns.
253
+ """
254
+
255
+ # Check if model has been created
256
+ if not self.creation_date:
257
+ Logger.logger.print_message("Model has not been trained, created, or loaded. Cannot output model data in this state.",logging_level=1)
258
+ else:
259
+ Logger.logger.print_message("Creating model info text file")
260
+ output_text = ""
261
+
262
+ # Add summary information
263
+ output_text += "Model {}, version {}, created at {} \n".format(self.original_name, self.model_version, self.creation_date)
264
+ output_text += "\nAccuracy: {}\nRecall: {} \nPrecision: {}\nF-Measure: {}\n".format(self.accuracy, self.recall,
265
+ self.precision, self.f_measure)
266
+
267
+ # Retrieve the header names if available
268
+ if training_data_csv_location:
269
+ with open(training_data_csv_location, "r") as csv_file:
270
+ reader = csv.reader(csv_file)
271
+ headers = next(reader)
272
+
273
+ # Loop through all feature importance scores
274
+ for iterator in range(len(self.model.feature_importances_)):
275
+ if training_data_csv_location:
276
+ # Plus one to ignore ID field
277
+ output_text += "\n{}: {}".format(headers[iterator+1], self.model.feature_importances_[iterator])
278
+ else:
279
+ output_text += "\nFeature {}: {}".format(iterator,self.model.feature_importances_[iterator])
280
+
281
+ # If no name has been set write to outputs folder
282
+ if location_of_output_file:
283
+ file_name = location_of_output_file
284
+ else:
285
+ file_name = os.path.join(self._outputs_folder,"model-output-{}.txt".format(datetime.today().strftime('%Y-%m-%d-%H%M%S')))
286
+
287
+ # Write to file
288
+ with open(file_name, "w") as output_file:
289
+ output_file.write(output_text)
290
+
291
+ def train_model(self, features_file, force_new_dataset=True, model_location=None):
292
+ """
293
+ Trains the model of the proveded data unless the model file already exists or if the force new dataset flag is True.
294
+ :param features_file: the location of the feature file to be used to train the model
295
+ :param force_new_dataset: If True a new dataset will be created and new model created even if a model already exists.
296
+ :param model_location: the location to save the model file to
297
+ """
298
+
299
+ # Sets model location based on default folder location and placeholder name if none was given
300
+ if model_location is None:
301
+ model_location = os.path.join(self._model_folder, "predictor.model")
302
+
303
+ # if told to force the creation of a new dataset to train off or the model location does not exist then make a new model
304
+ if force_new_dataset or not os.path.isfile(model_location):
305
+
306
+ # Import train_test_split function
307
+ feature_data = self.get_features_as_df(features_file, force_new_dataset)
308
+
309
+ # Removes index column
310
+ if "ID" in feature_data.keys():
311
+ feature_data.drop(feature_data.columns[0], axis=1, inplace=True)
312
+ feature_data.reset_index(drop=True, inplace=True)
313
+
314
+ y = feature_data[['is_extremist']] # Labels
315
+ X = feature_data.drop(axis=1, labels=['is_extremist']) # Features
316
+
317
+ # Split dataset into training set and test set
318
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # 80% training and 20% test
319
+
320
+ # Create a Gaussian Classifier
321
+ random_forest = RandomForestClassifier(n_estimators=100, max_depth=50, oob_score=True
322
+ ) # class_weight={0:1,1:5} # A higher weight for the minority class (is_extreamist)
323
+
324
+ # Train the model using the training sets y_pred=random_forest.predict(X_test)
325
+ random_forest.fit(X_train, y_train.values.ravel())
326
+
327
+ y_pred = random_forest.predict(X_test)
328
+
329
+ # Model Accuracy, how often is the classifier correct?
330
+ self.accuracy = metrics.accuracy_score(y_test, y_pred)
331
+ self.recall = metrics.recall_score(y_test, y_pred)
332
+ self.precision = metrics.precision_score(y_test, y_pred)
333
+ self.f_measure = metrics.f1_score(y_test, y_pred)
334
+
335
+ Logger.logger.print_message("Accuracy: {}".format(self.accuracy), logging_level=1)
336
+ Logger.logger.print_message("Recall: {}".format(self.recall), logging_level=1)
337
+ Logger.logger.print_message("Precision: {}".format(self.precision), logging_level=1)
338
+ Logger.logger.print_message("F-Measure: {}".format(self.f_measure), logging_level=1)
339
+
340
+ self.model = random_forest
341
+ self.original_name = model_location
342
+ self.creation_date = datetime.today().strftime('%Y-%m-%d')
343
+
344
+ # write model and accuracy to file to file
345
+ model_data = {"model": self.model,
346
+ "original_name": self.original_name,
347
+ "creation_date": self.creation_date,
348
+ "accuracy": self.accuracy,
349
+ "recall": self.recall,
350
+ "precision": self.precision,
351
+ "f1": self.f_measure,
352
+ "version": self._FRAMEWORK_VERSION
353
+ }
354
+
355
+ pickle.dump(model_data, open(model_location, "wb"))
356
+
357
+ else:
358
+ # Read model and accuracy from file
359
+ saved_file = pickle.load(open(model_location, "rb"))
360
+
361
+ self.accuracy = saved_file["accuracy"]
362
+ self.recall = saved_file["recall"]
363
+ self.precision = saved_file["precision"]
364
+ self.f_measure = saved_file["f1"]
365
+ self.model = saved_file["model"]
366
+ self.model_version = saved_file["version"]
367
+ self.original_name = saved_file["original_name"]
368
+ self.creation_date = saved_file["creation_date"]
369
+
370
+ # A check to identify if the loaded model is of the same version as the tooling
371
+ if self.model_version is not self._FRAMEWORK_VERSION:
372
+ Logger.logger.print_message("Model provided is of version {}, tooling is of "
373
+ "version {}. Using the model may not work as expected."
374
+ .format(self.model_version, self._FRAMEWORK_VERSION))
Pinpoint/Sanitizer.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+
3
+ from nltk import *
4
+ from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
5
+
6
+ from Pinpoint.Logger import *
7
+
8
+ # If NLTK data doesn't exist, downloads it
9
+ try:
10
+ tagged = pos_tag(["test"])
11
+ except LookupError:
12
+ download()
13
+
14
+
15
+ # nltk.download() #todo how to get this to run once?
16
+
17
+ class sanitization():
18
+ """
19
+ This class is used to sanitize a given corpus of data. In turn removing stop words, stemming words, removing small
20
+ words, removing no alphabet words, and setting words to lower case. To save on repeat runs a local copy of the
21
+ serialised corpus is saved that is used unless this feature is overwritten.
22
+ """
23
+
24
+ def sanitize(self, text, output_folder, force_new_data_and_dont_persisit=False):
25
+ """
26
+ Entry function for sanitizing text
27
+ :param text:
28
+ :param force_new_data_and_dont_persisit:
29
+ :return: sanitized text
30
+ """
31
+ sanitize_file_name = os.path.join(output_folder, "{}-sanitized_text.txt".format(uuid.uuid4()))
32
+ final_text = ""
33
+
34
+ # If a file exists don't sanitize given text
35
+ if os.path.isfile(sanitize_file_name) and not force_new_data_and_dont_persisit:
36
+ logger.print_message("Sanitized file exists. Using data")
37
+
38
+ with open(sanitize_file_name, 'r', encoding="utf8") as file_to_write:
39
+ final_text = file_to_write.read()
40
+
41
+ else:
42
+ total_words = len(text.split(" "))
43
+ number = 0
44
+ logger.print_message("Starting sanitization... {} words to go".format(total_words))
45
+ for word in text.split(" "):
46
+ number = number + 1
47
+ word = self.remove_non_alpha(word)
48
+ word = self.lower(word)
49
+ word = self.stemmer(word)
50
+ word = self.remove_stop_words(word)
51
+ word = self.remove_small_words(word)
52
+
53
+ if word is None:
54
+ continue
55
+
56
+ final_text = final_text + word + " "
57
+ logger.print_message("Completed {} of {} sanitized words".format(number, total_words))
58
+
59
+ final_text = final_text.replace(" ", " ")
60
+
61
+ if not force_new_data_and_dont_persisit:
62
+ with open(sanitize_file_name, 'w', encoding="utf8") as file_to_write:
63
+ file_to_write.write(final_text)
64
+
65
+ final_text = final_text.strip()
66
+ return final_text
67
+
68
+ def stemmer(self, word):
69
+ """
70
+ Get stemms of words
71
+ :param word:
72
+ :return: the stemmed word using port stemmer
73
+ """
74
+
75
+ porter = PorterStemmer()
76
+
77
+ # todo anouther stemmer be assessed?
78
+ # lancaster = LancasterStemmer()
79
+ # stemmed_word = lancaster.stem(word)
80
+ stemmed_word = porter.stem(word)
81
+
82
+ return stemmed_word
83
+
84
+ def lower(self, word):
85
+ """
86
+ get the lower case representation of words
87
+ :param word:
88
+ :return: the lowercase representation of the word
89
+ """
90
+ return word.lower()
91
+
92
+ def remove_stop_words(self, text):
93
+ """
94
+ Remove stop words
95
+ :param text:
96
+ :return: the word without stop words
97
+ """
98
+
99
+ text_without_stopwords = [word for word in text.split() if word not in ENGLISH_STOP_WORDS]
100
+
101
+ final_string = ""
102
+
103
+ for word in text_without_stopwords:
104
+ final_string = final_string + word + " "
105
+
106
+ return final_string
107
+
108
+ def remove_non_alpha(self, word):
109
+ """
110
+ Removes non alphabet characters (Excluding spaces)
111
+ :param word:
112
+ :return: the word with non-alpha characters removed
113
+ """
114
+ word = word.replace("\n", " ").replace("\t", " ").replace(" ", " ")
115
+ regex = re.compile('[^a-zA-Z ]')
116
+
117
+ return regex.sub('', word)
118
+
119
+ def remove_small_words(self, word, length_to_remove_if_not_equal=4):
120
+ """
121
+ Removes words that are too small, defaults to words words length 3 characters or below which are removed.
122
+ :param word:
123
+ :param length_to_remove_if_not_equal:
124
+ :return: "" if word below 3 characters or the word if above
125
+ """
126
+
127
+ new_word = ""
128
+ if len(word) >= length_to_remove_if_not_equal:
129
+ new_word = word
130
+
131
+ return new_word
Pinpoint/Serializer.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # todo This file should be used to store common serialisations across aggregating data
2
+
3
+ def createPostDict(date, post_text, likes, comments, shares, source="self"):
4
+ '''
5
+ Creates a dictionary containing the pertinent information from a social media post. This should later be added to a list
6
+ of other posts from that account and then added to a master dictionary.
7
+ :param date:
8
+ :param post_text:
9
+ :param likes:
10
+ :param comments:
11
+ :param shares:
12
+ :param source:
13
+ :return: a dictionary containing pertinent post information
14
+ '''
15
+ return {"text": post_text, "likes": likes, "comments": comments, "shares": shares, "source": source, "date": date}
16
+
17
+
18
+ def createWholeUserDict(unique_id, reddit_list, instagram_list, twitter_list, survey_data):
19
+ return {"id": unique_id, "reddit": reddit_list, "instagram": instagram_list, "twitter": twitter_list,
20
+ "survey": survey_data}
Pinpoint/__pycache__/Aggregator_NGram.cpython-310.pyc ADDED
Binary file (3.13 kB). View file
Pinpoint/__pycache__/Aggregator_NGram.cpython-36.pyc ADDED
Binary file (3.09 kB). View file
Pinpoint/__pycache__/Aggregator_NGram.cpython-38.pyc ADDED
Binary file (3.08 kB). View file
Pinpoint/__pycache__/Aggregator_TfIdf.cpython-310.pyc ADDED
Binary file (1.73 kB). View file
Pinpoint/__pycache__/Aggregator_TfIdf.cpython-36.pyc ADDED
Binary file (1.7 kB). View file
Pinpoint/__pycache__/Aggregator_TfIdf.cpython-38.pyc ADDED
Binary file (1.69 kB). View file
Pinpoint/__pycache__/Aggregator_Word2Vec.cpython-310.pyc ADDED
Binary file (1.05 kB). View file
Pinpoint/__pycache__/Aggregator_Word2Vec.cpython-36.pyc ADDED
Binary file (1.03 kB). View file
Pinpoint/__pycache__/Aggregator_Word2Vec.cpython-38.pyc ADDED
Binary file (1.02 kB). View file
Pinpoint/__pycache__/Aggregator_WordingChoice.cpython-310.pyc ADDED
Binary file (1.86 kB). View file
Pinpoint/__pycache__/Aggregator_WordingChoice.cpython-36.pyc ADDED
Binary file (1.83 kB). View file
Pinpoint/__pycache__/Aggregator_WordingChoice.cpython-38.pyc ADDED
Binary file (1.81 kB). View file
Pinpoint/__pycache__/FeatureExtraction.cpython-310.pyc ADDED
Binary file (19.7 kB). View file
Pinpoint/__pycache__/FeatureExtraction.cpython-36.pyc ADDED
Binary file (19.5 kB). View file
Pinpoint/__pycache__/FeatureExtraction.cpython-38.pyc ADDED
Binary file (19.4 kB). View file
Pinpoint/__pycache__/Grapher.cpython-310.pyc ADDED
Binary file (2.17 kB). View file
Pinpoint/__pycache__/Grapher.cpython-36.pyc ADDED
Binary file (2.13 kB). View file
Pinpoint/__pycache__/Grapher.cpython-38.pyc ADDED
Binary file (2.14 kB). View file
Pinpoint/__pycache__/Logger.cpython-310.pyc ADDED
Binary file (1.07 kB). View file
Pinpoint/__pycache__/Logger.cpython-36.pyc ADDED
Binary file (1.05 kB). View file
Pinpoint/__pycache__/Logger.cpython-38.pyc ADDED
Binary file (1.04 kB). View file
Pinpoint/__pycache__/RandomForest.cpython-310.pyc ADDED
Binary file (8.12 kB). View file
Pinpoint/__pycache__/RandomForest.cpython-36.pyc ADDED
Binary file (7.97 kB). View file
Pinpoint/__pycache__/RandomForest.cpython-38.pyc ADDED
Binary file (7.98 kB). View file
Pinpoint/__pycache__/Sanitizer.cpython-310.pyc ADDED
Binary file (3.99 kB). View file
Pinpoint/__pycache__/Sanitizer.cpython-36.pyc ADDED
Binary file (3.91 kB). View file
Pinpoint/__pycache__/Sanitizer.cpython-38.pyc ADDED
Binary file (3.92 kB). View file
Pinpoint/__pycache__/predictor.cpython-38.pyc ADDED
Binary file (2.39 kB). View file
Pinpoint/far-right-core.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Example of training a model using this package.
3
+ """
4
+
5
+ from Pinpoint.FeatureExtraction import *
6
+ from Pinpoint.RandomForest import *
7
+
8
+ # Performs feature extraction from the provided Extremist, Counterpoise, and Baseline datasets.
9
+ extractor = feature_extraction(violent_words_dataset_location=r"datasets/swears",
10
+ baseline_training_dataset_location=r"datasets/far-right/LIWC2015 Results (Storm_Front_Posts).csv")
11
+
12
+ extractor.MAX_RECORD_SIZE = 50000
13
+
14
+ extractor.dump_training_data_features(
15
+ feature_file_path_to_save_to=r"outputs/training_features.json",
16
+ extremist_data_location=r"datasets/far-right/LIWC2015 Results (extreamist-messages.csv).csv",
17
+ baseline_data_location=r"datasets/far-right/LIWC2015 Results (non-extreamist-messages.csv).csv")
18
+
19
+ # Trains a model off the features file created in the previous stage
20
+ model = random_forest()
21
+
22
+ model.RADICAL_LANGUAGE_ENABLED = True
23
+ model.BEHAVIOURAL_FEATURES_ENABLED = True
24
+ model.PSYCHOLOGICAL_SIGNALS_ENABLED = True
25
+
26
+ model.train_model(features_file= r"outputs/training_features.json",
27
+ force_new_dataset=True, model_location=r"outputs/far-right-radical-language.model") # , model_location=r"Pinpoint/model/my.model"
28
+
29
+ model.create_model_info_output_file(location_of_output_file="outputs/far-right-radical-language-output.txt",
30
+ training_data_csv_location=r"outputs/training_features.json.csv")
31
+
32
+ #############################################################################################
33
+ model.RADICAL_LANGUAGE_ENABLED = False
34
+ model.BEHAVIOURAL_FEATURES_ENABLED = True
35
+ model.PSYCHOLOGICAL_SIGNALS_ENABLED = False
36
+
37
+ model.train_model(features_file= r"outputs/training_features.json",
38
+ force_new_dataset=True, model_location=r"outputs/far-right-behavioural.model") # , model_location=r"Pinpoint/model/my.model"
39
+
40
+ model.create_model_info_output_file(location_of_output_file="outputs/far-right-behavioural-output.txt",
41
+ training_data_csv_location=r"outputs/training_features.json.csv")
42
+
43
+ ############################################################################
44
+ model.RADICAL_LANGUAGE_ENABLED = False
45
+ model.BEHAVIOURAL_FEATURES_ENABLED = False
46
+ model.PSYCHOLOGICAL_SIGNALS_ENABLED = True
47
+
48
+ model.train_model(features_file= r"outputs/training_features.json",
49
+ force_new_dataset=True, model_location=r"outputs/far-right-psychological.model") # , model_location=r"Pinpoint/model/my.model"
50
+
51
+ model.create_model_info_output_file(location_of_output_file="outputs/far-right-psychological-output.txt",
52
+ training_data_csv_location=r"outputs/training_features.json.csv")
53
+
54
+ ##############################################################################################
55
+ model.RADICAL_LANGUAGE_ENABLED = True
56
+ model.BEHAVIOURAL_FEATURES_ENABLED = False
57
+ model.PSYCHOLOGICAL_SIGNALS_ENABLED = False
58
+
59
+ model.train_model(features_file= r"outputs/training_features.json",
60
+ force_new_dataset=True, model_location=r"outputs/far-right-baseline.model") # , model_location=r"Pinpoint/model/my.model"
61
+
62
+ model.create_model_info_output_file(location_of_output_file="outputs/far-right-baseline-output.txt",
63
+ training_data_csv_location=r"outputs/training_features.json.csv")
64
+
65
+ print("Finished")
app.py ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+ import json
4
+ import os
5
+ import re
6
+ import time
7
+ from random import random
8
+ import socket
9
+
10
+ from threading import Thread
11
+ from time import sleep
12
+
13
+ test_html = '''
14
+ <!-- Header -->
15
+ <header class="w3-display-container w3-content w3-wide" style="max-width:1500px;" id="home">
16
+ <img class="w3-image" src="https://cdn.pixabay.com/photo/2018/12/10/16/22/city-3867295_960_720.png" alt="Architecture" width="1500" height="800">
17
+ <div class="w3-display-middle w3-margin-top w3-center">
18
+ <h1 class="w3-xxlarge w3-text-white"><span class="w3-padding w3-black w3-opacity-min"><b>WATCH</b></span> <span class="w3-hide-small w3-text-dark-grey">Tower</span></h1>
19
+ </div>
20
+ </header>
21
+
22
+ <!-- Container (About Section) -->
23
+ <div class="w3-content w3-container w3-padding-64" id="about">
24
+ <h3 class="w3-center">Block Violent Content Before It Reaches Your Feed</h3>
25
+ <p class="w3-center"><em>WatchTower identifies, blocks, and filters out violent and radical content before it reaches your Twitter feed.
26
+ </em></p>
27
+ <br>
28
+ <p>WatchTower works to protect you from violent, misinformation, hate speech and other malicious communication by using a suite of machine learning models to identify user accounts that post content that commonly falls into these categories. WatchTower is broken down into two components, the first utilises the Twitter streaming API and applies a suite of machine learning models to identify users that commonly post malicious information, while the second element provides a web UI where users can authenticaate with Twitter and tailor the types and thresholds for the accounts they block. </p>
29
+ <br>
30
+ <p> WatchTower was developed solely by James Stevenson and primarily uses Pinpoint, a machine learning model also developed by James. The future roadmap sees WatchTower incoperate other models for identifying contrent such as misinformation and hate speech. More on Pinpoint and the model WatchTower uses to identify violent extremism can be seen below.</p>
31
+
32
+ <p class="w3-large w3-center w3-padding-16">Model Accuracy:</p>
33
+ <p class="w3-center"><em>Machine learning models can be validated based on several statistics. These statistics for Pinpoint the main ML model used by WatchTower can be seen below. </p>
34
+ <br>
35
+ <p class="w3-wide"><i class="fa fa-camera"></i>Accuracy</p>
36
+ <div class="w3-light-grey">
37
+ <div class="w3-container w3-padding-small w3-dark-grey w3-center" style="width:73%">73%</div>
38
+ </div>
39
+ <p class="w3-wide"><i class="fa fa-laptop"></i>Recall</p>
40
+ <div class="w3-light-grey">
41
+ <div class="w3-container w3-padding-small w3-dark-grey w3-center" style="width:62%">62%</div>
42
+ </div>
43
+ <p class="w3-wide"><i class="fa fa-photo"></i>Precision</p>
44
+ <div class="w3-light-grey">
45
+ <div class="w3-container w3-padding-small w3-dark-grey w3-center" style="width:78%">78%</div>
46
+ </div>
47
+ <p class="w3-wide"><i class="fa fa-photo"></i>F-Measure</p>
48
+ <div class="w3-light-grey">
49
+ <div class="w3-container w3-padding-small w3-dark-grey w3-center" style="width:69%">69%</div>
50
+ </div>
51
+ </div>
52
+
53
+ <div class="w3-row w3-center w3-dark-grey w3-padding-16">
54
+ <div class="w3-quarter w3-section">
55
+ <span class="w3-xlarge">14+</span><br>
56
+ Partners
57
+ </div>
58
+ <div class="w3-quarter w3-section">
59
+ <span class="w3-xlarge">55+</span><br>
60
+ Projects Done
61
+ </div>
62
+ <div class="w3-quarter w3-section">
63
+ <span class="w3-xlarge">89+</span><br>
64
+ Happy Clients
65
+ </div>
66
+ <div class="w3-quarter w3-section">
67
+ <span class="w3-xlarge">150+</span><br>
68
+ Meetings
69
+ </div>
70
+ </div>
71
+ <br>
72
+ <!-- Container (Portfolio Section) -->
73
+ <div class="w3-content w3-container w3-padding-64" id="portfolio">
74
+ <h3 class="w3-center">Chirp Development Challenge 2022</h3>
75
+ <p class="w3-center"><em>WatchTower was developed for the Chirp 2022 Twitter API Developer Challenge</em></p>
76
+ </div><p> Watchtower was developed solely by James Stevenson for the Chirp 2022 Twitter API Developer Challenge. More infomration of this can be found below.</p>
77
+ <br>
78
+ <img class="w3-image" src="https://cdn.cms-twdigitalassets.com/content/dam/developer-twitter/redesign-2021-images/blog2022/chirp/Chirp-Hero-Banner.jpg.twimg.1920.jpg" alt="Architecture" width="1500" height="800">
79
+ <br>
80
+ <!-- Modal for full size images on click-->
81
+ <div id="modal01" class="w3-modal w3-black" onclick="this.style.display='none'">
82
+ <span class="w3-button w3-large w3-black w3-display-topright" title="Close Modal Image"><i class="fa fa-remove"></i></span>
83
+ <div class="w3-modal-content w3-animate-zoom w3-center w3-transparent w3-padding-64">
84
+ <img id="img01" class="w3-image">
85
+ <p id="caption" class="w3-opacity w3-large"></p>
86
+ </div>
87
+ </div>
88
+
89
+ <script>
90
+ // Modal Image Gallery
91
+ function onClick(element) {
92
+ document.getElementById("img01").src = element.src;
93
+ document.getElementById("modal01").style.display = "block";
94
+ var captionText = document.getElementById("caption");
95
+ captionText.innerHTML = element.alt;
96
+ }
97
+
98
+ // Change style of navbar on scroll
99
+ window.onscroll = function() {myFunction()};
100
+ function myFunction() {
101
+ var navbar = document.getElementById("myNavbar");
102
+ if (document.body.scrollTop > 100 || document.documentElement.scrollTop > 100) {
103
+ navbar.className = "w3-bar" + " w3-card" + " w3-animate-top" + " w3-white";
104
+ } else {
105
+ navbar.className = navbar.className.replace(" w3-card w3-animate-top w3-white", "");
106
+ }
107
+ }
108
+
109
+ // Used to toggle the menu on small screens when clicking on the menu button
110
+ function toggleFunction() {
111
+ var x = document.getElementById("navDemo");
112
+ if (x.className.indexOf("w3-show") == -1) {
113
+ x.className += " w3-show";
114
+ } else {
115
+ x.className = x.className.replace(" w3-show", "");
116
+ }
117
+ }
118
+ </script>
119
+
120
+ </body>
121
+ </html>
122
+
123
+
124
+
125
+ '''
126
+
127
+ import gradio as gr
128
+ import tweepy
129
+ from fastapi import FastAPI, Request
130
+
131
+ consumer_token = os.getenv('CONSUMER_TOKEN')
132
+ consumer_secret = os.getenv('CONSUMER_SECRET')
133
+ my_access_token = os.getenv('ACCESS_TOKEN')
134
+ my_access_secret = os.getenv('ACCESS_SECRET')
135
+ global_oauth1_user_handler = None
136
+ bearer = os.getenv('BEARER')
137
+
138
+ oauth1_user_handler = tweepy.OAuth1UserHandler(
139
+ consumer_token, consumer_secret,
140
+ callback="http://127.0.0.1:7860/"
141
+ )
142
+ target_website = oauth1_user_handler.get_authorization_url(signin_with_twitter=True)
143
+
144
+ block = gr.Blocks(css=".container { max-width: 800px; margin: auto; }")
145
+
146
+ chat_history = []
147
+
148
+ def get_client_from_tokens(oauth_verifier, oauth_token):
149
+ new_oauth1_user_handler = tweepy.OAuth1UserHandler(
150
+ consumer_token, consumer_secret,
151
+ callback="http://127.0.0.1:7860/"
152
+ )
153
+ new_oauth1_user_handler.request_token = {
154
+ "oauth_token": oauth_token,
155
+ "oauth_token_secret": consumer_secret
156
+ }
157
+
158
+ access_token, access_token_secret = new_oauth1_user_handler.get_access_token(
159
+ oauth_verifier
160
+ )
161
+
162
+ their_client = tweepy.Client(
163
+ bearer_token=bearer,
164
+ consumer_key=consumer_token,
165
+ consumer_secret=consumer_secret,
166
+ access_token=access_token,
167
+ access_token_secret=access_token_secret
168
+ )
169
+
170
+ return their_client
171
+
172
+ def get_oath_headers():
173
+ oauth_verifier = None
174
+ oauth_token = None
175
+ did_find = False
176
+ if hasattr(block, "server"):
177
+ for connection in block.server.server_state.connections:
178
+ # connection_app_id = connection.app.app.blocks.app_id
179
+ # if active_app_id == connection_app_id:
180
+ # print("Its a match")
181
+ if connection.headers != None:
182
+ for header in connection.headers:
183
+ header = header[1].decode()
184
+ if "oauth_verifier" in header:
185
+ oauth_verifier = re.search(r"oauth_verifier=(.+)", header).group(1)
186
+ oauth_token = re.search(r"oauth_token=(.+)&", header).group(1)
187
+ if oauth_token and oauth_verifier:
188
+ did_find = True
189
+ break
190
+ if did_find:
191
+ break
192
+ return oauth_verifier, oauth_token
193
+
194
+ def block_users(client, threshold, dataset):
195
+ num_users_blocked = 0
196
+
197
+ for filename in os.listdir("users"):
198
+ filename = os.path.join("users", filename)
199
+
200
+ user_file = open(filename, "r")
201
+ users = json.load(user_file)
202
+
203
+ for user in users:
204
+ if threshold >= user["threshold"]:
205
+
206
+ user = user["username"].strip()
207
+ user_id = client.get_user(username=user)
208
+
209
+ finished = False
210
+ while not finished:
211
+ try:
212
+ client.block(target_user_id=user_id.data.id)
213
+ except tweepy.errors.TooManyRequests as e:
214
+ print(e)
215
+ time.sleep(240)
216
+ continue
217
+ finished = True
218
+ me = client.get_me()
219
+ print("{} blocked {}".format(me.data["username"], user))
220
+ num_users_blocked = num_users_blocked + 1
221
+
222
+ return num_users_blocked
223
+
224
+ def has_oath_header():
225
+ headers = get_oath_headers()
226
+ if headers[0] == None:
227
+ return False
228
+ else:
229
+ return True
230
+
231
+ username_populated = False
232
+ def chat(radio_score = None, selected_option = None):
233
+ global client
234
+ history = []
235
+
236
+ # app id
237
+
238
+ if radio_score != None and selected_option != None:
239
+ response = "no blocking"
240
+ if client != None:
241
+ chat_history.append(["Model tuned to a '{}%' threshold and is using the '{}' dataset.".format(radio_score, selected_option),
242
+ "{} Account blocking initialised".format(selected_option.capitalize())])
243
+ num_users_blocked = block_users(client,radio_score,selected_option)
244
+ chat_history.append(["Blocked {} user account(s).".format(num_users_blocked), "Thank you for using Watchtower."])
245
+ elif radio_score != None or selected_option != None:
246
+ chat_history.append(["Initialisation error!","Please tune the model by using the above options"])
247
+
248
+ return chat_history
249
+
250
+ def infer(prompt):
251
+ pass
252
+
253
+ have_initialised = False
254
+ client = None
255
+ name = None
256
+
257
+ def changed_tab():
258
+ global have_initialised
259
+ global chatbot
260
+ global chat_history
261
+ global client
262
+ global name
263
+
264
+ name = "no username"
265
+
266
+ chat_history = [["Welcome to Watchtower.".format(name), "Log in via Twitter and configure your blocking options above."]]
267
+
268
+ if client != None and name != "no username":
269
+ chat_history = [["Welcome {}".format(name), "Initialising WatchTower"]]
270
+
271
+ print("changed tabs - {}".format(name))
272
+ chatbot.value = chat_history
273
+ chatbot.update(value=chat_history)
274
+ elif has_oath_header() and client==None:
275
+
276
+ tokens = get_oath_headers()
277
+ if tokens[0] and client==None:
278
+ client = get_client_from_tokens(tokens[0],tokens[1])
279
+ name = client.get_me().data.name
280
+ have_initialised = True
281
+ chat_history = [["Welcome {}".format(name), "Initialising WatchTower"]]
282
+
283
+ chatbot.value = chat_history
284
+ chatbot.update(value=chat_history)
285
+
286
+ elif not has_oath_header() and not have_initialised:
287
+ chatbot.value = chat_history
288
+ chatbot.update(value=chat_history)
289
+
290
+ with block:
291
+ gr.HTML('''
292
+
293
+ <meta name="viewport" content="width=device-width, initial-scale=1">
294
+ <link rel="stylesheet" href="https://www.w3schools.com/w3css/4/w3.css">
295
+
296
+ <!-- Navbar (sit on top) -->
297
+ <div class="w3-top">
298
+ <div class="w3-bar w3-white w3-wide w3-padding w3-card">
299
+ <p class="w3-bar-item w3-button"><b>WATCH</b> Tower</p>
300
+ </div>
301
+ </div>
302
+ ''')
303
+ gr.HTML("<center><p><br></p></center>")
304
+
305
+
306
+ #todo check if user signed in
307
+
308
+ user_message = "Log in via Twitter and configure your blocking options above."
309
+
310
+ chat_history.append(["Welcome to Watchtower.",user_message])
311
+ tabs = gr.Tabs()
312
+ with tabs:
313
+ intro_tab = gr.TabItem("Introduction")
314
+ with intro_tab:
315
+ gr.HTML(test_html)
316
+
317
+ prediction_tab = gr.TabItem("Getting Started")
318
+ with prediction_tab:
319
+ gr.HTML('''
320
+ <header class="w3-display-container w3-content w3-wide" style="max-height:250px;" id="home">
321
+ <img class="w3-image" src="https://cdn.pixabay.com/photo/2018/12/10/16/22/city-3867295_960_720.png" alt="Architecture" width="1500" height="800">
322
+ <div class="w3-display-middle w3-margin-top w3-center">
323
+ <h1 class="w3-xxlarge w3-text-white"><span class="w3-padding w3-black w3-opacity-min"><b>WATCH</b></span> <span class="w3-hide-small w3-text-dark-grey">Tower</span></h1>
324
+ </div>
325
+ </header>
326
+ ''')
327
+ with gr.Group():
328
+ with gr.Box():
329
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
330
+ gr.HTML(
331
+ value='<a href={}><img src="https://cdn.cms-twdigitalassets.com/content/dam/developer-twitter/auth-docs/sign-in-with-twitter-gray.png.twimg.1920.png" alt="Log In With Twitter"></a><br>'.format(
332
+ target_website))
333
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
334
+ radio = gr.CheckboxGroup(value="Violent", choices=["Violent", "Hate Speech", "Misinformation"],
335
+ interactive=False, label="Behaviour To Block")
336
+
337
+ slider = gr.Slider(value=80, label="Threshold Certainty Tolerance")
338
+
339
+ chatbot = gr.Chatbot(value=chat_history, label="Watchtower Output").style()
340
+ btn = gr.Button("Run WatchTower").style(full_width=True)
341
+ #radio.change(fn=chat, inputs=[radio], outputs=chatbot)
342
+ #slider.change(fn=chat, inputs=[slider], outputs=chatbot)
343
+ #text.submit(fn=chat, inputs=[text,text], outputs=chatbot)
344
+ btn.click(fn=chat, inputs=[slider,radio], outputs=chatbot)
345
+ tabs.change(fn=changed_tab, inputs=None, outputs=None)
346
+
347
+ gr.Markdown(
348
+ """___
349
+ <p style='text-align: center'>
350
+ Created by <a href="https://twitter.com/borisdayma" target="_blank">Boris Dayma</a> et al. 2021-2022
351
+ <br/>
352
+ <a href="https://github.com/borisdayma/dalle-mini" target="_blank">GitHub</a> | <a href="https://wandb.ai/dalle-mini/dalle-mini/reports/DALL-E-mini-Generate-images-from-any-text-prompt--VmlldzoyMDE4NDAy" target="_blank">Project Report</a>
353
+ </p>"""
354
+ )
355
+
356
+ block.launch(enable_queue=False)
outputs/sanitized_text.txt ADDED
The diff for this file is too large to render. See raw diff
outputs/users.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unknown": "aa60c20c4b0742069665b5c7d6bbff82"}
predictor.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import time
3
+ import uuid
4
+ from pprint import pprint
5
+
6
+ import Pinpoint.FeatureExtraction
7
+ from Pinpoint.RandomForest import *
8
+
9
+ class predictor():
10
+
11
+ def __init__(self):
12
+ self.model = random_forest()
13
+ self.model.PSYCHOLOGICAL_SIGNALS_ENABLED = False # Needs LIWC markup
14
+ self.model.BEHAVIOURAL_FEATURES_ENABLED = False
15
+ self.model.train_model(features_file=None, force_new_dataset=False,
16
+ model_location=r"far-right-radical-language.model")
17
+ self.dict_of_users_all = {}
18
+ self.feature_extractor = Pinpoint.FeatureExtraction.feature_extraction(
19
+ violent_words_dataset_location="swears",
20
+ baseline_training_dataset_location="LIWC2015 Results (Storm_Front_Posts).csv")
21
+
22
+ def predict(self, string_to_predict = None, username = "unknown"):
23
+
24
+ if string_to_predict == None:
25
+ raise Exception("No prediction material given...")
26
+
27
+ extended_prediction_uuid = str(uuid.uuid1())+"-"+str(uuid.uuid1())
28
+ self.model.model_folder = "{}-output".format(extended_prediction_uuid)
29
+ self.feature_extractor.MESSAGE_TMP_CACHE_LOCATION = "{}-message-cache".format(extended_prediction_uuid)
30
+ print("Starting prediction for {}".format(extended_prediction_uuid))
31
+
32
+ if string_to_predict != None:
33
+ users_posts = [{"username": "{}".format(username), "timestamp": "tmp", "message": "{}".format(string_to_predict)}]
34
+
35
+ try:
36
+ os.remove("./{}-messages.json".format(extended_prediction_uuid))
37
+ except:
38
+ pass
39
+
40
+ with open('{}-all-messages.csv'.format(extended_prediction_uuid), 'w', encoding='utf8', newline='') as output_file:
41
+ writer = csv.DictWriter(output_file, fieldnames=["username", "timestamp", "message"])
42
+ for users_post in users_posts:
43
+ writer.writerow(users_post)
44
+
45
+ try:
46
+ self.feature_extractor._get_standard_tweets("{}-all-messages.csv".format(extended_prediction_uuid))
47
+ except FileNotFoundError:
48
+ return False
49
+
50
+ with open("./{}-messages.json".format(extended_prediction_uuid), 'w') as outfile:
51
+ features = self.feature_extractor.completed_tweet_user_features
52
+
53
+ json.dump(features, outfile, indent=4)
54
+
55
+ rows = self.model.get_features_as_df("./{}-messages.json".format(extended_prediction_uuid), True)
56
+ rows.pop("is_extremist")
57
+
58
+ try:
59
+ features = rows.loc[0]
60
+ is_extremist = self.model.model.predict([features])
61
+ except FileNotFoundError as e:
62
+ is_extremist = False
63
+ print("Message cache error, next - {}".format(e))
64
+
65
+ print("Ending prediction for {}".format(extended_prediction_uuid))
66
+
67
+ dir_name = "."
68
+ test = os.listdir(dir_name)
69
+
70
+ os.remove("{}-all-messages.csv".format(extended_prediction_uuid))
71
+ os.remove("{}-messages.json.csv".format(extended_prediction_uuid))
72
+ os.remove("{}-messages.json".format(extended_prediction_uuid))
73
+
74
+ if is_extremist == True:
75
+ return True
76
+ else:
77
+ return False
78
+
python-streamer.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gc
2
+ import json
3
+ import os
4
+ from datetime import date
5
+ from pathlib import Path
6
+
7
+ import unicodedata
8
+
9
+ consumer_token = os.getenv('CONSUMER_TOKEN')
10
+ consumer_secret = os.getenv('CONSUMER_SECRET')
11
+ my_access_token = os.getenv('ACCESS_TOKEN')
12
+ my_access_secret = os.getenv('ACCESS_SECRET')
13
+ bearer = os.getenv('BEARER')
14
+
15
+ import time
16
+ import tweepy
17
+ from googletrans import Translator
18
+
19
+ from predictor import predictor
20
+
21
+ class grapher():
22
+ """
23
+ A wrapper class used for generating a graph for interactions between users
24
+ """
25
+ graph = None
26
+
27
+ def __init__(self):
28
+ """
29
+ Constructor.
30
+ """
31
+ self.graph = Graph()
32
+
33
+ def add_edge_wrapper(self, node_1_name, node_2_name, weight=1, relationship=None):
34
+ """
35
+ A wrapper function used to add an edge connection or node.
36
+ :param node_1_name: from
37
+ :param node_2_name: to
38
+ :param weight:
39
+ :param relationship:
40
+ :return:
41
+ """
42
+
43
+ # get node one ID
44
+
45
+ node_1 = None
46
+ for node in self.graph.vs:
47
+ if node["label"] == node_1_name.capitalize():
48
+ node_1 = node
49
+
50
+ if node_1 == None:
51
+ self.graph.add_vertices(1)
52
+ node_count = self.graph.vcount()
53
+ self.graph.vs[node_count-1]["id"] = node_count-1
54
+ self.graph.vs[node_count-1]["label"] = node_1_name.capitalize()
55
+ node_1 = self.graph.vs[node_count-1]
56
+
57
+ # get node two id
58
+ node_2 = None
59
+ for node in self.graph.vs:
60
+ if node["label"] == node_2_name.capitalize():
61
+ node_2 = node
62
+
63
+ if node_2 == None:
64
+ self.graph.add_vertices(1)
65
+ node_count = self.graph.vcount()
66
+ self.graph.vs[node_count - 1]["id"] = node_count - 1
67
+ self.graph.vs[node_count - 1]["label"] = node_2_name.capitalize()
68
+ node_2 = self.graph.vs[node_count - 1]
69
+
70
+
71
+
72
+ #print("User one {} - {}, user two {} - {}".format(node_1["label"], str(node_1["id"]),
73
+ # node_2["label"], str(node_2["id"])))
74
+ self.graph.add_edges([(node_1["id"], node_2["id"])])
75
+ #self.graph.add_edge(node_1_name, node_2_name, weight=weight, relation=relationship) # , attr={""}
76
+
77
+ def add_node(self, node_name):
78
+ """
79
+ A wrapper function that adds a node with no edges to the graph
80
+ :param node_name:
81
+ """
82
+
83
+ node_1 = None
84
+ for node in self.graph.vs:
85
+ if node["label"] == node_name.capitalize():
86
+ node_1 = node["id"]
87
+
88
+ if node_1 == None:
89
+ self.graph.add_vertices(1)
90
+ node_count = self.graph.vcount()
91
+ self.graph.vs[node_count-1]["id"] = node_count-1
92
+ self.graph.vs[node_count-1]["label"] = node_name.capitalize()
93
+ node_1 = self.graph.vs[node_count-1]
94
+
95
+ global_oauth1_user_handler = None
96
+
97
+ auth = tweepy.OAuth1UserHandler(
98
+ consumer_token, consumer_secret,
99
+ my_access_token, my_access_secret
100
+ )
101
+ api = tweepy.API(auth)
102
+
103
+ client = tweepy.Client(
104
+ bearer_token= bearer,
105
+ consumer_key=consumer_token,
106
+ consumer_secret=consumer_secret,
107
+ access_token=my_access_token,
108
+ access_token_secret=my_access_secret
109
+ )
110
+
111
+
112
+
113
+
114
+ class IDPrinter(tweepy.StreamingClient):
115
+
116
+ def on_tweet(self, tweet):
117
+ self.translator = Translator()
118
+ gc.collect()
119
+ if len(tweet.data["text"]) > 100:
120
+ #tweet = client.get_tweet(id=tweet.id)
121
+ if tweet and tweet.data:
122
+
123
+ if tweet.data["author_id"]:
124
+ tweet_data = tweet.data["text"].strip().replace("@", "").replace("\n","")
125
+ if tweet_data is not None or tweet != "":
126
+ username = client.get_user(id=tweet.author_id).data
127
+ lang = self.translator.detect(tweet_data).lang
128
+
129
+ if lang == "en":
130
+ tweet_data = unicodedata.normalize('NFKD', tweet_data).encode('ascii', 'ignore').decode()
131
+ if tweet_data != None:
132
+ is_extremist = predictor().predict(tweet_data)
133
+ print("user {} post extremist {} - message: {}".format(username, is_extremist, str(tweet_data)))
134
+ if is_extremist != None and is_extremist == 1:
135
+ tweets = client.get_users_tweets(id=tweet.author_id, max_results=10)
136
+
137
+ number_extreme = 0
138
+ tweets = tweets[0]
139
+ for users_tweet in tweets:
140
+ if users_tweet.text != None:
141
+ is_extremist = predictor().predict(users_tweet.text)
142
+ if is_extremist != None:
143
+ if is_extremist == True:
144
+ number_extreme = number_extreme + 1
145
+
146
+ print(number_extreme)
147
+ threshold = number_extreme/len(tweets[0]) * 100
148
+ print("Threshold {}".format(threshold))
149
+ if threshold > 1: #
150
+
151
+ file_name = os.path.join("users","{}-radical_users.txt".format(date.today().strftime("%b-%d-%Y")))
152
+ print("User {} was found to be extremist".format(username))
153
+ file_path = Path(file_name)
154
+ file_path.touch(exist_ok=True)
155
+
156
+ with open(file_name, 'a+') as outfile:
157
+ json_to_dump = [{"username":username.id,"threshold":threshold,"date":date.today().strftime("%b-%d-%Y")}]
158
+ json.dump(json_to_dump, outfile, indent=4)
159
+ print("Got user {}".format(username))
160
+
161
+ gc.collect()
162
+ # calling the api
163
+
164
+
165
+ while True:
166
+ try:
167
+ printer = IDPrinter(bearer_token=bearer,wait_on_rate_limit =True,chunk_size=10000)
168
+ printer.add_rules(tweepy.StreamRule(value="en",tag="lang",id="lang-rule"))
169
+ printer.sample(expansions=["author_id", "geo.place_id"],threaded=False)
170
+ print("-"*20)
171
+ gc.collect()
172
+ except:
173
+ time.sleep(900)
sign-in.png ADDED
swears/VIOLENT_TERRORIST_WORDS.txt ADDED
@@ -0,0 +1 @@
 
1
+ ["Alert","Aim","Automatic","Anguish","Agitator","Apartheid","Agency","Aircraft","Airplane","Acid","Airport","Aerial","Assassinate","Account","Arms","Assault","Ambush","Anarchy","Authority","Aggressor","Allies","Alarm","Ashore","Atrocity","Artillery","Airfield","Annihilate","Appeasement","Arsenal","Attrition","Aggression","Armory","Ammunition","Advance","Assassin","Armedforces","Alliance","Attack","Armament","Bloodletting","Bulletproof","Brutal","Betray","Betrayal","Blood(y)","Boobytrap","Bombardment","Battalion","Bullet","Brute","Burn","Brutality","Bully","Blowup","Bunker","Booby trap","Blast","Bomb","Breach","Belligerent","Battle","Bury","Bloody","Blood","Blindside","Burning","Barrage","Barricade","Battlefield","Break","Conspiracy","Clash","Conspire","Coordinate","Civilian","Cautionary","Chief","Coalition","Camouflage","Captive","Coordinates","Corps","Carrier","Control","Concentration","Carnage","Conquer","Clamor","Compassion","Compliance","Crash","Checkpoint","Clandestine","Chopper","Confrontation","Causes","Countermand","Conflict","Crime","Counterattack","Courageous","Chaos","Commandos","Casualties","Confrontation(al)","Cautious","Consequences","Consolidate","Convoy","Checking","Crisis","Confusion","Cataclysm","Careen","Command(or)","Combat","Charred","Collapse","Cross-hairs","Capture","Culpability","Corpse","Cargo","Cadaver","Charge","Concussion","Campaign","Conflagration","Deliberate","Devastation","Discipline","Disperse","Dispatch","Dead","Death","Defensive","Dominate","Drone","Detect","Danger","Detection","Deploy","Detonate","Destruction","Demolish","Demoralize","Damage","Defend","Deception","Drama","Disaster","Dictator","Despot","Disease","Device","Domination","Duck","Duty","Debris","Dash","Decline","Defiant","Dictatorship","Defect","Doom","Disastrous","Division","Die","Downfall","Dispute","Desert","Disruption","Disarray","Dissonance","Dread","Defense","Dismantle","Dangerous","Deadly","Destroy","Demoralization","Debacle","Disarmament","Enemy","Expunge","Evacuate","Escalate","Explosion","Execute","Excess","Extremism","Evacuee","Explosive","Execution","Epithet","Exploitation","Enforce","Exercise","Explode","Expectations","Encounter","Engagement","Escape","Escalation","Enforcement","Endurance","Force(s)","Faction","Force","Fierce","Flight","Fortification","Flank","Ferment","Frenzy","Feud","Front lines","Fray","Fear","Fearless","Felon","Fugitive","Fright","Forceful","Furtive","Fuel","Fighter","Fanatic","Fiery","Fearful","Forces","Flee","Fatal","Frontlines","Foxhole","Ferocious","Fight","Gas","Germ warfare","Grenade","Guided bombs","Grave","Gang up on","Garrison","Guard","Generator","Germwarfare","Groans","Gunship","Government","Gang","Genocide","Grievous","Guerrillas","Guidedbombs","Guns","Hazard","Harass","Heroic","Hide","Hostility","Horses","Horror","Horrific","Harsh","Hit","Hiding","Helicopter","Heroism","Hijack","Hostile","Hijacker","Hatred","Hit-and-run","Howitzer","Hurt","Hatch","Holocaust","Hammering","Hate","Involvement","International","Interdiction","Infanticide","Ire","Invasion","Incident","Interrogation","Ignite","Instructions","Intimidate","Insurrection","Inflame","Inferred","Intense","Incontrovertible","Impact","Informant","Investigate","Intelligence","Improvise","Incite","Intercept","Infantry","Investigations","Infiltrate","Injuries","Inmate","Intervene","Insurgent","Jail","Join","Jets","Jeer","Knock-out","Keening","Knife","Kamikaze","Kidnap","Knives","Keen","Kill","Killing","Lamentation","Legacy","Liaison","Loathsome","Loyalty","Landmines","Laser-activated","Liberation","Linksto","Launcher","Liberators","Launch","Method","Militaristic","Mobile","Militant","Massacre","Menace","Malicious","Military","Momentum","Mines","Militancy","Maim","Militia","Mob","Mobilization","Machines","Mortars","Machineguns","March","Megalomania","Mission","Mayhem","Muscle","Murder","Missile","Mistreatment","Malevolent","Munitions","Maraud","Notorious","Nationalist","Negotiation","Nightmare","Nitrate","Neutralize","Overthrow","Onerous","Out of control","Operation","Officials","Offensive","Order","Overrun","Opposition","Outbreak","Planes","Prisoner","Pilot","Prowl","Post-traumatic","Pugnacious","Partisan","Premeditate","Prey","Patriotism","Plunder","Paramedics","Platoon","Potent","Powder","Power","Pacify","Persecute","Penetration","Pound","Provocation","Pistol","Performance","Patriot","Proliferation","Penetrate","Pushing","Pulverize","Preemptive","Petrify","Prison","Perform","Position","Photos","Patrol","Powerful","Quarrel","Quail","Quiver","Quell","Rally","Refugee","Revenge","Radical","Reputation","Retreat","Ravish","Revolution","Retribution","Radiation","Relentless","Rift","Rule","Resistance","Rounds","Recovery","Rebellion","Reparation","Retaliation","Reaction","Readiness","Recruitment","Reconnaissance","Regiment","Rot","Recruit","Reinforcements","Reprisal","Rival","Ricochet","Ravage","Rocket","Ruthless","Rescue","Rage","Rebel","Rifle","Riot","Regime","Shot","Strategy","Smash","Survival","Survivor","Showdown","Supplies","Sacrifice","Stronghold","Surrender","Storage","Salvage","Sanction","Strength","Surprise","Security","Seize","Secrecy","Seizure","Strife","Siege","Sensor","Secret","Stash","Scramble","Storm","Shock","Shells","Sedition","Skirmish","Strip","Suppression","Strangle","Special-ops","Shoot","Smuggle","Slaughter","Score","Sabotage","Spokesman","Soldier","Savage","Superstition","Suffering","Squad","Strategist","Specialized","Stalk","Struggle","Straggler","Subversive","Support","Stealth","Spysatellite","Strategic","Shelling","Spy","Screening","Strike","Setback","Spotter","Scare","Spy satellite","Submarine","Tsunami","Tactics","Triumph","Training","Tragic","Trauma","Torch","Terrorism","Threat","Terrorize","Thug","Torpedo","Tension","Turbulent","Tornado","Trigger","Trench","Tank","Terror","Topple","Tourniquet","Target","Terrain","Thwart","Treachery","Transportation","Trample","Trap","Terrorist","Threaten","Uprising","Urgency","Unruly","Unite","Unleash","Unify","Unit","Unexpected","Unbelievable","Uniform","Unconventional","Vociferous","Virulence","Violence","Vulnerability","Vow","Venomous","Victory","Vanguard","Vehicular","Vital","Vicious","Violation","Vanish","Veteran","Vehicle","Void","Vile","Vitriol","Vagrant","Vilify","Vendetta","Watchful","Warnings","Weather","Watchlist","Wince","Warplane","Watchdog","Weapon","Well-trained","Worldwide","Wreckage","Wage","Wound","Warrior","Wounds","Whiz","Warrant","Warheads","War","Wisdom","X-ray","Yearn","Yelling","Zigzag","Zeal","Zealot","Zone","pedophile","child molester","demonic","scumbag","fucking","demon-god","daemon"]
swears/bad_Words_list.txt ADDED
@@ -0,0 +1,547 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ buttmuch
2
+ snatch
3
+ titfuck
4
+ motherfucker
5
+ s.o.b.
6
+ knob end
7
+ clitty litter
8
+ nobhead
9
+ fags
10
+ booobs
11
+ cum
12
+ ejaculation
13
+ fook
14
+ damn
15
+ piss
16
+ motherfuckin
17
+ fingerfucked
18
+ fingerfuckers
19
+ beef curtain
20
+ xrated
21
+ a55
22
+ fatass
23
+ fcuking
24
+ pricks
25
+ nob
26
+ mothafucka
27
+ blowjobs
28
+ shitings
29
+ t1tt1e5
30
+ b!tch
31
+ pimpis
32
+ wtf
33
+ boner
34
+ gangbang
35
+ numbnuts
36
+ need the dick
37
+ testicle
38
+ 50 yard cunt punt
39
+ booooooobs
40
+ shittings
41
+ fist fuck
42
+ cuntlick
43
+ ass-fucker
44
+ muthafuckker
45
+ sh1t
46
+ fistfucker
47
+ goddamn
48
+ porn
49
+ bang (one's) box
50
+ pisses
51
+ cop some wood
52
+ dinks
53
+ master-bate
54
+ son-of-a-bitch
55
+ pussies
56
+ f u c k e r
57
+ bum
58
+ cum dumpster
59
+ cunts
60
+ niggers
61
+ carpetmuncher
62
+ coksucka
63
+ cyberfuck
64
+ fuckme
65
+ masterb8
66
+ nigga
67
+ fucks
68
+ fuckhead
69
+ fag
70
+ mof0
71
+ birdlock
72
+ clit licker
73
+ niggaz
74
+ fuckwhit
75
+ shitey
76
+ m0fo
77
+ fukwit
78
+ fanyy
79
+ autoerotic
80
+ cocksucking
81
+ mothafucker
82
+ lusting
83
+ vagina
84
+ tits
85
+ ejaculates
86
+ arsehole
87
+ cocksuka
88
+ fux0r
89
+ cunt
90
+ facial
91
+ w00se
92
+ phuking
93
+ pussy fart
94
+ cumshot
95
+ jiz
96
+ nobjokey
97
+ bellend
98
+ motherfuckings
99
+ scroat
100
+ assfucker
101
+ heshe
102
+ rectum
103
+ knob
104
+ phukking
105
+ knobhead
106
+ fcuk
107
+ queaf
108
+ fucka
109
+ donkeyribber
110
+ nazi
111
+ sadism
112
+ cum freak
113
+ lust
114
+ mafugly
115
+ kondum
116
+ amateur
117
+ carpet muncher
118
+ nigg4h
119
+ tw4t
120
+ asses
121
+ mothafuckings
122
+ kums
123
+ shite
124
+ duche
125
+ cockmunch
126
+ anilingus
127
+ shitted
128
+ shitty
129
+ masterbations
130
+ dink
131
+ cummer
132
+ jism
133
+ bastard
134
+ fuckheads
135
+ shagger
136
+ coon
137
+ feck
138
+ scrotum
139
+ cyberfucked
140
+ kawk
141
+ v1gra
142
+ muthafecker
143
+ fudge packer
144
+ twat
145
+ a_s_s
146
+ how to kill
147
+ kwif
148
+ jack-off
149
+ fagots
150
+ kinky jesus
151
+ horniest
152
+ jerk-off
153
+ mo-fo
154
+ phuk
155
+ pissin
156
+ god damn
157
+ fukkin
158
+ cock pocket
159
+ schlong
160
+ ejaculatings
161
+ nutsack
162
+ bitch tit
163
+ cocks
164
+ c0cksucker
165
+ cuntlicker
166
+ 4r5e
167
+ dick
168
+ jap
169
+ cyberfucker
170
+ cock snot
171
+ cyalis
172
+ knobend
173
+ cox
174
+ fuck yo mama
175
+ gangbangs
176
+ crap
177
+ mother fucker
178
+ retard
179
+ hell
180
+ whoar
181
+ gang-bang
182
+ cunilingus
183
+ slut bucket
184
+ muther
185
+ fukker
186
+ d1ck
187
+ dick shy
188
+ fellate
189
+ fuk
190
+ shitfuck
191
+ phukked
192
+ clits
193
+ fooker
194
+ ham flap
195
+ p0rn
196
+ a2m
197
+ fuck hole
198
+ jizz
199
+ pissers
200
+ fuck puppet
201
+ orgasms
202
+ titties
203
+ cornhole
204
+ bugger
205
+ sh!t
206
+ bollock
207
+ wanky
208
+ nobjocky
209
+ twunt
210
+ cum guzzler
211
+ cl1t
212
+ felching
213
+ dlck
214
+ bunny fucker
215
+ spunk
216
+ fukwhit
217
+ tittywank
218
+ hoer
219
+ masterbat3
220
+ bitching
221
+ nigger
222
+ shaggin
223
+ god-dam
224
+ sluts
225
+ arse
226
+ biatch
227
+ fellatio
228
+ boiolas
229
+ mutha
230
+ fanny
231
+ ar5e
232
+ nob jokey
233
+ hoare
234
+ dyke
235
+ tittyfuck
236
+ buttplug
237
+ doggin
238
+ twunter
239
+ niggah
240
+ motherfucked
241
+ masterbation
242
+ fucker
243
+ mothafucking
244
+ skank
245
+ pissoff
246
+ sandbar
247
+ flange
248
+ dildos
249
+ choade
250
+ pawn
251
+ buceta
252
+ cocksucker
253
+ ass
254
+ dick hole
255
+ fingerfucks
256
+ wank
257
+ butt
258
+ bitcher
259
+ cockface
260
+ shi+
261
+ m0f0
262
+ pissing
263
+ motherfucking
264
+ bestiality
265
+ pissed
266
+ slut
267
+ blumpkin
268
+ shemale
269
+ niggas
270
+ asshole
271
+ xxx
272
+ mothafuck
273
+ mothafuckin
274
+ teez
275
+ fecker
276
+ lmfao
277
+ fistfuckers
278
+ clit
279
+ c0ck
280
+ shitter
281
+ fingerfucker
282
+ fuckwit
283
+ boobs
284
+ bestial
285
+ adult
286
+ masturbate
287
+ gaylord
288
+ b1tch
289
+ mothafuckers
290
+ sh!+
291
+ cokmuncher
292
+ tittiefucker
293
+ pigfucker
294
+ cockhead
295
+ vulva
296
+ shitfull
297
+ turd
298
+ shag
299
+ dog-fucker
300
+ fucktoy
301
+ kunilingus
302
+ l3itch
303
+ fuckingshitmotherfucker
304
+ f u c k
305
+ mothafucked
306
+ bi+ch
307
+ fuckings
308
+ blow job
309
+ willies
310
+ god
311
+ bitches
312
+ phuck
313
+ cuntlicking
314
+ knobead
315
+ jizm
316
+ penis
317
+ shit
318
+ bareback
319
+ breasts
320
+ balls
321
+ fingerfuck
322
+ erotic
323
+ motherfuckers
324
+ mutherfucker
325
+ phonesex
326
+ screwing
327
+ assmucus
328
+ bangbros
329
+ cocksucks
330
+ chink
331
+ ejakulate
332
+ gassy ass
333
+ tosser
334
+ fucking
335
+ m45terbate
336
+ horny
337
+ assholes
338
+ fuckmeat
339
+ fux
340
+ hardcoresex
341
+ pussy
342
+ anus
343
+ mothafucks
344
+ dickhead
345
+ t1tties
346
+ cunillingus
347
+ cuntbag
348
+ bitchers
349
+ boooobs
350
+ pube
351
+ hoar
352
+ n1gger
353
+ phuks
354
+ pecker
355
+ hotsex
356
+ cum chugger
357
+ scrote
358
+ rimjaw
359
+ pisser
360
+ homo
361
+ fagot
362
+ goatse
363
+ phuq
364
+ tit wank
365
+ testical
366
+ busty
367
+ blow me
368
+ bitchin
369
+ how to murdep
370
+ ma5terb8
371
+ 5hit
372
+ cocksukka
373
+ tittie5
374
+ faggs
375
+ eat hair pie
376
+ fuker
377
+ blowjob
378
+ b17ch
379
+ cok
380
+ shagging
381
+ doggie style
382
+ prick
383
+ goddamned
384
+ labia
385
+ eat a dick
386
+ kummer
387
+ pusse
388
+ fucked
389
+ smegma
390
+ anal leakage
391
+ cocksucked
392
+ teets
393
+ penisfucker
394
+ cawk
395
+ knobjokey
396
+ l3i+ch
397
+ arrse
398
+ jerk
399
+ beastial
400
+ muff
401
+ pussi
402
+ cums
403
+ shitters
404
+ knobed
405
+ v14gra
406
+ cunt-struck
407
+ fingerfucking
408
+ anal impaler
409
+ len
410
+ blue waffle
411
+ kumming
412
+ doosh
413
+ fagging
414
+ fuck-bitch
415
+ pussys
416
+ fuck-ass
417
+ f4nny
418
+ cyberfucking
419
+ shitting
420
+ faggot
421
+ hore
422
+ cumming
423
+ assfukka
424
+ asswhole
425
+ fannyflaps
426
+ orgasim
427
+ fuck
428
+ n1gga
429
+ pornography
430
+ shits
431
+ poop
432
+ masochist
433
+ ejaculate
434
+ s hit
435
+ ass fuck
436
+ cyberfuc
437
+ motherfucks
438
+ cock
439
+ dirsa
440
+ whore
441
+ willy
442
+ dirty sanchez
443
+ god-damned
444
+ cunnilingus
445
+ fistfucked
446
+ mofo
447
+ clitoris
448
+ dildo
449
+ twathead
450
+ sex
451
+ homoerotic
452
+ cyberfuckers
453
+ sausage queen
454
+ titt
455
+ boob
456
+ cipa
457
+ tit
458
+ queer
459
+ kock
460
+ mothafuckas
461
+ mothafuckaz
462
+ gaysex
463
+ motherfuck
464
+ beastiality
465
+ ma5terbate
466
+ clusterfuck
467
+ muff puff
468
+ kum
469
+ dogging
470
+ cut rope
471
+ smut
472
+ b00bs
473
+ ballsack
474
+ chota bags
475
+ 5h1t
476
+ bloody
477
+ slope
478
+ masterbate
479
+ fistfuckings
480
+ semen
481
+ cnut
482
+ wang
483
+ cockmuncher
484
+ masterbat*
485
+ lmao
486
+ bust a load
487
+ fuckers
488
+ cuntsicle
489
+ fistfuck
490
+ fuck trophy
491
+ pornos
492
+ sadist
493
+ bollok
494
+ cocksuck
495
+ flog the log
496
+ fistfucks
497
+ ejaculated
498
+ f_u_c_k
499
+ porno
500
+ kondums
501
+ booooobs
502
+ fannyfucker
503
+ phuked
504
+ fuckin
505
+ shithead
506
+ fcuker
507
+ motherfuckka
508
+ pron
509
+ s_h_i_t
510
+ knobjocky
511
+ shiting
512
+ ejaculating
513
+ cock-sucker
514
+ cunt hair
515
+ viagra
516
+ bimbos
517
+ shit fucker
518
+ ballbag
519
+ assmunch
520
+ shited
521
+ doggiestyle
522
+ wanker
523
+ orgasims
524
+ twatty
525
+ titwank
526
+ omg
527
+ butt fuck
528
+ fudgepacker
529
+ nut butter
530
+ shitdick
531
+ pissflaps
532
+ fistfucking
533
+ blow mud
534
+ rimming
535
+ orgasm
536
+ corp whore
537
+ faggitt
538
+ cumdump
539
+ butthole
540
+ jackoff
541
+ nigg3r
542
+ spac
543
+ fuks
544
+ pussy palace
545
+ gangbanged
546
+ anal
547
+ bitch
swears/badwords.txt ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 4r5e
2
+ 5h1t
3
+ 5hit
4
+ a55
5
+ anal
6
+ anus
7
+ ar5e
8
+ arrse
9
+ arse
10
+ ass
11
+ ass-fucker
12
+ asses
13
+ assfucker
14
+ assfukka
15
+ asshole
16
+ assholes
17
+ asswhole
18
+ a_s_s
19
+ b!tch
20
+ b00bs
21
+ b17ch
22
+ b1tch
23
+ ballbag
24
+ balls
25
+ ballsack
26
+ bastard
27
+ beastial
28
+ beastiality
29
+ bellend
30
+ bestial
31
+ bestiality
32
+ bi+ch
33
+ biatch
34
+ bitch
35
+ bitcher
36
+ bitchers
37
+ bitches
38
+ bitchin
39
+ bitching
40
+ bloody
41
+ blow job
42
+ blowjob
43
+ blowjobs
44
+ boiolas
45
+ bollock
46
+ bollok
47
+ boner
48
+ boob
49
+ boobs
50
+ booobs
51
+ boooobs
52
+ booooobs
53
+ booooooobs
54
+ breasts
55
+ buceta
56
+ bugger
57
+ bum
58
+ bunny fucker
59
+ butt
60
+ butthole
61
+ buttmuch
62
+ buttplug
63
+ c0ck
64
+ c0cksucker
65
+ carpet muncher
66
+ cawk
67
+ chink
68
+ cipa
69
+ cl1t
70
+ clit
71
+ clitoris
72
+ clits
73
+ cnut
74
+ cock
75
+ cock-sucker
76
+ cockface
77
+ cockhead
78
+ cockmunch
79
+ cockmuncher
80
+ cocks
81
+ cocksuck
82
+ cocksucked
83
+ cocksucker
84
+ cocksucking
85
+ cocksucks
86
+ cocksuka
87
+ cocksukka
88
+ cok
89
+ cokmuncher
90
+ coksucka
91
+ coon
92
+ cox
93
+ crap
94
+ cum
95
+ cummer
96
+ cumming
97
+ cums
98
+ cumshot
99
+ cunilingus
100
+ cunillingus
101
+ cunnilingus
102
+ cunt
103
+ cuntlick
104
+ cuntlicker
105
+ cuntlicking
106
+ cunts
107
+ cyalis
108
+ cyberfuc
109
+ cyberfuck
110
+ cyberfucked
111
+ cyberfucker
112
+ cyberfuckers
113
+ cyberfucking
114
+ d1ck
115
+ damn
116
+ dick
117
+ dickhead
118
+ dildo
119
+ dildos
120
+ dink
121
+ dinks
122
+ dirsa
123
+ dlck
124
+ dog-fucker
125
+ doggin
126
+ dogging
127
+ donkeyribber
128
+ doosh
129
+ duche
130
+ dyke
131
+ ejaculate
132
+ ejaculated
133
+ ejaculates
134
+ ejaculating
135
+ ejaculatings
136
+ ejaculation
137
+ ejakulate
138
+ f u c k
139
+ f u c k e r
140
+ f4nny
141
+ fag
142
+ fagging
143
+ faggitt
144
+ faggot
145
+ faggs
146
+ fagot
147
+ fagots
148
+ fags
149
+ fanny
150
+ fannyflaps
151
+ fannyfucker
152
+ fanyy
153
+ fatass
154
+ fcuk
155
+ fcuker
156
+ fcuking
157
+ feck
158
+ fecker
159
+ felching
160
+ fellate
161
+ fellatio
162
+ fingerfuck
163
+ fingerfucked
164
+ fingerfucker
165
+ fingerfuckers
166
+ fingerfucking
167
+ fingerfucks
168
+ fistfuck
169
+ fistfucked
170
+ fistfucker
171
+ fistfuckers
172
+ fistfucking
173
+ fistfuckings
174
+ fistfucks
175
+ flange
176
+ fook
177
+ fooker
178
+ fuck
179
+ fucka
180
+ fucked
181
+ fucker
182
+ fuckers
183
+ fuckhead
184
+ fuckheads
185
+ fuckin
186
+ fucking
187
+ fuckings
188
+ fuckingshitmotherfucker
189
+ fuckme
190
+ fucks
191
+ fuckwhit
192
+ fuckwit
193
+ fudge packer
194
+ fudgepacker
195
+ fuk
196
+ fuker
197
+ fukker
198
+ fukkin
199
+ fuks
200
+ fukwhit
201
+ fukwit
202
+ fux
203
+ fux0r
204
+ f_u_c_k
205
+ gangbang
206
+ gangbanged
207
+ gangbangs
208
+ gaylord
209
+ gaysex
210
+ goatse
211
+ God
212
+ god-dam
213
+ god-damned
214
+ goddamn
215
+ goddamned
216
+ hardcoresex
217
+ hell
218
+ heshe
219
+ hoar
220
+ hoare
221
+ hoer
222
+ homo
223
+ hore
224
+ horniest
225
+ horny
226
+ hotsex
227
+ jack-off
228
+ jackoff
229
+ jap
230
+ jerk-off
231
+ jism
232
+ jiz
233
+ jizm
234
+ jizz
235
+ kawk
236
+ knob
237
+ knobead
238
+ knobed
239
+ knobend
240
+ knobhead
241
+ knobjocky
242
+ knobjokey
243
+ kock
244
+ kondum
245
+ kondums
246
+ kum
247
+ kummer
248
+ kumming
249
+ kums
250
+ kunilingus
251
+ l3i+ch
252
+ l3itch
253
+ labia
254
+ lmfao
255
+ lust
256
+ lusting
257
+ m0f0
258
+ m0fo
259
+ m45terbate
260
+ ma5terb8
261
+ ma5terbate
262
+ masochist
263
+ master-bate
264
+ masterb8
265
+ masterbat*
266
+ masterbat3
267
+ masterbate
268
+ masterbation
269
+ masterbations
270
+ masturbate
271
+ mo-fo
272
+ mof0
273
+ mofo
274
+ mothafuck
275
+ mothafucka
276
+ mothafuckas
277
+ mothafuckaz
278
+ mothafucked
279
+ mothafucker
280
+ mothafuckers
281
+ mothafuckin
282
+ mothafucking
283
+ mothafuckings
284
+ mothafucks
285
+ mother fucker
286
+ motherfuck
287
+ motherfucked
288
+ motherfucker
289
+ motherfuckers
290
+ motherfuckin
291
+ motherfucking
292
+ motherfuckings
293
+ motherfuckka
294
+ motherfucks
295
+ muff
296
+ mutha
297
+ muthafecker
298
+ muthafuckker
299
+ muther
300
+ mutherfucker
301
+ n1gga
302
+ n1gger
303
+ nazi
304
+ nigg3r
305
+ nigg4h
306
+ nigga
307
+ niggah
308
+ niggas
309
+ niggaz
310
+ nigger
311
+ niggers
312
+ nob
313
+ nob jokey
314
+ nobhead
315
+ nobjocky
316
+ nobjokey
317
+ numbnuts
318
+ nutsack
319
+ orgasim
320
+ orgasims
321
+ orgasm
322
+ orgasms
323
+ p0rn
324
+ pawn
325
+ pecker
326
+ penis
327
+ penisfucker
328
+ phonesex
329
+ phuck
330
+ phuk
331
+ phuked
332
+ phuking
333
+ phukked
334
+ phukking
335
+ phuks
336
+ phuq
337
+ pigfucker
338
+ pimpis
339
+ piss
340
+ pissed
341
+ pisser
342
+ pissers
343
+ pisses
344
+ pissflaps
345
+ pissin
346
+ pissing
347
+ pissoff
348
+ poop
349
+ porn
350
+ porno
351
+ pornography
352
+ pornos
353
+ prick
354
+ pricks
355
+ pron
356
+ pube
357
+ pusse
358
+ pussi
359
+ pussies
360
+ pussy
361
+ pussys
362
+ rectum
363
+ retard
364
+ rimjaw
365
+ rimming
366
+ s hit
367
+ s.o.b.
368
+ sadist
369
+ schlong
370
+ screwing
371
+ scroat
372
+ scrote
373
+ scrotum
374
+ semen
375
+ sex
376
+ sh!+
377
+ sh!t
378
+ sh1t
379
+ shag
380
+ shagger
381
+ shaggin
382
+ shagging
383
+ shemale
384
+ shi+
385
+ shit
386
+ shitdick
387
+ shite
388
+ shited
389
+ shitey
390
+ shitfuck
391
+ shitfull
392
+ shithead
393
+ shiting
394
+ shitings
395
+ shits
396
+ shitted
397
+ shitter
398
+ shitters
399
+ shitting
400
+ shittings
401
+ shitty
402
+ skank
403
+ slut
404
+ sluts
405
+ smegma
406
+ smut
407
+ snatch
408
+ son-of-a-bitch
409
+ spac
410
+ spunk
411
+ s_h_i_t
412
+ t1tt1e5
413
+ t1tties
414
+ teets
415
+ teez
416
+ testical
417
+ testicle
418
+ tit
419
+ titfuck
420
+ tits
421
+ titt
422
+ tittie5
423
+ tittiefucker
424
+ titties
425
+ tittyfuck
426
+ tittywank
427
+ titwank
428
+ tosser
429
+ turd
430
+ tw4t
431
+ twat
432
+ twathead
433
+ twatty
434
+ twunt
435
+ twunter
436
+ v14gra
437
+ v1gra
438
+ vagina
439
+ viagra
440
+ vulva
441
+ w00se
442
+ wang
443
+ wank
444
+ wanker
445
+ wanky
446
+ whoar
447
+ whore
448
+ willies
449
+ willy
450
+ xrated
451
+ xxx
swears/cmu-bad-words.txt ADDED
@@ -0,0 +1,1383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ abbo
2
+ abo
3
+ abortion
4
+ abuse
5
+ addict
6
+ addicts
7
+ adult
8
+ africa
9
+ african
10
+ alla
11
+ allah
12
+ alligatorbait
13
+ amateur
14
+ american
15
+ anal
16
+ analannie
17
+ analsex
18
+ angie
19
+ angry
20
+ anus
21
+ arab
22
+ arabs
23
+ areola
24
+ argie
25
+ aroused
26
+ arse
27
+ arsehole
28
+ asian
29
+ ass
30
+ assassin
31
+ assassinate
32
+ assassination
33
+ assault
34
+ assbagger
35
+ assblaster
36
+ assclown
37
+ asscowboy
38
+ asses
39
+ assfuck
40
+ assfucker
41
+ asshat
42
+ asshole
43
+ assholes
44
+ asshore
45
+ assjockey
46
+ asskiss
47
+ asskisser
48
+ assklown
49
+ asslick
50
+ asslicker
51
+ asslover
52
+ assman
53
+ assmonkey
54
+ assmunch
55
+ assmuncher
56
+ asspacker
57
+ asspirate
58
+ asspuppies
59
+ assranger
60
+ asswhore
61
+ asswipe
62
+ athletesfoot
63
+ attack
64
+ australian
65
+ babe
66
+ babies
67
+ backdoor
68
+ backdoorman
69
+ backseat
70
+ badfuck
71
+ balllicker
72
+ balls
73
+ ballsack
74
+ banging
75
+ baptist
76
+ barelylegal
77
+ barf
78
+ barface
79
+ barfface
80
+ bast
81
+ bastard
82
+ bazongas
83
+ bazooms
84
+ beaner
85
+ beast
86
+ beastality
87
+ beastial
88
+ beastiality
89
+ beatoff
90
+ beat-off
91
+ beatyourmeat
92
+ beaver
93
+ bestial
94
+ bestiality
95
+ bi
96
+ biatch
97
+ bible
98
+ bicurious
99
+ bigass
100
+ bigbastard
101
+ bigbutt
102
+ bigger
103
+ bisexual
104
+ bi-sexual
105
+ bitch
106
+ bitcher
107
+ bitches
108
+ bitchez
109
+ bitchin
110
+ bitching
111
+ bitchslap
112
+ bitchy
113
+ biteme
114
+ black
115
+ blackman
116
+ blackout
117
+ blacks
118
+ blind
119
+ blow
120
+ blowjob
121
+ boang
122
+ bogan
123
+ bohunk
124
+ bollick
125
+ bollock
126
+ bomb
127
+ bombers
128
+ bombing
129
+ bombs
130
+ bomd
131
+ bondage
132
+ boner
133
+ bong
134
+ boob
135
+ boobies
136
+ boobs
137
+ booby
138
+ boody
139
+ boom
140
+ boong
141
+ boonga
142
+ boonie
143
+ booty
144
+ bootycall
145
+ bountybar
146
+ bra
147
+ brea5t
148
+ breast
149
+ breastjob
150
+ breastlover
151
+ breastman
152
+ brothel
153
+ bugger
154
+ buggered
155
+ buggery
156
+ bullcrap
157
+ bulldike
158
+ bulldyke
159
+ bullshit
160
+ bumblefuck
161
+ bumfuck
162
+ bunga
163
+ bunghole
164
+ buried
165
+ burn
166
+ butchbabes
167
+ butchdike
168
+ butchdyke
169
+ butt
170
+ buttbang
171
+ butt-bang
172
+ buttface
173
+ buttfuck
174
+ butt-fuck
175
+ buttfucker
176
+ butt-fucker
177
+ buttfuckers
178
+ butt-fuckers
179
+ butthead
180
+ buttman
181
+ buttmunch
182
+ buttmuncher
183
+ buttpirate
184
+ buttplug
185
+ buttstain
186
+ byatch
187
+ cacker
188
+ cameljockey
189
+ cameltoe
190
+ canadian
191
+ cancer
192
+ carpetmuncher
193
+ carruth
194
+ catholic
195
+ catholics
196
+ cemetery
197
+ chav
198
+ cherrypopper
199
+ chickslick
200
+ children's
201
+ chin
202
+ chinaman
203
+ chinamen
204
+ chinese
205
+ chink
206
+ chinky
207
+ choad
208
+ chode
209
+ christ
210
+ christian
211
+ church
212
+ cigarette
213
+ cigs
214
+ clamdigger
215
+ clamdiver
216
+ clit
217
+ clitoris
218
+ clogwog
219
+ cocaine
220
+ cock
221
+ cockblock
222
+ cockblocker
223
+ cockcowboy
224
+ cockfight
225
+ cockhead
226
+ cockknob
227
+ cocklicker
228
+ cocklover
229
+ cocknob
230
+ cockqueen
231
+ cockrider
232
+ cocksman
233
+ cocksmith
234
+ cocksmoker
235
+ cocksucer
236
+ cocksuck
237
+ cocksucked
238
+ cocksucker
239
+ cocksucking
240
+ cocktail
241
+ cocktease
242
+ cocky
243
+ cohee
244
+ coitus
245
+ color
246
+ colored
247
+ coloured
248
+ commie
249
+ communist
250
+ condom
251
+ conservative
252
+ conspiracy
253
+ coolie
254
+ cooly
255
+ coon
256
+ coondog
257
+ copulate
258
+ cornhole
259
+ corruption
260
+ cra5h
261
+ crabs
262
+ crack
263
+ crackpipe
264
+ crackwhore
265
+ crack-whore
266
+ crap
267
+ crapola
268
+ crapper
269
+ crappy
270
+ crash
271
+ creamy
272
+ crime
273
+ crimes
274
+ criminal
275
+ criminals
276
+ crotch
277
+ crotchjockey
278
+ crotchmonkey
279
+ crotchrot
280
+ cum
281
+ cumbubble
282
+ cumfest
283
+ cumjockey
284
+ cumm
285
+ cummer
286
+ cumming
287
+ cumquat
288
+ cumqueen
289
+ cumshot
290
+ cunilingus
291
+ cunillingus
292
+ cunn
293
+ cunnilingus
294
+ cunntt
295
+ cunt
296
+ cunteyed
297
+ cuntfuck
298
+ cuntfucker
299
+ cuntlick
300
+ cuntlicker
301
+ cuntlicking
302
+ cuntsucker
303
+ cybersex
304
+ cyberslimer
305
+ dago
306
+ dahmer
307
+ dammit
308
+ damn
309
+ damnation
310
+ damnit
311
+ darkie
312
+ darky
313
+ datnigga
314
+ dead
315
+ deapthroat
316
+ death
317
+ deepthroat
318
+ defecate
319
+ dego
320
+ demon
321
+ deposit
322
+ desire
323
+ destroy
324
+ deth
325
+ devil
326
+ devilworshipper
327
+ dick
328
+ dickbrain
329
+ dickforbrains
330
+ dickhead
331
+ dickless
332
+ dicklick
333
+ dicklicker
334
+ dickman
335
+ dickwad
336
+ dickweed
337
+ diddle
338
+ die
339
+ died
340
+ dies
341
+ dike
342
+ dildo
343
+ dingleberry
344
+ dink
345
+ dipshit
346
+ dipstick
347
+ dirty
348
+ disease
349
+ diseases
350
+ disturbed
351
+ dive
352
+ dix
353
+ dixiedike
354
+ dixiedyke
355
+ doggiestyle
356
+ doggystyle
357
+ dong
358
+ doodoo
359
+ doo-doo
360
+ doom
361
+ dope
362
+ dragqueen
363
+ dragqween
364
+ dripdick
365
+ drug
366
+ drunk
367
+ drunken
368
+ dumb
369
+ dumbass
370
+ dumbbitch
371
+ dumbfuck
372
+ dyefly
373
+ dyke
374
+ easyslut
375
+ eatballs
376
+ eatme
377
+ eatpussy
378
+ ecstacy
379
+ ejaculate
380
+ ejaculated
381
+ ejaculating
382
+ ejaculation
383
+ enema
384
+ enemy
385
+ erect
386
+ erection
387
+ ero
388
+ escort
389
+ ethiopian
390
+ ethnic
391
+ european
392
+ evl
393
+ excrement
394
+ execute
395
+ executed
396
+ execution
397
+ executioner
398
+ explosion
399
+ facefucker
400
+ faeces
401
+ fag
402
+ fagging
403
+ faggot
404
+ fagot
405
+ failed
406
+ failure
407
+ fairies
408
+ fairy
409
+ faith
410
+ fannyfucker
411
+ fart
412
+ farted
413
+ farting
414
+ farty
415
+ fastfuck
416
+ fat
417
+ fatah
418
+ fatass
419
+ fatfuck
420
+ fatfucker
421
+ fatso
422
+ fckcum
423
+ fear
424
+ feces
425
+ felatio
426
+ felch
427
+ felcher
428
+ felching
429
+ fellatio
430
+ feltch
431
+ feltcher
432
+ feltching
433
+ fetish
434
+ fight
435
+ filipina
436
+ filipino
437
+ fingerfood
438
+ fingerfuck
439
+ fingerfucked
440
+ fingerfucker
441
+ fingerfuckers
442
+ fingerfucking
443
+ fire
444
+ firing
445
+ fister
446
+ fistfuck
447
+ fistfucked
448
+ fistfucker
449
+ fistfucking
450
+ fisting
451
+ flange
452
+ flasher
453
+ flatulence
454
+ floo
455
+ flydie
456
+ flydye
457
+ fok
458
+ fondle
459
+ footaction
460
+ footfuck
461
+ footfucker
462
+ footlicker
463
+ footstar
464
+ fore
465
+ foreskin
466
+ forni
467
+ fornicate
468
+ foursome
469
+ fourtwenty
470
+ fraud
471
+ freakfuck
472
+ freakyfucker
473
+ freefuck
474
+ fu
475
+ fubar
476
+ fuc
477
+ fucck
478
+ fuck
479
+ fucka
480
+ fuckable
481
+ fuckbag
482
+ fuckbuddy
483
+ fucked
484
+ fuckedup
485
+ fucker
486
+ fuckers
487
+ fuckface
488
+ fuckfest
489
+ fuckfreak
490
+ fuckfriend
491
+ fuckhead
492
+ fuckher
493
+ fuckin
494
+ fuckina
495
+ fucking
496
+ fuckingbitch
497
+ fuckinnuts
498
+ fuckinright
499
+ fuckit
500
+ fuckknob
501
+ fuckme
502
+ fuckmehard
503
+ fuckmonkey
504
+ fuckoff
505
+ fuckpig
506
+ fucks
507
+ fucktard
508
+ fuckwhore
509
+ fuckyou
510
+ fudgepacker
511
+ fugly
512
+ fuk
513
+ fuks
514
+ funeral
515
+ funfuck
516
+ fungus
517
+ fuuck
518
+ gangbang
519
+ gangbanged
520
+ gangbanger
521
+ gangsta
522
+ gatorbait
523
+ gay
524
+ gaymuthafuckinwhore
525
+ gaysex
526
+ geez
527
+ geezer
528
+ geni
529
+ genital
530
+ german
531
+ getiton
532
+ gin
533
+ ginzo
534
+ gipp
535
+ girls
536
+ givehead
537
+ glazeddonut
538
+ gob
539
+ god
540
+ godammit
541
+ goddamit
542
+ goddammit
543
+ goddamn
544
+ goddamned
545
+ goddamnes
546
+ goddamnit
547
+ goddamnmuthafucker
548
+ goldenshower
549
+ gonorrehea
550
+ gonzagas
551
+ gook
552
+ gotohell
553
+ goy
554
+ goyim
555
+ greaseball
556
+ gringo
557
+ groe
558
+ gross
559
+ grostulation
560
+ gubba
561
+ gummer
562
+ gun
563
+ gyp
564
+ gypo
565
+ gypp
566
+ gyppie
567
+ gyppo
568
+ gyppy
569
+ hamas
570
+ handjob
571
+ hapa
572
+ harder
573
+ hardon
574
+ harem
575
+ headfuck
576
+ headlights
577
+ hebe
578
+ heeb
579
+ hell
580
+ henhouse
581
+ heroin
582
+ herpes
583
+ heterosexual
584
+ hijack
585
+ hijacker
586
+ hijacking
587
+ hillbillies
588
+ hindoo
589
+ hiscock
590
+ hitler
591
+ hitlerism
592
+ hitlerist
593
+ hiv
594
+ ho
595
+ hobo
596
+ hodgie
597
+ hoes
598
+ hole
599
+ holestuffer
600
+ homicide
601
+ homo
602
+ homobangers
603
+ homosexual
604
+ honger
605
+ honk
606
+ honkers
607
+ honkey
608
+ honky
609
+ hook
610
+ hooker
611
+ hookers
612
+ hooters
613
+ hore
614
+ hork
615
+ horn
616
+ horney
617
+ horniest
618
+ horny
619
+ horseshit
620
+ hosejob
621
+ hoser
622
+ hostage
623
+ hotdamn
624
+ hotpussy
625
+ hottotrot
626
+ hummer
627
+ husky
628
+ hussy
629
+ hustler
630
+ hymen
631
+ hymie
632
+ iblowu
633
+ idiot
634
+ ikey
635
+ illegal
636
+ incest
637
+ insest
638
+ intercourse
639
+ interracial
640
+ intheass
641
+ inthebuff
642
+ israel
643
+ israeli
644
+ israel's
645
+ italiano
646
+ itch
647
+ jackass
648
+ jackoff
649
+ jackshit
650
+ jacktheripper
651
+ jade
652
+ jap
653
+ japanese
654
+ japcrap
655
+ jebus
656
+ jeez
657
+ jerkoff
658
+ jesus
659
+ jesuschrist
660
+ jew
661
+ jewish
662
+ jiga
663
+ jigaboo
664
+ jigg
665
+ jigga
666
+ jiggabo
667
+ jigger
668
+ jiggy
669
+ jihad
670
+ jijjiboo
671
+ jimfish
672
+ jism
673
+ jiz
674
+ jizim
675
+ jizjuice
676
+ jizm
677
+ jizz
678
+ jizzim
679
+ jizzum
680
+ joint
681
+ juggalo
682
+ jugs
683
+ junglebunny
684
+ kaffer
685
+ kaffir
686
+ kaffre
687
+ kafir
688
+ kanake
689
+ kid
690
+ kigger
691
+ kike
692
+ kill
693
+ killed
694
+ killer
695
+ killing
696
+ kills
697
+ kink
698
+ kinky
699
+ kissass
700
+ kkk
701
+ knife
702
+ knockers
703
+ kock
704
+ kondum
705
+ koon
706
+ kotex
707
+ krap
708
+ krappy
709
+ kraut
710
+ kum
711
+ kumbubble
712
+ kumbullbe
713
+ kummer
714
+ kumming
715
+ kumquat
716
+ kums
717
+ kunilingus
718
+ kunnilingus
719
+ kunt
720
+ ky
721
+ kyke
722
+ lactate
723
+ laid
724
+ lapdance
725
+ latin
726
+ lesbain
727
+ lesbayn
728
+ lesbian
729
+ lesbin
730
+ lesbo
731
+ lez
732
+ lezbe
733
+ lezbefriends
734
+ lezbo
735
+ lezz
736
+ lezzo
737
+ liberal
738
+ libido
739
+ licker
740
+ lickme
741
+ lies
742
+ limey
743
+ limpdick
744
+ limy
745
+ lingerie
746
+ liquor
747
+ livesex
748
+ loadedgun
749
+ lolita
750
+ looser
751
+ loser
752
+ lotion
753
+ lovebone
754
+ lovegoo
755
+ lovegun
756
+ lovejuice
757
+ lovemuscle
758
+ lovepistol
759
+ loverocket
760
+ lowlife
761
+ lsd
762
+ lubejob
763
+ lucifer
764
+ luckycammeltoe
765
+ lugan
766
+ lynch
767
+ macaca
768
+ mad
769
+ mafia
770
+ magicwand
771
+ mams
772
+ manhater
773
+ manpaste
774
+ marijuana
775
+ mastabate
776
+ mastabater
777
+ masterbate
778
+ masterblaster
779
+ mastrabator
780
+ masturbate
781
+ masturbating
782
+ mattressprincess
783
+ meatbeatter
784
+ meatrack
785
+ meth
786
+ mexican
787
+ mgger
788
+ mggor
789
+ mickeyfinn
790
+ mideast
791
+ milf
792
+ minority
793
+ mockey
794
+ mockie
795
+ mocky
796
+ mofo
797
+ moky
798
+ moles
799
+ molest
800
+ molestation
801
+ molester
802
+ molestor
803
+ moneyshot
804
+ mooncricket
805
+ mormon
806
+ moron
807
+ moslem
808
+ mosshead
809
+ mothafuck
810
+ mothafucka
811
+ mothafuckaz
812
+ mothafucked
813
+ mothafucker
814
+ mothafuckin
815
+ mothafucking
816
+ mothafuckings
817
+ motherfuck
818
+ motherfucked
819
+ motherfucker
820
+ motherfuckin
821
+ motherfucking
822
+ motherfuckings
823
+ motherlovebone
824
+ muff
825
+ muffdive
826
+ muffdiver
827
+ muffindiver
828
+ mufflikcer
829
+ mulatto
830
+ muncher
831
+ munt
832
+ murder
833
+ murderer
834
+ muslim
835
+ naked
836
+ narcotic
837
+ nasty
838
+ nastybitch
839
+ nastyho
840
+ nastyslut
841
+ nastywhore
842
+ nazi
843
+ necro
844
+ negro
845
+ negroes
846
+ negroid
847
+ negro's
848
+ nig
849
+ niger
850
+ nigerian
851
+ nigerians
852
+ nigg
853
+ nigga
854
+ niggah
855
+ niggaracci
856
+ niggard
857
+ niggarded
858
+ niggarding
859
+ niggardliness
860
+ niggardliness's
861
+ niggardly
862
+ niggards
863
+ niggard's
864
+ niggaz
865
+ nigger
866
+ niggerhead
867
+ niggerhole
868
+ niggers
869
+ nigger's
870
+ niggle
871
+ niggled
872
+ niggles
873
+ niggling
874
+ nigglings
875
+ niggor
876
+ niggur
877
+ niglet
878
+ nignog
879
+ nigr
880
+ nigra
881
+ nigre
882
+ nip
883
+ nipple
884
+ nipplering
885
+ nittit
886
+ nlgger
887
+ nlggor
888
+ nofuckingway
889
+ nook
890
+ nookey
891
+ nookie
892
+ noonan
893
+ nooner
894
+ nude
895
+ nudger
896
+ nuke
897
+ nutfucker
898
+ nymph
899
+ ontherag
900
+ oral
901
+ orga
902
+ orgasim
903
+ orgasm
904
+ orgies
905
+ orgy
906
+ osama
907
+ paki
908
+ palesimian
909
+ palestinian
910
+ pansies
911
+ pansy
912
+ panti
913
+ panties
914
+ payo
915
+ pearlnecklace
916
+ peck
917
+ pecker
918
+ peckerwood
919
+ pee
920
+ peehole
921
+ pee-pee
922
+ peepshow
923
+ peepshpw
924
+ pendy
925
+ penetration
926
+ peni5
927
+ penile
928
+ penis
929
+ penises
930
+ penthouse
931
+ period
932
+ perv
933
+ phonesex
934
+ phuk
935
+ phuked
936
+ phuking
937
+ phukked
938
+ phukking
939
+ phungky
940
+ phuq
941
+ pi55
942
+ picaninny
943
+ piccaninny
944
+ pickaninny
945
+ piker
946
+ pikey
947
+ piky
948
+ pimp
949
+ pimped
950
+ pimper
951
+ pimpjuic
952
+ pimpjuice
953
+ pimpsimp
954
+ pindick
955
+ piss
956
+ pissed
957
+ pisser
958
+ pisses
959
+ pisshead
960
+ pissin
961
+ pissing
962
+ pissoff
963
+ pistol
964
+ pixie
965
+ pixy
966
+ playboy
967
+ playgirl
968
+ pocha
969
+ pocho
970
+ pocketpool
971
+ pohm
972
+ polack
973
+ pom
974
+ pommie
975
+ pommy
976
+ poo
977
+ poon
978
+ poontang
979
+ poop
980
+ pooper
981
+ pooperscooper
982
+ pooping
983
+ poorwhitetrash
984
+ popimp
985
+ porchmonkey
986
+ porn
987
+ pornflick
988
+ pornking
989
+ porno
990
+ pornography
991
+ pornprincess
992
+ pot
993
+ poverty
994
+ premature
995
+ pric
996
+ prick
997
+ prickhead
998
+ primetime
999
+ propaganda
1000
+ pros
1001
+ prostitute
1002
+ protestant
1003
+ pu55i
1004
+ pu55y
1005
+ pube
1006
+ pubic
1007
+ pubiclice
1008
+ pud
1009
+ pudboy
1010
+ pudd
1011
+ puddboy
1012
+ puke
1013
+ puntang
1014
+ purinapricness
1015
+ puss
1016
+ pussie
1017
+ pussies
1018
+ pussy
1019
+ pussycat
1020
+ pussyeater
1021
+ pussyfucker
1022
+ pussylicker
1023
+ pussylips
1024
+ pussylover
1025
+ pussypounder
1026
+ pusy
1027
+ quashie
1028
+ queef
1029
+ queer
1030
+ quickie
1031
+ quim
1032
+ ra8s
1033
+ rabbi
1034
+ racial
1035
+ racist
1036
+ radical
1037
+ radicals
1038
+ raghead
1039
+ randy
1040
+ rape
1041
+ raped
1042
+ raper
1043
+ rapist
1044
+ rearend
1045
+ rearentry
1046
+ rectum
1047
+ redlight
1048
+ redneck
1049
+ reefer
1050
+ reestie
1051
+ refugee
1052
+ reject
1053
+ remains
1054
+ rentafuck
1055
+ republican
1056
+ rere
1057
+ retard
1058
+ retarded
1059
+ ribbed
1060
+ rigger
1061
+ rimjob
1062
+ rimming
1063
+ roach
1064
+ robber
1065
+ roundeye
1066
+ rump
1067
+ russki
1068
+ russkie
1069
+ sadis
1070
+ sadom
1071
+ samckdaddy
1072
+ sandm
1073
+ sandnigger
1074
+ satan
1075
+ scag
1076
+ scallywag
1077
+ scat
1078
+ schlong
1079
+ screw
1080
+ screwyou
1081
+ scrotum
1082
+ scum
1083
+ semen
1084
+ seppo
1085
+ servant
1086
+ sex
1087
+ sexed
1088
+ sexfarm
1089
+ sexhound
1090
+ sexhouse
1091
+ sexing
1092
+ sexkitten
1093
+ sexpot
1094
+ sexslave
1095
+ sextogo
1096
+ sextoy
1097
+ sextoys
1098
+ sexual
1099
+ sexually
1100
+ sexwhore
1101
+ sexy
1102
+ sexymoma
1103
+ sexy-slim
1104
+ shag
1105
+ shaggin
1106
+ shagging
1107
+ shat
1108
+ shav
1109
+ shawtypimp
1110
+ sheeney
1111
+ shhit
1112
+ shinola
1113
+ shit
1114
+ shitcan
1115
+ shitdick
1116
+ shite
1117
+ shiteater
1118
+ shited
1119
+ shitface
1120
+ shitfaced
1121
+ shitfit
1122
+ shitforbrains
1123
+ shitfuck
1124
+ shitfucker
1125
+ shitfull
1126
+ shithapens
1127
+ shithappens
1128
+ shithead
1129
+ shithouse
1130
+ shiting
1131
+ shitlist
1132
+ shitola
1133
+ shitoutofluck
1134
+ shits
1135
+ shitstain
1136
+ shitted
1137
+ shitter
1138
+ shitting
1139
+ shitty
1140
+ shoot
1141
+ shooting
1142
+ shortfuck
1143
+ showtime
1144
+ sick
1145
+ sissy
1146
+ sixsixsix
1147
+ sixtynine
1148
+ sixtyniner
1149
+ skank
1150
+ skankbitch
1151
+ skankfuck
1152
+ skankwhore
1153
+ skanky
1154
+ skankybitch
1155
+ skankywhore
1156
+ skinflute
1157
+ skum
1158
+ skumbag
1159
+ slant
1160
+ slanteye
1161
+ slapper
1162
+ slaughter
1163
+ slav
1164
+ slave
1165
+ slavedriver
1166
+ sleezebag
1167
+ sleezeball
1168
+ slideitin
1169
+ slime
1170
+ slimeball
1171
+ slimebucket
1172
+ slopehead
1173
+ slopey
1174
+ slopy
1175
+ slut
1176
+ sluts
1177
+ slutt
1178
+ slutting
1179
+ slutty
1180
+ slutwear
1181
+ slutwhore
1182
+ smack
1183
+ smackthemonkey
1184
+ smut
1185
+ snatch
1186
+ snatchpatch
1187
+ snigger
1188
+ sniggered
1189
+ sniggering
1190
+ sniggers
1191
+ snigger's
1192
+ sniper
1193
+ snot
1194
+ snowback
1195
+ snownigger
1196
+ sob
1197
+ sodom
1198
+ sodomise
1199
+ sodomite
1200
+ sodomize
1201
+ sodomy
1202
+ sonofabitch
1203
+ sonofbitch
1204
+ sooty
1205
+ sos
1206
+ soviet
1207
+ spaghettibender
1208
+ spaghettinigger
1209
+ spank
1210
+ spankthemonkey
1211
+ sperm
1212
+ spermacide
1213
+ spermbag
1214
+ spermhearder
1215
+ spermherder
1216
+ spic
1217
+ spick
1218
+ spig
1219
+ spigotty
1220
+ spik
1221
+ spit
1222
+ spitter
1223
+ splittail
1224
+ spooge
1225
+ spreadeagle
1226
+ spunk
1227
+ spunky
1228
+ squaw
1229
+ stagg
1230
+ stiffy
1231
+ strapon
1232
+ stringer
1233
+ stripclub
1234
+ stroke
1235
+ stroking
1236
+ stupid
1237
+ stupidfuck
1238
+ stupidfucker
1239
+ suck
1240
+ suckdick
1241
+ sucker
1242
+ suckme
1243
+ suckmyass
1244
+ suckmydick
1245
+ suckmytit
1246
+ suckoff
1247
+ suicide
1248
+ swallow
1249
+ swallower
1250
+ swalow
1251
+ swastika
1252
+ sweetness
1253
+ syphilis
1254
+ taboo
1255
+ taff
1256
+ tampon
1257
+ tang
1258
+ tantra
1259
+ tarbaby
1260
+ tard
1261
+ teat
1262
+ terror
1263
+ terrorist
1264
+ teste
1265
+ testicle
1266
+ testicles
1267
+ thicklips
1268
+ thirdeye
1269
+ thirdleg
1270
+ threesome
1271
+ threeway
1272
+ timbernigger
1273
+ tinkle
1274
+ tit
1275
+ titbitnipply
1276
+ titfuck
1277
+ titfucker
1278
+ titfuckin
1279
+ titjob
1280
+ titlicker
1281
+ titlover
1282
+ tits
1283
+ tittie
1284
+ titties
1285
+ titty
1286
+ tnt
1287
+ toilet
1288
+ tongethruster
1289
+ tongue
1290
+ tonguethrust
1291
+ tonguetramp
1292
+ tortur
1293
+ torture
1294
+ tosser
1295
+ towelhead
1296
+ trailertrash
1297
+ tramp
1298
+ trannie
1299
+ tranny
1300
+ transexual
1301
+ transsexual
1302
+ transvestite
1303
+ triplex
1304
+ trisexual
1305
+ trojan
1306
+ trots
1307
+ tuckahoe
1308
+ tunneloflove
1309
+ turd
1310
+ turnon
1311
+ twat
1312
+ twink
1313
+ twinkie
1314
+ twobitwhore
1315
+ uck
1316
+ uk
1317
+ unfuckable
1318
+ upskirt
1319
+ uptheass
1320
+ upthebutt
1321
+ urinary
1322
+ urinate
1323
+ urine
1324
+ usama
1325
+ uterus
1326
+ vagina
1327
+ vaginal
1328
+ vatican
1329
+ vibr
1330
+ vibrater
1331
+ vibrator
1332
+ vietcong
1333
+ violence
1334
+ virgin
1335
+ virginbreaker
1336
+ vomit
1337
+ vulva
1338
+ wab
1339
+ wank
1340
+ wanker
1341
+ wanking
1342
+ waysted
1343
+ weapon
1344
+ weenie
1345
+ weewee
1346
+ welcher
1347
+ welfare
1348
+ wetb
1349
+ wetback
1350
+ wetspot
1351
+ whacker
1352
+ whash
1353
+ whigger
1354
+ whiskey
1355
+ whiskeydick
1356
+ whiskydick
1357
+ whit
1358
+ whitenigger
1359
+ whites
1360
+ whitetrash
1361
+ whitey
1362
+ whiz
1363
+ whop
1364
+ whore
1365
+ whorefucker
1366
+ whorehouse
1367
+ wigger
1368
+ willie
1369
+ williewanker
1370
+ willy
1371
+ wn
1372
+ wog
1373
+ women's
1374
+ wop
1375
+ wtf
1376
+ wuss
1377
+ wuzzie
1378
+ xtc
1379
+ xxx
1380
+ yankee
1381
+ yellowman
1382
+ zigabo
1383
+ zipperhead