import csv import time from pprint import pprint import Pinpoint_Internal.FeatureExtraction from Pinpoint_Internal.RandomForest import * class predictor(): def __init__(self): self.model = random_forest() self.model.PSYCHOLOGICAL_SIGNALS_ENABLED = False # Needs LIWC markup self.model.BEHAVIOURAL_FEATURES_ENABLED = False self.model.train_model(features_file=None, force_new_dataset=False, model_location=r"far-right-radical-language.model") self.dict_of_users_all = {} self.feature_extractor = Pinpoint_Internal.FeatureExtraction.feature_extraction( violent_words_dataset_location="swears", baseline_training_dataset_location="LIWC2015 Results (Storm_Front_Posts).csv") def predict(self, string_to_predict): self.__init__() try: os.remove("./messages.json") except: pass try: os.remove("messages.json") except: pass try: os.remove("./all-messages.csv") except: pass users_posts = [{"username": "tmp", "timestamp": "tmp", "message": "{}".format(string_to_predict)}] with open('all-messages.csv', 'w', encoding='utf8', newline='') as output_file: writer = csv.DictWriter(output_file, fieldnames=["username", "timestamp", "message"]) for users_post in users_posts: writer.writerow(users_post) self.feature_extractor._get_standard_tweets("all-messages.csv") with open("./messages.json", 'w') as outfile: features = self.feature_extractor.completed_tweet_user_features json.dump(features, outfile, indent=4) rows = self.model.get_features_as_df("./messages.json", True) rows.pop("is_extremist") iter = 0 message_vector_list = [] for user_iter in range(0, len(users_posts)): rows_as_json = json.loads(rows.iloc[iter].to_json()) tmp = [] for i in range(1, 201): vect_str = "message_vector_{}".format(str(i)) vector = rows_as_json[vect_str] tmp.append(vector) message_vector_list.append(tmp) iter = iter + 1 for row in users_posts: user = row["username"] timestamp = row["timestamp"] message = row["message"] user_unique_id = str(self.feature_extractor._get_unique_id_from_username(user)) iter = 0 user_found = False while not user_found: try: user_features = self.feature_extractor.completed_tweet_user_features[iter][user_unique_id] user_found = True break except KeyError as e: iter = iter + 1 formated_vectors = [float('%.10f' % elem) for elem in user_features["message_vector"]] iter = 0 for vector_list in message_vector_list: if message_vector_list[iter] == formated_vectors: is_extremist = self.model.model.predict([rows.iloc[iter]]) if is_extremist == 1: return True else: return False