Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

13macattack37 commited on Jul 6, 2022

Commit

dc67c78

•

1 Parent(s): 205426f

Added the text classifier class to the repo

Browse files

Files changed (1) hide show

text-classifier/text_classifier.py +168 -0

text-classifier/text_classifier.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import openai
+import regex as re
+openai.api_key = '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'
+class text_classifier:
+    '''def __init__(self, user, from_date, to_date):
+        self.user = "Janne"
+        self.from_date = "2022-01-05"
+        self.to_date = "2022-07-05"'''
+    def classify_topics(tweet_dict):
+        tweet_list = list(tweet_dict.keys())
+        prediction_list = []
+        for tweet in tweet_list:
+            #prompt_string = ""
+            prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\""
+            prompt_string += tweet
+            prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be more than 5 words. Numerate each topic in the output. END"
+            response = openai.Completion.create(
+            model="text-davinci-002",
+            prompt= prompt_string,
+            temperature=0,
+            max_tokens=892,
+            top_p=1,
+            frequency_penalty=0,
+            presence_penalty=0
+            )
+            classifications_unclean = response.choices[0]['text']
+            prediction_list.append(classifications_unclean)
+        return text_classifier.cleanup_results(prediction_list, tweet_dict)
+    def classify_sentiments(tweet_dict):
+        tweet_list = list(tweet_dict.keys())
+        prediction_list = []
+        for tweet in tweet_list:
+            prompt_string = "Classify one sentiment for this tweet:\n \""
+            prompt_string += tweet
+            prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement,\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire,\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
+            response = openai.Completion.create(
+            model="text-davinci-002",
+            prompt=prompt_string,
+            temperature=0,
+            max_tokens=256,
+            top_p=1,
+            frequency_penalty=0,
+            presence_penalty=0
+            )
+            classifications_unclean = response.choices[0]['text']
+            prediction_list.append(classifications_unclean)
+        return prediction_list
+    def cleanup_results(prediction_list, tweet_dict):
+        predictions_cleaned = []
+        temp_list = []
+        pred_dict = {}
+        i = 0
+        tweet_list = list(tweet_dict.keys())
+        for item in prediction_list:
+            temp_list = []
+            new_item = item.replace("\n", " ")
+            new_item = new_item.replace("  ", " ")
+            new_item = new_item[4:]
+            new_item = re.sub('\d', '', new_item)
+            sub_list = new_item.split(".")
+            for item in sub_list:
+                if item.startswith(' '):
+                    item = item[1:]
+                if item.endswith(' '):
+                    item = item[:-1]
+                temp_list.append(item)
+            predictions_cleaned.append(temp_list)
+        for tweet in tweet_list:
+            pred_dict[tweet] = predictions_cleaned[i]
+            i += 1
+        return pred_dict
+    def insert_predictions(tweet_dict, results):
+        for key in results:
+            tweet_dict[key]['topic'] = results[key]
+        return tweet_dict
+    def print_results(results_dict):
+        print('\033[1m' + "RESULTS" + '\033[0m', "\n")
+        for key in results_dict.keys():
+            predictions = results_dict[key]
+            print("\"" + key + "\"" + "\n"+ str(predictions),"\n" + "---------------------------------")
+    def print_stats(result_dict):
+        user = ""
+        freq_dict = {}
+        mean_likes = {}
+        mean_retweets = {}
+        mean_replies = {}
+        nbr_topics = 0
+        for value in result_dict.values():
+            nlikes = value['nlikes']
+            nreplies = value['nreplies']
+            nretweets = value['nretweets']
+            topic_list = value['topic']
+        # Count topic frequency
+            for topic in topic_list:
+                if topic in freq_dict.keys():
+                    freq_dict[topic] += 1
+                else:
+                    freq_dict[topic] = 1
+                    nbr_topics += 1
+                # Count total likes per topic
+                if topic in mean_likes.keys():
+                    mean_likes[topic] += nlikes
+                else:
+                    mean_likes[topic] = nlikes
+                # Count total retweets per topic
+                if topic in mean_retweets.keys():
+                    mean_retweets[topic] += nretweets
+                else:
+                    mean_retweets[topic] = nretweets
+                # Count total replies per topic
+                if topic in mean_replies.keys():
+                    mean_replies[topic] += nreplies
+                else:
+                    mean_replies[topic] = nreplies
+        # Count mean of likes
+        for key in mean_likes.keys():
+            mean_likes[key] = mean_likes[key] / freq_dict[key]
+        # Count mean of retweets
+        for key in mean_retweets.keys():
+            mean_retweets[key] = mean_retweets[key] / freq_dict[key]
+        # Print the names of the columns.
+        print('\033[1m' + "USER: " + '\033[0m', user)
+        print('\033[1m' + "NBR OF TWEETS SCRAPED: "+ '\033[0m', len(list(result_dict.keys())))
+        print('\033[1m' + "NBR OF DIFFERENT TOPICS: "+ '\033[0m', nbr_topics, "\n", "\n")
+        print("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format('\033[1m' + 'TOPIC', 'TOPIC FREQUENCY', 'AVERAGE NBR OF LIKES', 'AVERAGE NBR OF RETWEETS', 'AVERAGE NBR OF REPLIES', 'REACH AVERAGE' + '\033[0m'))
+        # print each data item.
+        for key, value in mean_likes.items():
+            topic = key
+            mean_likes = value
+            reach_avg = (mean_likes + mean_retweets[topic] + mean_replies[topic] ) / 3
+            print ("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format(topic, freq_dict[topic], "{:.2f}".format(mean_likes), "{:.2f}".format(mean_retweets[topic]), mean_replies[topic], "{:.2f}".format(reach_avg)))