Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

13macattack37 commited on Jul 6, 2022

Commit

4406f1d

•

1 Parent(s): dc67c78

Functions for inserting sentiments have been coded. The sentiments are now also a part of the statistics in the print_stats() function

Browse files

Files changed (1) hide show

text-classifier/text_classifier.py +41 -43

text-classifier/text_classifier.py CHANGED Viewed

@@ -3,19 +3,13 @@ import openai
 import regex as re
 openai.api_key = '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'
-class text_classifier:
-    '''def __init__(self, user, from_date, to_date):
-        self.user = "Janne"
-        self.from_date = "2022-01-05"
-        self.to_date = "2022-07-05"'''
     def classify_topics(tweet_dict):
         tweet_list = list(tweet_dict.keys())
-        prediction_list = []
-        for tweet in tweet_list:
-            #prompt_string = ""
             prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\""
             prompt_string += tweet
             prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be more than 5 words. Numerate each topic in the output. END"
@@ -30,20 +24,19 @@ class text_classifier:
             )
             classifications_unclean = response.choices[0]['text']
-            prediction_list.append(classifications_unclean)
-        return text_classifier.cleanup_results(prediction_list, tweet_dict)
     def classify_sentiments(tweet_dict):
-        tweet_list = list(tweet_dict.keys())
-        prediction_list = []
         for tweet in tweet_list:
             prompt_string = "Classify one sentiment for this tweet:\n \""
             prompt_string += tweet
             prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement,\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire,\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
             response = openai.Completion.create(
             model="text-davinci-002",
             prompt=prompt_string,
@@ -53,20 +46,16 @@ class text_classifier:
             frequency_penalty=0,
             presence_penalty=0
             )
-            classifications_unclean = response.choices[0]['text']
-            prediction_list.append(classifications_unclean)
-        return prediction_list
-    def cleanup_results(prediction_list, tweet_dict):
-        predictions_cleaned = []
-        temp_list = []
-        pred_dict = {}
-        i = 0
-        tweet_list = list(tweet_dict.keys())
-        for item in prediction_list:
             temp_list = []
             new_item = item.replace("\n", " ")
             new_item = new_item.replace("  ", " ")
@@ -79,19 +68,10 @@ class text_classifier:
                     item = item[1:]
                 if item.endswith(' '):
                     item = item[:-1]
-                temp_list.append(item)
-            predictions_cleaned.append(temp_list)
-        for tweet in tweet_list:
-            pred_dict[tweet] = predictions_cleaned[i]
-            i += 1
-        return pred_dict
-    def insert_predictions(tweet_dict, results):
-        for key in results:
-            tweet_dict[key]['topic'] = results[key]
-        return tweet_dict
     def print_results(results_dict):
@@ -107,14 +87,24 @@ class text_classifier:
         mean_likes = {}
         mean_retweets = {}
         mean_replies = {}
         nbr_topics = 0
-        for value in result_dict.values():
             nlikes = value['nlikes']
             nreplies = value['nreplies']
             nretweets = value['nretweets']
             topic_list = value['topic']
         # Count topic frequency
             for topic in topic_list:
@@ -151,11 +141,11 @@ class text_classifier:
         for key in mean_retweets.keys():
             mean_retweets[key] = mean_retweets[key] / freq_dict[key]
         # Print the names of the columns.
         print('\033[1m' + "USER: " + '\033[0m', user)
         print('\033[1m' + "NBR OF TWEETS SCRAPED: "+ '\033[0m', len(list(result_dict.keys())))
-        print('\033[1m' + "NBR OF DIFFERENT TOPICS: "+ '\033[0m', nbr_topics, "\n", "\n")
         print("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format('\033[1m' + 'TOPIC', 'TOPIC FREQUENCY', 'AVERAGE NBR OF LIKES', 'AVERAGE NBR OF RETWEETS', 'AVERAGE NBR OF REPLIES', 'REACH AVERAGE' + '\033[0m'))
         # print each data item.
@@ -165,4 +155,12 @@ class text_classifier:
             reach_avg = (mean_likes + mean_retweets[topic] + mean_replies[topic] ) / 3
             print ("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format(topic, freq_dict[topic], "{:.2f}".format(mean_likes), "{:.2f}".format(mean_retweets[topic]), mean_replies[topic], "{:.2f}".format(reach_avg)))

 import regex as re
 openai.api_key = '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'
+class text_classifier:
     def classify_topics(tweet_dict):
         tweet_list = list(tweet_dict.keys())
+        prediction_dict = {}
+        for tweet in tweet_list:
             prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\""
             prompt_string += tweet
             prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be more than 5 words. Numerate each topic in the output. END"
             )
             classifications_unclean = response.choices[0]['text']
+            prediction_dict[tweet] = classifications_unclean
+        return text_classifier.cleanup_topic_results(prediction_dict, tweet_dict)
     def classify_sentiments(tweet_dict):
+        tweet_list = list(tweet_dict.keys())
         for tweet in tweet_list:
             prompt_string = "Classify one sentiment for this tweet:\n \""
             prompt_string += tweet
             prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement,\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire,\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
             response = openai.Completion.create(
             model="text-davinci-002",
             prompt=prompt_string,
             frequency_penalty=0,
             presence_penalty=0
             )
+            classifications_unclean = response.choices[0]['text']
+            tweet_dict[tweet]['sentiment'] = classifications_unclean
+        return tweet_dict
+    def cleanup_topic_results(prediction_dict, tweet_dict):
+        temp_list = []
+        for tweet, item in prediction_dict.items():
             temp_list = []
             new_item = item.replace("\n", " ")
             new_item = new_item.replace("  ", " ")
                     item = item[1:]
                 if item.endswith(' '):
                     item = item[:-1]
+                temp_list.append(item)
+            tweet_dict[tweet]['topic'] = temp_list
+        return tweet_dict
     def print_results(results_dict):
         mean_likes = {}
         mean_retweets = {}
         mean_replies = {}
+        sentiment_dict = {}
+        nbr_sentiment = 0
         nbr_topics = 0
+        for key, value in result_dict.items():
             nlikes = value['nlikes']
             nreplies = value['nreplies']
             nretweets = value['nretweets']
             topic_list = value['topic']
+            sentiment = value['sentiment']
+        # Count sentiment frequency
+            if sentiment in sentiment_dict.keys():
+                sentiment_dict[sentiment] += 1
+            else:
+                sentiment_dict[sentiment] = 1
+                nbr_sentiment += 1
         # Count topic frequency
             for topic in topic_list:
         for key in mean_retweets.keys():
             mean_retweets[key] = mean_retweets[key] / freq_dict[key]
         # Print the names of the columns.
         print('\033[1m' + "USER: " + '\033[0m', user)
         print('\033[1m' + "NBR OF TWEETS SCRAPED: "+ '\033[0m', len(list(result_dict.keys())))
+        print('\033[1m' + "NBR OF DIFFERENT TOPICS: "+ '\033[0m', nbr_topics, "\n")
         print("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format('\033[1m' + 'TOPIC', 'TOPIC FREQUENCY', 'AVERAGE NBR OF LIKES', 'AVERAGE NBR OF RETWEETS', 'AVERAGE NBR OF REPLIES', 'REACH AVERAGE' + '\033[0m'))
         # print each data item.
             reach_avg = (mean_likes + mean_retweets[topic] + mean_replies[topic] ) / 3
             print ("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format(topic, freq_dict[topic], "{:.2f}".format(mean_likes), "{:.2f}".format(mean_retweets[topic]), mean_replies[topic], "{:.2f}".format(reach_avg)))
+        print("\n")
+        print('\033[1m' + "NBR OF DIFFERENT SENTIMENTS: "+ '\033[0m', nbr_sentiment, "\n")
+        print("{:<60} {:<20}".format('\033[1m' + 'SENTIMENT', 'SENTIMENT FREQUENCY' + '\033[0m'))
+        for key, value in sentiment_dict.items():
+            sentiment = key
+            mean_sentiment = value
+            print ("{:<60} {:<20}".format(sentiment, sentiment_dict[sentiment], "{:.2f}".format(mean_sentiment)))