Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Demea9000 commited on Jul 6, 2022

Commit

eceff29

•

1 Parent(s): 13599ad

prettier

Browse files

Files changed (2) hide show

text-classifier/TextClassifier.py +86 -84
text-classifier/main.py +48 -38

text-classifier/TextClassifier.py CHANGED Viewed

@@ -1,87 +1,87 @@
-import openai
 import regex as re
-openai.api_key = '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'
 class TextClassifier:
-    def classify_topics(tweet_dict):
-        tweet_list = list(tweet_dict.keys())
-        prediction_dict = {}
-        for tweet in tweet_list:
-            prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\""
             prompt_string += tweet
-            prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be more than 5 words. Numerate each topic in the output. END"
             response = openai.Completion.create(
-            model="text-davinci-002",
-            prompt= prompt_string,
-            temperature=0,
-            max_tokens=892,
-            top_p=1,
-            frequency_penalty=0,
-            presence_penalty=0
             )
-            classifications_unclean = response.choices[0]['text']
-            prediction_dict[tweet] = classifications_unclean
         return TextClassifier.cleanup_topic_results(prediction_dict, tweet_dict)
     def classify_sentiments(tweet_dict):
-        tweet_list = list(tweet_dict.keys())
         for tweet in tweet_list:
             prompt_string = "Classify one sentiment for this tweet:\n \""
             prompt_string += tweet
-            prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement,\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire,\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
             response = openai.Completion.create(
-            model="text-davinci-002",
-            prompt=prompt_string,
-            temperature=0,
-            max_tokens=256,
-            top_p=1,
-            frequency_penalty=0,
-            presence_penalty=0
             )
-            classifications_unclean = response.choices[0]['text']
             tweet_dict[tweet]['sentiment'] = classifications_unclean
-        return tweet_dict
-    def cleanup_topic_results(prediction_dict, tweet_dict):
-        temp_list = []
         for tweet, item in prediction_dict.items():
-            temp_list = []
             new_item = item.replace("\n", " ")
             new_item = new_item.replace("  ", " ")
             new_item = new_item[4:]
-            new_item = re.sub('\d', '', new_item)
             sub_list = new_item.split(".")
-            for item in sub_list:
                 if item.startswith(' '):
                     item = item[1:]
                 if item.endswith(' '):
-                    item = item[:-1]
-                temp_list.append(item)
-            tweet_dict[tweet]['topic'] = temp_list
-        return tweet_dict
     def print_results(results_dict):
-        print('\033[1m' + "RESULTS" + '\033[0m', "\n")
         for key in results_dict.keys():
             predictions = results_dict[key]
-            print("\"" + key + "\"" + "\n"+ str(predictions),"\n" + "---------------------------------")
-    def print_stats(result_dict):
         user = ""
         freq_dict = {}
         mean_likes = {}
@@ -93,74 +93,76 @@ class TextClassifier:
         for key, value in result_dict.items():
-            nlikes = value['nlikes']
             nreplies = value['nreplies']
             nretweets = value['nretweets']
-            topic_list = value['topic']
             sentiment = value['sentiment']
-        # Count sentiment frequency
             if sentiment in sentiment_dict.keys():
                 sentiment_dict[sentiment] += 1
             else:
-                sentiment_dict[sentiment] = 1
-                nbr_sentiment += 1
-        # Count topic frequency
             for topic in topic_list:
                 if topic in freq_dict.keys():
                     freq_dict[topic] += 1
                 else:
-                    freq_dict[topic] = 1
                     nbr_topics += 1
                 # Count total likes per topic
                 if topic in mean_likes.keys():
-                    mean_likes[topic] += nlikes
                 else:
-                    mean_likes[topic] = nlikes
-                # Count total retweets per topic
                 if topic in mean_retweets.keys():
-                    mean_retweets[topic] += nretweets
                 else:
-                    mean_retweets[topic] = nretweets
-                # Count total replies per topic
                 if topic in mean_replies.keys():
-                    mean_replies[topic] += nreplies
                 else:
-                    mean_replies[topic] = nreplies
-        # Count mean of likes
-        for key in mean_likes.keys():
             mean_likes[key] = mean_likes[key] / freq_dict[key]
         # Count mean of retweets
-        for key in mean_retweets.keys():
             mean_retweets[key] = mean_retweets[key] / freq_dict[key]
         # Print the names of the columns.
-        print('\033[1m' + "USER: " + '\033[0m', user)
-        print('\033[1m' + "NBR OF TWEETS SCRAPED: "+ '\033[0m', len(list(result_dict.keys())))
-        print('\033[1m' + "NBR OF DIFFERENT TOPICS: "+ '\033[0m', nbr_topics, "\n")
-        print("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format('\033[1m' + 'TOPIC', 'TOPIC FREQUENCY', 'AVERAGE NBR OF LIKES', 'AVERAGE NBR OF RETWEETS', 'AVERAGE NBR OF REPLIES', 'REACH AVERAGE' + '\033[0m'))
         # print each data item.
         for key, value in mean_likes.items():
             topic = key
             mean_likes = value
-            reach_avg = (mean_likes + mean_retweets[topic] + mean_replies[topic] ) / 3
-            print ("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format(topic, freq_dict[topic], "{:.2f}".format(mean_likes), "{:.2f}".format(mean_retweets[topic]), mean_replies[topic], "{:.2f}".format(reach_avg)))
         print("\n")
-        print('\033[1m' + "NBR OF DIFFERENT SENTIMENTS: "+ '\033[0m', nbr_sentiment, "\n")
         print("{:<60} {:<20}".format('\033[1m' + 'SENTIMENT', 'SENTIMENT FREQUENCY' + '\033[0m'))
         for key, value in sentiment_dict.items():
             sentiment = key
-            mean_sentiment = value
-            print ("{:<60} {:<20}".format(sentiment, sentiment_dict[sentiment], "{:.2f}".format(mean_sentiment)))

+import openai
 import regex as re
+openai.api_key = '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'
 class TextClassifier:
+    def classify_topics(tweet_dict):
+        tweet_list = list(tweet_dict.keys())
+        prediction_dict = {}
+        for tweet in tweet_list:
+            prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\""
             prompt_string += tweet
+            prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be " \
+                             "more than 5 words. Numerate each topic in the output. END "
             response = openai.Completion.create(
+                model="text-davinci-002",
+                prompt=prompt_string,
+                temperature=0,
+                max_tokens=892,
+                top_p=1,
+                frequency_penalty=0,
+                presence_penalty=0
             )
+            classifications_unclean = response.choices[0]['text']
+            prediction_dict[tweet] = classifications_unclean
         return TextClassifier.cleanup_topic_results(prediction_dict, tweet_dict)
     def classify_sentiments(tweet_dict):
+        tweet_list = list(tweet_dict.keys())
         for tweet in tweet_list:
             prompt_string = "Classify one sentiment for this tweet:\n \""
             prompt_string += tweet
+            prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \
+                             "\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \
+                             "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT= "
             response = openai.Completion.create(
+                model="text-davinci-002",
+                prompt=prompt_string,
+                temperature=0,
+                max_tokens=256,
+                top_p=1,
+                frequency_penalty=0,
+                presence_penalty=0
             )
+            classifications_unclean = response.choices[0]['text']
             tweet_dict[tweet]['sentiment'] = classifications_unclean
+        return tweet_dict
+    def cleanup_topic_results(prediction_dict, tweet_dict):
+        temp_list = []
         for tweet, item in prediction_dict.items():
+            temp_list = []
             new_item = item.replace("\n", " ")
             new_item = new_item.replace("  ", " ")
             new_item = new_item[4:]
+            new_item = re.sub('\d', '', new_item)
             sub_list = new_item.split(".")
+            for item in sub_list:
                 if item.startswith(' '):
                     item = item[1:]
                 if item.endswith(' '):
+                    item = item[:-1]
+                temp_list.append(item)
+            tweet_dict[tweet]['topic'] = temp_list
+        return tweet_dict
     def print_results(results_dict):
+        print('\033[1m' + "RESULTS" + '\033[0m', "\n")
         for key in results_dict.keys():
             predictions = results_dict[key]
+            print("\"" + key + "\"" + "\n" + str(predictions), "\n" + "---------------------------------")
+    def print_stats(result_dict):
         user = ""
         freq_dict = {}
         mean_likes = {}
         for key, value in result_dict.items():
+            nlikes = value['nlikes']
             nreplies = value['nreplies']
             nretweets = value['nretweets']
+            topic_list = value['topic']
             sentiment = value['sentiment']
+            # Count sentiment frequency
             if sentiment in sentiment_dict.keys():
                 sentiment_dict[sentiment] += 1
             else:
+                sentiment_dict[sentiment] = 1
+                nbr_sentiment += 1
+                # Count topic frequency
             for topic in topic_list:
                 if topic in freq_dict.keys():
                     freq_dict[topic] += 1
                 else:
+                    freq_dict[topic] = 1
                     nbr_topics += 1
                 # Count total likes per topic
                 if topic in mean_likes.keys():
+                    mean_likes[topic] += nlikes
                 else:
+                    mean_likes[topic] = nlikes
+                    # Count total retweets per topic
                 if topic in mean_retweets.keys():
+                    mean_retweets[topic] += nretweets
                 else:
+                    mean_retweets[topic] = nretweets
+                    # Count total replies per topic
                 if topic in mean_replies.keys():
+                    mean_replies[topic] += nreplies
                 else:
+                    mean_replies[topic] = nreplies
+                    # Count mean of likes
+        for key in mean_likes.keys():
             mean_likes[key] = mean_likes[key] / freq_dict[key]
         # Count mean of retweets
+        for key in mean_retweets.keys():
             mean_retweets[key] = mean_retweets[key] / freq_dict[key]
         # Print the names of the columns.
+        print('\033[1m' + "USER: " + '\033[0m', user)
+        print('\033[1m' + "NBR OF TWEETS SCRAPED: " + '\033[0m', len(list(result_dict.keys())))
+        print('\033[1m' + "NBR OF DIFFERENT TOPICS: " + '\033[0m', nbr_topics, "\n")
+        print("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format('\033[1m' + 'TOPIC', 'TOPIC FREQUENCY',
+                                                                 'AVERAGE NBR OF LIKES', 'AVERAGE NBR OF RETWEETS',
+                                                                 'AVERAGE NBR OF REPLIES', 'REACH AVERAGE' + '\033[0m'))
         # print each data item.
         for key, value in mean_likes.items():
             topic = key
             mean_likes = value
+            reach_avg = (mean_likes + mean_retweets[topic] + mean_replies[topic]) / 3
+            print(
+                "{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format(topic, freq_dict[topic], "{:.2f}".format(mean_likes),
+                                                                   "{:.2f}".format(mean_retweets[topic]),
+                                                                   mean_replies[topic], "{:.2f}".format(reach_avg)))
         print("\n")
+        print('\033[1m' + "NBR OF DIFFERENT SENTIMENTS: " + '\033[0m', nbr_sentiment, "\n")
         print("{:<60} {:<20}".format('\033[1m' + 'SENTIMENT', 'SENTIMENT FREQUENCY' + '\033[0m'))
         for key, value in sentiment_dict.items():
             sentiment = key
+            mean_sentiment = value
+            print("{:<60} {:<20}".format(sentiment, sentiment_dict[sentiment], "{:.2f}".format(mean_sentiment)))

text-classifier/main.py CHANGED Viewed

@@ -1,43 +1,54 @@
 from TextClassifier import TextClassifier
 # Some examples of tweets:
-data_dict = {'25 years ago we made a promise to the people of Hong Kong.   We intend to keep it.  https://t.co/nIN96ZydgV': {'hour': '17',
-  'nlikes': 7878,
-  'nreplies': 2999,
-  'nretweets': 1993,
-  'topic': '',
-  'sentiment': ''},
- 'A huge delight to meet @SwedishPM Magdalena Andersson and President @niinisto again.   The accession of Finland and Sweden to @NATO will permanently strengthen our defensive Alliance, helping to keep us all safe.  #WeAreNATO  https://t.co/pArvdWHr2F': {'hour': '16',
-  'nlikes': 3468,
-  'nreplies': 686,
-  'nretweets': 435,
-  'topic': '',
-  'sentiment': ''},
- 'At this @NATO Leaders’ Summit, I’ll be urging fellow nations to continue to do everything they can to support Ukraine.   The UK has always played a historic role in the @NATO alliance, working to address the biggest global threats and build a more secure world.': {'hour': '07',
-  'nlikes': 7742,
-  'nreplies': 1838,
-  'nretweets': 1112,
-  'topic': '',
-  'sentiment': ''},
- 'Morgan Johansson måste avgå som minister.  Otryggheten biter sig fast och gängkriminaliteten är allt annat än knäckt. Antalet skjutningar ökar och sätter skräck i varje del av vårt land.  Sverige har förvandlats till ett gangsterland.': {'hour': '16',
-  'nlikes': 3468,
-  'nreplies': 686,
-  'nretweets': 435,
-  'topic': '',
-  'sentiment': ''},
- 'Döms man för brott, särskilt våldsbrott, ska man vara inlåst från det att domen faller tills straffet är avtjänat. Allt annat är vansinne.': {'hour': '16',
-  'nlikes': 3468,
-  'nreplies': 686,
-  'nretweets': 435,
-  'topic': '',
-  'sentiment': ''},
- 'Motionerna: ' + '\n' + 'K339 avslogs av enig riksdag (inkl KD).' + '\n' + 'K220 avslogs av enig riksdag (inkl KD).' + '\n' + '1601 avslogs av enig riksdag (inkl KD).' + '\n' +  'K281 avslogs av enig riksdag (inkl KD).' + '\n' + '\n' +  '¯\_(ツ)_/¯': {'hour': '16',
-  'nlikes': 3468,
-  'nreplies': 686,
-  'nretweets': 435,
-  'topic': '',
-  'sentiment': ''}
-  }
 # Classify the TOPICS and insert the results into the data dictionary found above
 topic_results = TextClassifier.classify_topics(data_dict)
@@ -45,4 +56,3 @@ topic_results = TextClassifier.classify_topics(data_dict)
 sentiment_results = TextClassifier.classify_sentiments(data_dict)
 # Print simple statistics related to TOPICS and SENTIMENTS
 TextClassifier.print_stats(sentiment_results)

 from TextClassifier import TextClassifier
 # Some examples of tweets:
+data_dict = {
+    '25 years ago we made a promise to the people of Hong Kong.   We intend to keep it.  https://t.co/nIN96ZydgV': {
+        'hour': '17',
+        'nlikes': 7878,
+        'nreplies': 2999,
+        'nretweets': 1993,
+        'topic': '',
+        'sentiment': ''},
+    'A huge delight to meet @SwedishPM Magdalena Andersson and President @niinisto again.   The accession of Finland '
+    'and Sweden to @NATO will permanently strengthen our defensive Alliance, helping to keep us all safe.  #WeAreNATO '
+    ' https://t.co/pArvdWHr2F': {
+        'hour': '16',
+        'nlikes': 3468,
+        'nreplies': 686,
+        'nretweets': 435,
+        'topic': '',
+        'sentiment': ''},
+    'At this @NATO Leaders’ Summit, I’ll be urging fellow nations to continue to do everything they can to support '
+    'Ukraine.   The UK has always played a historic role in the @NATO alliance, working to address the biggest global '
+    'threats and build a more secure world.': {
+        'hour': '07',
+        'nlikes': 7742,
+        'nreplies': 1838,
+        'nretweets': 1112,
+        'topic': '',
+        'sentiment': ''},
+    'Morgan Johansson måste avgå som minister.  Otryggheten biter sig fast och gängkriminaliteten är allt annat än knäckt. Antalet skjutningar ökar och sätter skräck i varje del av vårt land.  Sverige har förvandlats till ett gangsterland.': {
+        'hour': '16',
+        'nlikes': 3468,
+        'nreplies': 686,
+        'nretweets': 435,
+        'topic': '',
+        'sentiment': ''},
+    'Döms man för brott, särskilt våldsbrott, ska man vara inlåst från det att domen faller tills straffet är avtjänat. Allt annat är vansinne.': {
+        'hour': '16',
+        'nlikes': 3468,
+        'nreplies': 686,
+        'nretweets': 435,
+        'topic': '',
+        'sentiment': ''},
+    'Motionerna: ' + '\n' + 'K339 avslogs av enig riksdag (inkl KD).' + '\n' + 'K220 avslogs av enig riksdag (inkl KD).' + '\n' + '1601 avslogs av enig riksdag (inkl KD).' + '\n' + 'K281 avslogs av enig riksdag (inkl KD).' + '\n' + '\n' + '¯\_(ツ)_/¯': {
+        'hour': '16',
+        'nlikes': 3468,
+        'nreplies': 686,
+        'nretweets': 435,
+        'topic': '',
+        'sentiment': ''}
+}
 # Classify the TOPICS and insert the results into the data dictionary found above
 topic_results = TextClassifier.classify_topics(data_dict)
 sentiment_results = TextClassifier.classify_sentiments(data_dict)
 # Print simple statistics related to TOPICS and SENTIMENTS
 TextClassifier.print_stats(sentiment_results)