Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Demea9000 commited on Jul 12, 2022

Commit

130dfd8

•

1 Parent(s): 510f63c

added skeleton to TextClassifier

Browse files

Files changed (1) hide show

textclassifier/TextClassifier.py +27 -171

textclassifier/TextClassifier.py CHANGED Viewed

@@ -1,176 +1,32 @@
 import openai
 import regex as re
-from twitterscraper import TwitterScraper as tf
-openai.api_key = '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'
 class TextClassifier:
-    def classify_topics(tweet_dict):
-        tweet_list = list(tweet_dict.keys())
-        prediction_dict = {}
-        for tweet in tweet_list:
-            prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\""
-            prompt_string += tweet
-            prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be " \
-                             "more than 5 words. Numerate each topic in the output. END "
-            response = openai.Completion.create(
-                model="text-davinci-002",
-                prompt=prompt_string,
-                temperature=0,
-                max_tokens=892,
-                top_p=1,
-                frequency_penalty=0,
-                presence_penalty=0
-            )
-            classifications_unclean = response.choices[0]['text']
-            prediction_dict[tweet] = classifications_unclean
-        return TextClassifier.cleanup_topic_results(prediction_dict, tweet_dict)
-    def classify_sentiments(tweet_dict):
-        tweet_list = list(tweet_dict.keys())
-        for tweet in tweet_list:
-            prompt_string = "Classify one sentiment for this tweet:\n \""
-            prompt_string += tweet
-            prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \
-                             "\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \
-                             "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT= "
-            response = openai.Completion.create(
-                model="text-davinci-002",
-                prompt=prompt_string,
-                temperature=0,
-                max_tokens=256,
-                top_p=1,
-                frequency_penalty=0,
-                presence_penalty=0
-            )
-            classifications_unclean = response.choices[0]['text']
-            tweet_dict[tweet]['sentiment'] = classifications_unclean
-        return tweet_dict
-    def cleanup_topic_results(prediction_dict, tweet_dict):
-        temp_list = []
-        for tweet, item in prediction_dict.items():
-            temp_list = []
-            new_item = item.replace("\n", " ")
-            new_item = new_item.replace("  ", " ")
-            new_item = new_item[4:]
-            new_item = re.sub('\d', '', new_item)
-            sub_list = new_item.split(".")
-            for item in sub_list:
-                if item.startswith(' '):
-                    item = item[1:]
-                if item.endswith(' '):
-                    item = item[:-1]
-                temp_list.append(item)
-            tweet_dict[tweet]['topic'] = temp_list
-        return tweet_dict
-    def print_results(results_dict):
-        print('\033[1m' + "RESULTS" + '\033[0m', "\n")
-        for key in results_dict.keys():
-            predictions = results_dict[key]
-            print("\"" + key + "\"" + "\n" + str(predictions), "\n" + "---------------------------------")
-    def print_stats(result_dict):
-        user = ""
-        freq_dict = {}
-        mean_likes = {}
-        mean_retweets = {}
-        mean_replies = {}
-        sentiment_dict = {}
-        nbr_sentiment = 0
-        nbr_topics = 0
-        for key, value in result_dict.items():
-            nlikes = value['nlikes']
-            nreplies = value['nreplies']
-            nretweets = value['nretweets']
-            topic_list = value['topic']
-            sentiment = value['sentiment']
-            # Count sentiment frequency
-            if sentiment in sentiment_dict.keys():
-                sentiment_dict[sentiment] += 1
-            else:
-                sentiment_dict[sentiment] = 1
-                nbr_sentiment += 1
-                # Count topic frequency
-            for topic in topic_list:
-                if topic in freq_dict.keys():
-                    freq_dict[topic] += 1
-                else:
-                    freq_dict[topic] = 1
-                    nbr_topics += 1
-                # Count total likes per topic
-                if topic in mean_likes.keys():
-                    mean_likes[topic] += nlikes
-                else:
-                    mean_likes[topic] = nlikes
-                    # Count total retweets per topic
-                if topic in mean_retweets.keys():
-                    mean_retweets[topic] += nretweets
-                else:
-                    mean_retweets[topic] = nretweets
-                    # Count total replies per topic
-                if topic in mean_replies.keys():
-                    mean_replies[topic] += nreplies
-                else:
-                    mean_replies[topic] = nreplies
-                    # Count mean of likes
-        for key in mean_likes.keys():
-            mean_likes[key] = mean_likes[key] / freq_dict[key]
-        # Count mean of retweets
-        for key in mean_retweets.keys():
-            mean_retweets[key] = mean_retweets[key] / freq_dict[key]
-        # Print the names of the columns.
-        print('\033[1m' + "USER: " + '\033[0m', user)
-        print('\033[1m' + "NBR OF TWEETS SCRAPED: " + '\033[0m', len(list(result_dict.keys())))
-        print('\033[1m' + "NBR OF DIFFERENT TOPICS: " + '\033[0m', nbr_topics, "\n")
-        print("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format('\033[1m' + 'TOPIC', 'TOPIC FREQUENCY',
-                                                                 'AVERAGE NBR OF LIKES', 'AVERAGE NBR OF RETWEETS',
-                                                                 'AVERAGE NBR OF REPLIES', 'REACH AVERAGE' + '\033[0m'))
-        # print each data item.
-        for key, value in mean_likes.items():
-            topic = key
-            mean_likes = value
-            reach_avg = (mean_likes + mean_retweets[topic] + mean_replies[topic]) / 3
-            print(
-                "{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format(topic, freq_dict[topic], "{:.2f}".format(mean_likes),
-                                                                   "{:.2f}".format(mean_retweets[topic]),
-                                                                   mean_replies[topic], "{:.2f}".format(reach_avg)))
-        print("\n")
-        print('\033[1m' + "NBR OF DIFFERENT SENTIMENTS: " + '\033[0m', nbr_sentiment, "\n")
-        print("{:<60} {:<20}".format('\033[1m' + 'SENTIMENT', 'SENTIMENT FREQUENCY' + '\033[0m'))
-        for key, value in sentiment_dict.items():
-            sentiment = key
-            mean_sentiment = value
-            print("{:<60} {:<20}".format(sentiment, sentiment_dict[sentiment], "{:.2f}".format(mean_sentiment)))
-if __name__ == '__main__':
-    sc = tf.TwitterScraper(num_tweets=40)
-    dc = sc.scrape_by_user("jimmieakesson")
-    print(dc.head())
-    print(dc.shape)

 import openai
 import regex as re
+from twitterscraper import TwitterScraper
+from datetime import date
 class TextClassifier:
+    def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=100):
+        """
+        Initializes the TextClassifier.
+        :param model_name: name of the model from openai.
+        :param from_date: string of the format 'YYYY-MM-DD'.
+        :param to_date: string of the format 'YYYY-MM-DD'.
+        :param num_tweets: integer value of the maximum number of tweets to be scraped.
+        """
+        self.model_name = model_name
+        self.df = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
+        self.api_key = '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'
+    def classify_sentiment(self, text: str):
+        """
+        Classifies the sentiment of a text.
+        """
+    def classify_topics(self, text: str):
+        """
+        Classifies the topics of a text.
+        """
+    def __repr__(self):
+        return f"TextClassifier(df={self.df}, col={self.col}, model_name={self.model_name})"