Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Demea9000 commited on Jul 12, 2022

Commit

1ace546

•

1 Parent(s): c91c79b

utökade sentiment metoderna, la till placeholder för att spara redan promptade tweets

Browse files

Files changed (1) hide show

textclassifier/TextClassifier.py +55 -11

textclassifier/TextClassifier.py CHANGED Viewed

@@ -6,6 +6,7 @@ from datetime import date
 class TextClassifier:
     def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
                  num_tweets=20):
         """
         Initializes the TextClassifier.
@@ -19,12 +20,29 @@ class TextClassifier:
         self.from_date = from_date
         self.to_date = to_date
         self.num_tweets = num_tweets
         self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
         # self.api_key = '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'
         openai.api_key = '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'
     @staticmethod
     def cleanup_sentiment_results(classification_unclean):
         classification_clean = classification_unclean.replace('\n\n', "")
         classification_clean = classification_clean.replace('\n', "")
         if classification_clean.startswith(" "):
@@ -45,7 +63,7 @@ class TextClassifier:
                          "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
         response = openai.Completion.create(
-            model="text-davinci-002",
             prompt=prompt_string,
             temperature=0.0,
             max_tokens=256,
@@ -59,26 +77,29 @@ class TextClassifier:
         return classification_clean.lower()
-    def classify_sentiment_of_tweets(self, user_name: str):
         """
         Classifies the sentiment of a user's tweets.
         :param user_name: string of the user name.
         """
-        df_sentiment = self.ts.scrape_by_user(user_name)
         df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
-        return df_sentiment
     def analyze_sentiment(self, text: str, sentiment: str):
         """
         Analyzes the sentiment of a text using OpenAI.
         :param text: string of the tweet text.
         :param sentiment:
         :return:
         """
         prompt_string = "Who is the TARGET of this "
         prompt_string += sentiment
         prompt_string += " TWEET?\\nTWEET=\""
-        prompt_string += tweet
         prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
         response = openai.Completion.create(
@@ -92,7 +113,7 @@ class TextClassifier:
         )
         analyzed_sentiment = response.choices[0]['text']
         # Remove spaces at the start/end of the response
         if analyzed_sentiment.startswith(' '):
             analyzed_sentiment = analyzed_sentiment[1:]
@@ -132,9 +153,20 @@ class TextClassifier:
         elif analyzed_sentiment.lower() == "the swedish government":
             analyzed_sentiment = "Regeringen"
-        tweet_dict[tweet]['target'] = analyzed_sentiment
-        return tweet_dict
     def classify_topics(self, text: str):
         """
@@ -142,11 +174,23 @@ class TextClassifier:
         """
     def __repr__(self):
-        return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date, self.num_tweets)
 if __name__ == "__main__":
-    tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=20)
     print(tc)
-    df = tc.classify_sentiment_of_tweets("jimmieakesson")
     print(df)

 class TextClassifier:
     def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
+                 user_name='jimmieakesson',
                  num_tweets=20):
         """
         Initializes the TextClassifier.
         self.from_date = from_date
         self.to_date = to_date
         self.num_tweets = num_tweets
+        self.user_name = user_name
         self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
+        self.df = self.ts.scrape_by_user(user_name)
         # self.api_key = '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'
         openai.api_key = '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'
+    def store_tweets(self, file_name: str):
+        # TODO: implement this method
+        """
+        Stores the tweets of a user.
+        :param file_name:
+        :param user_name: string of the user name.
+        """
+        df_tweets = self.df
+        df_tweets.to_csv(file_name + '.csv', index=False)
     @staticmethod
     def cleanup_sentiment_results(classification_unclean):
+        """
+        Cleans up the results of the sentiment classification.
+        :param classification_unclean: string of the classification result.
+        :return: cleaned up string.
+        """
         classification_clean = classification_unclean.replace('\n\n', "")
         classification_clean = classification_clean.replace('\n', "")
         if classification_clean.startswith(" "):
                          "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
         response = openai.Completion.create(
+            model=self.model_name,
             prompt=prompt_string,
             temperature=0.0,
             max_tokens=256,
         return classification_clean.lower()
+    def classify_sentiment_of_tweets(self):
         """
         Classifies the sentiment of a user's tweets.
         :param user_name: string of the user name.
         """
+        df_sentiment = self.df
         df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
+        self.df = df_sentiment
+        return self.df
     def analyze_sentiment(self, text: str, sentiment: str):
+        # TODO: fix prompt before running this method
         """
         Analyzes the sentiment of a text using OpenAI.
         :param text: string of the tweet text.
         :param sentiment:
         :return:
         """
+        assert 1 == 2, "Måste fixa prompt innan denna metod körs"
         prompt_string = "Who is the TARGET of this "
         prompt_string += sentiment
         prompt_string += " TWEET?\\nTWEET=\""
+        prompt_string += text
         prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
         response = openai.Completion.create(
         )
         analyzed_sentiment = response.choices[0]['text']
+        print(analyzed_sentiment)
         # Remove spaces at the start/end of the response
         if analyzed_sentiment.startswith(' '):
             analyzed_sentiment = analyzed_sentiment[1:]
         elif analyzed_sentiment.lower() == "the swedish government":
             analyzed_sentiment = "Regeringen"
+        return analyzed_sentiment
+    def analyze_sentiment_of_tweets(self):
+        """
+        Analyzes the sentiment of a user's tweets.
+        """
+        # check if 'sentiment' column exists, raise exception if not
+        assert 'sentiment' in self.df.columns, \
+            "'sentiment' column does not exist. Please run classify_sentiment_of_tweets first."
+        df_analyze = self.df
+        df_analyze['target'] = df_analyze['tweet'].apply(self.analyze_sentiment, args=(df_analyze['sentiment']))
+        self.df = df_analyze
+        return self.df
     def classify_topics(self, text: str):
         """
         """
     def __repr__(self):
+        return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
+                                                                                self.num_tweets)
 if __name__ == "__main__":
+    import pandas as pd
+    import warnings
+    warnings.simplefilter(action='ignore', category=FutureWarning)
+    pd.set_option('display.max_columns', None)
+    tc = TextClassifier(model_name="text-davinci-002", from_date='2022-07-01',
+                        to_date=str(date.today()), user_name='jimmieakesson', num_tweets=20)
     print(tc)
+    # df = tc.classify_sentiment_of_tweets("jimmieakesson")
+    # print(df)
+    df = tc.classify_sentiment_of_tweets()
+    print(df.head())
+    # df = tc.analyze_sentiment("Nu har sd igen gjort fel", "critical")
     print(df)