Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Demea9000 commited on Jul 12, 2022

Commit

32119e0

•

1 Parent(s): 2a4df2c

added code to classify sentiment and analyze

Browse files

Files changed (2) hide show

textclassifier/TextClassifier.py +124 -5
twitterscraper/TwitterScraper.py +1 -0

textclassifier/TextClassifier.py CHANGED Viewed

@@ -5,7 +5,8 @@ from datetime import date
 class TextClassifier:
-    def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=100):
         """
         Initializes the TextClassifier.
         :param model_name: name of the model from openai.
@@ -18,13 +19,122 @@ class TextClassifier:
         self.from_date = from_date
         self.to_date = to_date
         self.num_tweets = num_tweets
-        self.df = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
-        self.api_key = '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'
     def classify_sentiment(self, text: str):
         """
         Classifies the sentiment of a text.
         """
     def classify_topics(self, text: str):
         """
@@ -32,5 +142,14 @@ class TextClassifier:
         """
     def __repr__(self):
-        return f"TextClassifier(model_name={self.model_name}, from_date={self.from_date}, to_date={self.to_date}, " \
-               f"num_tweets={self.num_tweets}) "

 class TextClassifier:
+    def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
+                 num_tweets=20):
         """
         Initializes the TextClassifier.
         :param model_name: name of the model from openai.
         self.from_date = from_date
         self.to_date = to_date
         self.num_tweets = num_tweets
+        self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
+        # self.api_key = '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'
+        openai.api_key = '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'
+    @staticmethod
+    def cleanup_sentiment_results(classification_unclean):
+        classification_clean = classification_unclean.replace('\n\n', "")
+        classification_clean = classification_clean.replace('\n', "")
+        if classification_clean.startswith(" "):
+            classification_clean = classification_clean.replace(" ", "")
+        return classification_clean
     def classify_sentiment(self, text: str):
         """
         Classifies the sentiment of a text.
         """
+        assert isinstance(text, str)
+        prompt_string = "Classify one sentiment for this tweet:\n \""
+        prompt_string += text
+        prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \
+                         "\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \
+                         "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
+        response = openai.Completion.create(
+            model="text-davinci-002",
+            prompt=prompt_string,
+            temperature=0.0,
+            max_tokens=256,
+            top_p=1,
+            frequency_penalty=0,
+            presence_penalty=0,
+            logprobs=5
+        )
+        classification_unclean = response.choices[0]['text']
+        classification_clean = self.cleanup_sentiment_results(classification_unclean)
+        return classification_clean.lower()
+    def classify_sentiment_of_tweets(self, user_name: str):
+        """
+        Classifies the sentiment of a user's tweets.
+        :param user_name: string of the user name.
+        """
+        df_sentiment = self.ts.scrape_by_user(user_name)
+        df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
+        self.df = df_sentiment
+    def analyze_sentiment(self, text: str, sentiment: str):
+        """
+        Analyzes the sentiment of a text using OpenAI.
+        :param text: string of the tweet text.
+        :param sentiment:
+        :return:
+        """
+        prompt_string = "Who is the TARGET of this "
+        prompt_string += sentiment
+        prompt_string += " TWEET?\\nTWEET=\""
+        prompt_string += tweet
+        prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
+        response = openai.Completion.create(
+            model=self.model_name,
+            prompt=prompt_string,
+            temperature=0,
+            max_tokens=256,
+            top_p=1,
+            frequency_penalty=0,
+            presence_penalty=0
+        )
+        analyzed_sentiment = response.choices[0]['text']
+        # Remove spaces at the start/end of the response
+        if analyzed_sentiment.startswith(' '):
+            analyzed_sentiment = analyzed_sentiment[1:]
+        if analyzed_sentiment.endswith(' '):
+            analyzed_sentiment = analyzed_sentiment[:-1]
+        # Sometimes GPT-3 gives faulty results, so a simple filter is introduced
+        # If the prediction is bad
+        # -> set target value to N/A (not applicable)
+        if len(analyzed_sentiment) > 10:
+            analyzed_sentiment = "N/A"
+        # An attempt to merge target responses that should be the same
+        analyzed_sentiment = re.sub("\(", "", analyzed_sentiment)
+        analyzed_sentiment = re.sub("\)", "", analyzed_sentiment)
+        s_list = ["s", "the swedish social democratic party"]
+        m_list = ["m", "the swedish moderate party", "the moderate party"]
+        mp_list = ["mp", "the swedish green party"]
+        if analyzed_sentiment.lower() == "v":
+            analyzed_sentiment = "Vänsterpartiet"
+        elif analyzed_sentiment.lower() == "mp":
+            analyzed_sentiment = "Miljöpartiet"
+        elif analyzed_sentiment.lower() in s_list:
+            analyzed_sentiment = "Socialdemokraterna"
+        elif analyzed_sentiment.lower() == "c":
+            analyzed_sentiment = "Centerpartiet"
+        elif analyzed_sentiment.lower() == "l":
+            analyzed_sentiment = "Liberalerna"
+        elif analyzed_sentiment.lower() == "kd":
+            analyzed_sentiment = "Kristdemokraterna"
+        elif analyzed_sentiment.lower() in m_list:
+            analyzed_sentiment = "Moderaterna"
+        elif analyzed_sentiment.lower() == "sd":
+            analyzed_sentiment = "Sverigedemokraterna"
+        elif analyzed_sentiment.lower() == "the swedish government":
+            analyzed_sentiment = "Regeringen"
+        tweet_dict[tweet]['target'] = analyzed_sentiment
+        return tweet_dict
     def classify_topics(self, text: str):
         """
         """
     def __repr__(self):
+        if self.df is None:
+            return "No dataframe available."
+        else:
+            return self.df.to_string()
+if __name__ == "__main__":
+    tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=20)
+    print(tc)
+    tc.classify_sentiment_of_tweets("jimmieakesson")
+    print(tc)

twitterscraper/TwitterScraper.py CHANGED Viewed

@@ -109,5 +109,6 @@ if __name__ == "__main__":
      dc = sc.scrape_by_user("jimmieakesson")
      print(dc.head())
      print(dc.shape)

      dc = sc.scrape_by_user("jimmieakesson")
      print(dc.head())
      print(dc.shape)
+     print(dc.columns)