Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Oresti Theodoridis commited on Jul 13, 2022

Commit

725b13f

unverified ·

2 Parent(s): c5f746c 11ba7a0

Merge branch 'develop' into 35-create-new-text-classifier-sentiment

Browse files

Files changed (3) hide show

.idea/misc.xml +1 -1
README.md +1 -0
textclassifier/TextClassifier.py +46 -17

.idea/misc.xml CHANGED Viewed

@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet)" project-jdk-type="Python SDK" />
 </project>

 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (politweet)" project-jdk-type="Python SDK" />
 </project>

README.md CHANGED Viewed

@@ -22,6 +22,7 @@ För att få alla dependencies:
 2. Aktivera din virtual environment
 2. gå till projektets root path och skriv i terminalen:
       $ env2/bin/python -m pip install -r requirements.txt

 2. Aktivera din virtual environment
 2. gå till projektets root path och skriv i terminalen:
       $ env2/bin/python -m pip install -r requirements.txt
+3. I vissa fall funkar det inte att installera twint för Ubuntu. Efter att ha ställt in allt funkade det efter att ha kört "sudo apt-get install build-        essential" i terminalen.

textclassifier/TextClassifier.py CHANGED Viewed

@@ -6,6 +6,7 @@ from datetime import date
 class TextClassifier:
     def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
                  user_name='jimmieakesson',
                  num_tweets=20):
         """
@@ -26,6 +27,12 @@ class TextClassifier:
         # self.api_key = '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'
         openai.api_key = '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'
     @staticmethod
     def cleanup_sentiment_results(classification_unclean):
         """
@@ -39,6 +46,7 @@ class TextClassifier:
             classification_clean = classification_clean.replace(" ", "")
         return classification_clean
     def classify_sentiment(self, text: str):
         """
@@ -70,9 +78,9 @@ class TextClassifier:
     def classify_sentiment_of_tweets(self):
         """
         Classifies the sentiment of a user's tweets.
-        :param user_name: string of the user name.
         """
         df_sentiment = self.df.copy()
         df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
         self.df = df_sentiment
         return self.df
@@ -158,29 +166,50 @@ class TextClassifier:
         self.df = df_sentiment
         return self.df
-    def classify_topics(self, text: str):
         """
         Classifies the topics of a text.
         """
     def __repr__(self):
         return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
                                                                                 self.num_tweets)
 if __name__ == "__main__":
-    import pandas as pd
-    import warnings
-    warnings.simplefilter(action='ignore', category=FutureWarning)
-    pd.set_option('display.max_columns', None)
-    tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01',
-                        to_date=str(date.today()), user_name='jimmieakesson', num_tweets=60)
-    print(tc)
-    # df = tc.classify_sentiment_of_tweets("jimmieakesson")
-    # print(df)
-    df = tc.classify_sentiment_of_tweets()
-    print(df.head())
-    df = tc.analyze_sentiment_of_tweets()
-    print(df.head())

 class TextClassifier:
     def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
                  user_name='jimmieakesson',
                  num_tweets=20):
         """
         # self.api_key = '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'
         openai.api_key = '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'
+    def scrape_tweets(self):
+        """
+        Scrapes tweets from the given date range.
+        """
+        self.ts.scrape_tweets()
     @staticmethod
     def cleanup_sentiment_results(classification_unclean):
         """
             classification_clean = classification_clean.replace(" ", "")
         return classification_clean
+        return response.choices[0]['text']
     def classify_sentiment(self, text: str):
         """
     def classify_sentiment_of_tweets(self):
         """
         Classifies the sentiment of a user's tweets.
         """
         df_sentiment = self.df.copy()
         df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
         self.df = df_sentiment
         return self.df
         self.df = df_sentiment
         return self.df
+    def classify_topic(self, text: str):
         """
         Classifies the topics of a text.
+        :param text: string of the tweet text.
+        """
+        assert isinstance(text, str)
+        prompt_string = "Classify one topic for this tweet:\n \""
+        prompt_string += text
+        prompt_string += "\" \nFor example:\nEconomy,\nEnvironment,\nHealth,\nPolitics,\nScience,\nSports,\nTechnology," \
+                         "\nTransportation,\nWorld.\nTOPIC="
+        response = openai.Completion.create(
+                model=self.model_name,
+                prompt=prompt_string,
+                temperature=0,
+                max_tokens=892,
+                top_p=1,
+                frequency_penalty=0,
+                presence_penalty=0,
+            )
+        classification_unclean = response.choices[0]['text']
+        classification_clean = self.cleanup_topic_results(classification_unclean)
+        return classification_clean.lower()
+    def classify_topics_of_tweets(self):
+        """
+        Classifies the topics of a user's tweets.
         """
+        df_topic = self.df
+        df_topic['topic'] = df_topic['tweet'].apply(self.classify_topic)
+        return df_topic
     def __repr__(self):
         return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
                                                                                 self.num_tweets)
+    def cleanup_topic_results(prediction_dict, text):
+            new_item = text.replace("\n", " ")
+            new_item = new_item.replace("  ", " ")
+            return new_item
 if __name__ == "__main__":