Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Nisse00 commited on Jul 13, 2022

Commit

5ce4dc5

•

1 Parent(s): c91c79b

Classify Topics done

Browse files

Files changed (3) hide show

.idea/misc.xml +1 -1
.idea/politweet.iml +1 -1
textclassifier/TextClassifier.py +55 -13

.idea/misc.xml CHANGED Viewed

@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet) (2)" project-jdk-type="Python SDK" />
 </project>

 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (politweet)" project-jdk-type="Python SDK" />
 </project>

.idea/politweet.iml CHANGED Viewed

@@ -5,7 +5,7 @@
       <excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
       <excludeFolder url="file://$MODULE_DIR$/venv" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.9 (politweet) (2)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">

       <excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
       <excludeFolder url="file://$MODULE_DIR$/venv" />
     </content>
+    <orderEntry type="jdk" jdkName="Python 3.10 (politweet)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">

textclassifier/TextClassifier.py CHANGED Viewed

@@ -6,7 +6,7 @@ from datetime import date
 class TextClassifier:
     def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
-                 num_tweets=20):
         """
         Initializes the TextClassifier.
         :param model_name: name of the model from openai.
@@ -20,9 +20,17 @@ class TextClassifier:
         self.to_date = to_date
         self.num_tweets = num_tweets
         self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
         # self.api_key = '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'
         openai.api_key = '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'
     @staticmethod
     def cleanup_sentiment_results(classification_unclean):
         classification_clean = classification_unclean.replace('\n\n', "")
@@ -31,6 +39,7 @@ class TextClassifier:
             classification_clean = classification_clean.replace(" ", "")
         return classification_clean
     def classify_sentiment(self, text: str):
         """
@@ -45,7 +54,7 @@ class TextClassifier:
                          "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
         response = openai.Completion.create(
-            model="text-davinci-002",
             prompt=prompt_string,
             temperature=0.0,
             max_tokens=256,
@@ -59,12 +68,11 @@ class TextClassifier:
         return classification_clean.lower()
-    def classify_sentiment_of_tweets(self, user_name: str):
         """
         Classifies the sentiment of a user's tweets.
-        :param user_name: string of the user name.
         """
-        df_sentiment = self.ts.scrape_by_user(user_name)
         df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
         return df_sentiment
@@ -78,7 +86,7 @@ class TextClassifier:
         prompt_string = "Who is the TARGET of this "
         prompt_string += sentiment
         prompt_string += " TWEET?\\nTWEET=\""
-        prompt_string += tweet
         prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
         response = openai.Completion.create(
@@ -136,17 +144,51 @@ class TextClassifier:
         return tweet_dict
-    def classify_topics(self, text: str):
         """
         Classifies the topics of a text.
         """
-    def __repr__(self):
-        return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date, self.num_tweets)
 if __name__ == "__main__":
-    tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=20)
-    print(tc)
-    df = tc.classify_sentiment_of_tweets("jimmieakesson")
-    print(df)

 class TextClassifier:
     def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
+                 num_tweets=10, user_name=None):
         """
         Initializes the TextClassifier.
         :param model_name: name of the model from openai.
         self.to_date = to_date
         self.num_tweets = num_tweets
         self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
+        self.user_name = user_name
+        self.df = self.ts.scrape_by_user(user_name)
         # self.api_key = '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'
         openai.api_key = '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'
+    def scrape_tweets(self):
+        """
+        Scrapes tweets from the given date range.
+        """
+        self.ts.scrape_tweets()
     @staticmethod
     def cleanup_sentiment_results(classification_unclean):
         classification_clean = classification_unclean.replace('\n\n', "")
             classification_clean = classification_clean.replace(" ", "")
         return classification_clean
+        return response.choices[0]['text']
     def classify_sentiment(self, text: str):
         """
                          "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
         response = openai.Completion.create(
+            model=self.model_name,
             prompt=prompt_string,
             temperature=0.0,
             max_tokens=256,
         return classification_clean.lower()
+    def classify_sentiment_of_tweets(self):
         """
         Classifies the sentiment of a user's tweets.
         """
+        df_sentiment = self.df
         df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
         return df_sentiment
         prompt_string = "Who is the TARGET of this "
         prompt_string += sentiment
         prompt_string += " TWEET?\\nTWEET=\""
+        prompt_string += text
         prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
         response = openai.Completion.create(
         return tweet_dict
+    def classify_topic(self, text: str):
         """
         Classifies the topics of a text.
+        :param text: string of the tweet text.
+        """
+        assert isinstance(text, str)
+        prompt_string = "Classify one topic for this tweet:\n \""
+        prompt_string += text
+        prompt_string += "\" \nFor example:\nEconomy,\nEnvironment,\nHealth,\nPolitics,\nScience,\nSports,\nTechnology," \
+                         "\nTransportation,\nWorld.\nTOPIC="
+        response = openai.Completion.create(
+                model=self.model_name,
+                prompt=prompt_string,
+                temperature=0,
+                max_tokens=892,
+                top_p=1,
+                frequency_penalty=0,
+                presence_penalty=0,
+            )
+        classification_unclean = response.choices[0]['text']
+        classification_clean = self.cleanup_topic_results(classification_unclean)
+        return classification_clean.lower()
+    def classify_topics_of_tweets(self):
+        """
+        Classifies the topics of a user's tweets.
         """
+        df_topic = self.df
+        df_topic['topic'] = df_topic['tweet'].apply(self.classify_topic)
+        return df_topic
+    def cleanup_topic_results(prediction_dict, text):
+            new_item = text.replace("\n", " ")
+            new_item = new_item.replace("  ", " ")
+            return new_item
 if __name__ == "__main__":
+    import pandas as pd
+    #pd.set_option('display.max_columns', None)
+    tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=20, user_name="jimmieakesson")
+    print(tc.classify_topics_of_tweets())