import openai import regex as re from twitterscraper import TwitterScraper from datetime import date class TextClassifier: def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=20): """ Initializes the TextClassifier. :param model_name: name of the model from openai. :param from_date: string of the format 'YYYY-MM-DD'. :param to_date: string of the format 'YYYY-MM-DD'. :param num_tweets: integer value of the maximum number of tweets to be scraped. """ self.model_name = model_name self.from_date = from_date self.to_date = to_date self.num_tweets = num_tweets self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets) # self.api_key = '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' openai.api_key = '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' @staticmethod def cleanup_sentiment_results(classification_unclean): classification_clean = classification_unclean.replace('\n\n', "") classification_clean = classification_clean.replace('\n', "") if classification_clean.startswith(" "): classification_clean = classification_clean.replace(" ", "") return classification_clean def classify_sentiment(self, text: str): """ Classifies the sentiment of a text. """ assert isinstance(text, str) prompt_string = "Classify one sentiment for this tweet:\n \"" prompt_string += text prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \ "\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \ "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT=" response = openai.Completion.create( model="text-davinci-002", prompt=prompt_string, temperature=0.0, max_tokens=256, top_p=1, frequency_penalty=0, presence_penalty=0, logprobs=5 ) classification_unclean = response.choices[0]['text'] classification_clean = self.cleanup_sentiment_results(classification_unclean) return classification_clean.lower() def classify_sentiment_of_tweets(self, user_name: str): """ Classifies the sentiment of a user's tweets. :param user_name: string of the user name. """ df_sentiment = self.ts.scrape_by_user(user_name) df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment) self.df = df_sentiment def analyze_sentiment(self, text: str, sentiment: str): """ Analyzes the sentiment of a text using OpenAI. :param text: string of the tweet text. :param sentiment: :return: """ prompt_string = "Who is the TARGET of this " prompt_string += sentiment prompt_string += " TWEET?\\nTWEET=\"" prompt_string += tweet prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET=" response = openai.Completion.create( model=self.model_name, prompt=prompt_string, temperature=0, max_tokens=256, top_p=1, frequency_penalty=0, presence_penalty=0 ) analyzed_sentiment = response.choices[0]['text'] # Remove spaces at the start/end of the response if analyzed_sentiment.startswith(' '): analyzed_sentiment = analyzed_sentiment[1:] if analyzed_sentiment.endswith(' '): analyzed_sentiment = analyzed_sentiment[:-1] # Sometimes GPT-3 gives faulty results, so a simple filter is introduced # If the prediction is bad # -> set target value to N/A (not applicable) if len(analyzed_sentiment) > 10: analyzed_sentiment = "N/A" # An attempt to merge target responses that should be the same analyzed_sentiment = re.sub("\(", "", analyzed_sentiment) analyzed_sentiment = re.sub("\)", "", analyzed_sentiment) s_list = ["s", "the swedish social democratic party"] m_list = ["m", "the swedish moderate party", "the moderate party"] mp_list = ["mp", "the swedish green party"] if analyzed_sentiment.lower() == "v": analyzed_sentiment = "Vänsterpartiet" elif analyzed_sentiment.lower() == "mp": analyzed_sentiment = "Miljöpartiet" elif analyzed_sentiment.lower() in s_list: analyzed_sentiment = "Socialdemokraterna" elif analyzed_sentiment.lower() == "c": analyzed_sentiment = "Centerpartiet" elif analyzed_sentiment.lower() == "l": analyzed_sentiment = "Liberalerna" elif analyzed_sentiment.lower() == "kd": analyzed_sentiment = "Kristdemokraterna" elif analyzed_sentiment.lower() in m_list: analyzed_sentiment = "Moderaterna" elif analyzed_sentiment.lower() == "sd": analyzed_sentiment = "Sverigedemokraterna" elif analyzed_sentiment.lower() == "the swedish government": analyzed_sentiment = "Regeringen" tweet_dict[tweet]['target'] = analyzed_sentiment return tweet_dict def classify_topics(self, text: str): """ Classifies the topics of a text. """ def __repr__(self): if self.df is None: return "No dataframe available." else: return self.df.to_string() if __name__ == "__main__": tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=20) print(tc) tc.classify_sentiment_of_tweets("jimmieakesson") print(tc)