import openai
import regex as re
from twitterscraper import TwitterScraper
from datetime import date


class TextClassifier:
    def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
                 num_tweets=20):
        """
        Initializes the TextClassifier.
        :param model_name: name of the model from openai.
        :param from_date: string of the format 'YYYY-MM-DD'.
        :param to_date: string of the format 'YYYY-MM-DD'.
        :param num_tweets: integer value of the maximum number of tweets to be scraped.
        """

        self.model_name = model_name
        self.from_date = from_date
        self.to_date = to_date
        self.num_tweets = num_tweets
        self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
        # self.api_key = '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'
        openai.api_key = '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'

    @staticmethod
    def cleanup_sentiment_results(classification_unclean):
        classification_clean = classification_unclean.replace('\n\n', "")
        classification_clean = classification_clean.replace('\n', "")
        if classification_clean.startswith(" "):
            classification_clean = classification_clean.replace(" ", "")

        return classification_clean

    def classify_sentiment(self, text: str):
        """
        Classifies the sentiment of a text.
        """
        assert isinstance(text, str)

        prompt_string = "Classify one sentiment for this tweet:\n \""
        prompt_string += text
        prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \
                         "\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \
                         "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="

        response = openai.Completion.create(
            model="text-davinci-002",
            prompt=prompt_string,
            temperature=0.0,
            max_tokens=256,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
            logprobs=5
        )
        classification_unclean = response.choices[0]['text']
        classification_clean = self.cleanup_sentiment_results(classification_unclean)

        return classification_clean.lower()

    def classify_sentiment_of_tweets(self, user_name: str):
        """
        Classifies the sentiment of a user's tweets.
        :param user_name: string of the user name.
        """
        df_sentiment = self.ts.scrape_by_user(user_name)
        df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
        self.df = df_sentiment

    def analyze_sentiment(self, text: str, sentiment: str):
        """
        Analyzes the sentiment of a text using OpenAI.
        :param text: string of the tweet text.
        :param sentiment:
        :return:
        """
        prompt_string = "Who is the TARGET of this "
        prompt_string += sentiment
        prompt_string += " TWEET?\\nTWEET=\""
        prompt_string += tweet
        prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="

        response = openai.Completion.create(
            model=self.model_name,
            prompt=prompt_string,
            temperature=0,
            max_tokens=256,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0
        )

        analyzed_sentiment = response.choices[0]['text']

        # Remove spaces at the start/end of the response
        if analyzed_sentiment.startswith(' '):
            analyzed_sentiment = analyzed_sentiment[1:]
        if analyzed_sentiment.endswith(' '):
            analyzed_sentiment = analyzed_sentiment[:-1]

        # Sometimes GPT-3 gives faulty results, so a simple filter is introduced
        # If the prediction is bad
        # -> set target value to N/A (not applicable)
        if len(analyzed_sentiment) > 10:
            analyzed_sentiment = "N/A"

        # An attempt to merge target responses that should be the same
        analyzed_sentiment = re.sub("\(", "", analyzed_sentiment)
        analyzed_sentiment = re.sub("\)", "", analyzed_sentiment)

        s_list = ["s", "the swedish social democratic party"]
        m_list = ["m", "the swedish moderate party", "the moderate party"]
        mp_list = ["mp", "the swedish green party"]

        if analyzed_sentiment.lower() == "v":
            analyzed_sentiment = "Vänsterpartiet"
        elif analyzed_sentiment.lower() == "mp":
            analyzed_sentiment = "Miljöpartiet"
        elif analyzed_sentiment.lower() in s_list:
            analyzed_sentiment = "Socialdemokraterna"
        elif analyzed_sentiment.lower() == "c":
            analyzed_sentiment = "Centerpartiet"
        elif analyzed_sentiment.lower() == "l":
            analyzed_sentiment = "Liberalerna"
        elif analyzed_sentiment.lower() == "kd":
            analyzed_sentiment = "Kristdemokraterna"
        elif analyzed_sentiment.lower() in m_list:
            analyzed_sentiment = "Moderaterna"
        elif analyzed_sentiment.lower() == "sd":
            analyzed_sentiment = "Sverigedemokraterna"
        elif analyzed_sentiment.lower() == "the swedish government":
            analyzed_sentiment = "Regeringen"

        tweet_dict[tweet]['target'] = analyzed_sentiment

        return tweet_dict

    def classify_topics(self, text: str):
        """
        Classifies the topics of a text.
        """

    def __repr__(self):
        if self.df is None:
            return "No dataframe available."
        else:
            return self.df.to_string()


if __name__ == "__main__":
    tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=20)
    print(tc)
    tc.classify_sentiment_of_tweets("jimmieakesson")
    print(tc)