import openai
import regex as re

openai.api_key = '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'


class TextClassifier:

    def classify_topics(tweet_dict):
        tweet_list = list(tweet_dict.keys())
        prediction_dict = {}

        for tweet in tweet_list:
            prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\""
            prompt_string += tweet
            prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be " \
                             "more than 5 words. Numerate each topic in the output. END "
            response = openai.Completion.create(
                model="text-davinci-002",
                prompt=prompt_string,
                temperature=0,
                max_tokens=892,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0
            )

            classifications_unclean = response.choices[0]['text']
            prediction_dict[tweet] = classifications_unclean

        return TextClassifier.cleanup_topic_results(prediction_dict, tweet_dict)

    def classify_sentiments(tweet_dict):
        tweet_list = list(tweet_dict.keys())

        for tweet in tweet_list:
            prompt_string = "Classify one sentiment for this tweet:\n \""
            prompt_string += tweet
            prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \
                             "\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \
                             "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT= "

            response = openai.Completion.create(
                model="text-davinci-002",
                prompt=prompt_string,
                temperature=0,
                max_tokens=256,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0
            )
            classifications_unclean = response.choices[0]['text']
            tweet_dict[tweet]['sentiment'] = classifications_unclean

        return tweet_dict

    def cleanup_topic_results(prediction_dict, tweet_dict):
        temp_list = []

        for tweet, item in prediction_dict.items():
            temp_list = []
            new_item = item.replace("\n", " ")
            new_item = new_item.replace("  ", " ")
            new_item = new_item[4:]
            new_item = re.sub('\d', '', new_item)
            sub_list = new_item.split(".")

            for item in sub_list:
                if item.startswith(' '):
                    item = item[1:]
                if item.endswith(' '):
                    item = item[:-1]
                temp_list.append(item)
            tweet_dict[tweet]['topic'] = temp_list

        return tweet_dict

    def print_results(results_dict):
        print('\033[1m' + "RESULTS" + '\033[0m', "\n")
        for key in results_dict.keys():
            predictions = results_dict[key]
            print("\"" + key + "\"" + "\n" + str(predictions), "\n" + "---------------------------------")

    def print_stats(result_dict):
        user = ""
        freq_dict = {}
        mean_likes = {}
        mean_retweets = {}
        mean_replies = {}
        sentiment_dict = {}
        nbr_sentiment = 0
        nbr_topics = 0

        for key, value in result_dict.items():

            nlikes = value['nlikes']
            nreplies = value['nreplies']
            nretweets = value['nretweets']
            topic_list = value['topic']
            sentiment = value['sentiment']

            # Count sentiment frequency
            if sentiment in sentiment_dict.keys():
                sentiment_dict[sentiment] += 1
            else:
                sentiment_dict[sentiment] = 1
                nbr_sentiment += 1

                # Count topic frequency
            for topic in topic_list:
                if topic in freq_dict.keys():
                    freq_dict[topic] += 1

                else:
                    freq_dict[topic] = 1
                    nbr_topics += 1

                # Count total likes per topic
                if topic in mean_likes.keys():
                    mean_likes[topic] += nlikes
                else:
                    mean_likes[topic] = nlikes

                    # Count total retweets per topic
                if topic in mean_retweets.keys():
                    mean_retweets[topic] += nretweets
                else:
                    mean_retweets[topic] = nretweets

                    # Count total replies per topic
                if topic in mean_replies.keys():
                    mean_replies[topic] += nreplies
                else:
                    mean_replies[topic] = nreplies

                    # Count mean of likes
        for key in mean_likes.keys():
            mean_likes[key] = mean_likes[key] / freq_dict[key]

        # Count mean of retweets
        for key in mean_retweets.keys():
            mean_retweets[key] = mean_retweets[key] / freq_dict[key]

        # Print the names of the columns.
        print('\033[1m' + "USER: " + '\033[0m', user)
        print('\033[1m' + "NBR OF TWEETS SCRAPED: " + '\033[0m', len(list(result_dict.keys())))
        print('\033[1m' + "NBR OF DIFFERENT TOPICS: " + '\033[0m', nbr_topics, "\n")
        print("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format('\033[1m' + 'TOPIC', 'TOPIC FREQUENCY',
                                                                 'AVERAGE NBR OF LIKES', 'AVERAGE NBR OF RETWEETS',
                                                                 'AVERAGE NBR OF REPLIES', 'REACH AVERAGE' + '\033[0m'))

        # print each data item.
        for key, value in mean_likes.items():
            topic = key
            mean_likes = value
            reach_avg = (mean_likes + mean_retweets[topic] + mean_replies[topic]) / 3
            print(
                "{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format(topic, freq_dict[topic], "{:.2f}".format(mean_likes),
                                                                   "{:.2f}".format(mean_retweets[topic]),
                                                                   mean_replies[topic], "{:.2f}".format(reach_avg)))

        print("\n")
        print('\033[1m' + "NBR OF DIFFERENT SENTIMENTS: " + '\033[0m', nbr_sentiment, "\n")
        print("{:<60} {:<20}".format('\033[1m' + 'SENTIMENT', 'SENTIMENT FREQUENCY' + '\033[0m'))
        for key, value in sentiment_dict.items():
            sentiment = key
            mean_sentiment = value
            print("{:<60} {:<20}".format(sentiment, sentiment_dict[sentiment], "{:.2f}".format(mean_sentiment)))