Spaces:

politweet-sh
/

politweet

Runtime error

File size: 6,919 Bytes

eceff29
dc67c78
03f299a
eceff29
 
 
dc67c78
c706f5e
dc67c78
eceff29
 
 
 
 
 
dc67c78
eceff29
 
dc67c78
eceff29
 
 
 
 
 
 
dc67c78
 
eceff29
 
4406f1d
c706f5e
eceff29
dc67c78
eceff29
dc67c78
 
 
 
eceff29
 
 
 
dc67c78
eceff29
 
 
 
 
 
 
dc67c78
eceff29
4406f1d
dc67c78
eceff29
dc67c78
eceff29
 
dc67c78
4406f1d
eceff29
dc67c78
 
 
eceff29
dc67c78
eceff29
 
dc67c78
 
 
eceff29
 
 
dc67c78
eceff29
dc67c78
 
eceff29
dc67c78
 
eceff29
dc67c78
eceff29
dc67c78
 
 
 
 
4406f1d
 
dc67c78
 
4406f1d
dc67c78
eceff29
dc67c78
 
eceff29
4406f1d
 
eceff29
4406f1d
 
 
eceff29
 
 
 
dc67c78
 
 
eceff29
dc67c78
eceff29
dc67c78
eceff29
dc67c78
 
eceff29
dc67c78
eceff29
dc67c78
eceff29
dc67c78
eceff29
dc67c78
eceff29
dc67c78
eceff29
dc67c78
eceff29
dc67c78
eceff29
dc67c78
eceff29
 
dc67c78
eceff29
dc67c78
eceff29
dc67c78
 
 
eceff29
 
 
 
 
 
 
dc67c78
 
 
 
eceff29
 
 
 
 
dc67c78
4406f1d
eceff29
4406f1d
 
 
eceff29
 
03f299a

import openai
import regex as re
from twitterscraper import TwitterScraper as tf

openai.api_key = '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'


class TextClassifier:

    def classify_topics(tweet_dict):
        tweet_list = list(tweet_dict.keys())
        prediction_dict = {}

        for tweet in tweet_list:
            prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\""
            prompt_string += tweet
            prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be " \
                             "more than 5 words. Numerate each topic in the output. END "
            response = openai.Completion.create(
                model="text-davinci-002",
                prompt=prompt_string,
                temperature=0,
                max_tokens=892,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0
            )

            classifications_unclean = response.choices[0]['text']
            prediction_dict[tweet] = classifications_unclean

        return TextClassifier.cleanup_topic_results(prediction_dict, tweet_dict)

    def classify_sentiments(tweet_dict):
        tweet_list = list(tweet_dict.keys())

        for tweet in tweet_list:
            prompt_string = "Classify one sentiment for this tweet:\n \""
            prompt_string += tweet
            prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \
                             "\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \
                             "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT= "

            response = openai.Completion.create(
                model="text-davinci-002",
                prompt=prompt_string,
                temperature=0,
                max_tokens=256,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0
            )
            classifications_unclean = response.choices[0]['text']
            tweet_dict[tweet]['sentiment'] = classifications_unclean

        return tweet_dict

    def cleanup_topic_results(prediction_dict, tweet_dict):
        temp_list = []

        for tweet, item in prediction_dict.items():
            temp_list = []
            new_item = item.replace("\n", " ")
            new_item = new_item.replace("  ", " ")
            new_item = new_item[4:]
            new_item = re.sub('\d', '', new_item)
            sub_list = new_item.split(".")

            for item in sub_list:
                if item.startswith(' '):
                    item = item[1:]
                if item.endswith(' '):
                    item = item[:-1]
                temp_list.append(item)
            tweet_dict[tweet]['topic'] = temp_list

        return tweet_dict

    def print_results(results_dict):
        print('\033[1m' + "RESULTS" + '\033[0m', "\n")
        for key in results_dict.keys():
            predictions = results_dict[key]
            print("\"" + key + "\"" + "\n" + str(predictions), "\n" + "---------------------------------")

    def print_stats(result_dict):
        user = ""
        freq_dict = {}
        mean_likes = {}
        mean_retweets = {}
        mean_replies = {}
        sentiment_dict = {}
        nbr_sentiment = 0
        nbr_topics = 0

        for key, value in result_dict.items():

            nlikes = value['nlikes']
            nreplies = value['nreplies']
            nretweets = value['nretweets']
            topic_list = value['topic']
            sentiment = value['sentiment']

            # Count sentiment frequency
            if sentiment in sentiment_dict.keys():
                sentiment_dict[sentiment] += 1
            else:
                sentiment_dict[sentiment] = 1
                nbr_sentiment += 1

                # Count topic frequency
            for topic in topic_list:
                if topic in freq_dict.keys():
                    freq_dict[topic] += 1

                else:
                    freq_dict[topic] = 1
                    nbr_topics += 1

                # Count total likes per topic
                if topic in mean_likes.keys():
                    mean_likes[topic] += nlikes
                else:
                    mean_likes[topic] = nlikes

                    # Count total retweets per topic
                if topic in mean_retweets.keys():
                    mean_retweets[topic] += nretweets
                else:
                    mean_retweets[topic] = nretweets

                    # Count total replies per topic
                if topic in mean_replies.keys():
                    mean_replies[topic] += nreplies
                else:
                    mean_replies[topic] = nreplies

                    # Count mean of likes
        for key in mean_likes.keys():
            mean_likes[key] = mean_likes[key] / freq_dict[key]

        # Count mean of retweets
        for key in mean_retweets.keys():
            mean_retweets[key] = mean_retweets[key] / freq_dict[key]

        # Print the names of the columns.
        print('\033[1m' + "USER: " + '\033[0m', user)
        print('\033[1m' + "NBR OF TWEETS SCRAPED: " + '\033[0m', len(list(result_dict.keys())))
        print('\033[1m' + "NBR OF DIFFERENT TOPICS: " + '\033[0m', nbr_topics, "\n")
        print("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format('\033[1m' + 'TOPIC', 'TOPIC FREQUENCY',
                                                                 'AVERAGE NBR OF LIKES', 'AVERAGE NBR OF RETWEETS',
                                                                 'AVERAGE NBR OF REPLIES', 'REACH AVERAGE' + '\033[0m'))

        # print each data item.
        for key, value in mean_likes.items():
            topic = key
            mean_likes = value
            reach_avg = (mean_likes + mean_retweets[topic] + mean_replies[topic]) / 3
            print(
                "{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format(topic, freq_dict[topic], "{:.2f}".format(mean_likes),
                                                                   "{:.2f}".format(mean_retweets[topic]),
                                                                   mean_replies[topic], "{:.2f}".format(reach_avg)))

        print("\n")
        print('\033[1m' + "NBR OF DIFFERENT SENTIMENTS: " + '\033[0m', nbr_sentiment, "\n")
        print("{:<60} {:<20}".format('\033[1m' + 'SENTIMENT', 'SENTIMENT FREQUENCY' + '\033[0m'))
        for key, value in sentiment_dict.items():
            sentiment = key
            mean_sentiment = value
            print("{:<60} {:<20}".format(sentiment, sentiment_dict[sentiment], "{:.2f}".format(mean_sentiment)))


if __name__ == '__main__':
    sc = tf.TwitterScraper(num_tweets=40)
    dc = sc.scrape_by_user("jimmieakesson")
    print(dc.head())
    print(dc.shape)