import openai import regex as re openai.api_key = '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' class TextClassifier: def classify_topics(tweet_dict): tweet_list = list(tweet_dict.keys()) prediction_dict = {} for tweet in tweet_list: prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\"" prompt_string += tweet prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be " \ "more than 5 words. Numerate each topic in the output. END " response = openai.Completion.create( model="text-davinci-002", prompt=prompt_string, temperature=0, max_tokens=892, top_p=1, frequency_penalty=0, presence_penalty=0 ) classifications_unclean = response.choices[0]['text'] prediction_dict[tweet] = classifications_unclean return TextClassifier.cleanup_topic_results(prediction_dict, tweet_dict) def classify_sentiments(tweet_dict): tweet_list = list(tweet_dict.keys()) for tweet in tweet_list: prompt_string = "Classify one sentiment for this tweet:\n \"" prompt_string += tweet prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \ "\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \ "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT= " response = openai.Completion.create( model="text-davinci-002", prompt=prompt_string, temperature=0, max_tokens=256, top_p=1, frequency_penalty=0, presence_penalty=0 ) classifications_unclean = response.choices[0]['text'] tweet_dict[tweet]['sentiment'] = classifications_unclean return tweet_dict def cleanup_topic_results(prediction_dict, tweet_dict): temp_list = [] for tweet, item in prediction_dict.items(): temp_list = [] new_item = item.replace("\n", " ") new_item = new_item.replace(" ", " ") new_item = new_item[4:] new_item = re.sub('\d', '', new_item) sub_list = new_item.split(".") for item in sub_list: if item.startswith(' '): item = item[1:] if item.endswith(' '): item = item[:-1] temp_list.append(item) tweet_dict[tweet]['topic'] = temp_list return tweet_dict def print_results(results_dict): print('\033[1m' + "RESULTS" + '\033[0m', "\n") for key in results_dict.keys(): predictions = results_dict[key] print("\"" + key + "\"" + "\n" + str(predictions), "\n" + "---------------------------------") def print_stats(result_dict): user = "" freq_dict = {} mean_likes = {} mean_retweets = {} mean_replies = {} sentiment_dict = {} nbr_sentiment = 0 nbr_topics = 0 for key, value in result_dict.items(): nlikes = value['nlikes'] nreplies = value['nreplies'] nretweets = value['nretweets'] topic_list = value['topic'] sentiment = value['sentiment'] # Count sentiment frequency if sentiment in sentiment_dict.keys(): sentiment_dict[sentiment] += 1 else: sentiment_dict[sentiment] = 1 nbr_sentiment += 1 # Count topic frequency for topic in topic_list: if topic in freq_dict.keys(): freq_dict[topic] += 1 else: freq_dict[topic] = 1 nbr_topics += 1 # Count total likes per topic if topic in mean_likes.keys(): mean_likes[topic] += nlikes else: mean_likes[topic] = nlikes # Count total retweets per topic if topic in mean_retweets.keys(): mean_retweets[topic] += nretweets else: mean_retweets[topic] = nretweets # Count total replies per topic if topic in mean_replies.keys(): mean_replies[topic] += nreplies else: mean_replies[topic] = nreplies # Count mean of likes for key in mean_likes.keys(): mean_likes[key] = mean_likes[key] / freq_dict[key] # Count mean of retweets for key in mean_retweets.keys(): mean_retweets[key] = mean_retweets[key] / freq_dict[key] # Print the names of the columns. print('\033[1m' + "USER: " + '\033[0m', user) print('\033[1m' + "NBR OF TWEETS SCRAPED: " + '\033[0m', len(list(result_dict.keys()))) print('\033[1m' + "NBR OF DIFFERENT TOPICS: " + '\033[0m', nbr_topics, "\n") print("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format('\033[1m' + 'TOPIC', 'TOPIC FREQUENCY', 'AVERAGE NBR OF LIKES', 'AVERAGE NBR OF RETWEETS', 'AVERAGE NBR OF REPLIES', 'REACH AVERAGE' + '\033[0m')) # print each data item. for key, value in mean_likes.items(): topic = key mean_likes = value reach_avg = (mean_likes + mean_retweets[topic] + mean_replies[topic]) / 3 print( "{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format(topic, freq_dict[topic], "{:.2f}".format(mean_likes), "{:.2f}".format(mean_retweets[topic]), mean_replies[topic], "{:.2f}".format(reach_avg))) print("\n") print('\033[1m' + "NBR OF DIFFERENT SENTIMENTS: " + '\033[0m', nbr_sentiment, "\n") print("{:<60} {:<20}".format('\033[1m' + 'SENTIMENT', 'SENTIMENT FREQUENCY' + '\033[0m')) for key, value in sentiment_dict.items(): sentiment = key mean_sentiment = value print("{:<60} {:<20}".format(sentiment, sentiment_dict[sentiment], "{:.2f}".format(mean_sentiment)))