Spaces:
Runtime error
Runtime error
import openai | |
import regex as re | |
openai.api_key = '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' | |
class TextClassifier: | |
def classify_topics(tweet_dict): | |
tweet_list = list(tweet_dict.keys()) | |
prediction_dict = {} | |
for tweet in tweet_list: | |
prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\"" | |
prompt_string += tweet | |
prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be " \ | |
"more than 5 words. Numerate each topic in the output. END " | |
response = openai.Completion.create( | |
model="text-davinci-002", | |
prompt=prompt_string, | |
temperature=0, | |
max_tokens=892, | |
top_p=1, | |
frequency_penalty=0, | |
presence_penalty=0 | |
) | |
classifications_unclean = response.choices[0]['text'] | |
prediction_dict[tweet] = classifications_unclean | |
return TextClassifier.cleanup_topic_results(prediction_dict, tweet_dict) | |
def classify_sentiments(tweet_dict): | |
tweet_list = list(tweet_dict.keys()) | |
for tweet in tweet_list: | |
prompt_string = "Classify one sentiment for this tweet:\n \"" | |
prompt_string += tweet | |
prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \ | |
"\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \ | |
"\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT= " | |
response = openai.Completion.create( | |
model="text-davinci-002", | |
prompt=prompt_string, | |
temperature=0, | |
max_tokens=256, | |
top_p=1, | |
frequency_penalty=0, | |
presence_penalty=0 | |
) | |
classifications_unclean = response.choices[0]['text'] | |
tweet_dict[tweet]['sentiment'] = classifications_unclean | |
return tweet_dict | |
def cleanup_topic_results(prediction_dict, tweet_dict): | |
temp_list = [] | |
for tweet, item in prediction_dict.items(): | |
temp_list = [] | |
new_item = item.replace("\n", " ") | |
new_item = new_item.replace(" ", " ") | |
new_item = new_item[4:] | |
new_item = re.sub('\d', '', new_item) | |
sub_list = new_item.split(".") | |
for item in sub_list: | |
if item.startswith(' '): | |
item = item[1:] | |
if item.endswith(' '): | |
item = item[:-1] | |
temp_list.append(item) | |
tweet_dict[tweet]['topic'] = temp_list | |
return tweet_dict | |
def print_results(results_dict): | |
print('\033[1m' + "RESULTS" + '\033[0m', "\n") | |
for key in results_dict.keys(): | |
predictions = results_dict[key] | |
print("\"" + key + "\"" + "\n" + str(predictions), "\n" + "---------------------------------") | |
def print_stats(result_dict): | |
user = "" | |
freq_dict = {} | |
mean_likes = {} | |
mean_retweets = {} | |
mean_replies = {} | |
sentiment_dict = {} | |
nbr_sentiment = 0 | |
nbr_topics = 0 | |
for key, value in result_dict.items(): | |
nlikes = value['nlikes'] | |
nreplies = value['nreplies'] | |
nretweets = value['nretweets'] | |
topic_list = value['topic'] | |
sentiment = value['sentiment'] | |
# Count sentiment frequency | |
if sentiment in sentiment_dict.keys(): | |
sentiment_dict[sentiment] += 1 | |
else: | |
sentiment_dict[sentiment] = 1 | |
nbr_sentiment += 1 | |
# Count topic frequency | |
for topic in topic_list: | |
if topic in freq_dict.keys(): | |
freq_dict[topic] += 1 | |
else: | |
freq_dict[topic] = 1 | |
nbr_topics += 1 | |
# Count total likes per topic | |
if topic in mean_likes.keys(): | |
mean_likes[topic] += nlikes | |
else: | |
mean_likes[topic] = nlikes | |
# Count total retweets per topic | |
if topic in mean_retweets.keys(): | |
mean_retweets[topic] += nretweets | |
else: | |
mean_retweets[topic] = nretweets | |
# Count total replies per topic | |
if topic in mean_replies.keys(): | |
mean_replies[topic] += nreplies | |
else: | |
mean_replies[topic] = nreplies | |
# Count mean of likes | |
for key in mean_likes.keys(): | |
mean_likes[key] = mean_likes[key] / freq_dict[key] | |
# Count mean of retweets | |
for key in mean_retweets.keys(): | |
mean_retweets[key] = mean_retweets[key] / freq_dict[key] | |
# Print the names of the columns. | |
print('\033[1m' + "USER: " + '\033[0m', user) | |
print('\033[1m' + "NBR OF TWEETS SCRAPED: " + '\033[0m', len(list(result_dict.keys()))) | |
print('\033[1m' + "NBR OF DIFFERENT TOPICS: " + '\033[0m', nbr_topics, "\n") | |
print("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format('\033[1m' + 'TOPIC', 'TOPIC FREQUENCY', | |
'AVERAGE NBR OF LIKES', 'AVERAGE NBR OF RETWEETS', | |
'AVERAGE NBR OF REPLIES', 'REACH AVERAGE' + '\033[0m')) | |
# print each data item. | |
for key, value in mean_likes.items(): | |
topic = key | |
mean_likes = value | |
reach_avg = (mean_likes + mean_retweets[topic] + mean_replies[topic]) / 3 | |
print( | |
"{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format(topic, freq_dict[topic], "{:.2f}".format(mean_likes), | |
"{:.2f}".format(mean_retweets[topic]), | |
mean_replies[topic], "{:.2f}".format(reach_avg))) | |
print("\n") | |
print('\033[1m' + "NBR OF DIFFERENT SENTIMENTS: " + '\033[0m', nbr_sentiment, "\n") | |
print("{:<60} {:<20}".format('\033[1m' + 'SENTIMENT', 'SENTIMENT FREQUENCY' + '\033[0m')) | |
for key, value in sentiment_dict.items(): | |
sentiment = key | |
mean_sentiment = value | |
print("{:<60} {:<20}".format(sentiment, sentiment_dict[sentiment], "{:.2f}".format(mean_sentiment))) | |