Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

politweet / text-classifier /TextClassifier.py

Demea9000

prettier

eceff29 over 2 years ago

raw

history blame

6.72 kB

	import openai
	import regex as re

	openai.api_key = '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'


	class TextClassifier:

	def classify_topics(tweet_dict):
	tweet_list = list(tweet_dict.keys())
	prediction_dict = {}

	for tweet in tweet_list:
	prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\""
	prompt_string += tweet
	prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be " \
	"more than 5 words. Numerate each topic in the output. END "
	response = openai.Completion.create(
	model="text-davinci-002",
	prompt=prompt_string,
	temperature=0,
	max_tokens=892,
	top_p=1,
	frequency_penalty=0,
	presence_penalty=0
	)

	classifications_unclean = response.choices[0]['text']
	prediction_dict[tweet] = classifications_unclean

	return TextClassifier.cleanup_topic_results(prediction_dict, tweet_dict)

	def classify_sentiments(tweet_dict):
	tweet_list = list(tweet_dict.keys())

	for tweet in tweet_list:
	prompt_string = "Classify one sentiment for this tweet:\n \""
	prompt_string += tweet
	prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \
	"\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \
	"\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT= "

	response = openai.Completion.create(
	model="text-davinci-002",
	prompt=prompt_string,
	temperature=0,
	max_tokens=256,
	top_p=1,
	frequency_penalty=0,
	presence_penalty=0
	)
	classifications_unclean = response.choices[0]['text']
	tweet_dict[tweet]['sentiment'] = classifications_unclean

	return tweet_dict

	def cleanup_topic_results(prediction_dict, tweet_dict):
	temp_list = []

	for tweet, item in prediction_dict.items():
	temp_list = []
	new_item = item.replace("\n", " ")
	new_item = new_item.replace(" ", " ")
	new_item = new_item[4:]
	new_item = re.sub('\d', '', new_item)
	sub_list = new_item.split(".")

	for item in sub_list:
	if item.startswith(' '):
	item = item[1:]
	if item.endswith(' '):
	item = item[:-1]
	temp_list.append(item)
	tweet_dict[tweet]['topic'] = temp_list

	return tweet_dict

	def print_results(results_dict):
	print('\033[1m' + "RESULTS" + '\033[0m', "\n")
	for key in results_dict.keys():
	predictions = results_dict[key]
	print("\"" + key + "\"" + "\n" + str(predictions), "\n" + "---------------------------------")

	def print_stats(result_dict):
	user = ""
	freq_dict = {}
	mean_likes = {}
	mean_retweets = {}
	mean_replies = {}
	sentiment_dict = {}
	nbr_sentiment = 0
	nbr_topics = 0

	for key, value in result_dict.items():

	nlikes = value['nlikes']
	nreplies = value['nreplies']
	nretweets = value['nretweets']
	topic_list = value['topic']
	sentiment = value['sentiment']

	# Count sentiment frequency
	if sentiment in sentiment_dict.keys():
	sentiment_dict[sentiment] += 1
	else:
	sentiment_dict[sentiment] = 1
	nbr_sentiment += 1

	# Count topic frequency
	for topic in topic_list:
	if topic in freq_dict.keys():
	freq_dict[topic] += 1

	else:
	freq_dict[topic] = 1
	nbr_topics += 1

	# Count total likes per topic
	if topic in mean_likes.keys():
	mean_likes[topic] += nlikes
	else:
	mean_likes[topic] = nlikes

	# Count total retweets per topic
	if topic in mean_retweets.keys():
	mean_retweets[topic] += nretweets
	else:
	mean_retweets[topic] = nretweets

	# Count total replies per topic
	if topic in mean_replies.keys():
	mean_replies[topic] += nreplies
	else:
	mean_replies[topic] = nreplies

	# Count mean of likes
	for key in mean_likes.keys():
	mean_likes[key] = mean_likes[key] / freq_dict[key]

	# Count mean of retweets
	for key in mean_retweets.keys():
	mean_retweets[key] = mean_retweets[key] / freq_dict[key]

	# Print the names of the columns.
	print('\033[1m' + "USER: " + '\033[0m', user)
	print('\033[1m' + "NBR OF TWEETS SCRAPED: " + '\033[0m', len(list(result_dict.keys())))
	print('\033[1m' + "NBR OF DIFFERENT TOPICS: " + '\033[0m', nbr_topics, "\n")
	print("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format('\033[1m' + 'TOPIC', 'TOPIC FREQUENCY',
	'AVERAGE NBR OF LIKES', 'AVERAGE NBR OF RETWEETS',
	'AVERAGE NBR OF REPLIES', 'REACH AVERAGE' + '\033[0m'))

	# print each data item.
	for key, value in mean_likes.items():
	topic = key
	mean_likes = value
	reach_avg = (mean_likes + mean_retweets[topic] + mean_replies[topic]) / 3
	print(
	"{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format(topic, freq_dict[topic], "{:.2f}".format(mean_likes),
	"{:.2f}".format(mean_retweets[topic]),
	mean_replies[topic], "{:.2f}".format(reach_avg)))

	print("\n")
	print('\033[1m' + "NBR OF DIFFERENT SENTIMENTS: " + '\033[0m', nbr_sentiment, "\n")
	print("{:<60} {:<20}".format('\033[1m' + 'SENTIMENT', 'SENTIMENT FREQUENCY' + '\033[0m'))
	for key, value in sentiment_dict.items():
	sentiment = key
	mean_sentiment = value
	print("{:<60} {:<20}".format(sentiment, sentiment_dict[sentiment], "{:.2f}".format(mean_sentiment)))