13macattack37 commited on
Commit
4406f1d
1 Parent(s): dc67c78

Functions for inserting sentiments have been coded. The sentiments are now also a part of the statistics in the print_stats() function

Browse files
Files changed (1) hide show
  1. text-classifier/text_classifier.py +41 -43
text-classifier/text_classifier.py CHANGED
@@ -3,19 +3,13 @@ import openai
3
  import regex as re
4
  openai.api_key = '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'
5
 
6
- class text_classifier:
7
-
8
- '''def __init__(self, user, from_date, to_date):
9
- self.user = "Janne"
10
- self.from_date = "2022-01-05"
11
- self.to_date = "2022-07-05"'''
12
 
13
  def classify_topics(tweet_dict):
14
  tweet_list = list(tweet_dict.keys())
15
- prediction_list = []
16
 
17
- for tweet in tweet_list:
18
- #prompt_string = ""
19
  prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\""
20
  prompt_string += tweet
21
  prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be more than 5 words. Numerate each topic in the output. END"
@@ -30,20 +24,19 @@ class text_classifier:
30
  )
31
 
32
  classifications_unclean = response.choices[0]['text']
33
- prediction_list.append(classifications_unclean)
34
-
35
- return text_classifier.cleanup_results(prediction_list, tweet_dict)
 
36
 
37
  def classify_sentiments(tweet_dict):
38
- tweet_list = list(tweet_dict.keys())
39
- prediction_list = []
40
 
41
  for tweet in tweet_list:
42
  prompt_string = "Classify one sentiment for this tweet:\n \""
43
  prompt_string += tweet
44
  prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement,\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire,\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
45
-
46
-
47
  response = openai.Completion.create(
48
  model="text-davinci-002",
49
  prompt=prompt_string,
@@ -53,20 +46,16 @@ class text_classifier:
53
  frequency_penalty=0,
54
  presence_penalty=0
55
  )
56
- classifications_unclean = response.choices[0]['text']
57
- prediction_list.append(classifications_unclean)
58
-
59
- return prediction_list
60
 
61
 
62
- def cleanup_results(prediction_list, tweet_dict):
63
- predictions_cleaned = []
64
- temp_list = []
65
- pred_dict = {}
66
- i = 0
67
- tweet_list = list(tweet_dict.keys())
68
 
69
- for item in prediction_list:
70
  temp_list = []
71
  new_item = item.replace("\n", " ")
72
  new_item = new_item.replace(" ", " ")
@@ -79,19 +68,10 @@ class text_classifier:
79
  item = item[1:]
80
  if item.endswith(' '):
81
  item = item[:-1]
82
- temp_list.append(item)
83
- predictions_cleaned.append(temp_list)
84
-
85
- for tweet in tweet_list:
86
- pred_dict[tweet] = predictions_cleaned[i]
87
- i += 1
88
- return pred_dict
89
 
90
-
91
- def insert_predictions(tweet_dict, results):
92
- for key in results:
93
- tweet_dict[key]['topic'] = results[key]
94
- return tweet_dict
95
 
96
 
97
  def print_results(results_dict):
@@ -107,14 +87,24 @@ class text_classifier:
107
  mean_likes = {}
108
  mean_retweets = {}
109
  mean_replies = {}
 
 
110
  nbr_topics = 0
111
 
112
- for value in result_dict.values():
113
 
114
  nlikes = value['nlikes']
115
  nreplies = value['nreplies']
116
  nretweets = value['nretweets']
117
  topic_list = value['topic']
 
 
 
 
 
 
 
 
118
 
119
  # Count topic frequency
120
  for topic in topic_list:
@@ -151,11 +141,11 @@ class text_classifier:
151
  for key in mean_retweets.keys():
152
  mean_retweets[key] = mean_retweets[key] / freq_dict[key]
153
 
154
-
155
  # Print the names of the columns.
156
  print('\033[1m' + "USER: " + '\033[0m', user)
157
  print('\033[1m' + "NBR OF TWEETS SCRAPED: "+ '\033[0m', len(list(result_dict.keys())))
158
- print('\033[1m' + "NBR OF DIFFERENT TOPICS: "+ '\033[0m', nbr_topics, "\n", "\n")
159
  print("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format('\033[1m' + 'TOPIC', 'TOPIC FREQUENCY', 'AVERAGE NBR OF LIKES', 'AVERAGE NBR OF RETWEETS', 'AVERAGE NBR OF REPLIES', 'REACH AVERAGE' + '\033[0m'))
160
 
161
  # print each data item.
@@ -165,4 +155,12 @@ class text_classifier:
165
  reach_avg = (mean_likes + mean_retweets[topic] + mean_replies[topic] ) / 3
166
  print ("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format(topic, freq_dict[topic], "{:.2f}".format(mean_likes), "{:.2f}".format(mean_retweets[topic]), mean_replies[topic], "{:.2f}".format(reach_avg)))
167
 
168
-
 
 
 
 
 
 
 
 
 
3
  import regex as re
4
  openai.api_key = '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'
5
 
6
+ class text_classifier:
 
 
 
 
 
7
 
8
  def classify_topics(tweet_dict):
9
  tweet_list = list(tweet_dict.keys())
10
+ prediction_dict = {}
11
 
12
+ for tweet in tweet_list:
 
13
  prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\""
14
  prompt_string += tweet
15
  prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be more than 5 words. Numerate each topic in the output. END"
 
24
  )
25
 
26
  classifications_unclean = response.choices[0]['text']
27
+ prediction_dict[tweet] = classifications_unclean
28
+
29
+ return text_classifier.cleanup_topic_results(prediction_dict, tweet_dict)
30
+
31
 
32
  def classify_sentiments(tweet_dict):
33
+ tweet_list = list(tweet_dict.keys())
 
34
 
35
  for tweet in tweet_list:
36
  prompt_string = "Classify one sentiment for this tweet:\n \""
37
  prompt_string += tweet
38
  prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement,\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire,\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
39
+
 
40
  response = openai.Completion.create(
41
  model="text-davinci-002",
42
  prompt=prompt_string,
 
46
  frequency_penalty=0,
47
  presence_penalty=0
48
  )
49
+ classifications_unclean = response.choices[0]['text']
50
+ tweet_dict[tweet]['sentiment'] = classifications_unclean
51
+
52
+ return tweet_dict
53
 
54
 
55
+ def cleanup_topic_results(prediction_dict, tweet_dict):
56
+ temp_list = []
 
 
 
 
57
 
58
+ for tweet, item in prediction_dict.items():
59
  temp_list = []
60
  new_item = item.replace("\n", " ")
61
  new_item = new_item.replace(" ", " ")
 
68
  item = item[1:]
69
  if item.endswith(' '):
70
  item = item[:-1]
71
+ temp_list.append(item)
72
+ tweet_dict[tweet]['topic'] = temp_list
 
 
 
 
 
73
 
74
+ return tweet_dict
 
 
 
 
75
 
76
 
77
  def print_results(results_dict):
 
87
  mean_likes = {}
88
  mean_retweets = {}
89
  mean_replies = {}
90
+ sentiment_dict = {}
91
+ nbr_sentiment = 0
92
  nbr_topics = 0
93
 
94
+ for key, value in result_dict.items():
95
 
96
  nlikes = value['nlikes']
97
  nreplies = value['nreplies']
98
  nretweets = value['nretweets']
99
  topic_list = value['topic']
100
+ sentiment = value['sentiment']
101
+
102
+ # Count sentiment frequency
103
+ if sentiment in sentiment_dict.keys():
104
+ sentiment_dict[sentiment] += 1
105
+ else:
106
+ sentiment_dict[sentiment] = 1
107
+ nbr_sentiment += 1
108
 
109
  # Count topic frequency
110
  for topic in topic_list:
 
141
  for key in mean_retweets.keys():
142
  mean_retweets[key] = mean_retweets[key] / freq_dict[key]
143
 
144
+
145
  # Print the names of the columns.
146
  print('\033[1m' + "USER: " + '\033[0m', user)
147
  print('\033[1m' + "NBR OF TWEETS SCRAPED: "+ '\033[0m', len(list(result_dict.keys())))
148
+ print('\033[1m' + "NBR OF DIFFERENT TOPICS: "+ '\033[0m', nbr_topics, "\n")
149
  print("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format('\033[1m' + 'TOPIC', 'TOPIC FREQUENCY', 'AVERAGE NBR OF LIKES', 'AVERAGE NBR OF RETWEETS', 'AVERAGE NBR OF REPLIES', 'REACH AVERAGE' + '\033[0m'))
150
 
151
  # print each data item.
 
155
  reach_avg = (mean_likes + mean_retweets[topic] + mean_replies[topic] ) / 3
156
  print ("{:<60} {:<20} {:<30} {:<30} {:<30} {:<30}".format(topic, freq_dict[topic], "{:.2f}".format(mean_likes), "{:.2f}".format(mean_retweets[topic]), mean_replies[topic], "{:.2f}".format(reach_avg)))
157
 
158
+ print("\n")
159
+ print('\033[1m' + "NBR OF DIFFERENT SENTIMENTS: "+ '\033[0m', nbr_sentiment, "\n")
160
+ print("{:<60} {:<20}".format('\033[1m' + 'SENTIMENT', 'SENTIMENT FREQUENCY' + '\033[0m'))
161
+ for key, value in sentiment_dict.items():
162
+ sentiment = key
163
+ mean_sentiment = value
164
+ print ("{:<60} {:<20}".format(sentiment, sentiment_dict[sentiment], "{:.2f}".format(mean_sentiment)))
165
+
166
+