Demea9000 commited on
Commit
e71b625
1 Parent(s): 297c37f

removed unnecessary functions

Browse files
Files changed (1) hide show
  1. textclassifier/TextClassifier.py +0 -155
textclassifier/TextClassifier.py CHANGED
@@ -73,149 +73,6 @@ class TextClassifier:
73
 
74
  return classification_clean
75
 
76
- def classify_sentiment(self, text: str):
77
- """
78
- Classifies the sentiment of a text.
79
- """
80
- assert isinstance(text, str)
81
-
82
- prompt_string = "Classify one sentiment for this tweet:\n \""
83
- prompt_string += text
84
- prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \
85
- "\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \
86
- "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
87
-
88
- response = openai.Completion.create(
89
- model=self.model_name,
90
- prompt=prompt_string,
91
- temperature=0.0,
92
- max_tokens=256,
93
- top_p=1,
94
- frequency_penalty=0,
95
- presence_penalty=0,
96
- logprobs=5
97
- )
98
- classification_unclean = response.choices[0]['text']
99
- classification_clean = self.cleanup_sentiment_results(classification_unclean)
100
-
101
- return classification_clean.lower()
102
-
103
- def classify_sentiment_of_tweets(self):
104
- """
105
- Classifies the sentiment of a user's tweets.
106
- """
107
- df_sentiment = self.df.copy()
108
-
109
- df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
110
- self.df = df_sentiment
111
- return self.df
112
-
113
- def analyze_sentiment(self, text: str, sentiment: str):
114
- """
115
- Analyzes the sentiment of a text using OpenAI.
116
- :param text: string of the tweet text.
117
- :param sentiment: string of the sentiment.
118
- :return:
119
- """
120
- # assert 1 == 2, "Måste fixa prompt innan denna metod körs"
121
- prompt_string = "Who is the TARGET of this "
122
- prompt_string += sentiment
123
- prompt_string += " TWEET?\\nTWEET=\""
124
- prompt_string += text
125
- prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
126
-
127
- response = openai.Completion.create(
128
- model=self.model_name,
129
- prompt=prompt_string,
130
- temperature=0,
131
- max_tokens=256,
132
- top_p=1,
133
- frequency_penalty=0,
134
- presence_penalty=0
135
- )
136
-
137
- analyzed_sentiment = response.choices[0]['text']
138
- # Remove spaces at the start/end of the response
139
- if analyzed_sentiment.startswith(' '):
140
- analyzed_sentiment = analyzed_sentiment[1:]
141
- if analyzed_sentiment.endswith(' '):
142
- analyzed_sentiment = analyzed_sentiment[:-1]
143
-
144
- # Sometimes GPT-3 gives faulty results, so a simple filter is introduced
145
- # If the prediction is bad
146
- # -> set target value to N/A (not applicable)
147
- if len(analyzed_sentiment) > 50:
148
- analyzed_sentiment = "N/A"
149
-
150
- # An attempt to merge target responses that should be the same
151
- analyzed_sentiment = re.sub("\(", "", analyzed_sentiment)
152
- analyzed_sentiment = re.sub("\)", "", analyzed_sentiment)
153
-
154
- s_list = ["s", "the swedish social democratic party"]
155
- m_list = ["m", "the swedish moderate party", "the moderate party"]
156
- mp_list = ["mp", "the swedish green party"]
157
-
158
- if analyzed_sentiment.lower() == "v":
159
- analyzed_sentiment = "Vänsterpartiet"
160
- elif analyzed_sentiment.lower() == "mp":
161
- analyzed_sentiment = "Miljöpartiet"
162
- elif analyzed_sentiment.lower() in s_list:
163
- analyzed_sentiment = "Socialdemokraterna"
164
- elif analyzed_sentiment.lower() == "c":
165
- analyzed_sentiment = "Centerpartiet"
166
- elif analyzed_sentiment.lower() == "l":
167
- analyzed_sentiment = "Liberalerna"
168
- elif analyzed_sentiment.lower() == "kd":
169
- analyzed_sentiment = "Kristdemokraterna"
170
- elif analyzed_sentiment.lower() in m_list:
171
- analyzed_sentiment = "Moderaterna"
172
- elif analyzed_sentiment.lower() == "sd":
173
- analyzed_sentiment = "Sverigedemokraterna"
174
- elif analyzed_sentiment.lower() == "the swedish government":
175
- analyzed_sentiment = "Regeringen"
176
-
177
- analyzed_sentiment = self.cleanup_sentiment_results(analyzed_sentiment)
178
- return analyzed_sentiment
179
-
180
- def analyze_sentiment_of_tweets(self):
181
- """
182
- Analyzes the sentiment of a user's tweets.
183
- """
184
- # check if 'sentiment' column exists, raise exception if not
185
- assert 'sentiment' in self.df.columns, \
186
- "'sentiment' column does not exist. Please run classify_sentiment_of_tweets first."
187
-
188
- df_sentiment = self.df.copy()
189
- df_sentiment['target'] = df_sentiment.apply(lambda row: self.analyze_sentiment(row['tweet'], row['sentiment']),
190
- axis=1)
191
- self.df = df_sentiment
192
- return self.df
193
-
194
- def classify_topic(self, text: str):
195
- """
196
- Classifies the topics of a text.
197
- :param text: string of the tweet text.
198
- """
199
- assert isinstance(text, str)
200
-
201
- prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\""
202
- prompt_string += text
203
- prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be " \
204
- "more than 5 words. Numerate each topic in the output. END "
205
- response = openai.Completion.create(
206
- model="text-davinci-002",
207
- prompt=prompt_string,
208
- temperature=0,
209
- max_tokens=892,
210
- top_p=1,
211
- frequency_penalty=0,
212
- presence_penalty=0
213
- )
214
- classification_unclean = response.choices[0]['text']
215
- classification_clean = self.cleanup_topic_results(classification_unclean)
216
-
217
- return classification_clean.lower()
218
-
219
  def classify_topics_of_tweets(self):
220
  """
221
  Classifies the topics of a user's tweets.
@@ -308,18 +165,6 @@ class TextClassifier:
308
  self.df = df
309
  self.df_to_csv(filename)
310
 
311
- def split_topics_into_columns(self):
312
- """
313
- Splits the topics into columns.
314
- :return: None
315
- """
316
- df_topic = self.df.copy()
317
- df_topic['topics_temp'] = df_topic['topics'].apply(lambda x: f.separate_string(x))
318
- df_topic_split = pd.DataFrame(df_topic['topics_temp'].tolist(),
319
- columns=['main_topic', 'sub_topic_1', 'sub_topic_2'])
320
- self.df = df_topic.merge(df_topic_split, how='left', left_index=True, right_index=True)
321
- self.df.drop(['topics_temp'], axis=1, inplace=True)
322
-
323
  def split_tuple_into_columns(self):
324
  """
325
  Splits the topics (topic, subtopic, sentiment, target) into columns.
 
73
 
74
  return classification_clean
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def classify_topics_of_tweets(self):
77
  """
78
  Classifies the topics of a user's tweets.
 
165
  self.df = df
166
  self.df_to_csv(filename)
167
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  def split_tuple_into_columns(self):
169
  """
170
  Splits the topics (topic, subtopic, sentiment, target) into columns.