Spaces:
Runtime error
Runtime error
removed unnecessary functions
Browse files- textclassifier/TextClassifier.py +0 -155
textclassifier/TextClassifier.py
CHANGED
@@ -73,149 +73,6 @@ class TextClassifier:
|
|
73 |
|
74 |
return classification_clean
|
75 |
|
76 |
-
def classify_sentiment(self, text: str):
|
77 |
-
"""
|
78 |
-
Classifies the sentiment of a text.
|
79 |
-
"""
|
80 |
-
assert isinstance(text, str)
|
81 |
-
|
82 |
-
prompt_string = "Classify one sentiment for this tweet:\n \""
|
83 |
-
prompt_string += text
|
84 |
-
prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \
|
85 |
-
"\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \
|
86 |
-
"\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
|
87 |
-
|
88 |
-
response = openai.Completion.create(
|
89 |
-
model=self.model_name,
|
90 |
-
prompt=prompt_string,
|
91 |
-
temperature=0.0,
|
92 |
-
max_tokens=256,
|
93 |
-
top_p=1,
|
94 |
-
frequency_penalty=0,
|
95 |
-
presence_penalty=0,
|
96 |
-
logprobs=5
|
97 |
-
)
|
98 |
-
classification_unclean = response.choices[0]['text']
|
99 |
-
classification_clean = self.cleanup_sentiment_results(classification_unclean)
|
100 |
-
|
101 |
-
return classification_clean.lower()
|
102 |
-
|
103 |
-
def classify_sentiment_of_tweets(self):
|
104 |
-
"""
|
105 |
-
Classifies the sentiment of a user's tweets.
|
106 |
-
"""
|
107 |
-
df_sentiment = self.df.copy()
|
108 |
-
|
109 |
-
df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
|
110 |
-
self.df = df_sentiment
|
111 |
-
return self.df
|
112 |
-
|
113 |
-
def analyze_sentiment(self, text: str, sentiment: str):
|
114 |
-
"""
|
115 |
-
Analyzes the sentiment of a text using OpenAI.
|
116 |
-
:param text: string of the tweet text.
|
117 |
-
:param sentiment: string of the sentiment.
|
118 |
-
:return:
|
119 |
-
"""
|
120 |
-
# assert 1 == 2, "Måste fixa prompt innan denna metod körs"
|
121 |
-
prompt_string = "Who is the TARGET of this "
|
122 |
-
prompt_string += sentiment
|
123 |
-
prompt_string += " TWEET?\\nTWEET=\""
|
124 |
-
prompt_string += text
|
125 |
-
prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
|
126 |
-
|
127 |
-
response = openai.Completion.create(
|
128 |
-
model=self.model_name,
|
129 |
-
prompt=prompt_string,
|
130 |
-
temperature=0,
|
131 |
-
max_tokens=256,
|
132 |
-
top_p=1,
|
133 |
-
frequency_penalty=0,
|
134 |
-
presence_penalty=0
|
135 |
-
)
|
136 |
-
|
137 |
-
analyzed_sentiment = response.choices[0]['text']
|
138 |
-
# Remove spaces at the start/end of the response
|
139 |
-
if analyzed_sentiment.startswith(' '):
|
140 |
-
analyzed_sentiment = analyzed_sentiment[1:]
|
141 |
-
if analyzed_sentiment.endswith(' '):
|
142 |
-
analyzed_sentiment = analyzed_sentiment[:-1]
|
143 |
-
|
144 |
-
# Sometimes GPT-3 gives faulty results, so a simple filter is introduced
|
145 |
-
# If the prediction is bad
|
146 |
-
# -> set target value to N/A (not applicable)
|
147 |
-
if len(analyzed_sentiment) > 50:
|
148 |
-
analyzed_sentiment = "N/A"
|
149 |
-
|
150 |
-
# An attempt to merge target responses that should be the same
|
151 |
-
analyzed_sentiment = re.sub("\(", "", analyzed_sentiment)
|
152 |
-
analyzed_sentiment = re.sub("\)", "", analyzed_sentiment)
|
153 |
-
|
154 |
-
s_list = ["s", "the swedish social democratic party"]
|
155 |
-
m_list = ["m", "the swedish moderate party", "the moderate party"]
|
156 |
-
mp_list = ["mp", "the swedish green party"]
|
157 |
-
|
158 |
-
if analyzed_sentiment.lower() == "v":
|
159 |
-
analyzed_sentiment = "Vänsterpartiet"
|
160 |
-
elif analyzed_sentiment.lower() == "mp":
|
161 |
-
analyzed_sentiment = "Miljöpartiet"
|
162 |
-
elif analyzed_sentiment.lower() in s_list:
|
163 |
-
analyzed_sentiment = "Socialdemokraterna"
|
164 |
-
elif analyzed_sentiment.lower() == "c":
|
165 |
-
analyzed_sentiment = "Centerpartiet"
|
166 |
-
elif analyzed_sentiment.lower() == "l":
|
167 |
-
analyzed_sentiment = "Liberalerna"
|
168 |
-
elif analyzed_sentiment.lower() == "kd":
|
169 |
-
analyzed_sentiment = "Kristdemokraterna"
|
170 |
-
elif analyzed_sentiment.lower() in m_list:
|
171 |
-
analyzed_sentiment = "Moderaterna"
|
172 |
-
elif analyzed_sentiment.lower() == "sd":
|
173 |
-
analyzed_sentiment = "Sverigedemokraterna"
|
174 |
-
elif analyzed_sentiment.lower() == "the swedish government":
|
175 |
-
analyzed_sentiment = "Regeringen"
|
176 |
-
|
177 |
-
analyzed_sentiment = self.cleanup_sentiment_results(analyzed_sentiment)
|
178 |
-
return analyzed_sentiment
|
179 |
-
|
180 |
-
def analyze_sentiment_of_tweets(self):
|
181 |
-
"""
|
182 |
-
Analyzes the sentiment of a user's tweets.
|
183 |
-
"""
|
184 |
-
# check if 'sentiment' column exists, raise exception if not
|
185 |
-
assert 'sentiment' in self.df.columns, \
|
186 |
-
"'sentiment' column does not exist. Please run classify_sentiment_of_tweets first."
|
187 |
-
|
188 |
-
df_sentiment = self.df.copy()
|
189 |
-
df_sentiment['target'] = df_sentiment.apply(lambda row: self.analyze_sentiment(row['tweet'], row['sentiment']),
|
190 |
-
axis=1)
|
191 |
-
self.df = df_sentiment
|
192 |
-
return self.df
|
193 |
-
|
194 |
-
def classify_topic(self, text: str):
|
195 |
-
"""
|
196 |
-
Classifies the topics of a text.
|
197 |
-
:param text: string of the tweet text.
|
198 |
-
"""
|
199 |
-
assert isinstance(text, str)
|
200 |
-
|
201 |
-
prompt_string = "Classify this tweet with a general topic and two sub-topics:\n\""
|
202 |
-
prompt_string += text
|
203 |
-
prompt_string += "\".\nGeneral topic: \nSub topic 1: \nSub topic 2:\n. The classifications should not be " \
|
204 |
-
"more than 5 words. Numerate each topic in the output. END "
|
205 |
-
response = openai.Completion.create(
|
206 |
-
model="text-davinci-002",
|
207 |
-
prompt=prompt_string,
|
208 |
-
temperature=0,
|
209 |
-
max_tokens=892,
|
210 |
-
top_p=1,
|
211 |
-
frequency_penalty=0,
|
212 |
-
presence_penalty=0
|
213 |
-
)
|
214 |
-
classification_unclean = response.choices[0]['text']
|
215 |
-
classification_clean = self.cleanup_topic_results(classification_unclean)
|
216 |
-
|
217 |
-
return classification_clean.lower()
|
218 |
-
|
219 |
def classify_topics_of_tweets(self):
|
220 |
"""
|
221 |
Classifies the topics of a user's tweets.
|
@@ -308,18 +165,6 @@ class TextClassifier:
|
|
308 |
self.df = df
|
309 |
self.df_to_csv(filename)
|
310 |
|
311 |
-
def split_topics_into_columns(self):
|
312 |
-
"""
|
313 |
-
Splits the topics into columns.
|
314 |
-
:return: None
|
315 |
-
"""
|
316 |
-
df_topic = self.df.copy()
|
317 |
-
df_topic['topics_temp'] = df_topic['topics'].apply(lambda x: f.separate_string(x))
|
318 |
-
df_topic_split = pd.DataFrame(df_topic['topics_temp'].tolist(),
|
319 |
-
columns=['main_topic', 'sub_topic_1', 'sub_topic_2'])
|
320 |
-
self.df = df_topic.merge(df_topic_split, how='left', left_index=True, right_index=True)
|
321 |
-
self.df.drop(['topics_temp'], axis=1, inplace=True)
|
322 |
-
|
323 |
def split_tuple_into_columns(self):
|
324 |
"""
|
325 |
Splits the topics (topic, subtopic, sentiment, target) into columns.
|
|
|
73 |
|
74 |
return classification_clean
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
def classify_topics_of_tweets(self):
|
77 |
"""
|
78 |
Classifies the topics of a user's tweets.
|
|
|
165 |
self.df = df
|
166 |
self.df_to_csv(filename)
|
167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
def split_tuple_into_columns(self):
|
169 |
"""
|
170 |
Splits the topics (topic, subtopic, sentiment, target) into columns.
|