Spaces:
Runtime error
Runtime error
lite clean-up i TextClassifier
Browse files- .idea/misc.xml +1 -1
- textclassifier/TextClassifier.py +21 -7
.idea/misc.xml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
-
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.
|
4 |
</project>
|
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet)" project-jdk-type="Python SDK" />
|
4 |
</project>
|
textclassifier/TextClassifier.py
CHANGED
@@ -46,7 +46,6 @@ class TextClassifier:
|
|
46 |
classification_clean = classification_clean.replace(" ", "")
|
47 |
|
48 |
return classification_clean
|
49 |
-
return response.choices[0]['text']
|
50 |
|
51 |
def classify_sentiment(self, text: str):
|
52 |
"""
|
@@ -200,16 +199,31 @@ class TextClassifier:
|
|
200 |
df_topic['topic'] = df_topic['tweet'].apply(self.classify_topic)
|
201 |
return df_topic
|
202 |
|
203 |
-
|
204 |
-
|
205 |
-
self.num_tweets)
|
206 |
-
|
207 |
-
def cleanup_topic_results(prediction_dict, text):
|
208 |
new_item = text.replace("\n", " ")
|
209 |
new_item = new_item.replace(" ", " ")
|
210 |
return new_item
|
211 |
|
|
|
|
|
|
|
|
|
212 |
|
213 |
|
214 |
-
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
|
|
|
46 |
classification_clean = classification_clean.replace(" ", "")
|
47 |
|
48 |
return classification_clean
|
|
|
49 |
|
50 |
def classify_sentiment(self, text: str):
|
51 |
"""
|
|
|
199 |
df_topic['topic'] = df_topic['tweet'].apply(self.classify_topic)
|
200 |
return df_topic
|
201 |
|
202 |
+
@staticmethod
|
203 |
+
def cleanup_topic_results(text):
|
|
|
|
|
|
|
204 |
new_item = text.replace("\n", " ")
|
205 |
new_item = new_item.replace(" ", " ")
|
206 |
return new_item
|
207 |
|
208 |
+
def __repr__(self):
|
209 |
+
return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
|
210 |
+
self.num_tweets)
|
211 |
+
|
212 |
|
213 |
|
214 |
+
# if __name__ == "__main__":
|
215 |
+
# import pandas as pd
|
216 |
+
# from datetime import datetime
|
217 |
+
# import os
|
218 |
+
# # show all columns
|
219 |
+
# pd.set_option('display.max_columns', None)
|
220 |
+
#
|
221 |
+
# tc = TextClassifier(from_date="2019-01-01", to_date="2019-05-31", user_name='jimmieakesson', num_tweets=20)
|
222 |
+
# tc.classify_sentiment_of_tweets()
|
223 |
+
# # df = tc.analyze_sentiment_of_tweets()
|
224 |
+
# # print(df)
|
225 |
+
# df = tc.classify_topics_of_tweets()
|
226 |
+
# print(df)
|
227 |
+
# # save to csv in a folder under politweet with timestamp in name
|
228 |
+
# df.to_csv(f"{datetime.now().strftime('%Y-%m-%d %H-%M-%S')}_tweets.csv")
|
229 |
|