Spaces:
Runtime error
Runtime error
ändringar i run_main_pipeline
Browse files- .idea/misc.xml +1 -1
- .idea/politweet.iml +1 -1
- textclassifier/TextClassifier.py +13 -16
.idea/misc.xml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
-
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.
|
4 |
</project>
|
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet)" project-jdk-type="Python SDK" />
|
4 |
</project>
|
.idea/politweet.iml
CHANGED
@@ -5,7 +5,7 @@
|
|
5 |
<excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
|
6 |
<excludeFolder url="file://$MODULE_DIR$/env" />
|
7 |
</content>
|
8 |
-
<orderEntry type="
|
9 |
<orderEntry type="sourceFolder" forTests="false" />
|
10 |
</component>
|
11 |
<component name="PyNamespacePackagesService">
|
|
|
5 |
<excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
|
6 |
<excludeFolder url="file://$MODULE_DIR$/env" />
|
7 |
</content>
|
8 |
+
<orderEntry type="jdk" jdkName="Python 3.9 (politweet)" jdkType="Python SDK" />
|
9 |
<orderEntry type="sourceFolder" forTests="false" />
|
10 |
</component>
|
11 |
<component name="PyNamespacePackagesService">
|
textclassifier/TextClassifier.py
CHANGED
@@ -275,21 +275,22 @@ class TextClassifier:
|
|
275 |
#We presume that all tweets inside the twitterdata.csv file are already classified.
|
276 |
:return: Pandas dataframe
|
277 |
"""
|
278 |
-
temp_df = pd.DataFrame(
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
|
|
|
|
286 |
|
287 |
self.df = self.classify_topics_of_tweets()
|
288 |
self.df = self.classify_sentiment_of_tweets()
|
289 |
# self.df = self.analyze_sentiment_of_tweets()
|
|
|
290 |
self.df_to_csv(filename)
|
291 |
-
self.df = self.df.append(temp_df)
|
292 |
-
return self.df
|
293 |
|
294 |
def __repr__(self):
|
295 |
"""
|
@@ -299,10 +300,6 @@ class TextClassifier:
|
|
299 |
return "Classifier for user: " + self.user_name + " with model: " + self.model_name + "."
|
300 |
|
301 |
|
302 |
-
|
303 |
if __name__ == "__main__":
|
304 |
-
tc = TextClassifier(from_date="
|
305 |
-
tc.
|
306 |
-
|
307 |
-
|
308 |
-
|
|
|
275 |
#We presume that all tweets inside the twitterdata.csv file are already classified.
|
276 |
:return: Pandas dataframe
|
277 |
"""
|
278 |
+
# temp_df = pd.DataFrame(
|
279 |
+
# columns=['id', 'tweet', 'date', 'user_id', 'username', 'urls', 'nlikes', 'nreplies', 'nretweets'])
|
280 |
+
if os.path.exists(filename):
|
281 |
+
already_classified_df = pd.read_csv(filename)
|
282 |
+
# Remove all entries in self.df that are already in already_classified_df
|
283 |
+
self.df = self.df.merge(already_classified_df, how='left', on='id')
|
284 |
+
# Create a new dataframe where entries in already_classified_df but not in self.df
|
285 |
+
temp_df = already_classified_df.merge(self.df, how='left', on='id')
|
286 |
+
else:
|
287 |
+
print("No csv file found. Continuing without removing already classified tweets.")
|
288 |
|
289 |
self.df = self.classify_topics_of_tweets()
|
290 |
self.df = self.classify_sentiment_of_tweets()
|
291 |
# self.df = self.analyze_sentiment_of_tweets()
|
292 |
+
|
293 |
self.df_to_csv(filename)
|
|
|
|
|
294 |
|
295 |
def __repr__(self):
|
296 |
"""
|
|
|
300 |
return "Classifier for user: " + self.user_name + " with model: " + self.model_name + "."
|
301 |
|
302 |
|
|
|
303 |
if __name__ == "__main__":
|
304 |
+
tc = TextClassifier(from_date="2019-12-01", to_date="2020-12-31", user_name='jimmieakesson', num_tweets=100)
|
305 |
+
tc.run_main_pipeline()
|
|
|
|
|
|