Spaces:
Runtime error
Runtime error
Classify Topics done
Browse files- .idea/misc.xml +1 -1
- .idea/politweet.iml +1 -1
- textclassifier/TextClassifier.py +55 -13
.idea/misc.xml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
-
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.
|
4 |
</project>
|
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (politweet)" project-jdk-type="Python SDK" />
|
4 |
</project>
|
.idea/politweet.iml
CHANGED
@@ -5,7 +5,7 @@
|
|
5 |
<excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
|
6 |
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
7 |
</content>
|
8 |
-
<orderEntry type="jdk" jdkName="Python 3.
|
9 |
<orderEntry type="sourceFolder" forTests="false" />
|
10 |
</component>
|
11 |
<component name="PyNamespacePackagesService">
|
|
|
5 |
<excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
|
6 |
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
7 |
</content>
|
8 |
+
<orderEntry type="jdk" jdkName="Python 3.10 (politweet)" jdkType="Python SDK" />
|
9 |
<orderEntry type="sourceFolder" forTests="false" />
|
10 |
</component>
|
11 |
<component name="PyNamespacePackagesService">
|
textclassifier/TextClassifier.py
CHANGED
@@ -6,7 +6,7 @@ from datetime import date
|
|
6 |
|
7 |
class TextClassifier:
|
8 |
def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
|
9 |
-
num_tweets=
|
10 |
"""
|
11 |
Initializes the TextClassifier.
|
12 |
:param model_name: name of the model from openai.
|
@@ -20,9 +20,17 @@ class TextClassifier:
|
|
20 |
self.to_date = to_date
|
21 |
self.num_tweets = num_tweets
|
22 |
self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
|
|
|
|
|
23 |
# self.api_key = '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'
|
24 |
openai.api_key = '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'
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
@staticmethod
|
27 |
def cleanup_sentiment_results(classification_unclean):
|
28 |
classification_clean = classification_unclean.replace('\n\n', "")
|
@@ -31,6 +39,7 @@ class TextClassifier:
|
|
31 |
classification_clean = classification_clean.replace(" ", "")
|
32 |
|
33 |
return classification_clean
|
|
|
34 |
|
35 |
def classify_sentiment(self, text: str):
|
36 |
"""
|
@@ -45,7 +54,7 @@ class TextClassifier:
|
|
45 |
"\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
|
46 |
|
47 |
response = openai.Completion.create(
|
48 |
-
model=
|
49 |
prompt=prompt_string,
|
50 |
temperature=0.0,
|
51 |
max_tokens=256,
|
@@ -59,12 +68,11 @@ class TextClassifier:
|
|
59 |
|
60 |
return classification_clean.lower()
|
61 |
|
62 |
-
def classify_sentiment_of_tweets(self
|
63 |
"""
|
64 |
Classifies the sentiment of a user's tweets.
|
65 |
-
:param user_name: string of the user name.
|
66 |
"""
|
67 |
-
df_sentiment = self.
|
68 |
df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
|
69 |
return df_sentiment
|
70 |
|
@@ -78,7 +86,7 @@ class TextClassifier:
|
|
78 |
prompt_string = "Who is the TARGET of this "
|
79 |
prompt_string += sentiment
|
80 |
prompt_string += " TWEET?\\nTWEET=\""
|
81 |
-
prompt_string +=
|
82 |
prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
|
83 |
|
84 |
response = openai.Completion.create(
|
@@ -136,17 +144,51 @@ class TextClassifier:
|
|
136 |
|
137 |
return tweet_dict
|
138 |
|
139 |
-
def
|
140 |
"""
|
141 |
Classifies the topics of a text.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
"""
|
|
|
|
|
|
|
143 |
|
144 |
-
def
|
145 |
-
|
|
|
|
|
146 |
|
147 |
|
148 |
if __name__ == "__main__":
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
print(
|
|
|
|
|
|
|
|
6 |
|
7 |
class TextClassifier:
|
8 |
def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
|
9 |
+
num_tweets=10, user_name=None):
|
10 |
"""
|
11 |
Initializes the TextClassifier.
|
12 |
:param model_name: name of the model from openai.
|
|
|
20 |
self.to_date = to_date
|
21 |
self.num_tweets = num_tweets
|
22 |
self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
|
23 |
+
self.user_name = user_name
|
24 |
+
self.df = self.ts.scrape_by_user(user_name)
|
25 |
# self.api_key = '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'
|
26 |
openai.api_key = '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'
|
27 |
|
28 |
+
def scrape_tweets(self):
|
29 |
+
"""
|
30 |
+
Scrapes tweets from the given date range.
|
31 |
+
"""
|
32 |
+
self.ts.scrape_tweets()
|
33 |
+
|
34 |
@staticmethod
|
35 |
def cleanup_sentiment_results(classification_unclean):
|
36 |
classification_clean = classification_unclean.replace('\n\n', "")
|
|
|
39 |
classification_clean = classification_clean.replace(" ", "")
|
40 |
|
41 |
return classification_clean
|
42 |
+
return response.choices[0]['text']
|
43 |
|
44 |
def classify_sentiment(self, text: str):
|
45 |
"""
|
|
|
54 |
"\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
|
55 |
|
56 |
response = openai.Completion.create(
|
57 |
+
model=self.model_name,
|
58 |
prompt=prompt_string,
|
59 |
temperature=0.0,
|
60 |
max_tokens=256,
|
|
|
68 |
|
69 |
return classification_clean.lower()
|
70 |
|
71 |
+
def classify_sentiment_of_tweets(self):
|
72 |
"""
|
73 |
Classifies the sentiment of a user's tweets.
|
|
|
74 |
"""
|
75 |
+
df_sentiment = self.df
|
76 |
df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
|
77 |
return df_sentiment
|
78 |
|
|
|
86 |
prompt_string = "Who is the TARGET of this "
|
87 |
prompt_string += sentiment
|
88 |
prompt_string += " TWEET?\\nTWEET=\""
|
89 |
+
prompt_string += text
|
90 |
prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
|
91 |
|
92 |
response = openai.Completion.create(
|
|
|
144 |
|
145 |
return tweet_dict
|
146 |
|
147 |
+
def classify_topic(self, text: str):
|
148 |
"""
|
149 |
Classifies the topics of a text.
|
150 |
+
:param text: string of the tweet text.
|
151 |
+
"""
|
152 |
+
assert isinstance(text, str)
|
153 |
+
|
154 |
+
prompt_string = "Classify one topic for this tweet:\n \""
|
155 |
+
prompt_string += text
|
156 |
+
prompt_string += "\" \nFor example:\nEconomy,\nEnvironment,\nHealth,\nPolitics,\nScience,\nSports,\nTechnology," \
|
157 |
+
"\nTransportation,\nWorld.\nTOPIC="
|
158 |
+
|
159 |
+
response = openai.Completion.create(
|
160 |
+
model=self.model_name,
|
161 |
+
prompt=prompt_string,
|
162 |
+
temperature=0,
|
163 |
+
max_tokens=892,
|
164 |
+
top_p=1,
|
165 |
+
frequency_penalty=0,
|
166 |
+
presence_penalty=0,
|
167 |
+
)
|
168 |
+
classification_unclean = response.choices[0]['text']
|
169 |
+
classification_clean = self.cleanup_topic_results(classification_unclean)
|
170 |
+
|
171 |
+
return classification_clean.lower()
|
172 |
+
|
173 |
+
def classify_topics_of_tweets(self):
|
174 |
+
"""
|
175 |
+
Classifies the topics of a user's tweets.
|
176 |
"""
|
177 |
+
df_topic = self.df
|
178 |
+
df_topic['topic'] = df_topic['tweet'].apply(self.classify_topic)
|
179 |
+
return df_topic
|
180 |
|
181 |
+
def cleanup_topic_results(prediction_dict, text):
|
182 |
+
new_item = text.replace("\n", " ")
|
183 |
+
new_item = new_item.replace(" ", " ")
|
184 |
+
return new_item
|
185 |
|
186 |
|
187 |
if __name__ == "__main__":
|
188 |
+
import pandas as pd
|
189 |
+
#pd.set_option('display.max_columns', None)
|
190 |
+
tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=20, user_name="jimmieakesson")
|
191 |
+
print(tc.classify_topics_of_tweets())
|
192 |
+
|
193 |
+
|
194 |
+
|