Spaces:
Runtime error
Runtime error
Merge branch 'develop' into 35-create-new-text-classifier-sentiment
Browse files- .idea/misc.xml +1 -1
- README.md +1 -0
- textclassifier/TextClassifier.py +46 -17
.idea/misc.xml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
-
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.
|
4 |
</project>
|
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (politweet)" project-jdk-type="Python SDK" />
|
4 |
</project>
|
README.md
CHANGED
@@ -22,6 +22,7 @@ För att få alla dependencies:
|
|
22 |
2. Aktivera din virtual environment
|
23 |
2. gå till projektets root path och skriv i terminalen:
|
24 |
$ env2/bin/python -m pip install -r requirements.txt
|
|
|
25 |
|
26 |
|
27 |
|
|
|
22 |
2. Aktivera din virtual environment
|
23 |
2. gå till projektets root path och skriv i terminalen:
|
24 |
$ env2/bin/python -m pip install -r requirements.txt
|
25 |
+
3. I vissa fall funkar det inte att installera twint för Ubuntu. Efter att ha ställt in allt funkade det efter att ha kört "sudo apt-get install build- essential" i terminalen.
|
26 |
|
27 |
|
28 |
|
textclassifier/TextClassifier.py
CHANGED
@@ -6,6 +6,7 @@ from datetime import date
|
|
6 |
|
7 |
class TextClassifier:
|
8 |
def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
|
|
|
9 |
user_name='jimmieakesson',
|
10 |
num_tweets=20):
|
11 |
"""
|
@@ -26,6 +27,12 @@ class TextClassifier:
|
|
26 |
# self.api_key = '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'
|
27 |
openai.api_key = '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'
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
@staticmethod
|
30 |
def cleanup_sentiment_results(classification_unclean):
|
31 |
"""
|
@@ -39,6 +46,7 @@ class TextClassifier:
|
|
39 |
classification_clean = classification_clean.replace(" ", "")
|
40 |
|
41 |
return classification_clean
|
|
|
42 |
|
43 |
def classify_sentiment(self, text: str):
|
44 |
"""
|
@@ -70,9 +78,9 @@ class TextClassifier:
|
|
70 |
def classify_sentiment_of_tweets(self):
|
71 |
"""
|
72 |
Classifies the sentiment of a user's tweets.
|
73 |
-
:param user_name: string of the user name.
|
74 |
"""
|
75 |
df_sentiment = self.df.copy()
|
|
|
76 |
df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
|
77 |
self.df = df_sentiment
|
78 |
return self.df
|
@@ -158,29 +166,50 @@ class TextClassifier:
|
|
158 |
self.df = df_sentiment
|
159 |
return self.df
|
160 |
|
161 |
-
def
|
162 |
"""
|
163 |
Classifies the topics of a text.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
"""
|
|
|
|
|
|
|
165 |
|
166 |
def __repr__(self):
|
167 |
return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
|
168 |
self.num_tweets)
|
169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
|
171 |
if __name__ == "__main__":
|
172 |
-
|
173 |
-
import warnings
|
174 |
-
|
175 |
-
warnings.simplefilter(action='ignore', category=FutureWarning)
|
176 |
-
pd.set_option('display.max_columns', None)
|
177 |
-
tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01',
|
178 |
-
to_date=str(date.today()), user_name='jimmieakesson', num_tweets=60)
|
179 |
-
print(tc)
|
180 |
-
# df = tc.classify_sentiment_of_tweets("jimmieakesson")
|
181 |
-
# print(df)
|
182 |
-
df = tc.classify_sentiment_of_tweets()
|
183 |
-
print(df.head())
|
184 |
-
df = tc.analyze_sentiment_of_tweets()
|
185 |
-
|
186 |
-
print(df.head())
|
|
|
6 |
|
7 |
class TextClassifier:
|
8 |
def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
|
9 |
+
|
10 |
user_name='jimmieakesson',
|
11 |
num_tweets=20):
|
12 |
"""
|
|
|
27 |
# self.api_key = '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'
|
28 |
openai.api_key = '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'
|
29 |
|
30 |
+
def scrape_tweets(self):
|
31 |
+
"""
|
32 |
+
Scrapes tweets from the given date range.
|
33 |
+
"""
|
34 |
+
self.ts.scrape_tweets()
|
35 |
+
|
36 |
@staticmethod
|
37 |
def cleanup_sentiment_results(classification_unclean):
|
38 |
"""
|
|
|
46 |
classification_clean = classification_clean.replace(" ", "")
|
47 |
|
48 |
return classification_clean
|
49 |
+
return response.choices[0]['text']
|
50 |
|
51 |
def classify_sentiment(self, text: str):
|
52 |
"""
|
|
|
78 |
def classify_sentiment_of_tweets(self):
|
79 |
"""
|
80 |
Classifies the sentiment of a user's tweets.
|
|
|
81 |
"""
|
82 |
df_sentiment = self.df.copy()
|
83 |
+
|
84 |
df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
|
85 |
self.df = df_sentiment
|
86 |
return self.df
|
|
|
166 |
self.df = df_sentiment
|
167 |
return self.df
|
168 |
|
169 |
+
def classify_topic(self, text: str):
|
170 |
"""
|
171 |
Classifies the topics of a text.
|
172 |
+
:param text: string of the tweet text.
|
173 |
+
"""
|
174 |
+
assert isinstance(text, str)
|
175 |
+
|
176 |
+
prompt_string = "Classify one topic for this tweet:\n \""
|
177 |
+
prompt_string += text
|
178 |
+
prompt_string += "\" \nFor example:\nEconomy,\nEnvironment,\nHealth,\nPolitics,\nScience,\nSports,\nTechnology," \
|
179 |
+
"\nTransportation,\nWorld.\nTOPIC="
|
180 |
+
|
181 |
+
response = openai.Completion.create(
|
182 |
+
model=self.model_name,
|
183 |
+
prompt=prompt_string,
|
184 |
+
temperature=0,
|
185 |
+
max_tokens=892,
|
186 |
+
top_p=1,
|
187 |
+
frequency_penalty=0,
|
188 |
+
presence_penalty=0,
|
189 |
+
)
|
190 |
+
classification_unclean = response.choices[0]['text']
|
191 |
+
classification_clean = self.cleanup_topic_results(classification_unclean)
|
192 |
+
|
193 |
+
return classification_clean.lower()
|
194 |
+
|
195 |
+
def classify_topics_of_tweets(self):
|
196 |
+
"""
|
197 |
+
Classifies the topics of a user's tweets.
|
198 |
"""
|
199 |
+
df_topic = self.df
|
200 |
+
df_topic['topic'] = df_topic['tweet'].apply(self.classify_topic)
|
201 |
+
return df_topic
|
202 |
|
203 |
def __repr__(self):
|
204 |
return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
|
205 |
self.num_tweets)
|
206 |
|
207 |
+
def cleanup_topic_results(prediction_dict, text):
|
208 |
+
new_item = text.replace("\n", " ")
|
209 |
+
new_item = new_item.replace(" ", " ")
|
210 |
+
return new_item
|
211 |
+
|
212 |
+
|
213 |
|
214 |
if __name__ == "__main__":
|
215 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|