Spaces:
Runtime error
Runtime error
utökade sentiment metoderna, la till placeholder för att spara redan promptade tweets
Browse files- textclassifier/TextClassifier.py +55 -11
textclassifier/TextClassifier.py
CHANGED
@@ -6,6 +6,7 @@ from datetime import date
|
|
6 |
|
7 |
class TextClassifier:
|
8 |
def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
|
|
|
9 |
num_tweets=20):
|
10 |
"""
|
11 |
Initializes the TextClassifier.
|
@@ -19,12 +20,29 @@ class TextClassifier:
|
|
19 |
self.from_date = from_date
|
20 |
self.to_date = to_date
|
21 |
self.num_tweets = num_tweets
|
|
|
22 |
self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
|
|
|
23 |
# self.api_key = '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'
|
24 |
openai.api_key = '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'
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
@staticmethod
|
27 |
def cleanup_sentiment_results(classification_unclean):
|
|
|
|
|
|
|
|
|
|
|
28 |
classification_clean = classification_unclean.replace('\n\n', "")
|
29 |
classification_clean = classification_clean.replace('\n', "")
|
30 |
if classification_clean.startswith(" "):
|
@@ -45,7 +63,7 @@ class TextClassifier:
|
|
45 |
"\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
|
46 |
|
47 |
response = openai.Completion.create(
|
48 |
-
model=
|
49 |
prompt=prompt_string,
|
50 |
temperature=0.0,
|
51 |
max_tokens=256,
|
@@ -59,26 +77,29 @@ class TextClassifier:
|
|
59 |
|
60 |
return classification_clean.lower()
|
61 |
|
62 |
-
def classify_sentiment_of_tweets(self
|
63 |
"""
|
64 |
Classifies the sentiment of a user's tweets.
|
65 |
:param user_name: string of the user name.
|
66 |
"""
|
67 |
-
df_sentiment = self.
|
68 |
df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
|
69 |
-
|
|
|
70 |
|
71 |
def analyze_sentiment(self, text: str, sentiment: str):
|
|
|
72 |
"""
|
73 |
Analyzes the sentiment of a text using OpenAI.
|
74 |
:param text: string of the tweet text.
|
75 |
:param sentiment:
|
76 |
:return:
|
77 |
"""
|
|
|
78 |
prompt_string = "Who is the TARGET of this "
|
79 |
prompt_string += sentiment
|
80 |
prompt_string += " TWEET?\\nTWEET=\""
|
81 |
-
prompt_string +=
|
82 |
prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
|
83 |
|
84 |
response = openai.Completion.create(
|
@@ -92,7 +113,7 @@ class TextClassifier:
|
|
92 |
)
|
93 |
|
94 |
analyzed_sentiment = response.choices[0]['text']
|
95 |
-
|
96 |
# Remove spaces at the start/end of the response
|
97 |
if analyzed_sentiment.startswith(' '):
|
98 |
analyzed_sentiment = analyzed_sentiment[1:]
|
@@ -132,9 +153,20 @@ class TextClassifier:
|
|
132 |
elif analyzed_sentiment.lower() == "the swedish government":
|
133 |
analyzed_sentiment = "Regeringen"
|
134 |
|
135 |
-
|
136 |
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
def classify_topics(self, text: str):
|
140 |
"""
|
@@ -142,11 +174,23 @@ class TextClassifier:
|
|
142 |
"""
|
143 |
|
144 |
def __repr__(self):
|
145 |
-
return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
|
|
|
146 |
|
147 |
|
148 |
if __name__ == "__main__":
|
149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
print(tc)
|
151 |
-
df = tc.classify_sentiment_of_tweets("jimmieakesson")
|
|
|
|
|
|
|
|
|
|
|
152 |
print(df)
|
|
|
6 |
|
7 |
class TextClassifier:
|
8 |
def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
|
9 |
+
user_name='jimmieakesson',
|
10 |
num_tweets=20):
|
11 |
"""
|
12 |
Initializes the TextClassifier.
|
|
|
20 |
self.from_date = from_date
|
21 |
self.to_date = to_date
|
22 |
self.num_tweets = num_tweets
|
23 |
+
self.user_name = user_name
|
24 |
self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
|
25 |
+
self.df = self.ts.scrape_by_user(user_name)
|
26 |
# self.api_key = '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'
|
27 |
openai.api_key = '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'
|
28 |
|
29 |
+
def store_tweets(self, file_name: str):
|
30 |
+
# TODO: implement this method
|
31 |
+
"""
|
32 |
+
Stores the tweets of a user.
|
33 |
+
:param file_name:
|
34 |
+
:param user_name: string of the user name.
|
35 |
+
"""
|
36 |
+
df_tweets = self.df
|
37 |
+
df_tweets.to_csv(file_name + '.csv', index=False)
|
38 |
+
|
39 |
@staticmethod
|
40 |
def cleanup_sentiment_results(classification_unclean):
|
41 |
+
"""
|
42 |
+
Cleans up the results of the sentiment classification.
|
43 |
+
:param classification_unclean: string of the classification result.
|
44 |
+
:return: cleaned up string.
|
45 |
+
"""
|
46 |
classification_clean = classification_unclean.replace('\n\n', "")
|
47 |
classification_clean = classification_clean.replace('\n', "")
|
48 |
if classification_clean.startswith(" "):
|
|
|
63 |
"\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
|
64 |
|
65 |
response = openai.Completion.create(
|
66 |
+
model=self.model_name,
|
67 |
prompt=prompt_string,
|
68 |
temperature=0.0,
|
69 |
max_tokens=256,
|
|
|
77 |
|
78 |
return classification_clean.lower()
|
79 |
|
80 |
+
def classify_sentiment_of_tweets(self):
|
81 |
"""
|
82 |
Classifies the sentiment of a user's tweets.
|
83 |
:param user_name: string of the user name.
|
84 |
"""
|
85 |
+
df_sentiment = self.df
|
86 |
df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
|
87 |
+
self.df = df_sentiment
|
88 |
+
return self.df
|
89 |
|
90 |
def analyze_sentiment(self, text: str, sentiment: str):
|
91 |
+
# TODO: fix prompt before running this method
|
92 |
"""
|
93 |
Analyzes the sentiment of a text using OpenAI.
|
94 |
:param text: string of the tweet text.
|
95 |
:param sentiment:
|
96 |
:return:
|
97 |
"""
|
98 |
+
assert 1 == 2, "Måste fixa prompt innan denna metod körs"
|
99 |
prompt_string = "Who is the TARGET of this "
|
100 |
prompt_string += sentiment
|
101 |
prompt_string += " TWEET?\\nTWEET=\""
|
102 |
+
prompt_string += text
|
103 |
prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
|
104 |
|
105 |
response = openai.Completion.create(
|
|
|
113 |
)
|
114 |
|
115 |
analyzed_sentiment = response.choices[0]['text']
|
116 |
+
print(analyzed_sentiment)
|
117 |
# Remove spaces at the start/end of the response
|
118 |
if analyzed_sentiment.startswith(' '):
|
119 |
analyzed_sentiment = analyzed_sentiment[1:]
|
|
|
153 |
elif analyzed_sentiment.lower() == "the swedish government":
|
154 |
analyzed_sentiment = "Regeringen"
|
155 |
|
156 |
+
return analyzed_sentiment
|
157 |
|
158 |
+
def analyze_sentiment_of_tweets(self):
|
159 |
+
"""
|
160 |
+
Analyzes the sentiment of a user's tweets.
|
161 |
+
"""
|
162 |
+
# check if 'sentiment' column exists, raise exception if not
|
163 |
+
assert 'sentiment' in self.df.columns, \
|
164 |
+
"'sentiment' column does not exist. Please run classify_sentiment_of_tweets first."
|
165 |
+
|
166 |
+
df_analyze = self.df
|
167 |
+
df_analyze['target'] = df_analyze['tweet'].apply(self.analyze_sentiment, args=(df_analyze['sentiment']))
|
168 |
+
self.df = df_analyze
|
169 |
+
return self.df
|
170 |
|
171 |
def classify_topics(self, text: str):
|
172 |
"""
|
|
|
174 |
"""
|
175 |
|
176 |
def __repr__(self):
|
177 |
+
return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
|
178 |
+
self.num_tweets)
|
179 |
|
180 |
|
181 |
if __name__ == "__main__":
|
182 |
+
import pandas as pd
|
183 |
+
import warnings
|
184 |
+
|
185 |
+
warnings.simplefilter(action='ignore', category=FutureWarning)
|
186 |
+
pd.set_option('display.max_columns', None)
|
187 |
+
tc = TextClassifier(model_name="text-davinci-002", from_date='2022-07-01',
|
188 |
+
to_date=str(date.today()), user_name='jimmieakesson', num_tweets=20)
|
189 |
print(tc)
|
190 |
+
# df = tc.classify_sentiment_of_tweets("jimmieakesson")
|
191 |
+
# print(df)
|
192 |
+
df = tc.classify_sentiment_of_tweets()
|
193 |
+
print(df.head())
|
194 |
+
# df = tc.analyze_sentiment("Nu har sd igen gjort fel", "critical")
|
195 |
+
|
196 |
print(df)
|