Spaces:
Runtime error
Runtime error
added code to classify sentiment and analyze
Browse files
textclassifier/TextClassifier.py
CHANGED
@@ -5,7 +5,8 @@ from datetime import date
|
|
5 |
|
6 |
|
7 |
class TextClassifier:
|
8 |
-
def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
|
|
|
9 |
"""
|
10 |
Initializes the TextClassifier.
|
11 |
:param model_name: name of the model from openai.
|
@@ -18,13 +19,122 @@ class TextClassifier:
|
|
18 |
self.from_date = from_date
|
19 |
self.to_date = to_date
|
20 |
self.num_tweets = num_tweets
|
21 |
-
self.
|
22 |
-
self.api_key = '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'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
def classify_sentiment(self, text: str):
|
25 |
"""
|
26 |
Classifies the sentiment of a text.
|
27 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
def classify_topics(self, text: str):
|
30 |
"""
|
@@ -32,5 +142,14 @@ class TextClassifier:
|
|
32 |
"""
|
33 |
|
34 |
def __repr__(self):
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
class TextClassifier:
|
8 |
+
def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
|
9 |
+
num_tweets=20):
|
10 |
"""
|
11 |
Initializes the TextClassifier.
|
12 |
:param model_name: name of the model from openai.
|
|
|
19 |
self.from_date = from_date
|
20 |
self.to_date = to_date
|
21 |
self.num_tweets = num_tweets
|
22 |
+
self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
|
23 |
+
# self.api_key = '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'
|
24 |
+
openai.api_key = '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'
|
25 |
+
|
26 |
+
@staticmethod
|
27 |
+
def cleanup_sentiment_results(classification_unclean):
|
28 |
+
classification_clean = classification_unclean.replace('\n\n', "")
|
29 |
+
classification_clean = classification_clean.replace('\n', "")
|
30 |
+
if classification_clean.startswith(" "):
|
31 |
+
classification_clean = classification_clean.replace(" ", "")
|
32 |
+
|
33 |
+
return classification_clean
|
34 |
|
35 |
def classify_sentiment(self, text: str):
|
36 |
"""
|
37 |
Classifies the sentiment of a text.
|
38 |
"""
|
39 |
+
assert isinstance(text, str)
|
40 |
+
|
41 |
+
prompt_string = "Classify one sentiment for this tweet:\n \""
|
42 |
+
prompt_string += text
|
43 |
+
prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \
|
44 |
+
"\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \
|
45 |
+
"\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
|
46 |
+
|
47 |
+
response = openai.Completion.create(
|
48 |
+
model="text-davinci-002",
|
49 |
+
prompt=prompt_string,
|
50 |
+
temperature=0.0,
|
51 |
+
max_tokens=256,
|
52 |
+
top_p=1,
|
53 |
+
frequency_penalty=0,
|
54 |
+
presence_penalty=0,
|
55 |
+
logprobs=5
|
56 |
+
)
|
57 |
+
classification_unclean = response.choices[0]['text']
|
58 |
+
classification_clean = self.cleanup_sentiment_results(classification_unclean)
|
59 |
+
|
60 |
+
return classification_clean.lower()
|
61 |
+
|
62 |
+
def classify_sentiment_of_tweets(self, user_name: str):
|
63 |
+
"""
|
64 |
+
Classifies the sentiment of a user's tweets.
|
65 |
+
:param user_name: string of the user name.
|
66 |
+
"""
|
67 |
+
df_sentiment = self.ts.scrape_by_user(user_name)
|
68 |
+
df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
|
69 |
+
self.df = df_sentiment
|
70 |
+
|
71 |
+
def analyze_sentiment(self, text: str, sentiment: str):
|
72 |
+
"""
|
73 |
+
Analyzes the sentiment of a text using OpenAI.
|
74 |
+
:param text: string of the tweet text.
|
75 |
+
:param sentiment:
|
76 |
+
:return:
|
77 |
+
"""
|
78 |
+
prompt_string = "Who is the TARGET of this "
|
79 |
+
prompt_string += sentiment
|
80 |
+
prompt_string += " TWEET?\\nTWEET=\""
|
81 |
+
prompt_string += tweet
|
82 |
+
prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
|
83 |
+
|
84 |
+
response = openai.Completion.create(
|
85 |
+
model=self.model_name,
|
86 |
+
prompt=prompt_string,
|
87 |
+
temperature=0,
|
88 |
+
max_tokens=256,
|
89 |
+
top_p=1,
|
90 |
+
frequency_penalty=0,
|
91 |
+
presence_penalty=0
|
92 |
+
)
|
93 |
+
|
94 |
+
analyzed_sentiment = response.choices[0]['text']
|
95 |
+
|
96 |
+
# Remove spaces at the start/end of the response
|
97 |
+
if analyzed_sentiment.startswith(' '):
|
98 |
+
analyzed_sentiment = analyzed_sentiment[1:]
|
99 |
+
if analyzed_sentiment.endswith(' '):
|
100 |
+
analyzed_sentiment = analyzed_sentiment[:-1]
|
101 |
+
|
102 |
+
# Sometimes GPT-3 gives faulty results, so a simple filter is introduced
|
103 |
+
# If the prediction is bad
|
104 |
+
# -> set target value to N/A (not applicable)
|
105 |
+
if len(analyzed_sentiment) > 10:
|
106 |
+
analyzed_sentiment = "N/A"
|
107 |
+
|
108 |
+
# An attempt to merge target responses that should be the same
|
109 |
+
analyzed_sentiment = re.sub("\(", "", analyzed_sentiment)
|
110 |
+
analyzed_sentiment = re.sub("\)", "", analyzed_sentiment)
|
111 |
+
|
112 |
+
s_list = ["s", "the swedish social democratic party"]
|
113 |
+
m_list = ["m", "the swedish moderate party", "the moderate party"]
|
114 |
+
mp_list = ["mp", "the swedish green party"]
|
115 |
+
|
116 |
+
if analyzed_sentiment.lower() == "v":
|
117 |
+
analyzed_sentiment = "Vänsterpartiet"
|
118 |
+
elif analyzed_sentiment.lower() == "mp":
|
119 |
+
analyzed_sentiment = "Miljöpartiet"
|
120 |
+
elif analyzed_sentiment.lower() in s_list:
|
121 |
+
analyzed_sentiment = "Socialdemokraterna"
|
122 |
+
elif analyzed_sentiment.lower() == "c":
|
123 |
+
analyzed_sentiment = "Centerpartiet"
|
124 |
+
elif analyzed_sentiment.lower() == "l":
|
125 |
+
analyzed_sentiment = "Liberalerna"
|
126 |
+
elif analyzed_sentiment.lower() == "kd":
|
127 |
+
analyzed_sentiment = "Kristdemokraterna"
|
128 |
+
elif analyzed_sentiment.lower() in m_list:
|
129 |
+
analyzed_sentiment = "Moderaterna"
|
130 |
+
elif analyzed_sentiment.lower() == "sd":
|
131 |
+
analyzed_sentiment = "Sverigedemokraterna"
|
132 |
+
elif analyzed_sentiment.lower() == "the swedish government":
|
133 |
+
analyzed_sentiment = "Regeringen"
|
134 |
+
|
135 |
+
tweet_dict[tweet]['target'] = analyzed_sentiment
|
136 |
+
|
137 |
+
return tweet_dict
|
138 |
|
139 |
def classify_topics(self, text: str):
|
140 |
"""
|
|
|
142 |
"""
|
143 |
|
144 |
def __repr__(self):
|
145 |
+
if self.df is None:
|
146 |
+
return "No dataframe available."
|
147 |
+
else:
|
148 |
+
return self.df.to_string()
|
149 |
+
|
150 |
+
|
151 |
+
if __name__ == "__main__":
|
152 |
+
tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=20)
|
153 |
+
print(tc)
|
154 |
+
tc.classify_sentiment_of_tweets("jimmieakesson")
|
155 |
+
print(tc)
|
twitterscraper/TwitterScraper.py
CHANGED
@@ -109,5 +109,6 @@ if __name__ == "__main__":
|
|
109 |
dc = sc.scrape_by_user("jimmieakesson")
|
110 |
print(dc.head())
|
111 |
print(dc.shape)
|
|
|
112 |
|
113 |
|
|
|
109 |
dc = sc.scrape_by_user("jimmieakesson")
|
110 |
print(dc.head())
|
111 |
print(dc.shape)
|
112 |
+
print(dc.columns)
|
113 |
|
114 |
|