Demea9000 commited on
Commit
32119e0
1 Parent(s): 2a4df2c

added code to classify sentiment and analyze

Browse files
textclassifier/TextClassifier.py CHANGED
@@ -5,7 +5,8 @@ from datetime import date
5
 
6
 
7
  class TextClassifier:
8
- def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=100):
 
9
  """
10
  Initializes the TextClassifier.
11
  :param model_name: name of the model from openai.
@@ -18,13 +19,122 @@ class TextClassifier:
18
  self.from_date = from_date
19
  self.to_date = to_date
20
  self.num_tweets = num_tweets
21
- self.df = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
22
- self.api_key = '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'
 
 
 
 
 
 
 
 
 
 
23
 
24
  def classify_sentiment(self, text: str):
25
  """
26
  Classifies the sentiment of a text.
27
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def classify_topics(self, text: str):
30
  """
@@ -32,5 +142,14 @@ class TextClassifier:
32
  """
33
 
34
  def __repr__(self):
35
- return f"TextClassifier(model_name={self.model_name}, from_date={self.from_date}, to_date={self.to_date}, " \
36
- f"num_tweets={self.num_tweets}) "
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
  class TextClassifier:
8
+ def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
9
+ num_tweets=20):
10
  """
11
  Initializes the TextClassifier.
12
  :param model_name: name of the model from openai.
 
19
  self.from_date = from_date
20
  self.to_date = to_date
21
  self.num_tweets = num_tweets
22
+ self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
23
+ # self.api_key = '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'
24
+ openai.api_key = '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'
25
+
26
+ @staticmethod
27
+ def cleanup_sentiment_results(classification_unclean):
28
+ classification_clean = classification_unclean.replace('\n\n', "")
29
+ classification_clean = classification_clean.replace('\n', "")
30
+ if classification_clean.startswith(" "):
31
+ classification_clean = classification_clean.replace(" ", "")
32
+
33
+ return classification_clean
34
 
35
  def classify_sentiment(self, text: str):
36
  """
37
  Classifies the sentiment of a text.
38
  """
39
+ assert isinstance(text, str)
40
+
41
+ prompt_string = "Classify one sentiment for this tweet:\n \""
42
+ prompt_string += text
43
+ prompt_string += "\" \nFor example:\nSupport,\nOpposition,\nCriticism,\nPraise,\nDisagreement," \
44
+ "\nAgreement,\nSkepticism,\nAdmiration,\nAnecdotes,\nJokes,\nMemes,\nSarcasm,\nSatire," \
45
+ "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
46
+
47
+ response = openai.Completion.create(
48
+ model="text-davinci-002",
49
+ prompt=prompt_string,
50
+ temperature=0.0,
51
+ max_tokens=256,
52
+ top_p=1,
53
+ frequency_penalty=0,
54
+ presence_penalty=0,
55
+ logprobs=5
56
+ )
57
+ classification_unclean = response.choices[0]['text']
58
+ classification_clean = self.cleanup_sentiment_results(classification_unclean)
59
+
60
+ return classification_clean.lower()
61
+
62
+ def classify_sentiment_of_tweets(self, user_name: str):
63
+ """
64
+ Classifies the sentiment of a user's tweets.
65
+ :param user_name: string of the user name.
66
+ """
67
+ df_sentiment = self.ts.scrape_by_user(user_name)
68
+ df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
69
+ self.df = df_sentiment
70
+
71
+ def analyze_sentiment(self, text: str, sentiment: str):
72
+ """
73
+ Analyzes the sentiment of a text using OpenAI.
74
+ :param text: string of the tweet text.
75
+ :param sentiment:
76
+ :return:
77
+ """
78
+ prompt_string = "Who is the TARGET of this "
79
+ prompt_string += sentiment
80
+ prompt_string += " TWEET?\\nTWEET=\""
81
+ prompt_string += tweet
82
+ prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
83
+
84
+ response = openai.Completion.create(
85
+ model=self.model_name,
86
+ prompt=prompt_string,
87
+ temperature=0,
88
+ max_tokens=256,
89
+ top_p=1,
90
+ frequency_penalty=0,
91
+ presence_penalty=0
92
+ )
93
+
94
+ analyzed_sentiment = response.choices[0]['text']
95
+
96
+ # Remove spaces at the start/end of the response
97
+ if analyzed_sentiment.startswith(' '):
98
+ analyzed_sentiment = analyzed_sentiment[1:]
99
+ if analyzed_sentiment.endswith(' '):
100
+ analyzed_sentiment = analyzed_sentiment[:-1]
101
+
102
+ # Sometimes GPT-3 gives faulty results, so a simple filter is introduced
103
+ # If the prediction is bad
104
+ # -> set target value to N/A (not applicable)
105
+ if len(analyzed_sentiment) > 10:
106
+ analyzed_sentiment = "N/A"
107
+
108
+ # An attempt to merge target responses that should be the same
109
+ analyzed_sentiment = re.sub("\(", "", analyzed_sentiment)
110
+ analyzed_sentiment = re.sub("\)", "", analyzed_sentiment)
111
+
112
+ s_list = ["s", "the swedish social democratic party"]
113
+ m_list = ["m", "the swedish moderate party", "the moderate party"]
114
+ mp_list = ["mp", "the swedish green party"]
115
+
116
+ if analyzed_sentiment.lower() == "v":
117
+ analyzed_sentiment = "Vänsterpartiet"
118
+ elif analyzed_sentiment.lower() == "mp":
119
+ analyzed_sentiment = "Miljöpartiet"
120
+ elif analyzed_sentiment.lower() in s_list:
121
+ analyzed_sentiment = "Socialdemokraterna"
122
+ elif analyzed_sentiment.lower() == "c":
123
+ analyzed_sentiment = "Centerpartiet"
124
+ elif analyzed_sentiment.lower() == "l":
125
+ analyzed_sentiment = "Liberalerna"
126
+ elif analyzed_sentiment.lower() == "kd":
127
+ analyzed_sentiment = "Kristdemokraterna"
128
+ elif analyzed_sentiment.lower() in m_list:
129
+ analyzed_sentiment = "Moderaterna"
130
+ elif analyzed_sentiment.lower() == "sd":
131
+ analyzed_sentiment = "Sverigedemokraterna"
132
+ elif analyzed_sentiment.lower() == "the swedish government":
133
+ analyzed_sentiment = "Regeringen"
134
+
135
+ tweet_dict[tweet]['target'] = analyzed_sentiment
136
+
137
+ return tweet_dict
138
 
139
  def classify_topics(self, text: str):
140
  """
 
142
  """
143
 
144
  def __repr__(self):
145
+ if self.df is None:
146
+ return "No dataframe available."
147
+ else:
148
+ return self.df.to_string()
149
+
150
+
151
+ if __name__ == "__main__":
152
+ tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=20)
153
+ print(tc)
154
+ tc.classify_sentiment_of_tweets("jimmieakesson")
155
+ print(tc)
twitterscraper/TwitterScraper.py CHANGED
@@ -109,5 +109,6 @@ if __name__ == "__main__":
109
  dc = sc.scrape_by_user("jimmieakesson")
110
  print(dc.head())
111
  print(dc.shape)
 
112
 
113
 
 
109
  dc = sc.scrape_by_user("jimmieakesson")
110
  print(dc.head())
111
  print(dc.shape)
112
+ print(dc.columns)
113
 
114