Demea9000 commited on
Commit
1ace546
1 Parent(s): c91c79b

utökade sentiment metoderna, la till placeholder för att spara redan promptade tweets

Browse files
Files changed (1) hide show
  1. textclassifier/TextClassifier.py +55 -11
textclassifier/TextClassifier.py CHANGED
@@ -6,6 +6,7 @@ from datetime import date
6
 
7
  class TextClassifier:
8
  def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
 
9
  num_tweets=20):
10
  """
11
  Initializes the TextClassifier.
@@ -19,12 +20,29 @@ class TextClassifier:
19
  self.from_date = from_date
20
  self.to_date = to_date
21
  self.num_tweets = num_tweets
 
22
  self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
 
23
  # self.api_key = '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'
24
  openai.api_key = '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'
25
 
 
 
 
 
 
 
 
 
 
 
26
  @staticmethod
27
  def cleanup_sentiment_results(classification_unclean):
 
 
 
 
 
28
  classification_clean = classification_unclean.replace('\n\n', "")
29
  classification_clean = classification_clean.replace('\n', "")
30
  if classification_clean.startswith(" "):
@@ -45,7 +63,7 @@ class TextClassifier:
45
  "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
46
 
47
  response = openai.Completion.create(
48
- model="text-davinci-002",
49
  prompt=prompt_string,
50
  temperature=0.0,
51
  max_tokens=256,
@@ -59,26 +77,29 @@ class TextClassifier:
59
 
60
  return classification_clean.lower()
61
 
62
- def classify_sentiment_of_tweets(self, user_name: str):
63
  """
64
  Classifies the sentiment of a user's tweets.
65
  :param user_name: string of the user name.
66
  """
67
- df_sentiment = self.ts.scrape_by_user(user_name)
68
  df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
69
- return df_sentiment
 
70
 
71
  def analyze_sentiment(self, text: str, sentiment: str):
 
72
  """
73
  Analyzes the sentiment of a text using OpenAI.
74
  :param text: string of the tweet text.
75
  :param sentiment:
76
  :return:
77
  """
 
78
  prompt_string = "Who is the TARGET of this "
79
  prompt_string += sentiment
80
  prompt_string += " TWEET?\\nTWEET=\""
81
- prompt_string += tweet
82
  prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
83
 
84
  response = openai.Completion.create(
@@ -92,7 +113,7 @@ class TextClassifier:
92
  )
93
 
94
  analyzed_sentiment = response.choices[0]['text']
95
-
96
  # Remove spaces at the start/end of the response
97
  if analyzed_sentiment.startswith(' '):
98
  analyzed_sentiment = analyzed_sentiment[1:]
@@ -132,9 +153,20 @@ class TextClassifier:
132
  elif analyzed_sentiment.lower() == "the swedish government":
133
  analyzed_sentiment = "Regeringen"
134
 
135
- tweet_dict[tweet]['target'] = analyzed_sentiment
136
 
137
- return tweet_dict
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  def classify_topics(self, text: str):
140
  """
@@ -142,11 +174,23 @@ class TextClassifier:
142
  """
143
 
144
  def __repr__(self):
145
- return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date, self.num_tweets)
 
146
 
147
 
148
  if __name__ == "__main__":
149
- tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=20)
 
 
 
 
 
 
150
  print(tc)
151
- df = tc.classify_sentiment_of_tweets("jimmieakesson")
 
 
 
 
 
152
  print(df)
 
6
 
7
  class TextClassifier:
8
  def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
9
+ user_name='jimmieakesson',
10
  num_tweets=20):
11
  """
12
  Initializes the TextClassifier.
 
20
  self.from_date = from_date
21
  self.to_date = to_date
22
  self.num_tweets = num_tweets
23
+ self.user_name = user_name
24
  self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
25
+ self.df = self.ts.scrape_by_user(user_name)
26
  # self.api_key = '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'
27
  openai.api_key = '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'
28
 
29
+ def store_tweets(self, file_name: str):
30
+ # TODO: implement this method
31
+ """
32
+ Stores the tweets of a user.
33
+ :param file_name:
34
+ :param user_name: string of the user name.
35
+ """
36
+ df_tweets = self.df
37
+ df_tweets.to_csv(file_name + '.csv', index=False)
38
+
39
  @staticmethod
40
  def cleanup_sentiment_results(classification_unclean):
41
+ """
42
+ Cleans up the results of the sentiment classification.
43
+ :param classification_unclean: string of the classification result.
44
+ :return: cleaned up string.
45
+ """
46
  classification_clean = classification_unclean.replace('\n\n', "")
47
  classification_clean = classification_clean.replace('\n', "")
48
  if classification_clean.startswith(" "):
 
63
  "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
64
 
65
  response = openai.Completion.create(
66
+ model=self.model_name,
67
  prompt=prompt_string,
68
  temperature=0.0,
69
  max_tokens=256,
 
77
 
78
  return classification_clean.lower()
79
 
80
+ def classify_sentiment_of_tweets(self):
81
  """
82
  Classifies the sentiment of a user's tweets.
83
  :param user_name: string of the user name.
84
  """
85
+ df_sentiment = self.df
86
  df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
87
+ self.df = df_sentiment
88
+ return self.df
89
 
90
  def analyze_sentiment(self, text: str, sentiment: str):
91
+ # TODO: fix prompt before running this method
92
  """
93
  Analyzes the sentiment of a text using OpenAI.
94
  :param text: string of the tweet text.
95
  :param sentiment:
96
  :return:
97
  """
98
+ assert 1 == 2, "Måste fixa prompt innan denna metod körs"
99
  prompt_string = "Who is the TARGET of this "
100
  prompt_string += sentiment
101
  prompt_string += " TWEET?\\nTWEET=\""
102
+ prompt_string += text
103
  prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
104
 
105
  response = openai.Completion.create(
 
113
  )
114
 
115
  analyzed_sentiment = response.choices[0]['text']
116
+ print(analyzed_sentiment)
117
  # Remove spaces at the start/end of the response
118
  if analyzed_sentiment.startswith(' '):
119
  analyzed_sentiment = analyzed_sentiment[1:]
 
153
  elif analyzed_sentiment.lower() == "the swedish government":
154
  analyzed_sentiment = "Regeringen"
155
 
156
+ return analyzed_sentiment
157
 
158
+ def analyze_sentiment_of_tweets(self):
159
+ """
160
+ Analyzes the sentiment of a user's tweets.
161
+ """
162
+ # check if 'sentiment' column exists, raise exception if not
163
+ assert 'sentiment' in self.df.columns, \
164
+ "'sentiment' column does not exist. Please run classify_sentiment_of_tweets first."
165
+
166
+ df_analyze = self.df
167
+ df_analyze['target'] = df_analyze['tweet'].apply(self.analyze_sentiment, args=(df_analyze['sentiment']))
168
+ self.df = df_analyze
169
+ return self.df
170
 
171
  def classify_topics(self, text: str):
172
  """
 
174
  """
175
 
176
  def __repr__(self):
177
+ return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
178
+ self.num_tweets)
179
 
180
 
181
  if __name__ == "__main__":
182
+ import pandas as pd
183
+ import warnings
184
+
185
+ warnings.simplefilter(action='ignore', category=FutureWarning)
186
+ pd.set_option('display.max_columns', None)
187
+ tc = TextClassifier(model_name="text-davinci-002", from_date='2022-07-01',
188
+ to_date=str(date.today()), user_name='jimmieakesson', num_tweets=20)
189
  print(tc)
190
+ # df = tc.classify_sentiment_of_tweets("jimmieakesson")
191
+ # print(df)
192
+ df = tc.classify_sentiment_of_tweets()
193
+ print(df.head())
194
+ # df = tc.analyze_sentiment("Nu har sd igen gjort fel", "critical")
195
+
196
  print(df)