Oresti Theodoridis commited on
Commit
725b13f
2 Parent(s): c5f746c 11ba7a0

Merge branch 'develop' into 35-create-new-text-classifier-sentiment

Browse files
Files changed (3) hide show
  1. .idea/misc.xml +1 -1
  2. README.md +1 -0
  3. textclassifier/TextClassifier.py +46 -17
.idea/misc.xml CHANGED
@@ -1,4 +1,4 @@
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
3
- <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet)" project-jdk-type="Python SDK" />
4
  </project>
 
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (politweet)" project-jdk-type="Python SDK" />
4
  </project>
README.md CHANGED
@@ -22,6 +22,7 @@ För att få alla dependencies:
22
  2. Aktivera din virtual environment
23
  2. gå till projektets root path och skriv i terminalen:
24
  $ env2/bin/python -m pip install -r requirements.txt
 
25
 
26
 
27
 
 
22
  2. Aktivera din virtual environment
23
  2. gå till projektets root path och skriv i terminalen:
24
  $ env2/bin/python -m pip install -r requirements.txt
25
+ 3. I vissa fall funkar det inte att installera twint för Ubuntu. Efter att ha ställt in allt funkade det efter att ha kört "sudo apt-get install build- essential" i terminalen.
26
 
27
 
28
 
textclassifier/TextClassifier.py CHANGED
@@ -6,6 +6,7 @@ from datetime import date
6
 
7
  class TextClassifier:
8
  def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
 
9
  user_name='jimmieakesson',
10
  num_tweets=20):
11
  """
@@ -26,6 +27,12 @@ class TextClassifier:
26
  # self.api_key = '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'
27
  openai.api_key = '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'
28
 
 
 
 
 
 
 
29
  @staticmethod
30
  def cleanup_sentiment_results(classification_unclean):
31
  """
@@ -39,6 +46,7 @@ class TextClassifier:
39
  classification_clean = classification_clean.replace(" ", "")
40
 
41
  return classification_clean
 
42
 
43
  def classify_sentiment(self, text: str):
44
  """
@@ -70,9 +78,9 @@ class TextClassifier:
70
  def classify_sentiment_of_tweets(self):
71
  """
72
  Classifies the sentiment of a user's tweets.
73
- :param user_name: string of the user name.
74
  """
75
  df_sentiment = self.df.copy()
 
76
  df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
77
  self.df = df_sentiment
78
  return self.df
@@ -158,29 +166,50 @@ class TextClassifier:
158
  self.df = df_sentiment
159
  return self.df
160
 
161
- def classify_topics(self, text: str):
162
  """
163
  Classifies the topics of a text.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  """
 
 
 
165
 
166
  def __repr__(self):
167
  return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
168
  self.num_tweets)
169
 
 
 
 
 
 
 
170
 
171
  if __name__ == "__main__":
172
- import pandas as pd
173
- import warnings
174
-
175
- warnings.simplefilter(action='ignore', category=FutureWarning)
176
- pd.set_option('display.max_columns', None)
177
- tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01',
178
- to_date=str(date.today()), user_name='jimmieakesson', num_tweets=60)
179
- print(tc)
180
- # df = tc.classify_sentiment_of_tweets("jimmieakesson")
181
- # print(df)
182
- df = tc.classify_sentiment_of_tweets()
183
- print(df.head())
184
- df = tc.analyze_sentiment_of_tweets()
185
-
186
- print(df.head())
 
6
 
7
  class TextClassifier:
8
  def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
9
+
10
  user_name='jimmieakesson',
11
  num_tweets=20):
12
  """
 
27
  # self.api_key = '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'
28
  openai.api_key = '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'
29
 
30
+ def scrape_tweets(self):
31
+ """
32
+ Scrapes tweets from the given date range.
33
+ """
34
+ self.ts.scrape_tweets()
35
+
36
  @staticmethod
37
  def cleanup_sentiment_results(classification_unclean):
38
  """
 
46
  classification_clean = classification_clean.replace(" ", "")
47
 
48
  return classification_clean
49
+ return response.choices[0]['text']
50
 
51
  def classify_sentiment(self, text: str):
52
  """
 
78
  def classify_sentiment_of_tweets(self):
79
  """
80
  Classifies the sentiment of a user's tweets.
 
81
  """
82
  df_sentiment = self.df.copy()
83
+
84
  df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
85
  self.df = df_sentiment
86
  return self.df
 
166
  self.df = df_sentiment
167
  return self.df
168
 
169
+ def classify_topic(self, text: str):
170
  """
171
  Classifies the topics of a text.
172
+ :param text: string of the tweet text.
173
+ """
174
+ assert isinstance(text, str)
175
+
176
+ prompt_string = "Classify one topic for this tweet:\n \""
177
+ prompt_string += text
178
+ prompt_string += "\" \nFor example:\nEconomy,\nEnvironment,\nHealth,\nPolitics,\nScience,\nSports,\nTechnology," \
179
+ "\nTransportation,\nWorld.\nTOPIC="
180
+
181
+ response = openai.Completion.create(
182
+ model=self.model_name,
183
+ prompt=prompt_string,
184
+ temperature=0,
185
+ max_tokens=892,
186
+ top_p=1,
187
+ frequency_penalty=0,
188
+ presence_penalty=0,
189
+ )
190
+ classification_unclean = response.choices[0]['text']
191
+ classification_clean = self.cleanup_topic_results(classification_unclean)
192
+
193
+ return classification_clean.lower()
194
+
195
+ def classify_topics_of_tweets(self):
196
+ """
197
+ Classifies the topics of a user's tweets.
198
  """
199
+ df_topic = self.df
200
+ df_topic['topic'] = df_topic['tweet'].apply(self.classify_topic)
201
+ return df_topic
202
 
203
  def __repr__(self):
204
  return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date,
205
  self.num_tweets)
206
 
207
+ def cleanup_topic_results(prediction_dict, text):
208
+ new_item = text.replace("\n", " ")
209
+ new_item = new_item.replace(" ", " ")
210
+ return new_item
211
+
212
+
213
 
214
  if __name__ == "__main__":
215
+