Nisse00 commited on
Commit
5ce4dc5
1 Parent(s): c91c79b

Classify Topics done

Browse files
.idea/misc.xml CHANGED
@@ -1,4 +1,4 @@
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
3
- <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet) (2)" project-jdk-type="Python SDK" />
4
  </project>
 
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (politweet)" project-jdk-type="Python SDK" />
4
  </project>
.idea/politweet.iml CHANGED
@@ -5,7 +5,7 @@
5
  <excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
6
  <excludeFolder url="file://$MODULE_DIR$/venv" />
7
  </content>
8
- <orderEntry type="jdk" jdkName="Python 3.9 (politweet) (2)" jdkType="Python SDK" />
9
  <orderEntry type="sourceFolder" forTests="false" />
10
  </component>
11
  <component name="PyNamespacePackagesService">
 
5
  <excludeFolder url="file://$MODULE_DIR$/politweet-environment" />
6
  <excludeFolder url="file://$MODULE_DIR$/venv" />
7
  </content>
8
+ <orderEntry type="jdk" jdkName="Python 3.10 (politweet)" jdkType="Python SDK" />
9
  <orderEntry type="sourceFolder" forTests="false" />
10
  </component>
11
  <component name="PyNamespacePackagesService">
textclassifier/TextClassifier.py CHANGED
@@ -6,7 +6,7 @@ from datetime import date
6
 
7
  class TextClassifier:
8
  def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
9
- num_tweets=20):
10
  """
11
  Initializes the TextClassifier.
12
  :param model_name: name of the model from openai.
@@ -20,9 +20,17 @@ class TextClassifier:
20
  self.to_date = to_date
21
  self.num_tweets = num_tweets
22
  self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
 
 
23
  # self.api_key = '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'
24
  openai.api_key = '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'
25
 
 
 
 
 
 
 
26
  @staticmethod
27
  def cleanup_sentiment_results(classification_unclean):
28
  classification_clean = classification_unclean.replace('\n\n', "")
@@ -31,6 +39,7 @@ class TextClassifier:
31
  classification_clean = classification_clean.replace(" ", "")
32
 
33
  return classification_clean
 
34
 
35
  def classify_sentiment(self, text: str):
36
  """
@@ -45,7 +54,7 @@ class TextClassifier:
45
  "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
46
 
47
  response = openai.Completion.create(
48
- model="text-davinci-002",
49
  prompt=prompt_string,
50
  temperature=0.0,
51
  max_tokens=256,
@@ -59,12 +68,11 @@ class TextClassifier:
59
 
60
  return classification_clean.lower()
61
 
62
- def classify_sentiment_of_tweets(self, user_name: str):
63
  """
64
  Classifies the sentiment of a user's tweets.
65
- :param user_name: string of the user name.
66
  """
67
- df_sentiment = self.ts.scrape_by_user(user_name)
68
  df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
69
  return df_sentiment
70
 
@@ -78,7 +86,7 @@ class TextClassifier:
78
  prompt_string = "Who is the TARGET of this "
79
  prompt_string += sentiment
80
  prompt_string += " TWEET?\\nTWEET=\""
81
- prompt_string += tweet
82
  prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
83
 
84
  response = openai.Completion.create(
@@ -136,17 +144,51 @@ class TextClassifier:
136
 
137
  return tweet_dict
138
 
139
- def classify_topics(self, text: str):
140
  """
141
  Classifies the topics of a text.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  """
 
 
 
143
 
144
- def __repr__(self):
145
- return "TwitterScraper(from_date={}, to_date={}, num_tweets={})".format(self.from_date, self.to_date, self.num_tweets)
 
 
146
 
147
 
148
  if __name__ == "__main__":
149
- tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=20)
150
- print(tc)
151
- df = tc.classify_sentiment_of_tweets("jimmieakesson")
152
- print(df)
 
 
 
 
6
 
7
  class TextClassifier:
8
  def __init__(self, model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()),
9
+ num_tweets=10, user_name=None):
10
  """
11
  Initializes the TextClassifier.
12
  :param model_name: name of the model from openai.
 
20
  self.to_date = to_date
21
  self.num_tweets = num_tweets
22
  self.ts = TwitterScraper.TwitterScraper(from_date, to_date, num_tweets)
23
+ self.user_name = user_name
24
+ self.df = self.ts.scrape_by_user(user_name)
25
  # self.api_key = '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'
26
  openai.api_key = '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'
27
 
28
+ def scrape_tweets(self):
29
+ """
30
+ Scrapes tweets from the given date range.
31
+ """
32
+ self.ts.scrape_tweets()
33
+
34
  @staticmethod
35
  def cleanup_sentiment_results(classification_unclean):
36
  classification_clean = classification_unclean.replace('\n\n', "")
 
39
  classification_clean = classification_clean.replace(" ", "")
40
 
41
  return classification_clean
42
+ return response.choices[0]['text']
43
 
44
  def classify_sentiment(self, text: str):
45
  """
 
54
  "\nQuestions,\nStatements,\nOpinions,\nPredictions.\nSENTIMENT="
55
 
56
  response = openai.Completion.create(
57
+ model=self.model_name,
58
  prompt=prompt_string,
59
  temperature=0.0,
60
  max_tokens=256,
 
68
 
69
  return classification_clean.lower()
70
 
71
+ def classify_sentiment_of_tweets(self):
72
  """
73
  Classifies the sentiment of a user's tweets.
 
74
  """
75
+ df_sentiment = self.df
76
  df_sentiment['sentiment'] = df_sentiment['tweet'].apply(self.classify_sentiment)
77
  return df_sentiment
78
 
 
86
  prompt_string = "Who is the TARGET of this "
87
  prompt_string += sentiment
88
  prompt_string += " TWEET?\\nTWEET=\""
89
+ prompt_string += text
90
  prompt_string += "\"\\n.TARGET should consist of less than 5 words.\\nTARGET="
91
 
92
  response = openai.Completion.create(
 
144
 
145
  return tweet_dict
146
 
147
+ def classify_topic(self, text: str):
148
  """
149
  Classifies the topics of a text.
150
+ :param text: string of the tweet text.
151
+ """
152
+ assert isinstance(text, str)
153
+
154
+ prompt_string = "Classify one topic for this tweet:\n \""
155
+ prompt_string += text
156
+ prompt_string += "\" \nFor example:\nEconomy,\nEnvironment,\nHealth,\nPolitics,\nScience,\nSports,\nTechnology," \
157
+ "\nTransportation,\nWorld.\nTOPIC="
158
+
159
+ response = openai.Completion.create(
160
+ model=self.model_name,
161
+ prompt=prompt_string,
162
+ temperature=0,
163
+ max_tokens=892,
164
+ top_p=1,
165
+ frequency_penalty=0,
166
+ presence_penalty=0,
167
+ )
168
+ classification_unclean = response.choices[0]['text']
169
+ classification_clean = self.cleanup_topic_results(classification_unclean)
170
+
171
+ return classification_clean.lower()
172
+
173
+ def classify_topics_of_tweets(self):
174
+ """
175
+ Classifies the topics of a user's tweets.
176
  """
177
+ df_topic = self.df
178
+ df_topic['topic'] = df_topic['tweet'].apply(self.classify_topic)
179
+ return df_topic
180
 
181
+ def cleanup_topic_results(prediction_dict, text):
182
+ new_item = text.replace("\n", " ")
183
+ new_item = new_item.replace(" ", " ")
184
+ return new_item
185
 
186
 
187
  if __name__ == "__main__":
188
+ import pandas as pd
189
+ #pd.set_option('display.max_columns', None)
190
+ tc = TextClassifier(model_name="text-davinci-002", from_date='2022-01-01', to_date=str(date.today()), num_tweets=20, user_name="jimmieakesson")
191
+ print(tc.classify_topics_of_tweets())
192
+
193
+
194
+