Demea9000 commited on
Commit
bbd2927
·
1 Parent(s): 5c4ad0b

some more changes, ready to run

Browse files
Files changed (3) hide show
  1. app.py +22 -15
  2. data/topic_synonyms.txt +0 -43
  3. data/topic_synonyms_copy.txt +347 -0
app.py CHANGED
@@ -1,19 +1,20 @@
 
 
 
1
  import numpy as np
2
- from matplotlib import pyplot as plt, use as plt_use
3
- from textclassifier import TextClassifier as tc
4
  import pandas as pd
5
  import regex as re
6
- from pathlib import Path
7
- import glob
8
- import os
9
 
10
  plt_use('Agg')
11
 
12
  # from functions import functions as f
13
  # import time
14
-
15
  SELECTED_COLUMN_DICT = {
16
- 'merged_topic': ['tweet', 'main_topic', 'sub_topic', 'synonym_topic', 'cos_sim_topic', 'merged_topic'],
17
  'sentiment': ['tweet', 'sentiment'],
18
  'merged_target': ['tweet', 'target', 'synonym_target', 'cos_sim_target', 'merged_target']
19
  }
@@ -28,16 +29,16 @@ CHOICE_LIST = ['Topic', 'Sentiment', 'Target']
28
 
29
  # PLOT_CHOICES_DICT = {'Topic': 'sub_topic', 'Sentiment': 'sentiment', 'Target': 'target'} I just changed its pavue
30
  # to merged target and merged topic
31
- PLOT_CHOICES_DICT = {'Topic': 'merged_topic', 'Sentiment': 'sentiment', 'Target': 'merged_target'}
32
- PLOT_CHOICES_REVERSE_DICT = {'merged_topic': 'Topic', 'sentiment': 'Sentiment', 'merged_target': 'Target'}
33
  # PLOT_CHOICES_REVERSE_DICT= {'sub_topic':'Topic', 'sentiment':'Sentiment' , 'target':'Target'}
34
  UserNameDict = dict(zip(['Jimmie Åkesson', 'Ebba Busch', 'Annie Lööf', 'Johan Pehrson', 'Per Bolund',
35
  'Märta Stenevi', 'Magdalena Andersson', 'Nooshi Dadgostar'], USER_LIST))
36
 
37
  Columns = ['username', 'nlikes', 'nreplies', 'nretweets', 'main_topic', 'sub_topic', 'sentiment', 'target', 'tweet',
38
  'date', 'urls', 'id', 'class_tuple', 'user_id']
39
- NUM_TWEETS = 1000
40
- LIMIT = 0.05
41
 
42
 
43
  def show_all_stats(see_full_stats):
@@ -79,6 +80,7 @@ def main(from_date,
79
 
80
  ):
81
  save_file_bool = s1, s2, s3, s4, s5, s6, s7, s8
 
82
  # Describe what save_file_bool is for: if you want to save the dataframe to a file, this is the boolean for that
83
 
84
  def add_pie_chart(df, leaders, plot_choices):
@@ -122,7 +124,6 @@ def main(from_date,
122
  all_targets = ['v', 'mp', 's', 'c', 'l', 'kd', 'm', 'sd', 'Red-Greens', 'The opposition']
123
  db_new = db.loc[db["merged_target"] != "other"] # dataframe with other category removed
124
  percent_target = (len(db_new) / len(db)) * 100
125
- targets = db_new["merged_target"].value_counts().keys().to_list()
126
  positive = [0] * len(all_targets)
127
  negative = [0] * len(all_targets)
128
  neutral = [0] * len(all_targets)
@@ -181,9 +182,15 @@ def main(from_date,
181
  dataframe = pd.read_csv("{}/data/twitterdata.csv".format(tc.ROOT_PATH))
182
  # choose subset between from_date and to_date and username is in usr_name_choices
183
  df = dataframe.loc[(dataframe['date'] >= from_date) & (dataframe['date'] <= to_date) & \
184
- (dataframe['username'].isin(match_name_lower_case(usr_name_choices)))].copy()
185
  # Sort df by date
186
  df.sort_values(by=['date'], inplace=True)
 
 
 
 
 
 
187
  if save_selected:
188
  user_list = match_name_lower_case(usr_name_choices)
189
  df_l = []
@@ -393,8 +400,8 @@ if __name__ == "__main__":
393
  with gr.Row():
394
  with gr.Column():
395
  with gr.Row():
396
- date1 = gr.Textbox(label="from_date", value='2022-05-10')
397
- date2 = gr.Textbox(label="to_date", value='2022-05-30')
398
  leaders = gr.Checkboxgroup(choices=USER_NAMES,
399
  label="")
400
  plot_choices = gr.CheckboxGroup(choices=CHOICE_LIST, label='Choose what to show')
 
1
+ import glob
2
+ from pathlib import Path
3
+
4
  import numpy as np
 
 
5
  import pandas as pd
6
  import regex as re
7
+ from matplotlib import pyplot as plt, use as plt_use
8
+
9
+ from textclassifier import TextClassifier as tc
10
 
11
  plt_use('Agg')
12
 
13
  # from functions import functions as f
14
  # import time
15
+ TOPIC = "merged_topic"
16
  SELECTED_COLUMN_DICT = {
17
+ TOPIC: ['tweet', 'main_topic', 'sub_topic', 'synonym_topic', 'cos_sim_topic', 'merged_topic'],
18
  'sentiment': ['tweet', 'sentiment'],
19
  'merged_target': ['tweet', 'target', 'synonym_target', 'cos_sim_target', 'merged_target']
20
  }
 
29
 
30
  # PLOT_CHOICES_DICT = {'Topic': 'sub_topic', 'Sentiment': 'sentiment', 'Target': 'target'} I just changed its pavue
31
  # to merged target and merged topic
32
+ PLOT_CHOICES_DICT = {'Topic': TOPIC, 'Sentiment': 'sentiment', 'Target': 'merged_target'}
33
+ PLOT_CHOICES_REVERSE_DICT = {TOPIC: 'Topic', 'sentiment': 'Sentiment', 'merged_target': 'Target'}
34
  # PLOT_CHOICES_REVERSE_DICT= {'sub_topic':'Topic', 'sentiment':'Sentiment' , 'target':'Target'}
35
  UserNameDict = dict(zip(['Jimmie Åkesson', 'Ebba Busch', 'Annie Lööf', 'Johan Pehrson', 'Per Bolund',
36
  'Märta Stenevi', 'Magdalena Andersson', 'Nooshi Dadgostar'], USER_LIST))
37
 
38
  Columns = ['username', 'nlikes', 'nreplies', 'nretweets', 'main_topic', 'sub_topic', 'sentiment', 'target', 'tweet',
39
  'date', 'urls', 'id', 'class_tuple', 'user_id']
40
+ # NUM_TWEETS = 1000
41
+ LIMIT = 0.04
42
 
43
 
44
  def show_all_stats(see_full_stats):
 
80
 
81
  ):
82
  save_file_bool = s1, s2, s3, s4, s5, s6, s7, s8
83
+
84
  # Describe what save_file_bool is for: if you want to save the dataframe to a file, this is the boolean for that
85
 
86
  def add_pie_chart(df, leaders, plot_choices):
 
124
  all_targets = ['v', 'mp', 's', 'c', 'l', 'kd', 'm', 'sd', 'Red-Greens', 'The opposition']
125
  db_new = db.loc[db["merged_target"] != "other"] # dataframe with other category removed
126
  percent_target = (len(db_new) / len(db)) * 100
 
127
  positive = [0] * len(all_targets)
128
  negative = [0] * len(all_targets)
129
  neutral = [0] * len(all_targets)
 
182
  dataframe = pd.read_csv("{}/data/twitterdata.csv".format(tc.ROOT_PATH))
183
  # choose subset between from_date and to_date and username is in usr_name_choices
184
  df = dataframe.loc[(dataframe['date'] >= from_date) & (dataframe['date'] <= to_date) & \
185
+ (dataframe['username'].isin(match_name_lower_case(usr_name_choices)))].copy()
186
  # Sort df by date
187
  df.sort_values(by=['date'], inplace=True)
188
+ # Remove entries from df where 'tweet' starts with '@'
189
+ df = df[df['tweet'].str.startswith('@') == False]
190
+ # change 'merged_topic' to 'Other' if it is 'ERROR_9000' or 'ERROR_496'
191
+ df['merged_topic'] = df['merged_topic'].apply(lambda x: "other" if x == "ERROR_9000" or x == "ERROR_496" else x)
192
+ # change 'merged_topic' to 'Government' if it is 's'
193
+ df['merged_topic'] = df['merged_topic'].apply(lambda x: "The Government" if x == "s" else x)
194
  if save_selected:
195
  user_list = match_name_lower_case(usr_name_choices)
196
  df_l = []
 
400
  with gr.Row():
401
  with gr.Column():
402
  with gr.Row():
403
+ date1 = gr.Textbox(label="from_date", value='2022-01-01')
404
+ date2 = gr.Textbox(label="to_date", value='2022-05-31')
405
  leaders = gr.Checkboxgroup(choices=USER_NAMES,
406
  label="")
407
  plot_choices = gr.CheckboxGroup(choices=CHOICE_LIST, label='Choose what to show')
data/topic_synonyms.txt CHANGED
@@ -297,49 +297,6 @@ Valet 2022
297
  Almedalen
298
  Järvaveckan
299
  Partiledardebatt####
300
- v
301
- Nooshi Dadgostar
302
- Jonas Sjöstedt
303
- rooftop cats play physics with cardboard fire####
304
- mp
305
- Per Bolund
306
- Märta Stenevi
307
- ice piano flies with pencil as direction####
308
- s
309
- Magdalena Andersson
310
- Stefan Löfven
311
- Morgan Johansson
312
- Mikael Damberg
313
- Peter Hultqvist
314
- Khashayar Farmanbar
315
- Anders Ygeman
316
- Annika Strandhäll
317
- Lena Hallengren
318
- Ardalan Shekarabi
319
- Ann Linde
320
- The government
321
- the swedish government
322
- Regeringen
323
- Statsministern
324
- Prime minister
325
- lamp of fire walks bird gladly tomorrow####
326
- c
327
- Annie Lööf
328
- differential donuts program sunny waters####
329
- l
330
- Johan Pehrson
331
- Nyamko Sabuni
332
- Jan Björklund
333
- red weather jokes with music and the mathematician####
334
- kd
335
- Ebba Busch
336
- cauchy-riemann met sunglasses after rolling yellow####
337
- m
338
- Ulf Kristersson
339
- parrot computer is swimming as screen time####
340
- sd
341
- Jimmie Åkesson
342
- keyboard can hire the yellow elephant in cosmos####
343
  ERROR_9000
344
  sweden####
345
  ERROR_496
 
297
  Almedalen
298
  Järvaveckan
299
  Partiledardebatt####
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  ERROR_9000
301
  sweden####
302
  ERROR_496
data/topic_synonyms_copy.txt ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Act of Crime
2
+ Gang violence
3
+ Robbing
4
+ Riots
5
+ Murder
6
+ Misshandel
7
+ Assault
8
+ Drug trafficking####
9
+ Central government administration
10
+ development of services and efficiency
11
+ regional coordination in the counties
12
+ the responsibility of the state as an employer
13
+ maintenance of state properties
14
+ administration of state properties####
15
+ Children’s rights
16
+ child protection
17
+ interests of children
18
+ Care of Young Persons Act
19
+ Lagen om vård av unga
20
+ LVU####
21
+ Civil society and sport
22
+ non-profit organisations
23
+ football
24
+ soccer
25
+ hockey
26
+ tennis
27
+ skiing
28
+ registered faith communities####
29
+ Combating terrorism
30
+ money laundering
31
+ Counterterrorism
32
+ Isis
33
+ Bombs
34
+ mass shootings####
35
+ Consumer affairs
36
+ Consumers right####
37
+ Culture
38
+ Dance
39
+ Music
40
+ Art
41
+ Jul
42
+ Midsummer
43
+ Carnival
44
+ Food
45
+ Celebration
46
+ Tradition
47
+ Påsk
48
+ Easter
49
+ Halloween
50
+ Valborg
51
+ Festivity
52
+ Christmas
53
+ New years eve####
54
+ Defense and military
55
+ Försvarsmakten
56
+ Lumpen
57
+ Värnplikt
58
+ Artillery
59
+ Airstrike
60
+ Offensive front
61
+ Coast Guard
62
+ Ground Troops
63
+ Drone strike
64
+ Security Service
65
+ Navy
66
+ Säpo####
67
+ Democracy and human rights
68
+ Freedom of speech
69
+ Freedom of press
70
+ Humanity
71
+ Abortion
72
+ Mänskliga rättigheter####
73
+ Regulation of media and electronics and communication
74
+ Digital policy
75
+ Digitalisation
76
+ Digital infrastructure
77
+ Network and information security
78
+ Broadband access
79
+ Electronic identification
80
+ Technology####
81
+ Economic policy
82
+ Inflation
83
+ Fuel cost
84
+ Cost of living
85
+ Budget
86
+ Rate of interest
87
+ Ränta
88
+ Debt
89
+ Skuld
90
+ Income
91
+ Expenditures
92
+ Taxes and tariffs
93
+ carbon tax
94
+ VAT and excise duties
95
+ Distribution of wealth
96
+ Tax control
97
+ Audit####
98
+ Education and research
99
+ University
100
+ Utbildning
101
+ Skola
102
+ School
103
+ Dagis
104
+ Förskola
105
+ Lågstadiet
106
+ Mellanstadiet
107
+ Högstadiet
108
+ Undervisning
109
+ Högskola
110
+ CSN
111
+ Centrala Studienämnden
112
+ Doktoranden####
113
+ Emergency preparedness
114
+ Wildfire
115
+ Crises
116
+ Naturkatastrofer
117
+ Underground shelters####
118
+ Energy
119
+ Nuclear Power
120
+ Solar
121
+ Wind
122
+ Hydropower
123
+ Turbine
124
+ Kraftvärme
125
+ Kärnkraft
126
+ Solkraft
127
+ Vindkraft
128
+ Vattenkraft
129
+ Electricity production
130
+ Energy production
131
+ Thermal Power####
132
+ Enterprise and industry####
133
+ Environment and climate
134
+ Climate change
135
+ Global warming
136
+ Emission
137
+ Hazardous toxins
138
+ Animals and nature
139
+ Sustainability
140
+ Waste
141
+ Paris agreement
142
+ Extreme weather
143
+ Heatwaves
144
+ Droughts
145
+ Natural disaster
146
+ Pollution####
147
+ Family law
148
+ Gode man
149
+ Inheritance####
150
+ Foreign policy
151
+ eu
152
+ European Union
153
+ Nato
154
+ Un
155
+ United Nations
156
+ foreign affairs####
157
+ Gambling policy####
158
+ Gender equality
159
+ women's rights
160
+ Equal pay
161
+ Equal salaries
162
+ Equal opportunities####
163
+ Housing and community planning
164
+ Property information
165
+ Housing Market
166
+ Community planning####
167
+ Innovation
168
+ improved solutions####
169
+ Integration
170
+ Anti-segregation policy
171
+ Nyanländas etablering
172
+ Utanförskapsområde
173
+ Introduction of new arrivals
174
+ SFI####
175
+ International development cooperation
176
+ Improvement of life of poor people####
177
+ International law####
178
+ Judicial system
179
+ legal security
180
+ Court
181
+ Law and legal security
182
+ Prosecutor
183
+ Judge
184
+ Correctional treatment
185
+ Custody
186
+ Prison
187
+ Police
188
+ Enforcing sentences
189
+ punishment####
190
+ Labour
191
+ Work environment
192
+ Conditions in working life
193
+ Fair working conditions
194
+ Mediation in labour disputes
195
+ working environment
196
+ Labour market
197
+ Matching of jobseekers and vacancies
198
+ Unemployment
199
+ Arbetsförmedlingen####
200
+ LGBTQ
201
+ non-binary####
202
+ Media
203
+ SVT
204
+ TV5
205
+ TV4
206
+ Sveriges radio
207
+ Television
208
+ The daily press
209
+ Film
210
+ Twitter
211
+ Facebook
212
+ Instagram####
213
+ Migration and asylum
214
+ Refugee
215
+ Citizenship
216
+ Immigrants
217
+ Residence permit####
218
+ Municipalities and regions
219
+ County councils
220
+ County administrative boards
221
+ Municipal council
222
+ local government
223
+ Municipal matters####
224
+ Public health and medical care
225
+ Hospitals
226
+ Treatment
227
+ Alcohol and Tobacco consumption
228
+ Mortality
229
+ Diseases
230
+ Pandemic
231
+ Covid-19
232
+ Vaccines####
233
+ Public procurement
234
+ Offentlig upphandling####
235
+ Racial discrimination
236
+ Racist
237
+ Nazi
238
+ Fascist
239
+ Hatred against ethnic groups
240
+ Xenophobia
241
+ Anti-Semitism
242
+ Brunskjorta####
243
+ Regional development####
244
+ Rural affairs
245
+ Livestock
246
+ Fisheries
247
+ Hunting and game management
248
+ Agriculture
249
+ Forestry
250
+ Food access and security####
251
+ Social insurance
252
+ Financial security
253
+ Sickness insurance
254
+ Pensions
255
+ Parental insurance
256
+ Benefits for parents####
257
+ Social services including care for older people
258
+ Individual care
259
+ Family care
260
+ Support for people with disabilities
261
+ Elderly care####
262
+ The Constitution and personal privacy
263
+ Instrument of Government
264
+ Act of Succession
265
+ Freedom of the Press Act
266
+ Freedom of Expression
267
+ Official documents
268
+ Personal privacy of individuals####
269
+ Transport and infrastructure
270
+ transport services
271
+ Railways
272
+ Roads
273
+ shipping and aviation
274
+ transport and infrastructure research
275
+ Vehicle
276
+ Car
277
+ Trains
278
+ Airport
279
+ Transit
280
+ Highways
281
+ Aviation
282
+ Maritime
283
+ Tunnels
284
+ Water management
285
+ Bridges
286
+ Urban planning
287
+ Coastal engineering
288
+ Road safety####
289
+ Youth policy
290
+ Young people conditions
291
+ Young people
292
+ Youth issues
293
+ Opportunities for young people####
294
+ Election
295
+ Val
296
+ Valet 2022
297
+ Almedalen
298
+ Järvaveckan
299
+ Partiledardebatt####
300
+ v
301
+ Nooshi Dadgostar
302
+ Jonas Sjöstedt
303
+ rooftop cats play physics with cardboard fire####
304
+ mp
305
+ Per Bolund
306
+ Märta Stenevi
307
+ ice piano flies with pencil as direction####
308
+ s
309
+ Magdalena Andersson
310
+ Stefan Löfven
311
+ Morgan Johansson
312
+ Mikael Damberg
313
+ Peter Hultqvist
314
+ Khashayar Farmanbar
315
+ Anders Ygeman
316
+ Annika Strandhäll
317
+ Lena Hallengren
318
+ Ardalan Shekarabi
319
+ Ann Linde
320
+ The government
321
+ the swedish government
322
+ Regeringen
323
+ Statsministern
324
+ Prime minister
325
+ lamp of fire walks bird gladly tomorrow####
326
+ c
327
+ Annie Lööf
328
+ differential donuts program sunny waters####
329
+ l
330
+ Johan Pehrson
331
+ Nyamko Sabuni
332
+ Jan Björklund
333
+ red weather jokes with music and the mathematician####
334
+ kd
335
+ Ebba Busch
336
+ cauchy-riemann met sunglasses after rolling yellow####
337
+ m
338
+ Ulf Kristersson
339
+ parrot computer is swimming as screen time####
340
+ sd
341
+ Jimmie Åkesson
342
+ keyboard can hire the yellow elephant in cosmos####
343
+ ERROR_9000
344
+ sweden####
345
+ ERROR_496
346
+ n/a
347
+ none####