Demea9000 commited on
Commit
34e5763
1 Parent(s): 46b8217

fixed minor bugs

Browse files
.idea/misc.xml CHANGED
@@ -1,4 +1,4 @@
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
3
- <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (politweet)" project-jdk-type="Python SDK" />
4
  </project>
 
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet)" project-jdk-type="Python SDK" />
4
  </project>
.idea/politweet.iml CHANGED
@@ -6,7 +6,7 @@
6
  <excludeFolder url="file://$MODULE_DIR$/env" />
7
  <excludeFolder url="file://$MODULE_DIR$/venv" />
8
  </content>
9
- <orderEntry type="jdk" jdkName="Python 3.10 (politweet)" jdkType="Python SDK" />
10
  <orderEntry type="sourceFolder" forTests="false" />
11
  </component>
12
  <component name="PyNamespacePackagesService">
 
6
  <excludeFolder url="file://$MODULE_DIR$/env" />
7
  <excludeFolder url="file://$MODULE_DIR$/venv" />
8
  </content>
9
+ <orderEntry type="jdk" jdkName="Python 3.9 (politweet)" jdkType="Python SDK" />
10
  <orderEntry type="sourceFolder" forTests="false" />
11
  </component>
12
  <component name="PyNamespacePackagesService">
app.py CHANGED
@@ -15,11 +15,11 @@ import matplotlib.pyplot as plt
15
  from functions import functions as f
16
  import time
17
 
18
-
19
- SELECTED_COLUMN_DICT = { 'merged_topic': ['tweet', 'main_topic' , 'sub_topic' ,'synonym_topic' , 'cos_sim_topic', 'merged_topic' ],
20
- 'sentiment':['tweet', 'sentiment'],
21
- 'merged_target': ['tweet', 'target','synonym_target', 'cos_sim_target' , 'merged_target']
22
- }
23
 
24
  USER_LIST = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
25
  'dadgostarnooshi']
@@ -42,16 +42,16 @@ num_tweet = 1000
42
  LIMIT = 0.05
43
 
44
 
45
- def show_all_stats(SeeFullStats):
46
  dataframe = pd.read_csv("{}/data/twitterdata.csv".format(tc.ROOT_PATH))
47
- if SeeFullStats:
48
  return dataframe
49
  else:
50
  return pd.DataFrame()
51
 
52
 
53
- def fixChoicesCorrectOrder(Choices):
54
- ListChoices = [x for x in Columns if x in Choices]
55
  return ListChoices
56
 
57
 
@@ -74,14 +74,12 @@ def main(From,
74
  To,
75
  UserNameChoices,
76
  plot_choice,
77
- save_selected ,
78
  rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8,
79
- v1, v2, v3, v4, v5, v6, v7, v8 ,
80
  s1, s2, s3, s4, s5, s6, s7, s8
81
 
82
  ):
83
-
84
-
85
  save_file_bool = s1, s2, s3, s4, s5, s6, s7, s8
86
 
87
  def Add_Pychart(df, leaders, plot_choices):
@@ -102,7 +100,7 @@ def main(From,
102
 
103
  for db in df_list:
104
  for col in PLOT_CHOICES_REVERSE_DICT: # plot_choices:
105
- if col=='merged_target':
106
  pie_charts.append(bar(db[0], col + ": " + db[1]))
107
  else:
108
  pie_charts.append(pie_chart(db[0], col, col + ": " + db[1]))
@@ -116,43 +114,47 @@ def main(From,
116
  if db.empty:
117
  return None
118
  else:
119
- db['merged_target']= db["merged_target"].apply(lambda x: "other" if x == "ERROR_9000" or x == "ERROR_496" else x) # replacing Different Error type with string "other"
120
- db['sentiment'] = db['sentiment'].apply(lambda x: re.sub('\s+', "", x)) # removing extra spaces in at the end and beginning of the sentiments.
 
 
121
  # This can be removed after we remove all unnessary spaces from twitter data
122
- all_targets= ['v', 'mp', 's', 'c', 'l', 'kd', 'm', 'sd', 'Red-Greens', 'The opposition']
123
  db_new = db.loc[db["merged_target"] != "other"] # dataframe with other category removed
124
- percent_target = (len(db_new) / len(db))*100
125
- targets= db_new["merged_target"].value_counts().keys().to_list()
126
- positive=[0]*len(all_targets)
127
- negative=[0]*len(all_targets)
128
- neutral=[0]*len(all_targets)
129
- other =[0]*len(all_targets)
130
- for i,target in enumerate(all_targets):
131
- temp_db= db_new.loc[db_new["merged_target"] == target]
132
  if temp_db.empty:
133
  pass
134
  else:
135
  sent = temp_db['sentiment'].to_list()
136
- positive[i] +=sent.count('positive')
137
  negative[i] += sent.count('negative')
138
  neutral[i] += sent.count('neutral')
139
- other[i] += sent.count('other')
140
  font1 = {'family': 'serif', 'color': 'blue', 'size': 10}
141
  fig = plt.figure()
142
- y1 = np.array(positive)/len(db_new)
143
- y2 = np.array(negative)/len(db_new)
144
- y3 = np.array(neutral)/len(db_new)
145
- y4 = np.array(other)/len(db_new)
146
- plt.bar(all_targets, y1 , color='g')
147
- plt.bar(all_targets, y2 , bottom=y1, color='r')
148
- plt.bar(all_targets, y3 , bottom=(y1+y2), color ='yellow')
149
- plt.bar(all_targets, y4 , bottom=(y1+y2+y3) , color= 'b')
150
  plt.xticks(rotation=15)
151
  plt.ylim(0, 1)
152
- plt.title( str(percent_target)[0:4] + "% "+ " of tweets have target. "+ "Number of tweets with target:" +str(len(db_new)),loc='right',fontdict=font1)
153
- #plt.xlabel("Targets")
 
 
154
  plt.ylabel("Procent")
155
- plt.legend(["positive","negative", "neutral","other"])
156
  return fig
157
 
158
  def pie_chart(db, col_name, title):
@@ -178,46 +180,50 @@ def main(From,
178
  # os.path.dirname(os.path.abspath(__file__))) + "/politweet/data/twitterdata.csv") #
179
  df = dataframe
180
 
181
-
182
  if save_selected:
183
  user_list = MatchNameToUser(UserNameChoices)
184
- df_l=[]
185
  for user in user_list:
186
- df_l.append( pd.DataFrame(df.loc[df['username']== user]) )
187
 
188
- selected_df= pd.concat(df_l).reset_index(drop=True)
189
- export_to_download(selected_df,"selected_leaders")
190
- save_selected_checkbox= [gr.Checkbox.update(interactive=False)]
191
 
 
 
192
 
193
  pycharts = Add_Pychart(df, UserNameChoices, convert_plot_choices(plot_choice))
194
 
195
- rb_components = [rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8] #radio_buttons
196
- df_visibility_check = [v1,v2,v3,v4,v5,v6,v7,v8]
197
- def get_selected_df_list(d_frame,save_or_no ,selected_users, radio, visiblity):
 
198
 
199
  leader_bool_list = [True if leader in selected_users else False for leader in USER_NAMES]
200
- df_list=[]
201
  number_tweets = []
202
- save_file_components_list =[]
203
- for i , u_bool in enumerate(leader_bool_list):
204
  user_df = d_frame.loc[d_frame['username'] == USER_LIST[i]]
205
- number_tweets.append(gr.Number.update(value=len(user_df),visible=u_bool))
206
 
207
  if save_or_no[i]:
208
- export_to_download(pd.DataFrame(user_df) ,"one_leader" )
209
- save_file_components_list.append( gr.Checkbox.update(visible=u_bool , interactive=False) )
210
  else:
211
- save_file_components_list.append( gr.Checkbox.update(visible=u_bool) )
212
 
213
- if u_bool and visiblity[i]:
214
- df_list.append( get_exemple_df(user_df,PLOT_CHOICES_DICT[radio[i]]) )
215
  else:
216
  df_list.append(None)
217
 
218
- return df_list +number_tweets+save_file_components_list
 
 
 
219
 
220
- return pycharts + save_selected_checkbox +get_selected_df_list(df,save_file_bool,list(UserNameChoices), rb_components, df_visibility_check)
221
 
222
  ''' END OF MAIN
223
  ####
@@ -227,35 +233,31 @@ def main(From,
227
  '''
228
 
229
 
230
-
231
-
232
-
233
- def get_exemple_df(df:pd.DataFrame, column:str):
234
  print(column)
235
- df=df[SELECTED_COLUMN_DICT[column] ]
236
  unique_labels = df[column].value_counts().keys()
237
- stat=[]
238
  for label in unique_labels:
239
  df_temp = df.loc[df[column] == label]
240
  if len(df_temp) > 5:
241
- df_temp =df_temp[0:5]
242
  stat.append(df_temp)
243
 
244
- exemple_df= pd.concat(stat)
245
 
246
- #stat =stat.reset_index(drop=True) just in case u want to reset indexing
247
 
248
  return exemple_df
249
 
250
 
251
- def export_to_download(_data_frame,_type:str ):
252
-
253
  downloads_path = str(Path.home()) + "/Downloads/"
254
  if _type == "one_leader":
255
- file_name = _data_frame['username'].to_list()[0] #df['username'][0] + "_data"
256
  else:
257
  file_name = "selected_leaders"
258
- full_path = downloads_path + file_name+".csv"
259
 
260
  while full_path in glob.glob(downloads_path + "*"):
261
  search_list = re.findall('\p{N}+', full_path)
@@ -268,24 +270,22 @@ def export_to_download(_data_frame,_type:str ):
268
 
269
  _data_frame.to_csv(full_path, index=False)
270
 
 
271
 
272
 
273
-
274
-
275
-
276
- # , pie_chart(df, "main_topic"), pie_chart("target")
277
  def piechart_input(df, column, limit):
278
  df_len = len(df)
279
  df_v = df[column].value_counts()
280
  df_len = len(df)
281
  if column == "sentiment":
282
  ds_sentiment = df[column].apply(lambda x: re.sub("\s+", "", str(x)))
283
- df_v = ds_sentiment.apply(lambda x: x if str(x).lower() == "positive" or str(x).lower() == "negative" or str(x).lower() == "neutral" else "other").value_counts()
 
284
  elif column == "merged_target":
285
  ds_target = df[column].apply(lambda x: "other" if x == "ERROR_9000" or x == "ERROR_496" else x)
286
  df_v = ds_target.value_counts()
287
  freq = df_v.to_list()
288
- labels= df_v.keys().to_list
289
  freq_dict = {}
290
  freq_dict[column] = labels
291
  freq_dict["frequency"] = freq
@@ -321,50 +321,34 @@ def convert_to_boolean(leaders, plot_choices):
321
  return leaders_converted, plot_converted
322
 
323
 
324
-
325
-
326
-
327
-
328
-
329
-
330
-
331
-
332
-
333
-
334
-
335
-
336
  def update_window(leaders: list, plot_choices: list,
337
  v1, v2, v3, v4, v5, v6, v7, v8
338
  ):
339
-
340
-
341
  leader_bool_list, plot_bool_list = convert_to_boolean(leaders, plot_choices)
342
 
343
  bool_list = []
344
  df_visiblity_bool = [v1, v2, v3, v4, v5, v6, v7, v8]
345
 
346
-
347
- #this loop sets boolean for plots
348
  for leader in leader_bool_list:
349
  if leader:
350
  for choice in plot_bool_list:
351
  bool_list.append(choice)
352
- #bool_list.append(True) ## this is for radio component
353
  else:
354
  for i in range(len(plot_bool_list)):
355
  bool_list.append(False)
356
- #bool_list.append(False)
357
 
358
  update_blocks = []
359
  update_plots = []
360
  update_radio = []
361
- update_nr_tweet =[]
362
  update_checkbox = []
363
- update_save_file_checkboxes =[]
364
  update_df = []
365
 
366
- #all_visual = block_list + plots + radio_list + nr_tweet_list + checkbox_list + saving_file_checkboxes + df_list
367
-
368
 
369
  for i, vis_or_not in enumerate(leader_bool_list):
370
  update_blocks.append(gr.Row.update(visible=vis_or_not))
@@ -374,10 +358,10 @@ def update_window(leaders: list, plot_choices: list,
374
  update_df.append(gr.DataFrame.update(visible=df_visiblity_bool[i]))
375
  else:
376
 
377
- update_blocks.append(gr.Row.update(visible=False ))
378
- update_df.append(gr.DataFrame.update(visible= False ))
379
 
380
- update_nr_tweet.append( gr.Number.update(visible = vis_or_not) )
381
  update_radio.append(gr.Radio.update(visible=vis_or_not))
382
  update_checkbox.append(gr.Checkbox.update(visible=vis_or_not))
383
  update_save_file_checkboxes.append(gr.Checkbox.update(visible=vis_or_not))
@@ -387,34 +371,20 @@ def update_window(leaders: list, plot_choices: list,
387
  return update_blocks + update_plots + update_radio + update_nr_tweet + update_checkbox + update_save_file_checkboxes + update_df
388
 
389
 
390
-
391
-
392
-
393
-
394
-
395
-
396
-
397
-
398
-
399
-
400
-
401
-
402
  def add_plots(user):
403
  plot_list = []
404
  for plot_type in PLOT_CHOICES_DICT:
405
- plot_list.append(gr.Plot(label=plot_type+ " for " + user, visible=False))
406
  return plot_list
407
 
408
 
409
-
410
-
411
-
412
  def add_nbr_boxes():
413
  return [gr.Number(value=0, label='Tweets by ' + user, visible=False) for user in USER_NAMES]
414
 
415
 
416
  if __name__ == "__main__":
417
  import gradio as gr
 
418
  demo = gr.Blocks(title='Politweet')
419
  with demo:
420
  with gr.Column():
@@ -427,7 +397,7 @@ if __name__ == "__main__":
427
  label="")
428
  plot_choices = gr.CheckboxGroup(choices=CHOICE_LIST, label='Choose what to show')
429
 
430
- save_selected_data_checkbox= gr.Checkbox(label="Export selected data")
431
  with gr.Row():
432
  update = gr.Button('Apply')
433
  btn = gr.Button("Run")
@@ -436,52 +406,49 @@ if __name__ == "__main__":
436
  # show_plots = gr.components.Checkbox(label='Show topics', value=True)
437
  with gr.Column():
438
  selected = gr.DataFrame(label="Summary statistics for the selected choices",
439
- max_rows=None, visible=False)
440
- # all_data = gr.components.DataFrame(label="Summary statistics of the total database", max_rows=None)
 
441
 
442
  plots = []
443
  radio_list = []
444
  checkbox_list = []
445
  df_list = []
446
  block_list = []
447
- saving_file_checkboxes =[]
448
  nr_tweet_list = []
449
  with gr.Column():
450
  for i in range(len(USER_NAMES)):
451
- block_list +=[gr.Row()] * 3
452
  for i, leader in enumerate(USER_NAMES):
453
  with gr.Row():
454
  plots += add_plots(leader)
455
  with gr.Row():
456
- radio_list.append(gr.Radio(list(PLOT_CHOICES_DICT.keys()), visible=False ,interactive=True))
457
- nr_tweet_list.append( gr.Number(visible=False) )
458
- checkbox_list.append(gr.Checkbox(label="Show stats ",value=False,visible=False))
459
- saving_file_checkboxes.append( gr.Checkbox(label= "Export file" , value=False , visible= False) )
460
 
461
  with gr.Row():
462
  df_list.append(gr.DataFrame(visible=False))
463
 
464
-
465
  inp = [date1,
466
  date2,
467
  leaders,
468
- plot_choices , save_selected_data_checkbox] + radio_list + checkbox_list + saving_file_checkboxes
469
 
470
- output = plots + [save_selected_data_checkbox]+ df_list + nr_tweet_list + saving_file_checkboxes
471
 
472
-
473
- all_visual = block_list + plots + radio_list + nr_tweet_list +checkbox_list + saving_file_checkboxes +df_list #+ df_list # df_comps
474
 
475
  update_inp = [leaders, plot_choices] + checkbox_list
476
 
477
-
478
  update.click(fn=update_window, inputs=update_inp, outputs=all_visual)
479
 
480
  btn.click(fn=main, inputs=inp, outputs=output)
481
  # input.change(fn=main, inputs=input, outputs=output)
482
  demo.launch(share=False)
483
 
 
484
 
485
- #df= pd.read_csv(os.getcwd()+"/data/twitterdata.csv")
486
-
487
- #https://51285.gradio.app
 
15
  from functions import functions as f
16
  import time
17
 
18
+ SELECTED_COLUMN_DICT = {
19
+ 'merged_topic': ['tweet', 'main_topic', 'sub_topic', 'synonym_topic', 'cos_sim_topic', 'merged_topic'],
20
+ 'sentiment': ['tweet', 'sentiment'],
21
+ 'merged_target': ['tweet', 'target', 'synonym_target', 'cos_sim_target', 'merged_target']
22
+ }
23
 
24
  USER_LIST = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
25
  'dadgostarnooshi']
 
42
  LIMIT = 0.05
43
 
44
 
45
+ def show_all_stats(see_full_stats):
46
  dataframe = pd.read_csv("{}/data/twitterdata.csv".format(tc.ROOT_PATH))
47
+ if see_full_stats:
48
  return dataframe
49
  else:
50
  return pd.DataFrame()
51
 
52
 
53
+ def fixChoicesCorrectOrder(choices):
54
+ ListChoices = [x for x in Columns if x in choices]
55
  return ListChoices
56
 
57
 
 
74
  To,
75
  UserNameChoices,
76
  plot_choice,
77
+ save_selected,
78
  rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8,
79
+ v1, v2, v3, v4, v5, v6, v7, v8,
80
  s1, s2, s3, s4, s5, s6, s7, s8
81
 
82
  ):
 
 
83
  save_file_bool = s1, s2, s3, s4, s5, s6, s7, s8
84
 
85
  def Add_Pychart(df, leaders, plot_choices):
 
100
 
101
  for db in df_list:
102
  for col in PLOT_CHOICES_REVERSE_DICT: # plot_choices:
103
+ if col == 'merged_target':
104
  pie_charts.append(bar(db[0], col + ": " + db[1]))
105
  else:
106
  pie_charts.append(pie_chart(db[0], col, col + ": " + db[1]))
 
114
  if db.empty:
115
  return None
116
  else:
117
+ db['merged_target'] = db["merged_target"].apply(lambda
118
+ x: "other" if x == "ERROR_9000" or x == "ERROR_496" else x) # replacing Different Error type with string "other"
119
+ db['sentiment'] = db['sentiment'].apply(
120
+ lambda x: re.sub('\s+', "", x)) # removing extra spaces in at the end and beginning of the sentiments.
121
  # This can be removed after we remove all unnessary spaces from twitter data
122
+ all_targets = ['v', 'mp', 's', 'c', 'l', 'kd', 'm', 'sd', 'Red-Greens', 'The opposition']
123
  db_new = db.loc[db["merged_target"] != "other"] # dataframe with other category removed
124
+ percent_target = (len(db_new) / len(db)) * 100
125
+ targets = db_new["merged_target"].value_counts().keys().to_list()
126
+ positive = [0] * len(all_targets)
127
+ negative = [0] * len(all_targets)
128
+ neutral = [0] * len(all_targets)
129
+ other = [0] * len(all_targets)
130
+ for i, target in enumerate(all_targets):
131
+ temp_db = db_new.loc[db_new["merged_target"] == target]
132
  if temp_db.empty:
133
  pass
134
  else:
135
  sent = temp_db['sentiment'].to_list()
136
+ positive[i] += sent.count('positive')
137
  negative[i] += sent.count('negative')
138
  neutral[i] += sent.count('neutral')
139
+ other[i] += sent.count('other')
140
  font1 = {'family': 'serif', 'color': 'blue', 'size': 10}
141
  fig = plt.figure()
142
+ y1 = np.array(positive) / len(db_new) if len(db_new) > 0 else np.array(positive)
143
+ y2 = np.array(negative) / len(db_new) if len(db_new) > 0 else np.array(negative)
144
+ y3 = np.array(neutral) / len(db_new) if len(db_new) > 0 else np.array(neutral)
145
+ y4 = np.array(other) / len(db_new) if len(db_new) > 0 else np.array(other)
146
+ plt.bar(all_targets, y1, color='g')
147
+ plt.bar(all_targets, y2, bottom=y1, color='r')
148
+ plt.bar(all_targets, y3, bottom=(y1 + y2), color='yellow')
149
+ plt.bar(all_targets, y4, bottom=(y1 + y2 + y3), color='b')
150
  plt.xticks(rotation=15)
151
  plt.ylim(0, 1)
152
+ plt.title(
153
+ str(percent_target)[0:4] + "% " + " of tweets have target. " + "Number of tweets with target:" + str(
154
+ len(db_new)), loc='right', fontdict=font1)
155
+ # plt.xlabel("Targets")
156
  plt.ylabel("Procent")
157
+ plt.legend(["positive", "negative", "neutral", "other"])
158
  return fig
159
 
160
  def pie_chart(db, col_name, title):
 
180
  # os.path.dirname(os.path.abspath(__file__))) + "/politweet/data/twitterdata.csv") #
181
  df = dataframe
182
 
 
183
  if save_selected:
184
  user_list = MatchNameToUser(UserNameChoices)
185
+ df_l = []
186
  for user in user_list:
187
+ df_l.append(pd.DataFrame(df.loc[df['username'] == user]))
188
 
189
+ selected_df = pd.concat(df_l).reset_index(drop=True)
190
+ export_to_download(selected_df, "selected_leaders")
191
+ save_selected_checkbox = [gr.Checkbox.update(interactive=False)]
192
 
193
+ else:
194
+ save_selected_checkbox = [gr.Checkbox.update(interactive=True)]
195
 
196
  pycharts = Add_Pychart(df, UserNameChoices, convert_plot_choices(plot_choice))
197
 
198
+ rb_components = [rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8] # radio_buttons
199
+ df_visibility_check = [v1, v2, v3, v4, v5, v6, v7, v8]
200
+
201
+ def get_selected_df_list(d_frame, save_or_no, selected_users, radio, visibility):
202
 
203
  leader_bool_list = [True if leader in selected_users else False for leader in USER_NAMES]
204
+ df_list = []
205
  number_tweets = []
206
+ save_file_components_list = []
207
+ for i, u_bool in enumerate(leader_bool_list):
208
  user_df = d_frame.loc[d_frame['username'] == USER_LIST[i]]
209
+ number_tweets.append(gr.Number.update(value=len(user_df), visible=u_bool))
210
 
211
  if save_or_no[i]:
212
+ export_to_download(pd.DataFrame(user_df), "one_leader")
213
+ save_file_components_list.append(gr.Checkbox.update(visible=u_bool, interactive=False))
214
  else:
215
+ save_file_components_list.append(gr.Checkbox.update(visible=u_bool))
216
 
217
+ if u_bool and visibility[i]:
218
+ df_list.append(get_exemple_df(user_df, PLOT_CHOICES_DICT[radio[i]]))
219
  else:
220
  df_list.append(None)
221
 
222
+ return df_list + number_tweets + save_file_components_list
223
+
224
+ return pycharts + save_selected_checkbox + get_selected_df_list(df, save_file_bool, list(UserNameChoices),
225
+ rb_components, df_visibility_check)
226
 
 
227
 
228
  ''' END OF MAIN
229
  ####
 
233
  '''
234
 
235
 
236
+ def get_exemple_df(df: pd.DataFrame, column: str):
 
 
 
237
  print(column)
238
+ df = df[SELECTED_COLUMN_DICT[column]]
239
  unique_labels = df[column].value_counts().keys()
240
+ stat = []
241
  for label in unique_labels:
242
  df_temp = df.loc[df[column] == label]
243
  if len(df_temp) > 5:
244
+ df_temp = df_temp[0:5]
245
  stat.append(df_temp)
246
 
247
+ exemple_df = pd.concat(stat)
248
 
249
+ # stat =stat.reset_index(drop=True) just in case u want to reset indexing
250
 
251
  return exemple_df
252
 
253
 
254
+ def export_to_download(_data_frame, _type: str):
 
255
  downloads_path = str(Path.home()) + "/Downloads/"
256
  if _type == "one_leader":
257
+ file_name = _data_frame['username'].to_list()[0] # df['username'][0] + "_data"
258
  else:
259
  file_name = "selected_leaders"
260
+ full_path = downloads_path + file_name + ".csv"
261
 
262
  while full_path in glob.glob(downloads_path + "*"):
263
  search_list = re.findall('\p{N}+', full_path)
 
270
 
271
  _data_frame.to_csv(full_path, index=False)
272
 
273
+ # , pie_chart(df, "main_topic"), pie_chart("target")
274
 
275
 
 
 
 
 
276
  def piechart_input(df, column, limit):
277
  df_len = len(df)
278
  df_v = df[column].value_counts()
279
  df_len = len(df)
280
  if column == "sentiment":
281
  ds_sentiment = df[column].apply(lambda x: re.sub("\s+", "", str(x)))
282
+ df_v = ds_sentiment.apply(lambda x: x if str(x).lower() == "positive" or str(x).lower() == "negative" or str(
283
+ x).lower() == "neutral" else "other").value_counts()
284
  elif column == "merged_target":
285
  ds_target = df[column].apply(lambda x: "other" if x == "ERROR_9000" or x == "ERROR_496" else x)
286
  df_v = ds_target.value_counts()
287
  freq = df_v.to_list()
288
+ labels = df_v.keys().to_list
289
  freq_dict = {}
290
  freq_dict[column] = labels
291
  freq_dict["frequency"] = freq
 
321
  return leaders_converted, plot_converted
322
 
323
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  def update_window(leaders: list, plot_choices: list,
325
  v1, v2, v3, v4, v5, v6, v7, v8
326
  ):
 
 
327
  leader_bool_list, plot_bool_list = convert_to_boolean(leaders, plot_choices)
328
 
329
  bool_list = []
330
  df_visiblity_bool = [v1, v2, v3, v4, v5, v6, v7, v8]
331
 
332
+ # this loop sets boolean for plots
 
333
  for leader in leader_bool_list:
334
  if leader:
335
  for choice in plot_bool_list:
336
  bool_list.append(choice)
337
+ # bool_list.append(True) ## this is for radio component
338
  else:
339
  for i in range(len(plot_bool_list)):
340
  bool_list.append(False)
341
+ # bool_list.append(False)
342
 
343
  update_blocks = []
344
  update_plots = []
345
  update_radio = []
346
+ update_nr_tweet = []
347
  update_checkbox = []
348
+ update_save_file_checkboxes = []
349
  update_df = []
350
 
351
+ # all_visual = block_list + plots + radio_list + nr_tweet_list + checkbox_list + saving_file_checkboxes + df_list
 
352
 
353
  for i, vis_or_not in enumerate(leader_bool_list):
354
  update_blocks.append(gr.Row.update(visible=vis_or_not))
 
358
  update_df.append(gr.DataFrame.update(visible=df_visiblity_bool[i]))
359
  else:
360
 
361
+ update_blocks.append(gr.Row.update(visible=False))
362
+ update_df.append(gr.DataFrame.update(visible=False))
363
 
364
+ update_nr_tweet.append(gr.Number.update(visible=vis_or_not))
365
  update_radio.append(gr.Radio.update(visible=vis_or_not))
366
  update_checkbox.append(gr.Checkbox.update(visible=vis_or_not))
367
  update_save_file_checkboxes.append(gr.Checkbox.update(visible=vis_or_not))
 
371
  return update_blocks + update_plots + update_radio + update_nr_tweet + update_checkbox + update_save_file_checkboxes + update_df
372
 
373
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  def add_plots(user):
375
  plot_list = []
376
  for plot_type in PLOT_CHOICES_DICT:
377
+ plot_list.append(gr.Plot(label=plot_type + " for " + user, visible=False))
378
  return plot_list
379
 
380
 
 
 
 
381
  def add_nbr_boxes():
382
  return [gr.Number(value=0, label='Tweets by ' + user, visible=False) for user in USER_NAMES]
383
 
384
 
385
  if __name__ == "__main__":
386
  import gradio as gr
387
+
388
  demo = gr.Blocks(title='Politweet')
389
  with demo:
390
  with gr.Column():
 
397
  label="")
398
  plot_choices = gr.CheckboxGroup(choices=CHOICE_LIST, label='Choose what to show')
399
 
400
+ save_selected_data_checkbox = gr.Checkbox(label="Export selected data")
401
  with gr.Row():
402
  update = gr.Button('Apply')
403
  btn = gr.Button("Run")
 
406
  # show_plots = gr.components.Checkbox(label='Show topics', value=True)
407
  with gr.Column():
408
  selected = gr.DataFrame(label="Summary statistics for the selected choices",
409
+ max_rows=None, visible=False)
410
+ # all_data = gr.components.DataFrame(label="Summary statistics of the total database",
411
+ # max_rows=None)
412
 
413
  plots = []
414
  radio_list = []
415
  checkbox_list = []
416
  df_list = []
417
  block_list = []
418
+ saving_file_checkboxes = []
419
  nr_tweet_list = []
420
  with gr.Column():
421
  for i in range(len(USER_NAMES)):
422
+ block_list += [gr.Row()] * 3
423
  for i, leader in enumerate(USER_NAMES):
424
  with gr.Row():
425
  plots += add_plots(leader)
426
  with gr.Row():
427
+ radio_list.append(gr.Radio(list(PLOT_CHOICES_DICT.keys()), visible=False, interactive=True))
428
+ nr_tweet_list.append(gr.Number(visible=False))
429
+ checkbox_list.append(gr.Checkbox(label="Show stats ", value=False, visible=False))
430
+ saving_file_checkboxes.append(gr.Checkbox(label="Export file", value=False, visible=False))
431
 
432
  with gr.Row():
433
  df_list.append(gr.DataFrame(visible=False))
434
 
 
435
  inp = [date1,
436
  date2,
437
  leaders,
438
+ plot_choices, save_selected_data_checkbox] + radio_list + checkbox_list + saving_file_checkboxes
439
 
440
+ output = plots + [save_selected_data_checkbox] + df_list + nr_tweet_list + saving_file_checkboxes
441
 
442
+ all_visual = block_list + plots + radio_list + nr_tweet_list + checkbox_list + saving_file_checkboxes + df_list # + df_list # df_comps
 
443
 
444
  update_inp = [leaders, plot_choices] + checkbox_list
445
 
 
446
  update.click(fn=update_window, inputs=update_inp, outputs=all_visual)
447
 
448
  btn.click(fn=main, inputs=inp, outputs=output)
449
  # input.change(fn=main, inputs=input, outputs=output)
450
  demo.launch(share=False)
451
 
452
+ # df= pd.read_csv(os.getcwd()+"/data/twitterdata.csv")
453
 
454
+ # https://51285.gradio.app
 
 
textclassifier/TextClassifier.py CHANGED
@@ -44,8 +44,14 @@ class TextClassifier:
44
  # add timer in time-loop and stop after 10 seconds
45
  # self.df = self.ts.scrape_by_user(user_name)
46
  self.df = self.ts.scrape_by_several_users(user_list)
47
- # Make id as type int64
48
- self.df.loc[:, 'id'] = self.df.id.copy().apply(lambda x: int(x))
 
 
 
 
 
 
49
  openai.api_key = OPENAI_API_KEY
50
 
51
  def classify_all(self, tweet: str):
 
44
  # add timer in time-loop and stop after 10 seconds
45
  # self.df = self.ts.scrape_by_user(user_name)
46
  self.df = self.ts.scrape_by_several_users(user_list)
47
+
48
+ # Check if 'id' is in self.df
49
+ if 'id' in self.df.columns:
50
+ # Make id as type int64
51
+ self.df.loc[:, 'id'] = self.df.id.copy().apply(lambda x: int(x))
52
+ else:
53
+ # If not do nothing
54
+ pass
55
  openai.api_key = OPENAI_API_KEY
56
 
57
  def classify_all(self, tweet: str):
twitterscraper/TwitterScraper.py CHANGED
@@ -27,7 +27,7 @@ class TwitterScraper(object):
27
  # Make sure to_date is later than from_date
28
  assert from_date < to_date, "from_date must be earlier than to_date"
29
  # Make sure num_tweets is a positive integer
30
- assert 0 < num_tweets <= 60, "num_tweets must be a positive integer and at most 60"
31
 
32
  self.from_date = from_date
33
  self.to_date = to_date
 
27
  # Make sure to_date is later than from_date
28
  assert from_date < to_date, "from_date must be earlier than to_date"
29
  # Make sure num_tweets is a positive integer
30
+ # assert 0 < num_tweets <= 60, "num_tweets must be a positive integer and at most 60"
31
 
32
  self.from_date = from_date
33
  self.to_date = to_date