Spaces:
Runtime error
Runtime error
fixed minor bugs
Browse files- .idea/misc.xml +1 -1
- .idea/politweet.iml +1 -1
- app.py +103 -136
- textclassifier/TextClassifier.py +8 -2
- twitterscraper/TwitterScraper.py +1 -1
.idea/misc.xml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
-
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.
|
4 |
</project>
|
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (politweet)" project-jdk-type="Python SDK" />
|
4 |
</project>
|
.idea/politweet.iml
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
<excludeFolder url="file://$MODULE_DIR$/env" />
|
7 |
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
8 |
</content>
|
9 |
-
<orderEntry type="jdk" jdkName="Python 3.
|
10 |
<orderEntry type="sourceFolder" forTests="false" />
|
11 |
</component>
|
12 |
<component name="PyNamespacePackagesService">
|
|
|
6 |
<excludeFolder url="file://$MODULE_DIR$/env" />
|
7 |
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
8 |
</content>
|
9 |
+
<orderEntry type="jdk" jdkName="Python 3.9 (politweet)" jdkType="Python SDK" />
|
10 |
<orderEntry type="sourceFolder" forTests="false" />
|
11 |
</component>
|
12 |
<component name="PyNamespacePackagesService">
|
app.py
CHANGED
@@ -15,11 +15,11 @@ import matplotlib.pyplot as plt
|
|
15 |
from functions import functions as f
|
16 |
import time
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
|
24 |
USER_LIST = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
|
25 |
'dadgostarnooshi']
|
@@ -42,16 +42,16 @@ num_tweet = 1000
|
|
42 |
LIMIT = 0.05
|
43 |
|
44 |
|
45 |
-
def show_all_stats(
|
46 |
dataframe = pd.read_csv("{}/data/twitterdata.csv".format(tc.ROOT_PATH))
|
47 |
-
if
|
48 |
return dataframe
|
49 |
else:
|
50 |
return pd.DataFrame()
|
51 |
|
52 |
|
53 |
-
def fixChoicesCorrectOrder(
|
54 |
-
ListChoices = [x for x in Columns if x in
|
55 |
return ListChoices
|
56 |
|
57 |
|
@@ -74,14 +74,12 @@ def main(From,
|
|
74 |
To,
|
75 |
UserNameChoices,
|
76 |
plot_choice,
|
77 |
-
save_selected
|
78 |
rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8,
|
79 |
-
v1, v2, v3, v4, v5, v6, v7, v8
|
80 |
s1, s2, s3, s4, s5, s6, s7, s8
|
81 |
|
82 |
):
|
83 |
-
|
84 |
-
|
85 |
save_file_bool = s1, s2, s3, s4, s5, s6, s7, s8
|
86 |
|
87 |
def Add_Pychart(df, leaders, plot_choices):
|
@@ -102,7 +100,7 @@ def main(From,
|
|
102 |
|
103 |
for db in df_list:
|
104 |
for col in PLOT_CHOICES_REVERSE_DICT: # plot_choices:
|
105 |
-
if col=='merged_target':
|
106 |
pie_charts.append(bar(db[0], col + ": " + db[1]))
|
107 |
else:
|
108 |
pie_charts.append(pie_chart(db[0], col, col + ": " + db[1]))
|
@@ -116,43 +114,47 @@ def main(From,
|
|
116 |
if db.empty:
|
117 |
return None
|
118 |
else:
|
119 |
-
db['merged_target']= db["merged_target"].apply(lambda
|
120 |
-
|
|
|
|
|
121 |
# This can be removed after we remove all unnessary spaces from twitter data
|
122 |
-
all_targets= ['v', 'mp', 's', 'c', 'l', 'kd', 'm', 'sd', 'Red-Greens', 'The opposition']
|
123 |
db_new = db.loc[db["merged_target"] != "other"] # dataframe with other category removed
|
124 |
-
percent_target = (len(db_new) / len(db))*100
|
125 |
-
targets= db_new["merged_target"].value_counts().keys().to_list()
|
126 |
-
positive=[0]*len(all_targets)
|
127 |
-
negative=[0]*len(all_targets)
|
128 |
-
neutral=[0]*len(all_targets)
|
129 |
-
other =[0]*len(all_targets)
|
130 |
-
for i,target in enumerate(all_targets):
|
131 |
-
temp_db= db_new.loc[db_new["merged_target"] == target]
|
132 |
if temp_db.empty:
|
133 |
pass
|
134 |
else:
|
135 |
sent = temp_db['sentiment'].to_list()
|
136 |
-
positive[i] +=sent.count('positive')
|
137 |
negative[i] += sent.count('negative')
|
138 |
neutral[i] += sent.count('neutral')
|
139 |
-
other[i] +=
|
140 |
font1 = {'family': 'serif', 'color': 'blue', 'size': 10}
|
141 |
fig = plt.figure()
|
142 |
-
y1 = np.array(positive)/len(db_new)
|
143 |
-
y2 = np.array(negative)/len(db_new)
|
144 |
-
y3 = np.array(neutral)/len(db_new)
|
145 |
-
y4 = np.array(other)/len(db_new)
|
146 |
-
plt.bar(all_targets, y1
|
147 |
-
plt.bar(all_targets, y2
|
148 |
-
plt.bar(all_targets, y3
|
149 |
-
plt.bar(all_targets, y4
|
150 |
plt.xticks(rotation=15)
|
151 |
plt.ylim(0, 1)
|
152 |
-
plt.title(
|
153 |
-
|
|
|
|
|
154 |
plt.ylabel("Procent")
|
155 |
-
plt.legend(["positive","negative", "neutral","other"])
|
156 |
return fig
|
157 |
|
158 |
def pie_chart(db, col_name, title):
|
@@ -178,46 +180,50 @@ def main(From,
|
|
178 |
# os.path.dirname(os.path.abspath(__file__))) + "/politweet/data/twitterdata.csv") #
|
179 |
df = dataframe
|
180 |
|
181 |
-
|
182 |
if save_selected:
|
183 |
user_list = MatchNameToUser(UserNameChoices)
|
184 |
-
df_l=[]
|
185 |
for user in user_list:
|
186 |
-
df_l.append(
|
187 |
|
188 |
-
selected_df= pd.concat(df_l).reset_index(drop=True)
|
189 |
-
export_to_download(selected_df,"selected_leaders")
|
190 |
-
save_selected_checkbox= [gr.Checkbox.update(interactive=False)]
|
191 |
|
|
|
|
|
192 |
|
193 |
pycharts = Add_Pychart(df, UserNameChoices, convert_plot_choices(plot_choice))
|
194 |
|
195 |
-
rb_components = [rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8] #radio_buttons
|
196 |
-
df_visibility_check = [v1,v2,v3,v4,v5,v6,v7,v8]
|
197 |
-
|
|
|
198 |
|
199 |
leader_bool_list = [True if leader in selected_users else False for leader in USER_NAMES]
|
200 |
-
df_list=[]
|
201 |
number_tweets = []
|
202 |
-
save_file_components_list =[]
|
203 |
-
for i
|
204 |
user_df = d_frame.loc[d_frame['username'] == USER_LIST[i]]
|
205 |
-
number_tweets.append(gr.Number.update(value=len(user_df),visible=u_bool))
|
206 |
|
207 |
if save_or_no[i]:
|
208 |
-
export_to_download(pd.DataFrame(user_df)
|
209 |
-
save_file_components_list.append(
|
210 |
else:
|
211 |
-
save_file_components_list.append(
|
212 |
|
213 |
-
if u_bool and
|
214 |
-
df_list.append(
|
215 |
else:
|
216 |
df_list.append(None)
|
217 |
|
218 |
-
return
|
|
|
|
|
|
|
219 |
|
220 |
-
return pycharts + save_selected_checkbox +get_selected_df_list(df,save_file_bool,list(UserNameChoices), rb_components, df_visibility_check)
|
221 |
|
222 |
''' END OF MAIN
|
223 |
####
|
@@ -227,35 +233,31 @@ def main(From,
|
|
227 |
'''
|
228 |
|
229 |
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
def get_exemple_df(df:pd.DataFrame, column:str):
|
234 |
print(column)
|
235 |
-
df=df[SELECTED_COLUMN_DICT[column]
|
236 |
unique_labels = df[column].value_counts().keys()
|
237 |
-
stat=[]
|
238 |
for label in unique_labels:
|
239 |
df_temp = df.loc[df[column] == label]
|
240 |
if len(df_temp) > 5:
|
241 |
-
df_temp =df_temp[0:5]
|
242 |
stat.append(df_temp)
|
243 |
|
244 |
-
exemple_df= pd.concat(stat)
|
245 |
|
246 |
-
#stat =stat.reset_index(drop=True) just in case u want to reset indexing
|
247 |
|
248 |
return exemple_df
|
249 |
|
250 |
|
251 |
-
def export_to_download(_data_frame,_type:str
|
252 |
-
|
253 |
downloads_path = str(Path.home()) + "/Downloads/"
|
254 |
if _type == "one_leader":
|
255 |
-
file_name = _data_frame['username'].to_list()[0]
|
256 |
else:
|
257 |
file_name = "selected_leaders"
|
258 |
-
full_path = downloads_path + file_name+".csv"
|
259 |
|
260 |
while full_path in glob.glob(downloads_path + "*"):
|
261 |
search_list = re.findall('\p{N}+', full_path)
|
@@ -268,24 +270,22 @@ def export_to_download(_data_frame,_type:str ):
|
|
268 |
|
269 |
_data_frame.to_csv(full_path, index=False)
|
270 |
|
|
|
271 |
|
272 |
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
# , pie_chart(df, "main_topic"), pie_chart("target")
|
277 |
def piechart_input(df, column, limit):
|
278 |
df_len = len(df)
|
279 |
df_v = df[column].value_counts()
|
280 |
df_len = len(df)
|
281 |
if column == "sentiment":
|
282 |
ds_sentiment = df[column].apply(lambda x: re.sub("\s+", "", str(x)))
|
283 |
-
df_v = ds_sentiment.apply(lambda x: x if str(x).lower() == "positive" or str(x).lower() == "negative" or str(
|
|
|
284 |
elif column == "merged_target":
|
285 |
ds_target = df[column].apply(lambda x: "other" if x == "ERROR_9000" or x == "ERROR_496" else x)
|
286 |
df_v = ds_target.value_counts()
|
287 |
freq = df_v.to_list()
|
288 |
-
labels= df_v.keys().to_list
|
289 |
freq_dict = {}
|
290 |
freq_dict[column] = labels
|
291 |
freq_dict["frequency"] = freq
|
@@ -321,50 +321,34 @@ def convert_to_boolean(leaders, plot_choices):
|
|
321 |
return leaders_converted, plot_converted
|
322 |
|
323 |
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
def update_window(leaders: list, plot_choices: list,
|
337 |
v1, v2, v3, v4, v5, v6, v7, v8
|
338 |
):
|
339 |
-
|
340 |
-
|
341 |
leader_bool_list, plot_bool_list = convert_to_boolean(leaders, plot_choices)
|
342 |
|
343 |
bool_list = []
|
344 |
df_visiblity_bool = [v1, v2, v3, v4, v5, v6, v7, v8]
|
345 |
|
346 |
-
|
347 |
-
#this loop sets boolean for plots
|
348 |
for leader in leader_bool_list:
|
349 |
if leader:
|
350 |
for choice in plot_bool_list:
|
351 |
bool_list.append(choice)
|
352 |
-
|
353 |
else:
|
354 |
for i in range(len(plot_bool_list)):
|
355 |
bool_list.append(False)
|
356 |
-
#bool_list.append(False)
|
357 |
|
358 |
update_blocks = []
|
359 |
update_plots = []
|
360 |
update_radio = []
|
361 |
-
update_nr_tweet =[]
|
362 |
update_checkbox = []
|
363 |
-
update_save_file_checkboxes =[]
|
364 |
update_df = []
|
365 |
|
366 |
-
#all_visual = block_list + plots + radio_list + nr_tweet_list + checkbox_list + saving_file_checkboxes + df_list
|
367 |
-
|
368 |
|
369 |
for i, vis_or_not in enumerate(leader_bool_list):
|
370 |
update_blocks.append(gr.Row.update(visible=vis_or_not))
|
@@ -374,10 +358,10 @@ def update_window(leaders: list, plot_choices: list,
|
|
374 |
update_df.append(gr.DataFrame.update(visible=df_visiblity_bool[i]))
|
375 |
else:
|
376 |
|
377 |
-
update_blocks.append(gr.Row.update(visible=False
|
378 |
-
update_df.append(gr.DataFrame.update(visible=
|
379 |
|
380 |
-
update_nr_tweet.append(
|
381 |
update_radio.append(gr.Radio.update(visible=vis_or_not))
|
382 |
update_checkbox.append(gr.Checkbox.update(visible=vis_or_not))
|
383 |
update_save_file_checkboxes.append(gr.Checkbox.update(visible=vis_or_not))
|
@@ -387,34 +371,20 @@ def update_window(leaders: list, plot_choices: list,
|
|
387 |
return update_blocks + update_plots + update_radio + update_nr_tweet + update_checkbox + update_save_file_checkboxes + update_df
|
388 |
|
389 |
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
def add_plots(user):
|
403 |
plot_list = []
|
404 |
for plot_type in PLOT_CHOICES_DICT:
|
405 |
-
plot_list.append(gr.Plot(label=plot_type+ " for " + user, visible=False))
|
406 |
return plot_list
|
407 |
|
408 |
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
def add_nbr_boxes():
|
413 |
return [gr.Number(value=0, label='Tweets by ' + user, visible=False) for user in USER_NAMES]
|
414 |
|
415 |
|
416 |
if __name__ == "__main__":
|
417 |
import gradio as gr
|
|
|
418 |
demo = gr.Blocks(title='Politweet')
|
419 |
with demo:
|
420 |
with gr.Column():
|
@@ -427,7 +397,7 @@ if __name__ == "__main__":
|
|
427 |
label="")
|
428 |
plot_choices = gr.CheckboxGroup(choices=CHOICE_LIST, label='Choose what to show')
|
429 |
|
430 |
-
save_selected_data_checkbox= gr.Checkbox(label="Export selected data")
|
431 |
with gr.Row():
|
432 |
update = gr.Button('Apply')
|
433 |
btn = gr.Button("Run")
|
@@ -436,52 +406,49 @@ if __name__ == "__main__":
|
|
436 |
# show_plots = gr.components.Checkbox(label='Show topics', value=True)
|
437 |
with gr.Column():
|
438 |
selected = gr.DataFrame(label="Summary statistics for the selected choices",
|
439 |
-
|
440 |
-
# all_data = gr.components.DataFrame(label="Summary statistics of the total database",
|
|
|
441 |
|
442 |
plots = []
|
443 |
radio_list = []
|
444 |
checkbox_list = []
|
445 |
df_list = []
|
446 |
block_list = []
|
447 |
-
saving_file_checkboxes =[]
|
448 |
nr_tweet_list = []
|
449 |
with gr.Column():
|
450 |
for i in range(len(USER_NAMES)):
|
451 |
-
block_list +=[gr.Row()] * 3
|
452 |
for i, leader in enumerate(USER_NAMES):
|
453 |
with gr.Row():
|
454 |
plots += add_plots(leader)
|
455 |
with gr.Row():
|
456 |
-
radio_list.append(gr.Radio(list(PLOT_CHOICES_DICT.keys()), visible=False
|
457 |
-
nr_tweet_list.append(
|
458 |
-
checkbox_list.append(gr.Checkbox(label="Show stats ",value=False,visible=False))
|
459 |
-
saving_file_checkboxes.append(
|
460 |
|
461 |
with gr.Row():
|
462 |
df_list.append(gr.DataFrame(visible=False))
|
463 |
|
464 |
-
|
465 |
inp = [date1,
|
466 |
date2,
|
467 |
leaders,
|
468 |
-
plot_choices
|
469 |
|
470 |
-
output = plots + [save_selected_data_checkbox]+ df_list + nr_tweet_list + saving_file_checkboxes
|
471 |
|
472 |
-
|
473 |
-
all_visual = block_list + plots + radio_list + nr_tweet_list +checkbox_list + saving_file_checkboxes +df_list #+ df_list # df_comps
|
474 |
|
475 |
update_inp = [leaders, plot_choices] + checkbox_list
|
476 |
|
477 |
-
|
478 |
update.click(fn=update_window, inputs=update_inp, outputs=all_visual)
|
479 |
|
480 |
btn.click(fn=main, inputs=inp, outputs=output)
|
481 |
# input.change(fn=main, inputs=input, outputs=output)
|
482 |
demo.launch(share=False)
|
483 |
|
|
|
484 |
|
485 |
-
#
|
486 |
-
|
487 |
-
#https://51285.gradio.app
|
|
|
15 |
from functions import functions as f
|
16 |
import time
|
17 |
|
18 |
+
SELECTED_COLUMN_DICT = {
|
19 |
+
'merged_topic': ['tweet', 'main_topic', 'sub_topic', 'synonym_topic', 'cos_sim_topic', 'merged_topic'],
|
20 |
+
'sentiment': ['tweet', 'sentiment'],
|
21 |
+
'merged_target': ['tweet', 'target', 'synonym_target', 'cos_sim_target', 'merged_target']
|
22 |
+
}
|
23 |
|
24 |
USER_LIST = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
|
25 |
'dadgostarnooshi']
|
|
|
42 |
LIMIT = 0.05
|
43 |
|
44 |
|
45 |
+
def show_all_stats(see_full_stats):
|
46 |
dataframe = pd.read_csv("{}/data/twitterdata.csv".format(tc.ROOT_PATH))
|
47 |
+
if see_full_stats:
|
48 |
return dataframe
|
49 |
else:
|
50 |
return pd.DataFrame()
|
51 |
|
52 |
|
53 |
+
def fixChoicesCorrectOrder(choices):
|
54 |
+
ListChoices = [x for x in Columns if x in choices]
|
55 |
return ListChoices
|
56 |
|
57 |
|
|
|
74 |
To,
|
75 |
UserNameChoices,
|
76 |
plot_choice,
|
77 |
+
save_selected,
|
78 |
rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8,
|
79 |
+
v1, v2, v3, v4, v5, v6, v7, v8,
|
80 |
s1, s2, s3, s4, s5, s6, s7, s8
|
81 |
|
82 |
):
|
|
|
|
|
83 |
save_file_bool = s1, s2, s3, s4, s5, s6, s7, s8
|
84 |
|
85 |
def Add_Pychart(df, leaders, plot_choices):
|
|
|
100 |
|
101 |
for db in df_list:
|
102 |
for col in PLOT_CHOICES_REVERSE_DICT: # plot_choices:
|
103 |
+
if col == 'merged_target':
|
104 |
pie_charts.append(bar(db[0], col + ": " + db[1]))
|
105 |
else:
|
106 |
pie_charts.append(pie_chart(db[0], col, col + ": " + db[1]))
|
|
|
114 |
if db.empty:
|
115 |
return None
|
116 |
else:
|
117 |
+
db['merged_target'] = db["merged_target"].apply(lambda
|
118 |
+
x: "other" if x == "ERROR_9000" or x == "ERROR_496" else x) # replacing Different Error type with string "other"
|
119 |
+
db['sentiment'] = db['sentiment'].apply(
|
120 |
+
lambda x: re.sub('\s+', "", x)) # removing extra spaces in at the end and beginning of the sentiments.
|
121 |
# This can be removed after we remove all unnessary spaces from twitter data
|
122 |
+
all_targets = ['v', 'mp', 's', 'c', 'l', 'kd', 'm', 'sd', 'Red-Greens', 'The opposition']
|
123 |
db_new = db.loc[db["merged_target"] != "other"] # dataframe with other category removed
|
124 |
+
percent_target = (len(db_new) / len(db)) * 100
|
125 |
+
targets = db_new["merged_target"].value_counts().keys().to_list()
|
126 |
+
positive = [0] * len(all_targets)
|
127 |
+
negative = [0] * len(all_targets)
|
128 |
+
neutral = [0] * len(all_targets)
|
129 |
+
other = [0] * len(all_targets)
|
130 |
+
for i, target in enumerate(all_targets):
|
131 |
+
temp_db = db_new.loc[db_new["merged_target"] == target]
|
132 |
if temp_db.empty:
|
133 |
pass
|
134 |
else:
|
135 |
sent = temp_db['sentiment'].to_list()
|
136 |
+
positive[i] += sent.count('positive')
|
137 |
negative[i] += sent.count('negative')
|
138 |
neutral[i] += sent.count('neutral')
|
139 |
+
other[i] += sent.count('other')
|
140 |
font1 = {'family': 'serif', 'color': 'blue', 'size': 10}
|
141 |
fig = plt.figure()
|
142 |
+
y1 = np.array(positive) / len(db_new) if len(db_new) > 0 else np.array(positive)
|
143 |
+
y2 = np.array(negative) / len(db_new) if len(db_new) > 0 else np.array(negative)
|
144 |
+
y3 = np.array(neutral) / len(db_new) if len(db_new) > 0 else np.array(neutral)
|
145 |
+
y4 = np.array(other) / len(db_new) if len(db_new) > 0 else np.array(other)
|
146 |
+
plt.bar(all_targets, y1, color='g')
|
147 |
+
plt.bar(all_targets, y2, bottom=y1, color='r')
|
148 |
+
plt.bar(all_targets, y3, bottom=(y1 + y2), color='yellow')
|
149 |
+
plt.bar(all_targets, y4, bottom=(y1 + y2 + y3), color='b')
|
150 |
plt.xticks(rotation=15)
|
151 |
plt.ylim(0, 1)
|
152 |
+
plt.title(
|
153 |
+
str(percent_target)[0:4] + "% " + " of tweets have target. " + "Number of tweets with target:" + str(
|
154 |
+
len(db_new)), loc='right', fontdict=font1)
|
155 |
+
# plt.xlabel("Targets")
|
156 |
plt.ylabel("Procent")
|
157 |
+
plt.legend(["positive", "negative", "neutral", "other"])
|
158 |
return fig
|
159 |
|
160 |
def pie_chart(db, col_name, title):
|
|
|
180 |
# os.path.dirname(os.path.abspath(__file__))) + "/politweet/data/twitterdata.csv") #
|
181 |
df = dataframe
|
182 |
|
|
|
183 |
if save_selected:
|
184 |
user_list = MatchNameToUser(UserNameChoices)
|
185 |
+
df_l = []
|
186 |
for user in user_list:
|
187 |
+
df_l.append(pd.DataFrame(df.loc[df['username'] == user]))
|
188 |
|
189 |
+
selected_df = pd.concat(df_l).reset_index(drop=True)
|
190 |
+
export_to_download(selected_df, "selected_leaders")
|
191 |
+
save_selected_checkbox = [gr.Checkbox.update(interactive=False)]
|
192 |
|
193 |
+
else:
|
194 |
+
save_selected_checkbox = [gr.Checkbox.update(interactive=True)]
|
195 |
|
196 |
pycharts = Add_Pychart(df, UserNameChoices, convert_plot_choices(plot_choice))
|
197 |
|
198 |
+
rb_components = [rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8] # radio_buttons
|
199 |
+
df_visibility_check = [v1, v2, v3, v4, v5, v6, v7, v8]
|
200 |
+
|
201 |
+
def get_selected_df_list(d_frame, save_or_no, selected_users, radio, visibility):
|
202 |
|
203 |
leader_bool_list = [True if leader in selected_users else False for leader in USER_NAMES]
|
204 |
+
df_list = []
|
205 |
number_tweets = []
|
206 |
+
save_file_components_list = []
|
207 |
+
for i, u_bool in enumerate(leader_bool_list):
|
208 |
user_df = d_frame.loc[d_frame['username'] == USER_LIST[i]]
|
209 |
+
number_tweets.append(gr.Number.update(value=len(user_df), visible=u_bool))
|
210 |
|
211 |
if save_or_no[i]:
|
212 |
+
export_to_download(pd.DataFrame(user_df), "one_leader")
|
213 |
+
save_file_components_list.append(gr.Checkbox.update(visible=u_bool, interactive=False))
|
214 |
else:
|
215 |
+
save_file_components_list.append(gr.Checkbox.update(visible=u_bool))
|
216 |
|
217 |
+
if u_bool and visibility[i]:
|
218 |
+
df_list.append(get_exemple_df(user_df, PLOT_CHOICES_DICT[radio[i]]))
|
219 |
else:
|
220 |
df_list.append(None)
|
221 |
|
222 |
+
return df_list + number_tweets + save_file_components_list
|
223 |
+
|
224 |
+
return pycharts + save_selected_checkbox + get_selected_df_list(df, save_file_bool, list(UserNameChoices),
|
225 |
+
rb_components, df_visibility_check)
|
226 |
|
|
|
227 |
|
228 |
''' END OF MAIN
|
229 |
####
|
|
|
233 |
'''
|
234 |
|
235 |
|
236 |
+
def get_exemple_df(df: pd.DataFrame, column: str):
|
|
|
|
|
|
|
237 |
print(column)
|
238 |
+
df = df[SELECTED_COLUMN_DICT[column]]
|
239 |
unique_labels = df[column].value_counts().keys()
|
240 |
+
stat = []
|
241 |
for label in unique_labels:
|
242 |
df_temp = df.loc[df[column] == label]
|
243 |
if len(df_temp) > 5:
|
244 |
+
df_temp = df_temp[0:5]
|
245 |
stat.append(df_temp)
|
246 |
|
247 |
+
exemple_df = pd.concat(stat)
|
248 |
|
249 |
+
# stat =stat.reset_index(drop=True) just in case u want to reset indexing
|
250 |
|
251 |
return exemple_df
|
252 |
|
253 |
|
254 |
+
def export_to_download(_data_frame, _type: str):
|
|
|
255 |
downloads_path = str(Path.home()) + "/Downloads/"
|
256 |
if _type == "one_leader":
|
257 |
+
file_name = _data_frame['username'].to_list()[0] # df['username'][0] + "_data"
|
258 |
else:
|
259 |
file_name = "selected_leaders"
|
260 |
+
full_path = downloads_path + file_name + ".csv"
|
261 |
|
262 |
while full_path in glob.glob(downloads_path + "*"):
|
263 |
search_list = re.findall('\p{N}+', full_path)
|
|
|
270 |
|
271 |
_data_frame.to_csv(full_path, index=False)
|
272 |
|
273 |
+
# , pie_chart(df, "main_topic"), pie_chart("target")
|
274 |
|
275 |
|
|
|
|
|
|
|
|
|
276 |
def piechart_input(df, column, limit):
|
277 |
df_len = len(df)
|
278 |
df_v = df[column].value_counts()
|
279 |
df_len = len(df)
|
280 |
if column == "sentiment":
|
281 |
ds_sentiment = df[column].apply(lambda x: re.sub("\s+", "", str(x)))
|
282 |
+
df_v = ds_sentiment.apply(lambda x: x if str(x).lower() == "positive" or str(x).lower() == "negative" or str(
|
283 |
+
x).lower() == "neutral" else "other").value_counts()
|
284 |
elif column == "merged_target":
|
285 |
ds_target = df[column].apply(lambda x: "other" if x == "ERROR_9000" or x == "ERROR_496" else x)
|
286 |
df_v = ds_target.value_counts()
|
287 |
freq = df_v.to_list()
|
288 |
+
labels = df_v.keys().to_list
|
289 |
freq_dict = {}
|
290 |
freq_dict[column] = labels
|
291 |
freq_dict["frequency"] = freq
|
|
|
321 |
return leaders_converted, plot_converted
|
322 |
|
323 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
def update_window(leaders: list, plot_choices: list,
|
325 |
v1, v2, v3, v4, v5, v6, v7, v8
|
326 |
):
|
|
|
|
|
327 |
leader_bool_list, plot_bool_list = convert_to_boolean(leaders, plot_choices)
|
328 |
|
329 |
bool_list = []
|
330 |
df_visiblity_bool = [v1, v2, v3, v4, v5, v6, v7, v8]
|
331 |
|
332 |
+
# this loop sets boolean for plots
|
|
|
333 |
for leader in leader_bool_list:
|
334 |
if leader:
|
335 |
for choice in plot_bool_list:
|
336 |
bool_list.append(choice)
|
337 |
+
# bool_list.append(True) ## this is for radio component
|
338 |
else:
|
339 |
for i in range(len(plot_bool_list)):
|
340 |
bool_list.append(False)
|
341 |
+
# bool_list.append(False)
|
342 |
|
343 |
update_blocks = []
|
344 |
update_plots = []
|
345 |
update_radio = []
|
346 |
+
update_nr_tweet = []
|
347 |
update_checkbox = []
|
348 |
+
update_save_file_checkboxes = []
|
349 |
update_df = []
|
350 |
|
351 |
+
# all_visual = block_list + plots + radio_list + nr_tweet_list + checkbox_list + saving_file_checkboxes + df_list
|
|
|
352 |
|
353 |
for i, vis_or_not in enumerate(leader_bool_list):
|
354 |
update_blocks.append(gr.Row.update(visible=vis_or_not))
|
|
|
358 |
update_df.append(gr.DataFrame.update(visible=df_visiblity_bool[i]))
|
359 |
else:
|
360 |
|
361 |
+
update_blocks.append(gr.Row.update(visible=False))
|
362 |
+
update_df.append(gr.DataFrame.update(visible=False))
|
363 |
|
364 |
+
update_nr_tweet.append(gr.Number.update(visible=vis_or_not))
|
365 |
update_radio.append(gr.Radio.update(visible=vis_or_not))
|
366 |
update_checkbox.append(gr.Checkbox.update(visible=vis_or_not))
|
367 |
update_save_file_checkboxes.append(gr.Checkbox.update(visible=vis_or_not))
|
|
|
371 |
return update_blocks + update_plots + update_radio + update_nr_tweet + update_checkbox + update_save_file_checkboxes + update_df
|
372 |
|
373 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
def add_plots(user):
|
375 |
plot_list = []
|
376 |
for plot_type in PLOT_CHOICES_DICT:
|
377 |
+
plot_list.append(gr.Plot(label=plot_type + " for " + user, visible=False))
|
378 |
return plot_list
|
379 |
|
380 |
|
|
|
|
|
|
|
381 |
def add_nbr_boxes():
|
382 |
return [gr.Number(value=0, label='Tweets by ' + user, visible=False) for user in USER_NAMES]
|
383 |
|
384 |
|
385 |
if __name__ == "__main__":
|
386 |
import gradio as gr
|
387 |
+
|
388 |
demo = gr.Blocks(title='Politweet')
|
389 |
with demo:
|
390 |
with gr.Column():
|
|
|
397 |
label="")
|
398 |
plot_choices = gr.CheckboxGroup(choices=CHOICE_LIST, label='Choose what to show')
|
399 |
|
400 |
+
save_selected_data_checkbox = gr.Checkbox(label="Export selected data")
|
401 |
with gr.Row():
|
402 |
update = gr.Button('Apply')
|
403 |
btn = gr.Button("Run")
|
|
|
406 |
# show_plots = gr.components.Checkbox(label='Show topics', value=True)
|
407 |
with gr.Column():
|
408 |
selected = gr.DataFrame(label="Summary statistics for the selected choices",
|
409 |
+
max_rows=None, visible=False)
|
410 |
+
# all_data = gr.components.DataFrame(label="Summary statistics of the total database",
|
411 |
+
# max_rows=None)
|
412 |
|
413 |
plots = []
|
414 |
radio_list = []
|
415 |
checkbox_list = []
|
416 |
df_list = []
|
417 |
block_list = []
|
418 |
+
saving_file_checkboxes = []
|
419 |
nr_tweet_list = []
|
420 |
with gr.Column():
|
421 |
for i in range(len(USER_NAMES)):
|
422 |
+
block_list += [gr.Row()] * 3
|
423 |
for i, leader in enumerate(USER_NAMES):
|
424 |
with gr.Row():
|
425 |
plots += add_plots(leader)
|
426 |
with gr.Row():
|
427 |
+
radio_list.append(gr.Radio(list(PLOT_CHOICES_DICT.keys()), visible=False, interactive=True))
|
428 |
+
nr_tweet_list.append(gr.Number(visible=False))
|
429 |
+
checkbox_list.append(gr.Checkbox(label="Show stats ", value=False, visible=False))
|
430 |
+
saving_file_checkboxes.append(gr.Checkbox(label="Export file", value=False, visible=False))
|
431 |
|
432 |
with gr.Row():
|
433 |
df_list.append(gr.DataFrame(visible=False))
|
434 |
|
|
|
435 |
inp = [date1,
|
436 |
date2,
|
437 |
leaders,
|
438 |
+
plot_choices, save_selected_data_checkbox] + radio_list + checkbox_list + saving_file_checkboxes
|
439 |
|
440 |
+
output = plots + [save_selected_data_checkbox] + df_list + nr_tweet_list + saving_file_checkboxes
|
441 |
|
442 |
+
all_visual = block_list + plots + radio_list + nr_tweet_list + checkbox_list + saving_file_checkboxes + df_list # + df_list # df_comps
|
|
|
443 |
|
444 |
update_inp = [leaders, plot_choices] + checkbox_list
|
445 |
|
|
|
446 |
update.click(fn=update_window, inputs=update_inp, outputs=all_visual)
|
447 |
|
448 |
btn.click(fn=main, inputs=inp, outputs=output)
|
449 |
# input.change(fn=main, inputs=input, outputs=output)
|
450 |
demo.launch(share=False)
|
451 |
|
452 |
+
# df= pd.read_csv(os.getcwd()+"/data/twitterdata.csv")
|
453 |
|
454 |
+
# https://51285.gradio.app
|
|
|
|
textclassifier/TextClassifier.py
CHANGED
@@ -44,8 +44,14 @@ class TextClassifier:
|
|
44 |
# add timer in time-loop and stop after 10 seconds
|
45 |
# self.df = self.ts.scrape_by_user(user_name)
|
46 |
self.df = self.ts.scrape_by_several_users(user_list)
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
openai.api_key = OPENAI_API_KEY
|
50 |
|
51 |
def classify_all(self, tweet: str):
|
|
|
44 |
# add timer in time-loop and stop after 10 seconds
|
45 |
# self.df = self.ts.scrape_by_user(user_name)
|
46 |
self.df = self.ts.scrape_by_several_users(user_list)
|
47 |
+
|
48 |
+
# Check if 'id' is in self.df
|
49 |
+
if 'id' in self.df.columns:
|
50 |
+
# Make id as type int64
|
51 |
+
self.df.loc[:, 'id'] = self.df.id.copy().apply(lambda x: int(x))
|
52 |
+
else:
|
53 |
+
# If not do nothing
|
54 |
+
pass
|
55 |
openai.api_key = OPENAI_API_KEY
|
56 |
|
57 |
def classify_all(self, tweet: str):
|
twitterscraper/TwitterScraper.py
CHANGED
@@ -27,7 +27,7 @@ class TwitterScraper(object):
|
|
27 |
# Make sure to_date is later than from_date
|
28 |
assert from_date < to_date, "from_date must be earlier than to_date"
|
29 |
# Make sure num_tweets is a positive integer
|
30 |
-
assert 0 < num_tweets <= 60, "num_tweets must be a positive integer and at most 60"
|
31 |
|
32 |
self.from_date = from_date
|
33 |
self.to_date = to_date
|
|
|
27 |
# Make sure to_date is later than from_date
|
28 |
assert from_date < to_date, "from_date must be earlier than to_date"
|
29 |
# Make sure num_tweets is a positive integer
|
30 |
+
# assert 0 < num_tweets <= 60, "num_tweets must be a positive integer and at most 60"
|
31 |
|
32 |
self.from_date = from_date
|
33 |
self.to_date = to_date
|