Spaces:
Sleeping
Sleeping
Update index.py
Browse files
index.py
CHANGED
@@ -99,7 +99,13 @@ app.layout = dbc.Container([
|
|
99 |
dbc.Row([ # row 7 but needs to be updated
|
100 |
dbc.Col(dcc.Graph(id="bar-graph-1"))
|
101 |
]),
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
dbc.Row([ # row 7
|
104 |
dbc.Label("Escolha um site de notícias:", className="fw-bold")
|
105 |
]),
|
@@ -117,24 +123,54 @@ app.layout = dbc.Container([
|
|
117 |
|
118 |
)
|
119 |
]),
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
dbc.Row([ # row 9
|
122 |
-
dbc.Col(
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
-
dbc.Row([ # row 11
|
132 |
-
dbc.Col(dcc.Graph(id='line-graph-4'),
|
133 |
-
)
|
134 |
-
]),
|
135 |
-
|
136 |
-
html.Div(id='pie-container-1')
|
137 |
-
|
138 |
])
|
139 |
|
140 |
# # Create a function to generate pie charts
|
@@ -154,10 +190,9 @@ app.layout = dbc.Container([
|
|
154 |
@app.callback(
|
155 |
Output('line-graph-1', 'figure'),
|
156 |
Output('bar-graph-1','figure'),
|
|
|
157 |
Output('line-graph-2', 'figure'),
|
158 |
-
Output('
|
159 |
-
Output('line-graph-4', 'figure'),
|
160 |
-
Output('pie-container-1', 'children'),
|
161 |
Input("topic-selector", "value"),
|
162 |
Input ("domain-selector", "value"),
|
163 |
Input('date-range', 'start_date'),
|
@@ -210,44 +245,40 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
|
|
210 |
# Bar Graph ends
|
211 |
|
212 |
# filter dataframes based on updated data range
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
df4_filtered = df_neg.loc[mask_4]
|
219 |
|
220 |
-
#
|
221 |
-
|
222 |
-
title="Positive")
|
223 |
-
line_fig_3 = px.line(df3_filtered, x="date", y="rolling_mean_counts", line_group="FinBERT_label",
|
224 |
-
title="Neutral")
|
225 |
-
line_fig_4 = px.line(df4_filtered, x="date", y="rolling_mean_counts", line_group="FinBERT_label",
|
226 |
-
title="Negative")
|
227 |
|
228 |
-
#
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
line_fig_4.update_layout(
|
236 |
-
xaxis_title='Data',
|
237 |
-
yaxis_title='Número de notícias com sentimento negativo')
|
238 |
|
239 |
-
#
|
240 |
-
|
241 |
-
line_fig_3.update_xaxes(tickformat="%b %d<br>%Y")
|
242 |
-
line_fig_4.update_xaxes(tickformat="%b %d<br>%Y")
|
243 |
|
244 |
-
#
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
# Map original labels to their translated versions
|
252 |
label_translation = {'positive': 'positivo', 'neutral': 'neutro', 'negative': 'negativo'}
|
253 |
df_filtered['FinBERT_label_transformed'] = df_filtered['FinBERT_label'].map(label_translation)
|
@@ -259,49 +290,41 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
|
|
259 |
label_percentages_all = (label_counts_all / label_counts_all.sum()) * 100
|
260 |
|
261 |
# Plot general pie chart
|
262 |
-
|
263 |
values=label_percentages_all,
|
264 |
names=label_percentages_all.index,
|
265 |
title='Distribuição Geral',
|
266 |
color_discrete_sequence=['#039a4d', '#3c03f4', '#ca3919']
|
267 |
)
|
268 |
|
269 |
-
|
270 |
# Get unique media categories
|
271 |
media_categories = df_filtered['Veículos de notícias'].unique()
|
272 |
|
273 |
# Define colors for each label
|
274 |
label_colors = {'positivo': '#039a4d', 'neutro': '#3c03f4', 'negativo': '#ca3919'}
|
275 |
-
|
276 |
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
label_percentages
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
color_discrete_sequence=[label_colors[label] for label in label_percentages.index]
|
296 |
-
)
|
297 |
-
fig = dcc.Graph(figure=fig)
|
298 |
-
pie_chart = html.Div(fig,className='four columns')
|
299 |
-
row_content.append(pie_chart)
|
300 |
-
pie_container_1.append(html.Div(row_content, className='row'))
|
301 |
|
302 |
-
return line_fig_1, bar_fig_1,
|
303 |
else:
|
304 |
-
return {'data': []},{'data': []} ,{'data': []} ,{'data': []} , {'data': []}
|
305 |
|
306 |
# return line_fig_1
|
307 |
|
@@ -325,6 +348,30 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
|
|
325 |
# dff = df[df.country==value]
|
326 |
# return px.line(dff, x='year', y='pop')
|
327 |
|
328 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
if __name__ == '__main__':
|
330 |
app.run_server(debug=True)
|
|
|
99 |
dbc.Row([ # row 7 but needs to be updated
|
100 |
dbc.Col(dcc.Graph(id="bar-graph-1"))
|
101 |
]),
|
102 |
+
|
103 |
+
# html.Div(id='pie-container-1'),
|
104 |
+
dbc.Row([ # row 9
|
105 |
+
dbc.Col(dcc.Graph(id='pie-graph-1'),
|
106 |
+
)
|
107 |
+
]),
|
108 |
+
|
109 |
dbc.Row([ # row 7
|
110 |
dbc.Label("Escolha um site de notícias:", className="fw-bold")
|
111 |
]),
|
|
|
123 |
|
124 |
)
|
125 |
]),
|
126 |
+
|
127 |
+
dbc.Row([ # row 9
|
128 |
+
dbc.Col(dcc.Graph(id='line-graph-2'),
|
129 |
+
)
|
130 |
+
]),
|
131 |
+
|
132 |
+
# dbc.Row([ # row 9
|
133 |
+
# dbc.Col(dcc.Graph(id='line-graph-2'),
|
134 |
+
# )
|
135 |
+
# ]),
|
136 |
+
|
137 |
+
# dbc.Row([ # row 10
|
138 |
+
# dbc.Col(dcc.Graph(id='line-graph-3'),
|
139 |
+
# )
|
140 |
+
# ]),
|
141 |
+
|
142 |
+
# dbc.Row([ # row 11
|
143 |
+
# dbc.Col(dcc.Graph(id='line-graph-4'),
|
144 |
+
# )
|
145 |
+
# ]),
|
146 |
+
|
147 |
+
# html.Div(id='pie-container-2'),
|
148 |
+
dbc.Row([ # row 9
|
149 |
+
dbc.Col(dcc.Graph(id='pie-graph-2'),
|
150 |
+
)
|
151 |
+
]),
|
152 |
+
|
153 |
dbc.Row([ # row 9
|
154 |
+
dbc.Col(
|
155 |
+
dash_table.DataTable(
|
156 |
+
id='headlines-table',
|
157 |
+
columns=[
|
158 |
+
{"name": "Headline", "id": "Headline"},
|
159 |
+
{"name": "URL", "id": "url"},
|
160 |
+
{"name": "Date", "id": "date"},
|
161 |
+
{"name": "Sentiment Label", "id": "FinBERT_label"}
|
162 |
+
],
|
163 |
+
style_table={'overflowX': 'auto'},
|
164 |
+
style_cell={
|
165 |
+
'textAlign': 'left',
|
166 |
+
'whiteSpace': 'normal',
|
167 |
+
'height': 'auto',
|
168 |
+
'minWidth': '180px', 'width': '180px', 'maxWidth': '180px',
|
169 |
+
},
|
170 |
+
)
|
171 |
+
)
|
172 |
+
])
|
173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
])
|
175 |
|
176 |
# # Create a function to generate pie charts
|
|
|
190 |
@app.callback(
|
191 |
Output('line-graph-1', 'figure'),
|
192 |
Output('bar-graph-1','figure'),
|
193 |
+
Output('pie-container-1', 'figure'),
|
194 |
Output('line-graph-2', 'figure'),
|
195 |
+
Output('pie-container-2', 'figure'),
|
|
|
|
|
196 |
Input("topic-selector", "value"),
|
197 |
Input ("domain-selector", "value"),
|
198 |
Input('date-range', 'start_date'),
|
|
|
245 |
# Bar Graph ends
|
246 |
|
247 |
# filter dataframes based on updated data range
|
248 |
+
# Filtering data...
|
249 |
+
df_filtered = counts[(counts['Topic'] == selected_topic) &
|
250 |
+
(counts['domain_folder_name'] == selected_domain) &
|
251 |
+
(counts['date'] >= start_date) &
|
252 |
+
(counts['date'] <= end_date)]
|
|
|
253 |
|
254 |
+
# Create a date range for the selected period
|
255 |
+
date_range = pd.date_range(start=start_date, end=end_date)
|
|
|
|
|
|
|
|
|
|
|
256 |
|
257 |
+
# Create a DataFrame with all possible combinations of classes, topics, and dates
|
258 |
+
all_combinations = pd.MultiIndex.from_product([['positive', 'neutral', 'negative'],
|
259 |
+
[selected_topic],
|
260 |
+
[selected_domain],
|
261 |
+
date_range],
|
262 |
+
names=['FinBERT_label', 'Topic', 'domain_folder_name', 'date'])
|
263 |
+
df_all_combinations = pd.DataFrame(index=all_combinations).reset_index()
|
|
|
|
|
|
|
264 |
|
265 |
+
# Merge filtered DataFrame with DataFrame of all combinations
|
266 |
+
merged_df = pd.merge(df_all_combinations, df_filtered, on=['FinBERT_label', 'Topic', 'domain_folder_name', 'date'], how='left')
|
|
|
|
|
267 |
|
268 |
+
# Fill missing values with zeros
|
269 |
+
merged_df['count'].fillna(0, inplace=True)
|
270 |
+
merged_df['rolling_mean_counts'].fillna(0, inplace=True)
|
271 |
+
|
272 |
+
# Create line graph...
|
273 |
+
line_fig_2 = px.line(merged_df, x="date", y="count", color="FinBERT_label",
|
274 |
+
line_group="FinBERT_label", title="Sentiment Over Time",
|
275 |
+
labels={"count": "Number of News Articles", "date": "Date"})
|
276 |
+
|
277 |
+
|
278 |
+
# Update layout...
|
279 |
+
line_fig_2.update_layout(xaxis_title='Date', yaxis_title='Number of News Articles',
|
280 |
+
xaxis=dict(tickformat="%b %d<br>%Y"))
|
281 |
+
|
282 |
# Map original labels to their translated versions
|
283 |
label_translation = {'positive': 'positivo', 'neutral': 'neutro', 'negative': 'negativo'}
|
284 |
df_filtered['FinBERT_label_transformed'] = df_filtered['FinBERT_label'].map(label_translation)
|
|
|
290 |
label_percentages_all = (label_counts_all / label_counts_all.sum()) * 100
|
291 |
|
292 |
# Plot general pie chart
|
293 |
+
pie_chart_1 = px.pie(
|
294 |
values=label_percentages_all,
|
295 |
names=label_percentages_all.index,
|
296 |
title='Distribuição Geral',
|
297 |
color_discrete_sequence=['#039a4d', '#3c03f4', '#ca3919']
|
298 |
)
|
299 |
|
|
|
300 |
# Get unique media categories
|
301 |
media_categories = df_filtered['Veículos de notícias'].unique()
|
302 |
|
303 |
# Define colors for each label
|
304 |
label_colors = {'positivo': '#039a4d', 'neutro': '#3c03f4', 'negativo': '#ca3919'}
|
|
|
305 |
|
306 |
+
# Filter DataFrame for current media category
|
307 |
+
media_df = df_filtered[df_filtered['Veículos de notícias'] == media]
|
308 |
+
|
309 |
+
# Group by FinBERT_label and count occurrences
|
310 |
+
label_counts = media_df['FinBERT_label_transformed'].value_counts()
|
311 |
+
|
312 |
+
# Calculate percentage of each label
|
313 |
+
label_percentages = (label_counts / label_counts.sum()) * 100
|
314 |
+
|
315 |
+
# Plot pie chart
|
316 |
+
pie_chart_2 = px.pie(
|
317 |
+
values=label_percentages,
|
318 |
+
names=label_percentages.index,
|
319 |
+
title=f'Distribuição para {media}',
|
320 |
+
color_discrete_sequence=[label_colors[label] for label in label_percentages.index]
|
321 |
+
)
|
322 |
+
# pie_chart_2 = dcc.Graph(figure=fig)
|
323 |
+
# pie_chart_2 = html.Div(fig,className='four columns')
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
|
325 |
+
return line_fig_1, bar_fig_1, pie_chart_1, line_fig_2, pie_chart_2
|
326 |
else:
|
327 |
+
return {'data': []},{'data': []} ,{'data': []} ,{'data': []} , {'data': []}
|
328 |
|
329 |
# return line_fig_1
|
330 |
|
|
|
348 |
# dff = df[df.country==value]
|
349 |
# return px.line(dff, x='year', y='pop')
|
350 |
|
351 |
+
# Define callback function for updating the headlines table
|
352 |
+
@app.callback(
|
353 |
+
Output('headlines-table', 'data'),
|
354 |
+
Input("topic-selector", "value"),
|
355 |
+
Input ("domain-selector", "value"),
|
356 |
+
Input('date-range', 'start_date'),
|
357 |
+
Input('date-range', 'end_date')
|
358 |
+
)
|
359 |
+
def update_headlines_table(selected_topic, selected_domain, start_date, end_date):
|
360 |
+
# Filtering data...
|
361 |
+
df_filtered = df[(df['Topic'] == selected_topic) &
|
362 |
+
(df['domain_folder_name'] == selected_domain) &
|
363 |
+
(df['date'] >= start_date) &
|
364 |
+
(df['date'] <= end_date)]
|
365 |
+
|
366 |
+
# Convert FinBERT_label to categorical for better sorting
|
367 |
+
df_filtered['FinBERT_label'] = pd.Categorical(df_filtered['FinBERT_label'],
|
368 |
+
categories=['positive', 'neutral', 'negative'],
|
369 |
+
ordered=True)
|
370 |
+
|
371 |
+
# Sort DataFrame by sentiment label and date
|
372 |
+
df_filtered = df_filtered.sort_values(by=['FinBERT_label', 'date'])
|
373 |
+
|
374 |
+
return df_filtered.to_dict('records')
|
375 |
+
|
376 |
if __name__ == '__main__':
|
377 |
app.run_server(debug=True)
|