thak123 commited on
Commit
d589298
1 Parent(s): 26b6248

Update index.py

Browse files
Files changed (1) hide show
  1. index.py +131 -84
index.py CHANGED
@@ -99,7 +99,13 @@ app.layout = dbc.Container([
99
  dbc.Row([ # row 7 but needs to be updated
100
  dbc.Col(dcc.Graph(id="bar-graph-1"))
101
  ]),
102
-
 
 
 
 
 
 
103
  dbc.Row([ # row 7
104
  dbc.Label("Escolha um site de notícias:", className="fw-bold")
105
  ]),
@@ -117,24 +123,54 @@ app.layout = dbc.Container([
117
 
118
  )
119
  ]),
120
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  dbc.Row([ # row 9
122
- dbc.Col(dcc.Graph(id='line-graph-2'),
123
- )
124
- ]),
125
-
126
- dbc.Row([ # row 10
127
- dbc.Col(dcc.Graph(id='line-graph-3'),
128
- )
129
- ]),
 
 
 
 
 
 
 
 
 
 
 
130
 
131
- dbc.Row([ # row 11
132
- dbc.Col(dcc.Graph(id='line-graph-4'),
133
- )
134
- ]),
135
-
136
- html.Div(id='pie-container-1')
137
-
138
  ])
139
 
140
  # # Create a function to generate pie charts
@@ -154,10 +190,9 @@ app.layout = dbc.Container([
154
  @app.callback(
155
  Output('line-graph-1', 'figure'),
156
  Output('bar-graph-1','figure'),
 
157
  Output('line-graph-2', 'figure'),
158
- Output('line-graph-3', 'figure'),
159
- Output('line-graph-4', 'figure'),
160
- Output('pie-container-1', 'children'),
161
  Input("topic-selector", "value"),
162
  Input ("domain-selector", "value"),
163
  Input('date-range', 'start_date'),
@@ -210,44 +245,40 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
210
  # Bar Graph ends
211
 
212
  # filter dataframes based on updated data range
213
- mask_2 = ((df_pos["Topic"] == selected_topic) & (df_pos["domain_folder_name"] == selected_domain) & (df_pos['date'] >= start_date) & (df_pos['date'] <= end_date))
214
- mask_3 = ((df_neu["Topic"] == selected_topic) & (df_neu["domain_folder_name"] == selected_domain) & (df_neu['date'] >= start_date) & (df_neu['date'] <= end_date))
215
- mask_4 = ((df_neg["Topic"] == selected_topic) & (df_neg["domain_folder_name"] == selected_domain) & (df_neg['date'] >= start_date) & (df_neg['date'] <= end_date))
216
- df2_filtered = df_pos.loc[mask_2]
217
- df3_filtered = df_neu.loc[mask_3]
218
- df4_filtered = df_neg.loc[mask_4]
219
 
220
- #create line graphs based on filtered dataframes
221
- line_fig_2 = px.line(df2_filtered, x="date", y="rolling_mean_counts", line_group="FinBERT_label",
222
- title="Positive")
223
- line_fig_3 = px.line(df3_filtered, x="date", y="rolling_mean_counts", line_group="FinBERT_label",
224
- title="Neutral")
225
- line_fig_4 = px.line(df4_filtered, x="date", y="rolling_mean_counts", line_group="FinBERT_label",
226
- title="Negative")
227
 
228
- #set x-axis title and y-axis title in line graphs
229
- line_fig_2.update_layout(
230
- xaxis_title='Data',
231
- yaxis_title='Número de notícias com sentimento positivo')
232
- line_fig_3.update_layout(
233
- xaxis_title='Data',
234
- yaxis_title='Número de notícias com sentimento neutro')
235
- line_fig_4.update_layout(
236
- xaxis_title='Data',
237
- yaxis_title='Número de notícias com sentimento negativo')
238
 
239
- #set label format on y-axis in line graphs
240
- line_fig_2.update_xaxes(tickformat="%b %d<br>%Y")
241
- line_fig_3.update_xaxes(tickformat="%b %d<br>%Y")
242
- line_fig_4.update_xaxes(tickformat="%b %d<br>%Y")
243
 
244
- #set label format on y-axis in line graphs
245
- line_fig_2.update_traces(line_color='#1E88E5')
246
- line_fig_3.update_traces(line_color='#004D40')
247
- line_fig_4.update_traces(line_color='#D81B60')
248
-
249
- #
250
- # pie_container_1 = generate_pie_chart(category)
 
 
 
 
 
 
 
251
  # Map original labels to their translated versions
252
  label_translation = {'positive': 'positivo', 'neutral': 'neutro', 'negative': 'negativo'}
253
  df_filtered['FinBERT_label_transformed'] = df_filtered['FinBERT_label'].map(label_translation)
@@ -259,49 +290,41 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
259
  label_percentages_all = (label_counts_all / label_counts_all.sum()) * 100
260
 
261
  # Plot general pie chart
262
- fig_general = px.pie(
263
  values=label_percentages_all,
264
  names=label_percentages_all.index,
265
  title='Distribuição Geral',
266
  color_discrete_sequence=['#039a4d', '#3c03f4', '#ca3919']
267
  )
268
 
269
-
270
  # Get unique media categories
271
  media_categories = df_filtered['Veículos de notícias'].unique()
272
 
273
  # Define colors for each label
274
  label_colors = {'positivo': '#039a4d', 'neutro': '#3c03f4', 'negativo': '#ca3919'}
275
-
276
 
277
- pie_container_1 = []
278
- # Loop through each media category
279
- row_content = []
280
- for media in media_categories:
281
- # Filter DataFrame for current media category
282
- media_df = df_filtered[df_filtered['Veículos de notícias'] == media]
283
-
284
- # Group by FinBERT_label and count occurrences
285
- label_counts = media_df['FinBERT_label_transformed'].value_counts()
286
-
287
- # Calculate percentage of each label
288
- label_percentages = (label_counts / label_counts.sum()) * 100
289
-
290
- # Plot pie chart
291
- fig = px.pie(
292
- values=label_percentages,
293
- names=label_percentages.index,
294
- title=f'Distribuição para {media}',
295
- color_discrete_sequence=[label_colors[label] for label in label_percentages.index]
296
- )
297
- fig = dcc.Graph(figure=fig)
298
- pie_chart = html.Div(fig,className='four columns')
299
- row_content.append(pie_chart)
300
- pie_container_1.append(html.Div(row_content, className='row'))
301
 
302
- return line_fig_1, bar_fig_1, line_fig_2, line_fig_3, line_fig_4, pie_container_1
303
  else:
304
- return {'data': []},{'data': []} ,{'data': []} ,{'data': []} , {'data': []}, {'data':[]}
305
 
306
  # return line_fig_1
307
 
@@ -325,6 +348,30 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
325
  # dff = df[df.country==value]
326
  # return px.line(dff, x='year', y='pop')
327
 
328
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  if __name__ == '__main__':
330
  app.run_server(debug=True)
 
99
  dbc.Row([ # row 7 but needs to be updated
100
  dbc.Col(dcc.Graph(id="bar-graph-1"))
101
  ]),
102
+
103
+ # html.Div(id='pie-container-1'),
104
+ dbc.Row([ # row 9
105
+ dbc.Col(dcc.Graph(id='pie-graph-1'),
106
+ )
107
+ ]),
108
+
109
  dbc.Row([ # row 7
110
  dbc.Label("Escolha um site de notícias:", className="fw-bold")
111
  ]),
 
123
 
124
  )
125
  ]),
126
+
127
+ dbc.Row([ # row 9
128
+ dbc.Col(dcc.Graph(id='line-graph-2'),
129
+ )
130
+ ]),
131
+
132
+ # dbc.Row([ # row 9
133
+ # dbc.Col(dcc.Graph(id='line-graph-2'),
134
+ # )
135
+ # ]),
136
+
137
+ # dbc.Row([ # row 10
138
+ # dbc.Col(dcc.Graph(id='line-graph-3'),
139
+ # )
140
+ # ]),
141
+
142
+ # dbc.Row([ # row 11
143
+ # dbc.Col(dcc.Graph(id='line-graph-4'),
144
+ # )
145
+ # ]),
146
+
147
+ # html.Div(id='pie-container-2'),
148
+ dbc.Row([ # row 9
149
+ dbc.Col(dcc.Graph(id='pie-graph-2'),
150
+ )
151
+ ]),
152
+
153
  dbc.Row([ # row 9
154
+ dbc.Col(
155
+ dash_table.DataTable(
156
+ id='headlines-table',
157
+ columns=[
158
+ {"name": "Headline", "id": "Headline"},
159
+ {"name": "URL", "id": "url"},
160
+ {"name": "Date", "id": "date"},
161
+ {"name": "Sentiment Label", "id": "FinBERT_label"}
162
+ ],
163
+ style_table={'overflowX': 'auto'},
164
+ style_cell={
165
+ 'textAlign': 'left',
166
+ 'whiteSpace': 'normal',
167
+ 'height': 'auto',
168
+ 'minWidth': '180px', 'width': '180px', 'maxWidth': '180px',
169
+ },
170
+ )
171
+ )
172
+ ])
173
 
 
 
 
 
 
 
 
174
  ])
175
 
176
  # # Create a function to generate pie charts
 
190
  @app.callback(
191
  Output('line-graph-1', 'figure'),
192
  Output('bar-graph-1','figure'),
193
+ Output('pie-container-1', 'figure'),
194
  Output('line-graph-2', 'figure'),
195
+ Output('pie-container-2', 'figure'),
 
 
196
  Input("topic-selector", "value"),
197
  Input ("domain-selector", "value"),
198
  Input('date-range', 'start_date'),
 
245
  # Bar Graph ends
246
 
247
  # filter dataframes based on updated data range
248
+ # Filtering data...
249
+ df_filtered = counts[(counts['Topic'] == selected_topic) &
250
+ (counts['domain_folder_name'] == selected_domain) &
251
+ (counts['date'] >= start_date) &
252
+ (counts['date'] <= end_date)]
 
253
 
254
+ # Create a date range for the selected period
255
+ date_range = pd.date_range(start=start_date, end=end_date)
 
 
 
 
 
256
 
257
+ # Create a DataFrame with all possible combinations of classes, topics, and dates
258
+ all_combinations = pd.MultiIndex.from_product([['positive', 'neutral', 'negative'],
259
+ [selected_topic],
260
+ [selected_domain],
261
+ date_range],
262
+ names=['FinBERT_label', 'Topic', 'domain_folder_name', 'date'])
263
+ df_all_combinations = pd.DataFrame(index=all_combinations).reset_index()
 
 
 
264
 
265
+ # Merge filtered DataFrame with DataFrame of all combinations
266
+ merged_df = pd.merge(df_all_combinations, df_filtered, on=['FinBERT_label', 'Topic', 'domain_folder_name', 'date'], how='left')
 
 
267
 
268
+ # Fill missing values with zeros
269
+ merged_df['count'].fillna(0, inplace=True)
270
+ merged_df['rolling_mean_counts'].fillna(0, inplace=True)
271
+
272
+ # Create line graph...
273
+ line_fig_2 = px.line(merged_df, x="date", y="count", color="FinBERT_label",
274
+ line_group="FinBERT_label", title="Sentiment Over Time",
275
+ labels={"count": "Number of News Articles", "date": "Date"})
276
+
277
+
278
+ # Update layout...
279
+ line_fig_2.update_layout(xaxis_title='Date', yaxis_title='Number of News Articles',
280
+ xaxis=dict(tickformat="%b %d<br>%Y"))
281
+
282
  # Map original labels to their translated versions
283
  label_translation = {'positive': 'positivo', 'neutral': 'neutro', 'negative': 'negativo'}
284
  df_filtered['FinBERT_label_transformed'] = df_filtered['FinBERT_label'].map(label_translation)
 
290
  label_percentages_all = (label_counts_all / label_counts_all.sum()) * 100
291
 
292
  # Plot general pie chart
293
+ pie_chart_1 = px.pie(
294
  values=label_percentages_all,
295
  names=label_percentages_all.index,
296
  title='Distribuição Geral',
297
  color_discrete_sequence=['#039a4d', '#3c03f4', '#ca3919']
298
  )
299
 
 
300
  # Get unique media categories
301
  media_categories = df_filtered['Veículos de notícias'].unique()
302
 
303
  # Define colors for each label
304
  label_colors = {'positivo': '#039a4d', 'neutro': '#3c03f4', 'negativo': '#ca3919'}
 
305
 
306
+ # Filter DataFrame for current media category
307
+ media_df = df_filtered[df_filtered['Veículos de notícias'] == media]
308
+
309
+ # Group by FinBERT_label and count occurrences
310
+ label_counts = media_df['FinBERT_label_transformed'].value_counts()
311
+
312
+ # Calculate percentage of each label
313
+ label_percentages = (label_counts / label_counts.sum()) * 100
314
+
315
+ # Plot pie chart
316
+ pie_chart_2 = px.pie(
317
+ values=label_percentages,
318
+ names=label_percentages.index,
319
+ title=f'Distribuição para {media}',
320
+ color_discrete_sequence=[label_colors[label] for label in label_percentages.index]
321
+ )
322
+ # pie_chart_2 = dcc.Graph(figure=fig)
323
+ # pie_chart_2 = html.Div(fig,className='four columns')
 
 
 
 
 
 
324
 
325
+ return line_fig_1, bar_fig_1, pie_chart_1, line_fig_2, pie_chart_2
326
  else:
327
+ return {'data': []},{'data': []} ,{'data': []} ,{'data': []} , {'data': []}
328
 
329
  # return line_fig_1
330
 
 
348
  # dff = df[df.country==value]
349
  # return px.line(dff, x='year', y='pop')
350
 
351
+ # Define callback function for updating the headlines table
352
+ @app.callback(
353
+ Output('headlines-table', 'data'),
354
+ Input("topic-selector", "value"),
355
+ Input ("domain-selector", "value"),
356
+ Input('date-range', 'start_date'),
357
+ Input('date-range', 'end_date')
358
+ )
359
+ def update_headlines_table(selected_topic, selected_domain, start_date, end_date):
360
+ # Filtering data...
361
+ df_filtered = df[(df['Topic'] == selected_topic) &
362
+ (df['domain_folder_name'] == selected_domain) &
363
+ (df['date'] >= start_date) &
364
+ (df['date'] <= end_date)]
365
+
366
+ # Convert FinBERT_label to categorical for better sorting
367
+ df_filtered['FinBERT_label'] = pd.Categorical(df_filtered['FinBERT_label'],
368
+ categories=['positive', 'neutral', 'negative'],
369
+ ordered=True)
370
+
371
+ # Sort DataFrame by sentiment label and date
372
+ df_filtered = df_filtered.sort_values(by=['FinBERT_label', 'date'])
373
+
374
+ return df_filtered.to_dict('records')
375
+
376
  if __name__ == '__main__':
377
  app.run_server(debug=True)