SamiTorjmen commited on
Commit
2e8087a
1 Parent(s): f061af2

Commit all files

Browse files
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["panel", "serve", "/code/dashboard.py", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin", "dashboard-students-panel.hf.space", "--allow-websocket-origin", "0.0.0.0:7860"]
dashboard.py ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template_string
2
+
3
+ import warnings
4
+ warnings.filterwarnings('ignore')
5
+
6
+ import pandas as pd
7
+ import numpy as np
8
+ import plotly.express as px
9
+ import matplotlib.pyplot as plt
10
+ import matplotlib
11
+ matplotlib.use('Agg')
12
+
13
+ import seaborn as sns
14
+
15
+ sns.set_style('whitegrid')
16
+
17
+ import panel as pn
18
+ from panel.interact import interact
19
+ pn.extension('plotly') # Interactive tables
20
+
21
+ import hvplot.pandas # Interactive dataframes
22
+
23
+ import holoviews as hv
24
+ from bokeh.events import Event
25
+ hv.extension('bokeh')
26
+
27
+ import os
28
+ os.environ['BOKEH_ALLOW_WS_ORIGIN'] = 'localhost:5006'
29
+
30
+ from bokeh.embed import server_document
31
+ import subprocess
32
+
33
+ df = pd.read_csv("data\StudentsPerformance.csv")
34
+ numeric_features = ['math score', 'reading score', 'writing score']
35
+ categoric_features = ['gender', 'race/ethnicity', 'parental level of education', 'lunch', 'test preparation course']
36
+ df['pass'] = df.apply(lambda row: 1 if row['math score'] >= 60 and row['reading score'] >= 60 and row['writing score'] >= 60 else 0, axis=1)
37
+
38
+
39
+ from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, LogisticRegression
40
+ from sklearn.neighbors import KNeighborsClassifier
41
+ from sklearn.ensemble import RandomForestClassifier
42
+ from sklearn.svm import SVC
43
+
44
+ import dashboard
45
+ from dashboard.plots import table_plotly
46
+ from dashboard.plots import pie_quali
47
+ from dashboard.plots import histogram_quali
48
+ from dashboard.plots import boxplot_quali_quanti
49
+ from dashboard.plots import scatter_quanti_quanti
50
+ from dashboard.plots import plotting_target_feature
51
+ from dashboard.plots import corr_heatmap
52
+ from dashboard.plots import qqplot
53
+ from dashboard.plots import hist_residual
54
+ from dashboard.plots import qqplot_residual
55
+ from dashboard.plots import residual_fitted
56
+ from dashboard.plots import residual_leverage
57
+ from dashboard.plots import bivar_quanti_plot
58
+ from dashboard.plots import cross_heatmap
59
+ from dashboard.plots import ols_resid_plot
60
+ from dashboard.plots import confusion_matrix_heatmap
61
+ from dashboard.plots import plot_roc
62
+
63
+
64
+ from dashboard.tables import describe_quali_quanti
65
+ from dashboard.tables import filtered_dataframe
66
+ from dashboard.tables import evaluate_regression_model
67
+ from dashboard.tables import cross_tab
68
+ from dashboard.tables import chi2_tab
69
+ from dashboard.tables import report_to_df
70
+
71
+
72
+ from dashboard.model import model_history
73
+ from dashboard.model import model_cl_history
74
+
75
+ pn.config.sizing_mode = "stretch_width"
76
+
77
+
78
+ reg_list = [
79
+ LinearRegression,
80
+ Ridge,
81
+ Lasso,
82
+ ElasticNet
83
+ ]
84
+
85
+ cl_list= [
86
+ LogisticRegression,
87
+ RandomForestClassifier,
88
+ KNeighborsClassifier,
89
+ SVC
90
+ ]
91
+
92
+ ##### Create widgets
93
+
94
+ ### Exploration widgets (Page 1)
95
+
96
+ # Dataset
97
+ checked_columns = ['lunch', 'race/ethnicity','test_preparation_course','math score','reading score','writing score','target_name']
98
+ checkboxes = {col: pn.widgets.Checkbox(name=col, value=True) if col in checked_columns else pn.widgets.Checkbox(name=col, value=False) for col in df.columns}
99
+
100
+ # Histogram
101
+ count = pn.widgets.Select(name='feature',options=[col for col in df.columns], value='parental level of education')
102
+
103
+ # Scatter plot
104
+ abscisse_scatter = pn.widgets.Select(name='x', options=numeric_features, value='reading score')
105
+ ordonnee_scatter = pn.widgets.Select(name='y', options=numeric_features, value='writing score')
106
+ dashboard_fit_line_checkbox = pn.widgets.Checkbox(name='fit line')
107
+
108
+ # Box plot
109
+ quanti = pn.widgets.Select(name='numeric feature', options=numeric_features)
110
+ quali = pn.widgets.Select(name='categorical feature', options=categoric_features, value='parental level of education')
111
+
112
+ # Target Plot
113
+ quali_target = pn.widgets.Select(name='categorical feature', options=categoric_features, value='parental level of education')
114
+
115
+ ### Modeling Widget (Page 2)
116
+
117
+ # Regression
118
+ target_widget = pn.widgets.Select(name='target', options=numeric_features, value='writing score')
119
+ model_name_widget = pn.widgets.Select(name='model', options=reg_list, value=LinearRegression)
120
+
121
+
122
+ # Classification
123
+ model_name_cl_widget = pn.widgets.Select(name='classification model', options=cl_list, value=LogisticRegression)
124
+ color_confusion = pn.widgets.Select(name='Matrix color', options=px.colors.named_colorscales(), value='bupu')
125
+
126
+ ### Analysis Widget (Page 3)
127
+
128
+ # Quanti/Quanti
129
+ color1 = pn.widgets.Select(name='color', options=px.colors.named_colorscales(), value='magma')
130
+ quanti1_corr = pn.widgets.Select(name='x',options=numeric_features, value = 'reading score')
131
+ quanti2_corr = pn.widgets.Select(name='y',options=numeric_features, value = 'writing score')
132
+
133
+ # Quali/Quali
134
+ color2 = pn.widgets.Select(name='color', options=px.colors.named_colorscales(), value='redor')
135
+ quali1_cross = pn.widgets.Select(name='quali 1',options=categoric_features, value = 'parental level of education')
136
+ quali2_cross = pn.widgets.Select(name='quali 2',options=categoric_features, value = 'lunch')
137
+
138
+
139
+ # Q-Q Plot
140
+ quanti_qq = pn.widgets.Select(name='numeric feature', options=numeric_features)
141
+ quali_qq = pn.widgets.Select(name='categorical feature', options=categoric_features, value='parental level of education')
142
+ modality_qq = pn.widgets.Select(name='modality', options=df[quali_qq.params.args[0].value].unique().tolist())
143
+
144
+
145
+ def update_modality_options(event):
146
+ selected_quali = quali_qq.value
147
+ selected_modality = modality_qq.value
148
+ modality_qq.options = df[selected_quali].unique().tolist()
149
+ if selected_modality not in modality_qq.options:
150
+ modality_qq.value = modality_qq.options[0]
151
+ else:
152
+ modality_qq.value = selected_modality
153
+
154
+ quali_qq.param.watch(update_modality_options, 'value')
155
+
156
+
157
+ ##### Define reactive elements
158
+
159
+ ### Reactive elements for Exploration (Page 1)
160
+
161
+ dataset = pn.bind(filtered_dataframe, df=df, **checkboxes)
162
+ histogram = pn.bind(histogram_quali,quali=count,df=df)
163
+ scatter_plot = pn.bind(scatter_quanti_quanti, x=abscisse_scatter, y=ordonnee_scatter, df=df, checkbox=dashboard_fit_line_checkbox)
164
+ box_plot = pn.bind(boxplot_quali_quanti, quanti=quanti, quali=quali, df=df)
165
+ describe_table = pn.bind(describe_quali_quanti, quali=quali, quanti=quanti, df=df)
166
+ target_plot = pn.bind(plotting_target_feature, quali=quali_target,df=df)
167
+
168
+
169
+ ### Reactive elements for Modeling (Page 2)
170
+
171
+ # Regression
172
+
173
+ def update_reg_history(target, model):
174
+ return model_history(df=df, target=target, model=model)
175
+
176
+ reg_history = pn.bind(update_reg_history, target=target_widget, model=model_name_widget)
177
+
178
+ evaluate_reg_table = pn.bind(evaluate_regression_model,history=reg_history)
179
+ residual_fitted_plot = pn.bind(residual_fitted, history=reg_history)
180
+ qqplot_residual_plot = pn.bind(qqplot_residual, history=reg_history)
181
+ scale_location_plot = pn.bind(residual_fitted, history=reg_history, root=True)
182
+ residual_leverage_plot = pn.bind(residual_leverage, history=reg_history)
183
+
184
+ # Classification
185
+ def update_cl_history(model_cl):
186
+ return model_cl_history(df=df, model_cl=model_cl)
187
+
188
+ cl_classification = pn.bind(update_cl_history, model_cl=model_name_cl_widget)
189
+ evaluate_cl_table = pn.bind(report_to_df,classification=cl_classification)
190
+
191
+ confusion_plot = pn.bind(confusion_matrix_heatmap, classification=cl_classification,color=color_confusion)
192
+
193
+ roc = pn.bind(plot_roc, classification=cl_classification)
194
+
195
+ ### Reactive elements for Analysis (Page 3)
196
+ corr_plot = pn.bind(corr_heatmap, df=df, quanti1=quanti1_corr,quanti2=quanti2_corr, color=color1)
197
+ joint_plot = pn.bind(bivar_quanti_plot, df=df, quanti1=quanti1_corr, quanti2=quanti2_corr)
198
+
199
+ cross_table = pn.bind(cross_tab, df=df, quali1=quali1_cross, quali2=quali2_cross)
200
+ chi2_table = pn.bind(chi2_tab, df=df, quali1=quali1_cross, quali2=quali2_cross)
201
+ cross_heatmap_plot = pn.bind(cross_heatmap, df=df, quali1=quali1_cross, quali2=quali2_cross, color=color2)
202
+
203
+ box_plot2 = pn.bind(boxplot_quali_quanti, quanti=quanti_qq, quali=quali_qq, df=df)
204
+ qq_plot = pn.bind(qqplot, quali=quali_qq, quanti=quanti_qq, modality=modality_qq, df=df)
205
+ ols_plot = pn.bind(ols_resid_plot, df=df, quanti=quanti_qq, quali=quali_qq)
206
+
207
+ ##### Define Sidebar
208
+
209
+ ### Exploration Sidebar (Page 1)
210
+
211
+ # Cards
212
+ data_card = pn.Card(pn.Column(*checkboxes.values()), title='Data')
213
+ histogram_card = pn.Card(pn.Column(count), title='Histogram')
214
+ scatter_card = pn.Card(pn.Column(dashboard_fit_line_checkbox, abscisse_scatter, ordonnee_scatter), title='Scatter Plot')
215
+ box_card = pn.Card(pn.Column(quanti, quali), title='Box Plot')
216
+ target_card = pn.Card(pn.Column(quali_target), title='Target Plot')
217
+
218
+
219
+ # Sidebar
220
+ exploration_sidebar = pn.Column('# Parameters\n This section changes parameters for exploration plots',
221
+ data_card,
222
+ histogram_card,
223
+ scatter_card,
224
+ box_card,
225
+ target_card,
226
+ sizing_mode='stretch_width',
227
+ )
228
+
229
+ ### Modeling Sidebar (Page 2)
230
+
231
+ # Cards
232
+ regression_card = pn.Card(pn.Column(model_name_widget,target_widget), title='Regression',sizing_mode = "stretch_width")
233
+
234
+ classification_card = pn.Card(pn.Column(model_name_cl_widget, color_confusion), title='Classification',sizing_mode = "stretch_width")
235
+
236
+
237
+
238
+ # Sidebar
239
+ modeling_sidebar = pn.Column('# Parameters\n This section changes parameters for modeling plots',
240
+ regression_card,
241
+ classification_card,
242
+ sizing_mode='stretch_width'
243
+ )
244
+
245
+
246
+ ### Analysis Sidebar (Page 3)
247
+
248
+ # Cards
249
+ quanti_quanti_card = pn.Card(pn.Column(color1,quanti1_corr,quanti2_corr), title='Quantitative vs Quantitative')
250
+ quali_quali_card = pn.Card(pn.Column(color2,quali1_cross, quali2_cross), title='Qualitative vs Qualitative')
251
+ quali_quanti_card = pn.Card(pn.Column(quanti_qq,pn.Column(quali_qq, modality_qq)), title='Qualitative vs Quantitative')
252
+
253
+ # Sidebar
254
+ analysis_sidebar = pn.Column('# Parameters\n This section changes parameters for further analysis plots',
255
+ quanti_quanti_card,
256
+ quali_quali_card,
257
+ quali_quanti_card,
258
+ sizing_mode='stretch_width'
259
+ )
260
+
261
+ ##### Define Main
262
+
263
+ ### Main Exploration (Page 1)
264
+
265
+ # Cards
266
+ description = "This dataset contains information about the performance of students in various subjects. The data includes their scores in math, reading, and writing, as well as their gender, race/ethnicity, parental education, and whether they qualify for free/reduced lunch."
267
+ description_card = pn.Card(description, title='Description')
268
+
269
+ dataset_card = pn.Card(pn.Row(pn.Column('# Data ', description),
270
+ pn.Column(dataset)),
271
+ title='Description')
272
+
273
+ boxplot_card = pn.Row(pn.Card(describe_table, title='Describe Table'),
274
+ pn.Card(box_plot, title='Box Plot'))
275
+
276
+
277
+ scatter_hist_card = pn.Row(pn.Card(histogram, title='Histogram'),
278
+ pn.Card(scatter_plot, title='Scatter Plot'))
279
+ target_card = pn.Card(target_plot, title='Target Plot')
280
+
281
+ # Content
282
+ exploration_main_content = pn.Column(
283
+ pn.Row(dataset_card),
284
+ pn.Row(scatter_hist_card),
285
+ pn.Row(boxplot_card),
286
+ pn.Row(target_card),
287
+ sizing_mode='stretch_width')
288
+
289
+
290
+ ### Main Modeling (Page 2)
291
+
292
+ # Cards
293
+ evaluate_table_card = pn.Card(evaluate_reg_table, title="Evaluation")
294
+ residual_fitted_card = pn.Card(residual_fitted_plot ,title="Residual Plot")
295
+ qqplot_residual_card = pn.Card(qqplot_residual_plot,title="Normal Q-Q")
296
+ scale_location_card = pn.Card(scale_location_plot, title="Scale Location")
297
+ residual_leverage_card = pn.Card(residual_leverage_plot, title="Residuals vs Leverage")
298
+
299
+ # Regroup cards
300
+ regression_card = pn.Card(pn.Row(evaluate_table_card),
301
+ pn.Row(residual_fitted_card,qqplot_residual_card),
302
+ pn.Row(scale_location_card,residual_leverage_card),
303
+ title = 'Regression')
304
+
305
+ ## Classification
306
+
307
+ evaluate_cl_card = pn.Card(evaluate_cl_table, title="Evaluation Table")
308
+ confusion_card = pn.Card(confusion_plot, title="Confusion Matrix")
309
+ roc_card = pn.Card(roc, title='ROC')
310
+
311
+ classification_card = pn.Card(pn.Row(evaluate_cl_card),
312
+ pn.Row(confusion_card,roc_card),
313
+ title='Classification')
314
+
315
+
316
+ # Content
317
+ modeling_main_content = pn.Column(pn.Row(regression_card),
318
+ pn.Row(classification_card),
319
+ sizing_mode='stretch_width')
320
+
321
+
322
+ ### Main Analysis(Page 3)
323
+
324
+ # Cards
325
+ corr_card = pn.Card(corr_plot, title='Person Correlation Matrix')
326
+ joint_card = pn.Card(joint_plot, title='Bivariate Plot')
327
+
328
+ cross_card = pn.Card(cross_table, title='Contingency Table')
329
+ chi2_card = pn.Card(chi2_table, title='Chi2 Test')
330
+ cross_heatmap_card = pn.Card(cross_heatmap_plot, title='Contingency Heatmap')
331
+
332
+ boxplot_card = pn.Card(box_plot2, title='Box Plot')
333
+ qq_card = pn.Card(qq_plot, title='Q-Q Plot')
334
+ ols_card = pn.Card(ols_plot, title='OLS Residuals')
335
+
336
+
337
+ quanti_quanti_card = pn.Card(pn.Row(corr_card,joint_card),
338
+ title=f'Statistic Dependency {quanti1_corr.params.args[0].value} vs {quanti2_corr.params.args[0].value} (quantitative/quantitative)')
339
+
340
+ quali_quali_card = pn.Card(pn.Row(pn.Column(cross_card,chi2_card),
341
+ cross_heatmap_card),
342
+ title=f'Statistic Dependency {quali1_cross.params.args[0].value} vs {quali2_cross.params.args[0].value} (qualitative/qualitative)')
343
+
344
+
345
+ quali_quanti_card = pn.Card(pn.Row(boxplot_card),
346
+ pn.Row(ols_card,qq_card),
347
+ title=f'Statistic Dependency {quali_qq.params.args[0].value} vs {quanti_qq.params.args[0].value} (qualitative/quantitative)')
348
+
349
+
350
+ # Content
351
+ analysis_main_content = pn.Column(pn.Row(quanti_quanti_card),
352
+ pn.Row(quali_quali_card),
353
+ pn.Row(quali_quanti_card),
354
+ sizing_mode='stretch_width')
355
+
356
+
357
+ ##### Create Callback to change sidebar content
358
+
359
+ main_tabs = pn.Tabs(('Exploration', exploration_main_content),
360
+ ('Modeling', modeling_main_content),
361
+ ('Further Analysis', analysis_main_content))
362
+
363
+ def on_tab_change(event):
364
+
365
+ if event.new == 0:
366
+
367
+ exploration_sidebar.visible = True
368
+ modeling_sidebar.visible = False
369
+ analysis_sidebar.visible = False
370
+
371
+ elif event.new == 1:
372
+
373
+
374
+ exploration_sidebar.visible = False
375
+ modeling_sidebar.visible = True
376
+ analysis_sidebar.visible = False
377
+
378
+
379
+ else:
380
+
381
+ exploration_sidebar.visible = False
382
+ modeling_sidebar.visible = False
383
+ analysis_sidebar.visible = True
384
+
385
+
386
+ main_tabs.param.watch(on_tab_change, 'active')
387
+
388
+ ##### Layout
389
+
390
+ template = pn.template.VanillaTemplate(
391
+
392
+ # title
393
+ title = "Student Performance in Exams",
394
+
395
+ # sidebar
396
+ sidebar = pn.Column(exploration_sidebar, modeling_sidebar, analysis_sidebar, sizing_mode='stretch_width'),
397
+
398
+ # main
399
+ main = main_tabs
400
+ )
401
+
402
+ #template.header.append(dark_mode_toggle)
403
+ ##### Show Dashboard
404
+
405
+
406
+ template.servable()
407
+
408
+
409
+
410
+
dashboard/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dashboard.plots import table_plotly
2
+ from dashboard.plots import pie_quali
3
+ from dashboard.plots import histogram_quali
4
+ from dashboard.plots import boxplot_quali_quanti
5
+ from dashboard.plots import scatter_quanti_quanti
6
+ from dashboard.plots import plotting_target_feature
7
+
8
+
9
+ from dashboard.helpers import plotly_to_plt_colors
10
+ from dashboard.helpers import color_s
11
+ from dashboard.helpers import categarray
dashboard/helpers.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import plotly.express as px
2
+
3
+
4
+ import re
5
+ import plotly.express as px
6
+
7
+
8
+ def rgb_to_hex(rgb):
9
+ """
10
+ Converts RGB values to hexadecimal format.
11
+
12
+ Args: rgb_tuple
13
+
14
+ r (int): The red component (0-255).
15
+ g (int): The green component (0-255).
16
+ b (int): The blue component (0-255).
17
+
18
+ Returns:
19
+ str: The hexadecimal representation of the RGB color.
20
+
21
+ """
22
+
23
+ r, g, b = rgb
24
+ return '#{0:02x}{1:02x}{2:02x}'.format(r, g, b)
25
+
26
+
27
+ def extract_rgb(rgb_string):
28
+ """
29
+ Extracts RGB values from a string in the format "rgb(r, g, b)"
30
+ Returns a tuple of integers (r, g, b)
31
+ """
32
+ # Extract the numbers
33
+ rgb_numbers = re.findall(r'\d+', rgb_string)
34
+
35
+ # Return a tuple
36
+ return tuple(map(int, rgb_numbers))
37
+
38
+
39
+ def plotly_to_plt_colors(rgb_string):
40
+ return rgb_to_hex(extract_rgb(rgb_string))
41
+
42
+ def color_s(column,apply=True):
43
+ '''
44
+ This function apply colors for modalities of a column
45
+ '''
46
+
47
+ if apply:
48
+
49
+ if (column=='gender'): return([px.colors.qualitative.Safe[1], px.colors.qualitative.Safe[0]])
50
+ if (column=='pass'): return([px.colors.qualitative.Safe[3], px.colors.qualitative.Safe[5]])
51
+
52
+
53
+ return px.colors.qualitative.Safe
54
+
55
+
56
+ def categarray(column):
57
+ '''
58
+ This function order modalities of a column
59
+ '''
60
+ if (column=='gender'): return(['male','female'])
61
+ if (column=='race/ethnicity'): return (['group A', 'group B', 'group C', 'group D', 'group E'])
62
+ if (column=='parental level of education'): return(['some high school', 'high school','some college',"associate's degree","bachelor's degree", "master's degree"])
63
+ if (column=='pass'): return(['0','1'])
dashboard/model.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.preprocessing import StandardScaler
4
+ from sklearn.svm import SVC
5
+
6
+ class History:
7
+ def __init__(self, model, X_train, y_train, X_test, y_test, y_pred, residuals):
8
+ self.model = model
9
+ self.X_train = X_train
10
+ self.y_train = y_train
11
+ self.X_test = X_test
12
+ self.y_test = y_test
13
+ self.y_pred = y_pred
14
+ self.residuals = residuals
15
+
16
+ def to_dict(self):
17
+ return {
18
+ 'X_train': self.X_train,
19
+ 'y_train': self.y_train,
20
+ 'X_test': self.X_test,
21
+ 'y_test': self.y_test,
22
+ 'model': self.model,
23
+ 'y_pred': self.y_pred,
24
+ 'residuals': self.residuals
25
+ }
26
+
27
+ from sklearn.model_selection import train_test_split
28
+ def model_history(df, target, model):
29
+
30
+ Y = df[target]
31
+
32
+ columns_to_drop = ['pass', 'target_name', 'id', target]
33
+ columns_to_drop = [col for col in columns_to_drop if col in df.columns]
34
+
35
+ X = df.drop(columns_to_drop, axis=1)
36
+
37
+ X = pd.get_dummies(X)
38
+
39
+ X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=123)
40
+
41
+ scaler = StandardScaler()
42
+ X_train = scaler.fit_transform(X_train)
43
+ X_test = scaler.transform(X_test)
44
+
45
+ model_instance = model()
46
+ model_instance.fit(X_train, y_train)
47
+
48
+ y_pred = model_instance.predict(X_test)
49
+
50
+ residuals = y_test - y_pred
51
+
52
+ history = History(str(model_instance)[:-2], X_train, y_train, X_test, y_test, y_pred, residuals)
53
+ return history
54
+
55
+
56
+ def df_reg(df):
57
+ df_copy = df.copy()
58
+ df_copy = pd.get_dummies(df_copy, prefix='gender_', columns=['gender'])
59
+ df_copy = pd.get_dummies(df_copy, prefix='race_', columns=['race/ethnicity'])
60
+ df_copy = pd.get_dummies(df_copy, prefix='lunch_', columns=['lunch'])
61
+ edu_dict = {"some high school": 0, "high school": 1, "some college": 2, "associate's degree": 3, "bachelor's degree": 4, "master's degree": 5}
62
+ df_copy['parental level of education'] = df_copy['parental level of education'].replace(edu_dict)
63
+ edu_dict = {"none": 0, "completed": 1}
64
+ df_copy['test preparation course'] = df_copy['test preparation course'].replace(edu_dict)
65
+ df_copy["average_score"] = df_copy[["math score", "reading score", "writing score"]].mean(axis=1)
66
+ df_copy.drop(['math score', 'reading score', 'writing score','gender__female','lunch__free/reduced','pass','target_name'], axis=1,inplace=True)
67
+ return df_copy
68
+
69
+
70
+ # Classification
71
+
72
+ class Classification:
73
+ def __init__(self, model_cl, X_train_cl, y_train_cl, X_test_cl, y_test_cl, y_pred_cl, y_score_cl,classes):
74
+ self.model_cl = model_cl
75
+ self.X_train_cl = X_train_cl
76
+ self.y_train_cl = y_train_cl
77
+ self.X_test_cl = X_test_cl
78
+ self.y_test_cl = y_test_cl
79
+ self.y_pred_cl = y_pred_cl
80
+ self.y_score_cl = y_score_cl
81
+ self.classes = classes
82
+
83
+ def to_dict(self):
84
+ return {
85
+ 'X_train': self.X_train_cl,
86
+ 'y_train': self.y_train_cl,
87
+ 'X_test': self.X_test_cl,
88
+ 'y_test': self.y_test_cl,
89
+ 'model': self.model_cl,
90
+ 'y_pred': self.y_pred_cl,
91
+ 'y_score':self.y_score_cl,
92
+ 'classes': self.classes
93
+ }
94
+
95
+ def model_cl_history(df, model_cl):
96
+
97
+ Y = df['pass']
98
+
99
+ columns_to_drop = ['pass', 'target_name', 'id','math score','reading score','writing score']
100
+ columns_to_drop = [col for col in columns_to_drop if col in df.columns]
101
+
102
+ X = df.drop(columns_to_drop, axis=1)
103
+
104
+ X = pd.get_dummies(X)
105
+
106
+ X_train_cl, X_test_cl, y_train_cl, y_test_cl = train_test_split(X, Y, test_size=0.15, random_state=42)
107
+
108
+ if model_cl == SVC:
109
+ model_instance = model_cl(probability=True)
110
+
111
+ else:
112
+ model_instance = model_cl()
113
+
114
+ model_instance.fit(X_train_cl, y_train_cl)
115
+
116
+ y_pred_cl = model_instance.predict(X_test_cl)
117
+ y_score_cl = model_instance.predict_proba(X_test_cl)[:,1]
118
+
119
+ classes = np.unique(Y)
120
+
121
+ classification = Classification(str(model_instance)[:-2], X_train_cl, y_train_cl, X_test_cl, y_test_cl, y_pred_cl,y_score_cl, classes)
122
+ return classification
dashboard/plots.py ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import plotly.express as px
2
+ import plotly.graph_objects as go
3
+ import plotly.figure_factory as ff
4
+
5
+ import warnings
6
+ warnings.filterwarnings('ignore')
7
+
8
+ import pandas as pd
9
+ import numpy as np
10
+ import hvplot.pandas
11
+ import holoviews as hv
12
+ import scipy.stats as stats
13
+ import statsmodels.api as sm
14
+ from statsmodels.formula.api import ols
15
+ from sklearn.preprocessing import LabelEncoder
16
+ from sklearn.metrics import roc_curve, auc
17
+
18
+
19
+ import matplotlib.pyplot as plt
20
+ import seaborn as sns
21
+
22
+ import panel as pn
23
+ pn.extension('plotly')
24
+
25
+ from dashboard.helpers import plotly_to_plt_colors, color_s, categarray
26
+
27
+
28
+ #pn.config.sizing_mode = "stretch_width"
29
+
30
+ ##### Table
31
+
32
+ def table_plotly(df):
33
+ # create a table using Plotly
34
+ table = go.Table(
35
+ header=dict(
36
+ values=list(df.columns),
37
+ fill_color='white',
38
+ align='center',
39
+ line_color='darkslategray',
40
+ font=dict(color='darkslategray', size=12)
41
+ ),
42
+ cells=dict(
43
+ values=[df[col] for col in df.columns],
44
+ fill_color='white',
45
+ align='center',
46
+ line_color='darkslategray',
47
+ font=dict(color='darkslategray', size=11)
48
+ )
49
+ )
50
+ df_plotly = go.Figure(data=table)
51
+ df_plotly.update_layout(margin=dict(l=0, r=0, t=0, b=0))
52
+ return df_plotly
53
+
54
+
55
+ ##### Univariée
56
+
57
+ ## Pie
58
+ def pie_quali(quali,df):
59
+ """
60
+ plot a pie of categorical variable
61
+ --------------------------------------------------------
62
+ quali -> array of string. example 'gender'
63
+ df -> DataFrame
64
+ """
65
+
66
+ # Group the DataFrame by the categorical variable and count the number of unique occurrences
67
+ count = df.groupby(quali).nunique()
68
+
69
+ # create the pie using Plotly
70
+ fig = px.pie(count,
71
+ names=count.index,
72
+ values='id',
73
+ title=(f"Distribution of {quali}"),
74
+ color_discrete_sequence=color_s(quali))
75
+
76
+ # rearange axes
77
+ fig.update_xaxes(categoryorder='array', categoryarray=categarray(quali))
78
+
79
+ # show the histogram
80
+ return fig
81
+
82
+ ## Histogram
83
+ def histogram_quali(quali,df):
84
+ """
85
+ plot a histogram of categorical variable
86
+ --------------------------------------------------------
87
+ quali -> array of string. example 'gender'
88
+ df -> DataFrame
89
+ """
90
+ # create the histogram using Plotly
91
+ fig = px.histogram(df,
92
+ x=quali,
93
+ title=(f"Distribution of {quali}"),
94
+ color=quali,
95
+ color_discrete_sequence=color_s(quali))
96
+
97
+ # Set the font sizes for the axis labels
98
+ fig.update_layout(xaxis=dict(title=dict(font=dict(size=20)),
99
+ showline=True,
100
+ linewidth=1,
101
+ linecolor='gray',
102
+ mirror=True),
103
+
104
+ yaxis=dict(title=dict(font=dict(size=20)),
105
+ gridcolor='whitesmoke',
106
+ showline=True,
107
+ linewidth=1,
108
+ linecolor='gray',
109
+ mirror=True),
110
+ plot_bgcolor='white')
111
+
112
+ # rearange axes
113
+ fig.update_xaxes(type='category', categoryorder='array', categoryarray=categarray(quali))
114
+
115
+
116
+ # show the histogram
117
+
118
+ return fig
119
+
120
+ ##### Bivariée
121
+
122
+ ## Box Plot
123
+ def boxplot_quali_quanti(quali, quanti, df):
124
+ """
125
+ plot a boxplot between categorical et numerical variable
126
+ --------------------------------------------------------
127
+ quali -> array of string. example ['diplome', 'sexe']
128
+ quanti -> string. example "salaire"
129
+ df -> DataFrame
130
+ """
131
+
132
+ # Create the figure with Plotly Express
133
+ fig = px.box(df,
134
+ x=quali,
135
+ y=quanti,
136
+ color=quali,
137
+ color_discrete_sequence=color_s(quali),
138
+ title=f"{quanti} vs {quali}")
139
+
140
+ # Set the font sizes for the axis labels
141
+ fig.update_layout(xaxis=dict(title=dict(font=dict(size=20)),
142
+ showline=True,
143
+ linewidth=1,
144
+ linecolor='gray',
145
+ mirror=True),
146
+
147
+ yaxis=dict(title=dict(font=dict(size=20)),
148
+ gridcolor='whitesmoke',
149
+ showline=True,
150
+ linewidth=1,
151
+ linecolor='gray',
152
+ mirror=True),
153
+ plot_bgcolor='white')
154
+
155
+ fig.update_xaxes(categoryorder='array', categoryarray=categarray(quali))
156
+
157
+ fig
158
+ return fig
159
+
160
+ ## Scatter Plot
161
+ def scatter_quanti_quanti(x, y, df, checkbox):
162
+ scatter = df.hvplot.scatter(x, y).opts(width=450)
163
+
164
+ if checkbox:
165
+ scatter.opts(line_color='black')
166
+ return scatter * hv.Slope.from_scatter(scatter).opts(line_color='pink')
167
+ else:
168
+ scatter.opts(line_color='black')
169
+ return scatter
170
+
171
+ ## Target Plot
172
+ def plotting_target_feature(quali, df):
173
+ df['target_name'] = df['pass'].map({0: 'Fail', 1: 'Pass'})
174
+ # Figure initiation
175
+ fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(18,12))
176
+
177
+ ### Number of occurrences per categoty - target pair
178
+ order = categarray(quali) # Get the order of the categorical values
179
+
180
+ # Set the color palette
181
+ colors = list(map(lambda x: plotly_to_plt_colors(x), color_s(quali,apply=False)))
182
+ sns.set_palette(sns.color_palette(colors))
183
+
184
+ ax1 = sns.countplot(x=quali, hue="target_name", data=df, order=order, ax=axes[0])
185
+ # X-axis Label
186
+ ax1.set_xlabel(quali, fontsize=14)
187
+ ax1.tick_params(axis='x', labelsize=14)
188
+ # Y-axis Label
189
+ ax1.set_ylabel('Number of occurrences', fontsize=14)
190
+ # Adding Super Title (One for a whole figure)
191
+ fig.suptitle('Graphiques '+quali + ' par rapport à la réussite' , fontsize=18)
192
+ # Setting Legend location
193
+ ax1.legend(loc=1)
194
+
195
+ ### Adding percents over bars
196
+ # Getting heights of our bars
197
+ height = [p.get_height() for p in ax1.patches]
198
+ # Counting number of bar groups
199
+ ncol = int(len(height)/2)
200
+ # Counting total height of groups
201
+ total = [height[i] + height[i + ncol] for i in range(ncol)] * 2
202
+ # Looping through bars
203
+ for i, p in enumerate(ax1.patches):
204
+ # Adding percentages
205
+ ax1.text(p.get_x()+p.get_width()/2, height[i]*1.01 + 10,
206
+ '{:1.0%}'.format(height[i]/total[i]), ha="center", size=14)
207
+
208
+
209
+ ### Survived percentage for every value of feature
210
+ ax2 = sns.pointplot(x=quali, y='pass', data=df, order=order, ax=axes[1])
211
+ # X-axis Label
212
+ ax2.set_xlabel(quali, fontsize=14)
213
+ ax2.tick_params(axis='x', labelsize=14)
214
+ # Y-axis Label
215
+ ax2.set_ylabel('Pourcentage de réussite', fontsize=14)
216
+
217
+ plt.close()
218
+
219
+ return pn.pane.Matplotlib(fig, sizing_mode='stretch_both')
220
+
221
+ ## Heatmap
222
+ def corr_heatmap(df, quanti1, quanti2, color):
223
+
224
+ # Calculate the correlation matrix
225
+ corrmat = df[[quanti1,quanti2]].corr(method='pearson')
226
+
227
+ # Create a Plotly heatmap
228
+ fig = ff.create_annotated_heatmap(
229
+ z=corrmat.values,
230
+ x=list(corrmat.columns),
231
+ y=list(corrmat.index),
232
+ annotation_text=corrmat.round(2).values,
233
+ colorscale=color,
234
+ zmin=0,
235
+ zmax=1,
236
+ showscale=True
237
+ )
238
+
239
+ # Update layout
240
+ fig.update_layout(
241
+ title='Pearson Correlation of Features',
242
+ xaxis=dict(side='bottom', tickangle=0),
243
+ yaxis=dict(autorange='reversed')
244
+ )
245
+
246
+ return fig
247
+
248
+
249
+
250
+ def bivar_quanti_plot(df, quanti1, quanti2):
251
+ fig = sns.jointplot(x=quanti1, y=quanti2, data=df, color='red', kind='kde').fig
252
+ plt.close()
253
+ return pn.pane.Matplotlib(fig, sizing_mode='stretch_both')
254
+
255
+ def cross_heatmap(df,quali1, quali2, color):
256
+
257
+ crosstab = pd.crosstab(df[quali1], df[quali2], normalize='index')
258
+
259
+ # Create a Plotly heatmap
260
+ fig = ff.create_annotated_heatmap(
261
+ z=crosstab.values,
262
+ x=list(crosstab.columns),
263
+ y=list(crosstab.index),
264
+ annotation_text=crosstab.round(2).values,
265
+ colorscale=color,
266
+ zmin=0,
267
+ zmax=1,
268
+ showscale=True
269
+ )
270
+
271
+ # Update layout
272
+ fig.update_layout(
273
+ title='Pearson Correlation of Features',
274
+ xaxis=dict(side='bottom', tickangle=0),
275
+ yaxis=dict(autorange='reversed')
276
+ )
277
+
278
+ return fig
279
+
280
+ def ols_resid_plot(df, quali, quanti):
281
+ le = LabelEncoder()
282
+ data = df.copy()
283
+ data[quali] = le.fit_transform(df[quali])
284
+
285
+ results = ols(f"Q('{quanti}') ~ Q('{quali}')", data=data).fit()
286
+ residuals = results.resid
287
+
288
+ residual_df = pd.DataFrame({f'{quali}': data[quali], 'Residuals OLS': residuals})
289
+ scatter = residual_df.hvplot.scatter(f'{quali}', 'Residuals OLS')
290
+
291
+ scatter.opts(line_color='black')
292
+
293
+ return scatter * hv.HLine(0).opts(color='red', line_width=1)
294
+
295
+
296
+ ## Q-Q Plot
297
+ def qqplot(quali, quanti, modality, df):
298
+
299
+
300
+
301
+ selected_data = df[quanti][df[quali] == modality]
302
+ qq_points = stats.probplot(selected_data, fit=False)
303
+ qq_df = pd.DataFrame({'x': qq_points[0], 'y': qq_points[1]})
304
+
305
+ scatter = qq_df.hvplot.scatter('x', 'y', title="Q-Q Plot for '{}' with '{}' = '{}'".format(quanti, quali, modality))
306
+
307
+ scatter.opts(line_color='black')
308
+
309
+ return scatter * hv.Slope.from_scatter(scatter).opts(line_color='red',line_width=1)
310
+
311
+
312
+ ## Residuals
313
+
314
+ def hist_residual(history):
315
+
316
+ # create the histogram using Plotly
317
+ fig = px.histogram(
318
+ x=history.residuals,
319
+ title=(f"Distribution of residuals for {str(history.model)}"),
320
+ #color=residuals,
321
+ color_discrete_sequence=px.colors.qualitative.Safe[2:])
322
+
323
+ # Set the font sizes for the axis labels
324
+ fig.update_layout(xaxis=dict(title=dict(text='Residuals',font=dict(size=20)),
325
+ showline=True,
326
+ linewidth=1,
327
+ linecolor='gray',
328
+ mirror=True),
329
+
330
+ yaxis=dict(title=dict(font=dict(size=20)),
331
+ gridcolor='whitesmoke',
332
+ showline=True,
333
+ linewidth=1,
334
+ linecolor='gray',
335
+ mirror=True),
336
+ plot_bgcolor='white')
337
+
338
+ return fig
339
+
340
+ def residual_fitted(history,root=False):
341
+
342
+ if not root:
343
+ residual_df = pd.DataFrame({'Predicted Values': history.y_pred, 'Residuals': history.residuals})
344
+ scatter = residual_df.hvplot.scatter('Predicted Values', 'Residuals')
345
+
346
+ else:
347
+ residual_df = pd.DataFrame({'Predicted values': history.y_pred, 'Root Standardized Residuals': history.residuals.apply(lambda x: np.sqrt(np.abs(x)))})
348
+ scatter = residual_df.hvplot.scatter('Predicted values', 'Root Standardized Residuals')
349
+
350
+ scatter.opts(line_color='black')
351
+
352
+ return scatter * hv.Slope.from_scatter(scatter).opts(line_color='red',line_width=1)
353
+
354
+ def qqplot_residual(history):
355
+
356
+
357
+ qq_points = stats.probplot(history.residuals, fit=False)
358
+ qq_df = pd.DataFrame({'Theorical Quantiles': qq_points[0], 'Standardized residuals': qq_points[1]})
359
+
360
+ scatter = qq_df.hvplot.scatter('Theorical Quantiles', 'Standardized residuals')
361
+
362
+ scatter.opts(line_color='black')
363
+
364
+ return scatter * hv.Slope.from_scatter(scatter).opts(line_color='red',line_width=1)
365
+
366
+ def residual_leverage(history):
367
+ model = sm.regression.linear_model.OLS(history.y_train, sm.add_constant(history.X_train)).fit()
368
+ influence = model.get_influence()
369
+
370
+ leverage = influence.hat_matrix_diag
371
+ cooks_distance = influence.cooks_distance[0]
372
+ residuals = model.resid
373
+
374
+ norm_cooksd = (cooks_distance - np.min(cooks_distance)) / (np.max(cooks_distance) - np.min(cooks_distance))
375
+
376
+
377
+ residual_df = pd.DataFrame({'Leverage': leverage, 'Standardized residual':residuals, 'Normalized Cook\'s Distance': norm_cooksd})
378
+ scatter = residual_df.hvplot.scatter('Leverage', 'Standardized residual', c='Normalized Cook\'s Distance')
379
+
380
+ scatter.opts(line_color='black')
381
+
382
+ return scatter * hv.Slope.from_scatter(scatter).opts(line_color='red',line_width=1)
383
+
384
+
385
+ ### Classification Plot
386
+
387
+ # def plot_roc(classification):
388
+
389
+ # # Calculer le taux de vrais positifs (true positive rate) et le taux de faux positifs (false positive rate)
390
+ # fpr, tpr, _ = roc_curve(classification.y_test_cl, classification.y_score_cl)
391
+
392
+ # # Calculer l'aire sous la courbe ROC (AUC)
393
+ # roc_auc = auc(fpr, tpr)
394
+
395
+ # # Tracer la courbe ROC
396
+ # fig = plt.figure()
397
+ # plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
398
+ # plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
399
+ # plt.xlim([0.0, 1.0])
400
+ # plt.ylim([0.0, 1.05])
401
+ # plt.xlabel('False Positive Rate')
402
+ # plt.ylabel('True Positive Rate')
403
+ # plt.title('Receiver Operating Characteristic')
404
+ # plt.legend(loc="lower right")
405
+ # fig = pn.pane.Matplotlib(fig)
406
+ # fig.sizing_mode = 'scale_both'
407
+ # plt.close()
408
+ # return fig
409
+
410
+ def plot_roc(classification):
411
+
412
+ # Calculer le taux de vrais positifs (true positive rate) et le taux de faux positifs (false positive rate)
413
+ fpr, tpr, _ = roc_curve(classification.y_test_cl, classification.y_score_cl)
414
+
415
+ # Calculer l'aire sous la courbe ROC (AUC)
416
+ roc_auc = auc(fpr, tpr)
417
+
418
+ # Créer une courbe ROC à l'aide de hvplot
419
+ roc_curve_df = pd.DataFrame({'FPR': fpr, 'TPR': tpr})
420
+ roc_curve_plot = roc_curve_df.hvplot.line(x='FPR', y='TPR', line_color='darkorange',
421
+ line_width=2, title=f"ROC Curve (AUC = {roc_auc:.2f})",
422
+ xlim=(0,1), ylim=(0,1))
423
+ roc_curve_plot *= hv.Curve([(0, 0), (1, 1)]).opts(line_color='darkblue')
424
+ roc_curve_plot.opts(xlabel='False Positive Rate', ylabel='True Positive Rate', show_legend=True, legend_position='bottom_right')
425
+
426
+ return roc_curve_plot
427
+
428
+ def confusion_matrix_heatmap(classification, color):
429
+ # Compute the confusion matrix
430
+ cm = pd.crosstab(
431
+ classification.y_test_cl, classification.y_pred_cl, normalize='index'
432
+ )
433
+
434
+ # Create a Plotly heatmap
435
+ fig = ff.create_annotated_heatmap(
436
+ z=cm.values,
437
+ x=list(cm.columns),
438
+ y=list(cm.index),
439
+ annotation_text=cm.round(2).values,
440
+ colorscale=color,
441
+ zmin=0,
442
+ zmax=1,
443
+ showscale=True
444
+ )
445
+
446
+ # Update layout
447
+ fig.update_layout(
448
+ title='Confusion Matrix',
449
+ xaxis=dict(side='bottom', tickangle=0),
450
+ yaxis=dict(autorange='reversed')
451
+ )
452
+
453
+ return fig
454
+
455
+
456
+ ## Embedding plot
457
+ def plot_digits_embedding(X2d, y, title=None, remove_ticks=True):
458
+ """
459
+ Plot a 2D points at positions `X2d` using text labels from `y`.
460
+ The data is automatically centered and rescaled to [0,1].
461
+ Ticks are removed by default since the axes usually have no meaning (except for PCA).
462
+ """
463
+ x_min, x_max = np.min(X2d, 0), np.max(X2d, 0)
464
+ X = (X2d - x_min) / (x_max - x_min)
465
+
466
+ plt.figure(figsize=(20,10))
467
+ ax = plt.subplot(111)
468
+ for i in range(X.shape[0]):
469
+ plt.text(X[i, 0], X[i, 1], str(y[i]),
470
+ color=plt.cm.tab10(y[i]),
471
+ fontdict={'weight': 'bold', 'size': 9})
472
+
473
+ if remove_ticks:
474
+ plt.xticks([]), plt.yticks([])
475
+ if title is not None:
476
+ plt.title(title)
477
+
478
+
479
+
480
+
dashboard/tables.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import panel as pn
3
+ from scipy import stats
4
+ from sklearn.metrics import classification_report
5
+
6
+ pn.extension('plotly')
7
+
8
+ from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
9
+
10
+ class Table:
11
+ def __init__(self, df):
12
+ self.df = df
13
+
14
+ def to_panel(self):
15
+ df = self.df
16
+
17
+ if df.index.name:
18
+ width = str(100/(len(df.columns)+1))+'%'
19
+ widths = {col: width for col in df.columns}
20
+ widths[df.index.name] = width
21
+
22
+ else:
23
+ width = str(100/(len(df.columns)))+'%'
24
+ widths = {col: width for col in df.columns}
25
+
26
+ # else:
27
+ # widths['index'] = width
28
+
29
+ tabulator = pn.widgets.Tabulator(df,
30
+ page_size=10,
31
+ text_align='left',
32
+ header_align='center',
33
+ hidden_columns=['index'],
34
+ widths=widths)
35
+ return tabulator
36
+
37
+ #pn.config.sizing_mode = 'stretch_width'
38
+
39
+
40
+ def describe_quali_quanti(quali, quanti, df):
41
+ """
42
+ display mean, count, std of quantitative for each category of the variable qualitative
43
+ --------------------------------------------------------------------------------------
44
+ quali -> string. example 'gender'
45
+ quanti -> string. example "math score"
46
+ df -> DataFrame
47
+ """
48
+
49
+ df_g= df.groupby([quali])[quanti].agg(['count', 'mean', 'std']).sort_values(by='mean', ascending=False)
50
+ # print('average / standard ', quali)
51
+ # print(df)
52
+ # print('')
53
+
54
+ return Table(df_g).to_panel()
55
+
56
+
57
+
58
+ def cross_tab(df, quali1, quali2):
59
+
60
+ crosstab = pd.crosstab(df[quali1], df[quali2])
61
+
62
+ return Table(crosstab).to_panel()
63
+
64
+ def chi2_tab(df, quali1, quali2):
65
+
66
+ crosstab = pd.crosstab(df[quali1], df[quali2])
67
+ chi2_test = stats.chi2_contingency(crosstab)
68
+
69
+ # Extract the results
70
+ chi2, p_value, dof, expected = chi2_test
71
+
72
+ # Create a dictionary to store the results
73
+ results = {
74
+ "Chi-Square": [chi2],
75
+ "p-value": [p_value],
76
+ "Degrees of Freedom": [dof]
77
+ }
78
+
79
+ # Create a DataFrame from the dictionary
80
+ chi2_df = pd.DataFrame(results)
81
+
82
+ return Table(chi2_df).to_panel()
83
+
84
+
85
+ def filtered_dataframe(df, **checkboxes_values):
86
+ '''
87
+ A reactive function to filter the dataframe based on the checked checkboxes
88
+ ---------------------------------------------------------------------------
89
+ df -> DataFrame
90
+ checkboxes_values -> Dict
91
+ '''
92
+ selected_columns = [col for col, value in checkboxes_values.items() if value]
93
+ return Table(df[selected_columns]).to_panel()
94
+
95
+
96
+ def evaluate_regression_model(history):
97
+
98
+ # Calculate metrics
99
+ mse = mean_squared_error(history.y_test, history.y_pred)
100
+ rmse = mean_squared_error(history.y_test, history.y_pred, squared=False)
101
+ mae = mean_absolute_error(history.y_test, history.y_pred)
102
+ r2 = r2_score(history.y_test, history.y_pred)
103
+ # Create a dictionary with the results
104
+ results = {'R2 Score': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae}
105
+ # Create a DataFrame from the dictionary and return it
106
+ column = str(history.model)
107
+ eval_df = pd.DataFrame.from_dict(results, orient='index', columns=[column])
108
+ eval_df.insert(0,'metric',eval_df.index)
109
+ return Table(eval_df).to_panel()
110
+
111
+
112
+ # Classification
113
+
114
+ def report_to_df(classification):
115
+
116
+ report = classification_report(classification.y_test_cl, classification.y_pred_cl, output_dict=True)
117
+ df = pd.DataFrame(report).transpose()
118
+ df.rename_axis('Class', inplace=True)
119
+
120
+ return Table(df.head(len(classification.classes))).to_panel()
121
+
dashboard/update.py ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backcall==0.2.0
2
+ bleach==6.0.0
3
+ bokeh==2.4.3
4
+ certifi==2022.12.7
5
+ charset-normalizer==3.1.0
6
+ click==8.1.3
7
+ colorama==0.4.6
8
+ colorcet==3.0.1
9
+ cycler==0.11.0
10
+ debugpy==1.6.7
11
+ decorator==5.1.1
12
+ entrypoints==0.4
13
+ Flask==2.2.3
14
+ fonttools==4.38.0
15
+ gunicorn==20.1.0
16
+ holoviews==1.15.4
17
+ hvplot==0.8.3
18
+ idna==3.4
19
+ importlib-metadata==6.2.0
20
+ ipykernel==6.16.2
21
+ ipython==7.34.0
22
+ itsdangerous==2.1.2
23
+ jedi==0.18.2
24
+ Jinja2==3.1.2
25
+ joblib==1.2.0
26
+ jupyter_client==7.4.9
27
+ jupyter_core==4.12.0
28
+ kiwisolver==1.4.4
29
+ Markdown==3.4.3
30
+ MarkupSafe==2.1.2
31
+ matplotlib==3.5.3
32
+ matplotlib-inline==0.1.6
33
+ nest-asyncio==1.5.6
34
+ numpy==1.21.6
35
+ packaging==23.0
36
+ pandas==1.3.5
37
+ panel==0.14.4
38
+ param==1.13.0
39
+ parso==0.8.3
40
+ patsy==0.5.3
41
+ pickleshare==0.7.5
42
+ Pillow==9.5.0
43
+ plotly==5.14.1
44
+ prompt-toolkit==3.0.38
45
+ psutil==5.9.4
46
+ pyct==0.5.0
47
+ Pygments==2.14.0
48
+ pyparsing==3.0.9
49
+ python-dateutil==2.8.2
50
+ pytz==2023.3
51
+ pyviz-comms==2.2.1
52
+ PyYAML==6.0
53
+ pyzmq==25.0.2
54
+ requests==2.28.2
55
+ scikit-learn==1.0.2
56
+ scipy==1.7.3
57
+ seaborn==0.12.2
58
+ six==1.16.0
59
+ statsmodels==0.13.5
60
+ tenacity==8.2.2
61
+ threadpoolctl==3.1.0
62
+ tornado==6.2
63
+ tqdm==4.65.0
64
+ traitlets==5.9.0
65
+ typing_extensions==4.5.0
66
+ urllib3==1.26.15
67
+ wcwidth==0.2.6
68
+ webencodings==0.5.1
69
+ Werkzeug==2.2.3
70
+ zipp==3.15.0