thov commited on
Commit
e603fcd
1 Parent(s): ddce92b

add shap values for numerical values

Browse files
Files changed (2) hide show
  1. autoML.py +23 -5
  2. requirements.txt +1 -0
autoML.py CHANGED
@@ -17,6 +17,7 @@ from sklearn.impute import SimpleImputer
17
  from sklearn.preprocessing import StandardScaler, OneHotEncoder
18
  from sklearn.inspection import permutation_importance
19
  from sklearn.inspection import PartialDependenceDisplay
 
20
 
21
  def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimize_reg):
22
 
@@ -25,8 +26,12 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
25
  time.sleep(0.5)
26
 
27
  df = pd.read_csv(csv)
28
- df_features = df[df.columns.difference([label])]
29
- y = df[label]
 
 
 
 
30
 
31
  my_bar.progress(50, text=progress_text)
32
 
@@ -85,7 +90,7 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
85
  time.sleep(0.5)
86
  my_bar.empty()
87
 
88
- tab1, tab2, tab3 = st.tabs(["AutoML", "Best Model", "Partial Dependence"])
89
 
90
  with tab1:
91
 
@@ -165,7 +170,7 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
165
  st.markdown(href, unsafe_allow_html=True)
166
 
167
  download_model(automl)
168
-
169
  with tab3:
170
  with st.container():
171
  st.subheader('1D Partial Dependance for the three most important features')
@@ -201,7 +206,6 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
201
 
202
  st.pyplot(display.figure_)
203
 
204
-
205
  st.divider()
206
 
207
  with st.container():
@@ -232,6 +236,20 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
232
  )
233
 
234
  st.pyplot(display.figure_)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
 
237
  if os.path.isfile('datasets/temp_file.csv'):
 
17
  from sklearn.preprocessing import StandardScaler, OneHotEncoder
18
  from sklearn.inspection import permutation_importance
19
  from sklearn.inspection import PartialDependenceDisplay
20
+ import shap
21
 
22
  def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimize_reg):
23
 
 
26
  time.sleep(0.5)
27
 
28
  df = pd.read_csv(csv)
29
+
30
+ msk = np.random.rand(len(df)) < 0.8
31
+ df_train, df_test = df[msk], df[~msk]
32
+
33
+ df_features = df_train[df_train.columns.difference([label])]
34
+ y = df_train[label]
35
 
36
  my_bar.progress(50, text=progress_text)
37
 
 
90
  time.sleep(0.5)
91
  my_bar.empty()
92
 
93
+ tab1, tab2, tab3, tab4 = st.tabs(["AutoML", "Best Model", "Partial Dependence", "Shap Values"])
94
 
95
  with tab1:
96
 
 
170
  st.markdown(href, unsafe_allow_html=True)
171
 
172
  download_model(automl)
173
+
174
  with tab3:
175
  with st.container():
176
  st.subheader('1D Partial Dependance for the three most important features')
 
206
 
207
  st.pyplot(display.figure_)
208
 
 
209
  st.divider()
210
 
211
  with st.container():
 
236
  )
237
 
238
  st.pyplot(display.figure_)
239
+
240
+
241
+ with tab4:
242
+
243
+ df_features_test = df_test[df_test.columns.difference([label])]
244
+
245
+ with st.spinner(f'Compute Shap Values'):
246
+ explainer = shap.Explainer(pipeline.predict, df_features_test)
247
+ shap_values = explainer(df_features_test)
248
+
249
+ st.set_option('deprecation.showPyplotGlobalUse', False)
250
+ st.pyplot(shap.plots.beeswarm(shap_values))
251
+ st.pyplot(shap.summary_plot(shap_values, plot_type='violin'))
252
+
253
 
254
 
255
  if os.path.isfile('datasets/temp_file.csv'):
requirements.txt CHANGED
@@ -5,3 +5,4 @@ scikit-learn
5
  flaml[automl]
6
  plotly
7
  matplotlib
 
 
5
  flaml[automl]
6
  plotly
7
  matplotlib
8
+ shap