thov commited on
Commit
069c3ba
1 Parent(s): 48b4eec

add shap values for categorical values

Browse files
Files changed (1) hide show
  1. autoML.py +19 -9
autoML.py CHANGED
@@ -27,11 +27,11 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
27
 
28
  df = pd.read_csv(csv)
29
 
30
- msk = np.random.rand(len(df)) < 0.8
31
- df_train, df_test = df[msk], df[~msk]
32
 
33
- df_features = df_train[df_train.columns.difference([label])]
34
- y = df_train[label]
35
 
36
  my_bar.progress(50, text=progress_text)
37
 
@@ -173,7 +173,7 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
173
 
174
  download_model(automl)
175
 
176
-
177
  with tab3:
178
  with st.container():
179
  st.subheader('1D Partial Dependance for the three most important features')
@@ -240,15 +240,25 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
240
 
241
  st.pyplot(display.figure_)
242
 
243
-
244
  with tab4:
245
 
246
- df_features_test = df_test[df_test.columns.difference([label])]
 
 
 
 
 
 
 
 
 
247
 
248
  with st.container():
249
  with st.spinner(f'Compute Shap Values...'):
250
- explainer = shap.Explainer(pipeline.predict, df_features_test)
251
- shap_values = explainer(df_features_test)
 
252
  st.subheader('Beeswarm Plot')
253
  plt.figure()
254
  st.pyplot(shap.plots.beeswarm(shap_values, show=False).figure)
 
27
 
28
  df = pd.read_csv(csv)
29
 
30
+ #msk = np.random.rand(len(df)) < 0.8
31
+ #df_train, df_test = df[msk], df[~msk]
32
 
33
+ df_features = df[df.columns.difference([label])]
34
+ y = df[label]
35
 
36
  my_bar.progress(50, text=progress_text)
37
 
 
173
 
174
  download_model(automl)
175
 
176
+ """
177
  with tab3:
178
  with st.container():
179
  st.subheader('1D Partial Dependance for the three most important features')
 
240
 
241
  st.pyplot(display.figure_)
242
 
243
+ """
244
  with tab4:
245
 
246
+ #df_shap = df_features.sample(n=200, random_state=89)
247
+
248
+ def get_only_features_names(name):
249
+ return name[5:]
250
+
251
+ df_transform = pd.DataFrame(data=preprocessor.fit_transform(df_features),
252
+ columns=list(map(get_only_features_names,
253
+ preprocessor.get_feature_names_out())
254
+ )
255
+ )
256
 
257
  with st.container():
258
  with st.spinner(f'Compute Shap Values...'):
259
+ explainer = shap.Explainer(automl.model.predict, df_transform)
260
+ shap_values = explainer(df_transform)
261
+
262
  st.subheader('Beeswarm Plot')
263
  plt.figure()
264
  st.pyplot(shap.plots.beeswarm(shap_values, show=False).figure)