add shap values for numerical values
Browse files- autoML.py +23 -5
- requirements.txt +1 -0
autoML.py
CHANGED
@@ -17,6 +17,7 @@ from sklearn.impute import SimpleImputer
|
|
17 |
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
18 |
from sklearn.inspection import permutation_importance
|
19 |
from sklearn.inspection import PartialDependenceDisplay
|
|
|
20 |
|
21 |
def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimize_reg):
|
22 |
|
@@ -25,8 +26,12 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
|
|
25 |
time.sleep(0.5)
|
26 |
|
27 |
df = pd.read_csv(csv)
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
30 |
|
31 |
my_bar.progress(50, text=progress_text)
|
32 |
|
@@ -85,7 +90,7 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
|
|
85 |
time.sleep(0.5)
|
86 |
my_bar.empty()
|
87 |
|
88 |
-
tab1, tab2, tab3 = st.tabs(["AutoML", "Best Model", "Partial Dependence"])
|
89 |
|
90 |
with tab1:
|
91 |
|
@@ -165,7 +170,7 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
|
|
165 |
st.markdown(href, unsafe_allow_html=True)
|
166 |
|
167 |
download_model(automl)
|
168 |
-
|
169 |
with tab3:
|
170 |
with st.container():
|
171 |
st.subheader('1D Partial Dependance for the three most important features')
|
@@ -201,7 +206,6 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
|
|
201 |
|
202 |
st.pyplot(display.figure_)
|
203 |
|
204 |
-
|
205 |
st.divider()
|
206 |
|
207 |
with st.container():
|
@@ -232,6 +236,20 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
|
|
232 |
)
|
233 |
|
234 |
st.pyplot(display.figure_)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
|
237 |
if os.path.isfile('datasets/temp_file.csv'):
|
|
|
17 |
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
18 |
from sklearn.inspection import permutation_importance
|
19 |
from sklearn.inspection import PartialDependenceDisplay
|
20 |
+
import shap
|
21 |
|
22 |
def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimize_reg):
|
23 |
|
|
|
26 |
time.sleep(0.5)
|
27 |
|
28 |
df = pd.read_csv(csv)
|
29 |
+
|
30 |
+
msk = np.random.rand(len(df)) < 0.8
|
31 |
+
df_train, df_test = df[msk], df[~msk]
|
32 |
+
|
33 |
+
df_features = df_train[df_train.columns.difference([label])]
|
34 |
+
y = df_train[label]
|
35 |
|
36 |
my_bar.progress(50, text=progress_text)
|
37 |
|
|
|
90 |
time.sleep(0.5)
|
91 |
my_bar.empty()
|
92 |
|
93 |
+
tab1, tab2, tab3, tab4 = st.tabs(["AutoML", "Best Model", "Partial Dependence", "Shap Values"])
|
94 |
|
95 |
with tab1:
|
96 |
|
|
|
170 |
st.markdown(href, unsafe_allow_html=True)
|
171 |
|
172 |
download_model(automl)
|
173 |
+
|
174 |
with tab3:
|
175 |
with st.container():
|
176 |
st.subheader('1D Partial Dependance for the three most important features')
|
|
|
206 |
|
207 |
st.pyplot(display.figure_)
|
208 |
|
|
|
209 |
st.divider()
|
210 |
|
211 |
with st.container():
|
|
|
236 |
)
|
237 |
|
238 |
st.pyplot(display.figure_)
|
239 |
+
|
240 |
+
|
241 |
+
with tab4:
|
242 |
+
|
243 |
+
df_features_test = df_test[df_test.columns.difference([label])]
|
244 |
+
|
245 |
+
with st.spinner(f'Compute Shap Values'):
|
246 |
+
explainer = shap.Explainer(pipeline.predict, df_features_test)
|
247 |
+
shap_values = explainer(df_features_test)
|
248 |
+
|
249 |
+
st.set_option('deprecation.showPyplotGlobalUse', False)
|
250 |
+
st.pyplot(shap.plots.beeswarm(shap_values))
|
251 |
+
st.pyplot(shap.summary_plot(shap_values, plot_type='violin'))
|
252 |
+
|
253 |
|
254 |
|
255 |
if os.path.isfile('datasets/temp_file.csv'):
|
requirements.txt
CHANGED
@@ -5,3 +5,4 @@ scikit-learn
|
|
5 |
flaml[automl]
|
6 |
plotly
|
7 |
matplotlib
|
|
|
|
5 |
flaml[automl]
|
6 |
plotly
|
7 |
matplotlib
|
8 |
+
shap
|