Spaces:

thov
/

AutoML

Sleeping

App Files Files Community

thov commited on Dec 22, 2023

Commit

e603fcd

•

1 Parent(s): ddce92b

add shap values for numerical values

Browse files

Files changed (2) hide show

autoML.py +23 -5
requirements.txt +1 -0

autoML.py CHANGED Viewed

@@ -17,6 +17,7 @@ from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.inspection import permutation_importance
 from sklearn.inspection import PartialDependenceDisplay
 def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimize_reg):
@@ -25,8 +26,12 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
     time.sleep(0.5)
     df = pd.read_csv(csv)
-    df_features = df[df.columns.difference([label])]
-    y = df[label]
     my_bar.progress(50, text=progress_text)
@@ -85,7 +90,7 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
     time.sleep(0.5)
     my_bar.empty()
-    tab1, tab2, tab3 = st.tabs(["AutoML", "Best Model", "Partial Dependence"])
     with tab1:
@@ -165,7 +170,7 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
             st.markdown(href, unsafe_allow_html=True)
         download_model(automl)
     with tab3:
         with st.container():
             st.subheader('1D Partial Dependance for the three most important features')
@@ -201,7 +206,6 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
                     st.pyplot(display.figure_)
         st.divider()
         with st.container():
@@ -232,6 +236,20 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
                         )
                     st.pyplot(display.figure_)
     if os.path.isfile('datasets/temp_file.csv'):

 from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.inspection import permutation_importance
 from sklearn.inspection import PartialDependenceDisplay
+import shap
 def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimize_reg):
     time.sleep(0.5)
     df = pd.read_csv(csv)
+    msk = np.random.rand(len(df)) < 0.8
+    df_train, df_test = df[msk], df[~msk]
+    df_features = df_train[df_train.columns.difference([label])]
+    y = df_train[label]
     my_bar.progress(50, text=progress_text)
     time.sleep(0.5)
     my_bar.empty()
+    tab1, tab2, tab3, tab4 = st.tabs(["AutoML", "Best Model", "Partial Dependence", "Shap Values"])
     with tab1:
             st.markdown(href, unsafe_allow_html=True)
         download_model(automl)
     with tab3:
         with st.container():
             st.subheader('1D Partial Dependance for the three most important features')
                     st.pyplot(display.figure_)
         st.divider()
         with st.container():
                         )
                     st.pyplot(display.figure_)
+    with tab4:
+        df_features_test = df_test[df_test.columns.difference([label])]
+        with st.spinner(f'Compute Shap Values'):
+            explainer = shap.Explainer(pipeline.predict, df_features_test)
+            shap_values = explainer(df_features_test)
+        st.set_option('deprecation.showPyplotGlobalUse', False)
+        st.pyplot(shap.plots.beeswarm(shap_values))
+        st.pyplot(shap.summary_plot(shap_values, plot_type='violin'))
     if os.path.isfile('datasets/temp_file.csv'):

requirements.txt CHANGED Viewed

@@ -5,3 +5,4 @@ scikit-learn
 flaml[automl]
 plotly
 matplotlib

 flaml[automl]
 plotly
 matplotlib
+shap