Spaces:

thov
/

AutoML

Sleeping

App Files Files Community

Théo Villette commited on Dec 8, 2023

Commit

cac478c

•

1 Parent(s): 02d33be

fix features importance

Browse files

Files changed (2) hide show

app.py +2 -0
autoML.py +14 -16

app.py CHANGED Viewed

@@ -11,6 +11,8 @@ st.set_page_config(layout="wide")
 # add categorical features
 # handle missing values with automl (not possible)
 with st.sidebar:
     st.subheader('Demo Datasets')

 # add categorical features
 # handle missing values with automl (not possible)
+# fix importance features problem
 with st.sidebar:
     st.subheader('Demo Datasets')

autoML.py CHANGED Viewed

@@ -26,21 +26,25 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
     if task == 'Classification':
         automl_settings = {
             "time_budget": int(budget),
-            "metric": metric_to_minimize_class,
             "task": 'classification',
-            "log_file_name": 'classlog.log',
             "early_stop": True,
             "eval_method": "holdout"
         }
     if task == 'Regression':
         automl_settings = {
             "time_budget": int(budget),
-            "metric": metric_to_minimize_reg,
             "task": 'regression',
-            "log_file_name": 'reglog.log',
             "early_stop": True,
             "eval_method": "holdout"
         }
@@ -55,13 +59,6 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
     tab1, tab2 = st.tabs(["AutoML", "Best Model"])
     with tab1:
-        if task == 'Classification':
-            log = 'classlog.log'
-            metric = metric_to_minimize_class
-        if task == 'Regression':
-            log = 'reglog.log'
-            metric = metric_to_minimize_reg
         time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = get_output_from_log(filename=log, time_budget=120)
@@ -109,17 +106,18 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
         col1, col2, col3 = st.columns((1,1,1))
         with col1:
-            st.metric(label="r2_score", value=round(1 - automl.best_loss, 2))
         with col2:
             st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
         with col3:
             st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
-        df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
-        fig_features = px.bar(df_features_importance, x='features importance', y='features name')
-        st.divider()
-        st.plotly_chart(fig_features, theme="streamlit")
         def download_model(model):

     if task == 'Classification':
+        metric = metric_to_minimize_class
+        log = 'classlog.log'
         automl_settings = {
             "time_budget": int(budget),
+            "metric": metric,
             "task": 'classification',
+            "log_file_name": log,
             "early_stop": True,
             "eval_method": "holdout"
         }
     if task == 'Regression':
+        metric = metric_to_minimize_reg
+        log = 'reglog.log'
         automl_settings = {
             "time_budget": int(budget),
+            "metric": metric,
             "task": 'regression',
+            "log_file_name": log,
             "early_stop": True,
             "eval_method": "holdout"
         }
     tab1, tab2 = st.tabs(["AutoML", "Best Model"])
     with tab1:
         time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = get_output_from_log(filename=log, time_budget=120)
         col1, col2, col3 = st.columns((1,1,1))
         with col1:
+            st.metric(label=metric, value=round(1 - automl.best_loss, 2))
         with col2:
             st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
         with col3:
             st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
+        if automl.best_estimator == 'lgbm':
+            df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
+            fig_features = px.bar(df_features_importance, x='features importance', y='features name')
+            st.divider()
+            st.plotly_chart(fig_features, theme="streamlit")
         def download_model(model):