Spaces:

thov
/

AutoML

Sleeping

App Files Files Community

thov commited on Dec 6, 2023

Commit

f0d411f

•

1 Parent(s): cb240cd

Upload autoML.py

Browse files

Files changed (1) hide show

autoML.py +132 -0

autoML.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from flaml import AutoML
+from flaml.automl.data import get_output_from_log
+import pickle
+import plotly.express as px
+import base64
+import time
+from utils import csv_to_featuers_list, pre_process_df, pre_process_features
+def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimize_reg):
+    progress_text="Training in progress. Please wait."
+    my_bar = st.progress(0, text=progress_text)
+    time.sleep(0.5)
+    df = pd.read_csv(csv)
+    df.drop(columns='Unnamed: 0', inplace=True)
+    df = pre_process_df(df)
+    df_features = df[df.columns.difference([label])]
+    df_features=(df_features-df_features.mean())/df_features.std()
+    y = df[label]
+    my_bar.progress(50, text=progress_text)
+    if task == 'Classification':
+        automl_settings = {
+            "time_budget": int(budget),
+            "metric": metric_to_minimize_class,
+            "task": 'classification',
+            "log_file_name": 'classlog.log',
+            "early_stop": True,
+            "eval_method": "holdout"
+        }
+    if task == 'Regression':
+        automl_settings = {
+            "time_budget": int(budget),
+            "metric": metric_to_minimize_reg,
+            "task": 'regression',
+            "log_file_name": 'reglog.log',
+            "early_stop": True,
+            "eval_method": "holdout"
+        }
+    automl = AutoML()
+    automl.fit(df_features, y, **automl_settings)
+    my_bar.progress(100, text=progress_text)
+    time.sleep(0.5)
+    my_bar.empty()
+    tab1, tab2 = st.tabs(["AutoML", "Best Model"])
+    with tab1:
+        if task == 'Classification':
+            log = 'classlog.log'
+            metric = metric_to_minimize_class
+        if task == 'Regression':
+            log = 'reglog.log'
+            metric = metric_to_minimize_reg
+        time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = get_output_from_log(filename=log, time_budget=120)
+        def model(s):
+            mod = s.get('Current Learner')
+            return mod
+        def hp(s):
+            hparams = s.get('Current Hyper-parameters')
+            return hparams
+        df_res = pd.DataFrame({'time': time_history,
+                            metric: 1 - np.array(best_valid_loss_history),
+                            'model': list(map(model, config_history)),
+                            })
+        fig = px.line(df_res,
+                    title='evolution of best models found by AutoML',
+                    x='time',
+                    y=metric,
+                    hover_name='model',
+                    line_shape='hv',
+                    range_y=[0,1])
+        st.plotly_chart(fig, theme="streamlit")
+        models = pd.DataFrame({'learner': list(map(model, config_history))})
+        hps = list(map(hp, config_history))
+        df_hp = pd.DataFrame(hps)
+        df_models = pd.concat((models, df_hp), axis=1)
+        def highlight_last_row(s):
+            return ['background-color: yellow' if i == len(s) - 1 else '' for i in range(len(s))]
+        st.dataframe(df_models.style.apply(highlight_last_row, axis=0))
+        st.write('Estimator tested')
+        st.table(automl.estimator_list)
+    with tab2:
+        st.header('Best Model')
+        st.text(automl.model.estimator)
+        col1, col2, col3 = st.columns((1,1,1))
+        with col1:
+            st.metric(label="r2_score", value=round(1 - automl.best_loss, 2))
+        with col2:
+            st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
+        with col3:
+            st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
+        df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
+        fig_features = px.bar(df_features_importance, x='features importance', y='features name')
+        st.divider()
+        st.plotly_chart(fig_features, theme="streamlit")
+        def download_model(model):
+            output_model = pickle.dumps(model)
+            b64 = base64.b64encode(output_model).decode()
+            href = f'<a href="data:file/output_model;base64,{b64}" download="automl.pkl">Download Trained Model File (.pkl)</a>'
+            st.markdown(href, unsafe_allow_html=True)
+        download_model(automl)