Spaces:

thov
/

AutoML

Sleeping

App Files Files Community

thov commited on Dec 4, 2023

Commit

8f578ae

1 Parent(s): cc69360

add download, progress bar & other things

Browse files

Files changed (1) hide show

app.py +133 -26

app.py CHANGED Viewed

@@ -3,14 +3,20 @@ import pandas as pd
 import numpy as np
 from flaml import AutoML
 from flaml.automl.data import get_output_from_log
 from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
 import plotly.express as px
 from utils import csv_to_featuers_list, pre_process_df, pre_process_features
 st.set_page_config(layout="wide")
-#st.title("Auto ML")
 with st.sidebar:
     demo_but_class = st.button(label="Demo Classification on Wine Rate Dataset")
@@ -23,14 +29,21 @@ with st.sidebar:
     budget = st.text_area(label='Budget Time', value="5")
     #start_but = st.button(label='AutoML')
 if demo_but_class:
     df = pd.read_csv('WineRate.csv')
     df = pre_process_df(df)
     label = 'quality'
-    X = df[df.columns.difference([label])]
     y = df[label]
-    X = pre_process_features(X)
     automl_settings = {
         "time_budget": int(budget),
@@ -42,29 +55,80 @@ if demo_but_class:
     }
     automl = AutoML()
-    automl.fit(X, y, **automl_settings)
-    time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = get_output_from_log(filename="classlog.log", time_budget=120)
-    def model(s):
-        mod = s.get('Current Learner')
-        return mod
-    df_res = pd.DataFrame({'time': time_history,
-                           'accuracy': 1 - np.array(best_valid_loss_history),
-                           'model': list(map(model, config_history)),
-                           })
-    fig = px.line(df_res,
-                  x='time',
-                  y='accuracy',
-                  hover_name='model',
-                  line_shape='hv',
-                  range_y=[0,1])
-    st.plotly_chart(fig, theme="streamlit")
 if demo_but_reg:
     df = pd.read_csv('house_california.csv')
     df.drop(columns='Unnamed: 0', inplace=True)
     df = pre_process_df(df)
@@ -73,6 +137,8 @@ if demo_but_reg:
     df_features=(df_features-df_features.mean())/df_features.std()
     y = df[label]
     automl_settings = {
         "time_budget": int(budget),
         "metric": 'r2',
@@ -85,6 +151,10 @@ if demo_but_reg:
     automl = AutoML()
     automl.fit(df_features, y, **automl_settings)
     tab1, tab2 = st.tabs(["AutoML", "Best Model"])
     with tab1:
@@ -125,18 +195,33 @@ if demo_but_reg:
     with tab2:
         st.header('Best Model')
-        #add name of the best model with its HP
         df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
         fig_features = px.bar(df_features_importance, x='features importance', y='features name')
         st.plotly_chart(fig_features, theme="streamlit")
-        #add button to download the best model
-if demo_but_class == False and demo_but_class == False:
     df = pd.read_csv('house_california.csv')
     df.drop(columns='Unnamed: 0', inplace=True)
@@ -146,6 +231,8 @@ if demo_but_class == False and demo_but_class == False:
     df_features=(df_features-df_features.mean())/df_features.std()
     y = df[label]
     automl_settings = {
         "time_budget": int(budget),
         "metric": 'r2',
@@ -158,6 +245,10 @@ if demo_but_class == False and demo_but_class == False:
     automl = AutoML()
     automl.fit(df_features, y, **automl_settings)
     tab1, tab2 = st.tabs(["AutoML", "Best Model"])
     with tab1:
@@ -198,11 +289,27 @@ if demo_but_class == False and demo_but_class == False:
     with tab2:
         st.header('Best Model')
-        #add name of the best model with its HP
         df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
         fig_features = px.bar(df_features_importance, x='features importance', y='features name')
         st.plotly_chart(fig_features, theme="streamlit")
-        #add button to download the best model

 import numpy as np
 from flaml import AutoML
 from flaml.automl.data import get_output_from_log
+import pickle
 from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
 import plotly.express as px
+import base64
+import time
 from utils import csv_to_featuers_list, pre_process_df, pre_process_features
 st.set_page_config(layout="wide")
+#state
+#add progress bar
 with st.sidebar:
     demo_but_class = st.button(label="Demo Classification on Wine Rate Dataset")
     budget = st.text_area(label='Budget Time', value="5")
     #start_but = st.button(label='AutoML')
+progress_text="Training in progress. Please wait."
+my_bar = st.progress(0, text=progress_text)
+time.sleep(0.5)
 if demo_but_class:
     df = pd.read_csv('WineRate.csv')
+    df.drop(columns=['Id', 'Unnamed: 0'], inplace=True)
     df = pre_process_df(df)
     label = 'quality'
+    df_features = df[df.columns.difference([label])]
+    df_features=(df_features-df_features.mean())/df_features.std()
     y = df[label]
+    my_bar.progress(50, text=progress_text)
     automl_settings = {
         "time_budget": int(budget),
     }
     automl = AutoML()
+    automl.fit(df_features, y, **automl_settings)
+    my_bar.progress(100, text=progress_text)
+    time.sleep(0.5)
+    my_bar.empty()
+    tab1, tab2 = st.tabs(["AutoML", "Best Model"])
+    with tab1:
+        time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = get_output_from_log(filename="classlog.log", time_budget=120)
+        def model(s):
+            mod = s.get('Current Learner')
+            return mod
+        def hp(s):
+            hparams = s.get('Current Hyper-parameters')
+            return hparams
+        df_res = pd.DataFrame({'time': time_history,
+                            'r2': 1 - np.array(best_valid_loss_history),
+                            'model': list(map(model, config_history)),
+                            })
+        fig = px.line(df_res,
+                    x='time',
+                    y='r2',
+                    hover_name='model',
+                    line_shape='hv',
+                    range_y=[0,1])
+        st.plotly_chart(fig, theme="streamlit")
+        models = pd.DataFrame({'learner': list(map(model, config_history))})
+        hps = list(map(hp, config_history))
+        df_hp = pd.DataFrame(hps)
+        df_models = pd.concat((models, df_hp), axis=1)
+        def highlight_last_row(s):
+            return ['background-color: yellow' if i == len(s) - 1 else '' for i in range(len(s))]
+        st.dataframe(df_models.style.apply(highlight_last_row, axis=0))
+    with tab2:
+        st.header('Best Model')
+        st.text(automl.model.estimator)
+        col1, col2, col3 = st.columns((1,1,1))
+        with col1:
+            st.metric(label="Accuracy", value=round(1 - automl.best_loss, 2))
+        with col2:
+            st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
+        with col3:
+            st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
+        df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
+        fig_features = px.bar(df_features_importance, x='features importance', y='features name')
+        st.plotly_chart(fig_features, theme="streamlit")
+        def download_model(model):
+            output_model = pickle.dumps(model)
+            b64 = base64.b64encode(output_model).decode()
+            href = f'<a href="data:file/output_model;base64,{b64}" download="automl.pkl">Download Trained Model File (.pkl)</a>'
+            st.markdown(href, unsafe_allow_html=True)
+        download_model(automl)
 if demo_but_reg:
     df = pd.read_csv('house_california.csv')
     df.drop(columns='Unnamed: 0', inplace=True)
     df = pre_process_df(df)
     df_features=(df_features-df_features.mean())/df_features.std()
     y = df[label]
+    my_bar.progress(50, text=progress_text)
     automl_settings = {
         "time_budget": int(budget),
         "metric": 'r2',
     automl = AutoML()
     automl.fit(df_features, y, **automl_settings)
+    my_bar.progress(100, text=progress_text)
+    time.sleep(0.5)
+    my_bar.empty()
     tab1, tab2 = st.tabs(["AutoML", "Best Model"])
     with tab1:
     with tab2:
         st.header('Best Model')
+        st.text(automl.model.estimator)
+        col1, col2, col3 = st.columns((1,1,1))
+        with col1:
+            st.metric(label="r2_score", value=round(1 - automl.best_loss, 2))
+        with col2:
+            st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
+        with col3:
+            st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
         df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
         fig_features = px.bar(df_features_importance, x='features importance', y='features name')
         st.plotly_chart(fig_features, theme="streamlit")
+        def download_model(model):
+            output_model = pickle.dumps(model)
+            b64 = base64.b64encode(output_model).decode()
+            href = f'<a href="data:file/output_model;base64,{b64}" download="automl.pkl">Download Trained Model File (.pkl)</a>'
+            st.markdown(href, unsafe_allow_html=True)
+        download_model(automl)
+if (demo_but_class == False) and (demo_but_class == False):
     df = pd.read_csv('house_california.csv')
     df.drop(columns='Unnamed: 0', inplace=True)
     df_features=(df_features-df_features.mean())/df_features.std()
     y = df[label]
+    my_bar.progress(50, text=progress_text)
     automl_settings = {
         "time_budget": int(budget),
         "metric": 'r2',
     automl = AutoML()
     automl.fit(df_features, y, **automl_settings)
+    my_bar.progress(100, text=progress_text)
+    time.sleep(0.5)
+    my_bar.empty()
     tab1, tab2 = st.tabs(["AutoML", "Best Model"])
     with tab1:
     with tab2:
         st.header('Best Model')
+        st.text(automl.model.estimator)
+        col1, col2, col3 = st.columns((1,1,1))
+        with col1:
+            st.metric(label="r2_score", value=round(1 - automl.best_loss, 2))
+        with col2:
+            st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
+        with col3:
+            st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
         df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
         fig_features = px.bar(df_features_importance, x='features importance', y='features name')
         st.plotly_chart(fig_features, theme="streamlit")
+        def download_model(model):
+            output_model = pickle.dumps(model)
+            b64 = base64.b64encode(output_model).decode()
+            href = f'<a href="data:file/output_model;base64,{b64}" download="automl.pkl">Download Trained Model File (.pkl)</a>'
+            st.markdown(href, unsafe_allow_html=True)
+        download_model(automl)