import streamlit as st import pandas as pd import numpy as np from flaml import AutoML from flaml.automl.data import get_output_from_log import pickle import plotly.express as px import base64 import time from utils import csv_to_featuers_list, pre_process_df, pre_process_features def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimize_reg): progress_text="Training in progress. Please wait." my_bar = st.progress(0, text=progress_text) time.sleep(0.5) df = pd.read_csv(csv) df = pre_process_df(df) df_features = df[df.columns.difference([label])] df_features=(df_features-df_features.mean())/df_features.std() y = df[label] my_bar.progress(50, text=progress_text) if task == 'Classification': automl_settings = { "time_budget": int(budget), "metric": metric_to_minimize_class, "task": 'classification', "log_file_name": 'classlog.log', "early_stop": True, "eval_method": "holdout" } if task == 'Regression': automl_settings = { "time_budget": int(budget), "metric": metric_to_minimize_reg, "task": 'regression', "log_file_name": 'reglog.log', "early_stop": True, "eval_method": "holdout" } automl = AutoML() automl.fit(df_features, y, **automl_settings) my_bar.progress(100, text=progress_text) time.sleep(0.5) my_bar.empty() tab1, tab2 = st.tabs(["AutoML", "Best Model"]) with tab1: if task == 'Classification': log = 'classlog.log' metric = metric_to_minimize_class if task == 'Regression': log = 'reglog.log' metric = metric_to_minimize_reg time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = get_output_from_log(filename=log, time_budget=120) def model(s): mod = s.get('Current Learner') return mod def hp(s): hparams = s.get('Current Hyper-parameters') return hparams df_res = pd.DataFrame({'time': time_history, metric: 1 - np.array(best_valid_loss_history), 'model': list(map(model, config_history)), }) fig = px.line(df_res, title='evolution of best models found by AutoML', x='time', y=metric, hover_name='model', line_shape='hv', range_y=[0,1]) st.plotly_chart(fig, theme="streamlit") models = pd.DataFrame({'learner': list(map(model, config_history))}) hps = list(map(hp, config_history)) df_hp = pd.DataFrame(hps) df_models = pd.concat((models, df_hp), axis=1) def highlight_last_row(s): return ['background-color: yellow' if i == len(s) - 1 else '' for i in range(len(s))] st.dataframe(df_models.style.apply(highlight_last_row, axis=0)) st.write('Estimator tested') st.table(automl.estimator_list) with tab2: st.header('Best Model') st.text(automl.model.estimator) col1, col2, col3 = st.columns((1,1,1)) with col1: st.metric(label="r2_score", value=round(1 - automl.best_loss, 2)) with col2: st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec') with col3: st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec') df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_}) fig_features = px.bar(df_features_importance, x='features importance', y='features name') st.divider() st.plotly_chart(fig_features, theme="streamlit") def download_model(model): output_model = pickle.dumps(model) b64 = base64.b64encode(output_model).decode() href = f'Download Trained Model File (.pkl)' st.markdown(href, unsafe_allow_html=True) download_model(automl)