AutoML / autoML.py
thov's picture
Update autoML.py
5583690
raw
history blame
4.42 kB
import streamlit as st
import pandas as pd
import numpy as np
from flaml import AutoML
from flaml.automl.data import get_output_from_log
import pickle
import plotly.express as px
import base64
import time
from utils import csv_to_featuers_list, pre_process_df, pre_process_features
def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimize_reg):
progress_text="Training in progress. Please wait."
my_bar = st.progress(0, text=progress_text)
time.sleep(0.5)
df = pd.read_csv(csv)
df = pre_process_df(df)
df_features = df[df.columns.difference([label])]
df_features=(df_features-df_features.mean())/df_features.std()
y = df[label]
my_bar.progress(50, text=progress_text)
if task == 'Classification':
automl_settings = {
"time_budget": int(budget),
"metric": metric_to_minimize_class,
"task": 'classification',
"log_file_name": 'classlog.log',
"early_stop": True,
"eval_method": "holdout"
}
if task == 'Regression':
automl_settings = {
"time_budget": int(budget),
"metric": metric_to_minimize_reg,
"task": 'regression',
"log_file_name": 'reglog.log',
"early_stop": True,
"eval_method": "holdout"
}
automl = AutoML()
automl.fit(df_features, y, **automl_settings)
my_bar.progress(100, text=progress_text)
time.sleep(0.5)
my_bar.empty()
tab1, tab2 = st.tabs(["AutoML", "Best Model"])
with tab1:
if task == 'Classification':
log = 'classlog.log'
metric = metric_to_minimize_class
if task == 'Regression':
log = 'reglog.log'
metric = metric_to_minimize_reg
time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = get_output_from_log(filename=log, time_budget=120)
def model(s):
mod = s.get('Current Learner')
return mod
def hp(s):
hparams = s.get('Current Hyper-parameters')
return hparams
df_res = pd.DataFrame({'time': time_history,
metric: 1 - np.array(best_valid_loss_history),
'model': list(map(model, config_history)),
})
fig = px.line(df_res,
title='evolution of best models found by AutoML',
x='time',
y=metric,
hover_name='model',
line_shape='hv',
range_y=[0,1])
st.plotly_chart(fig, theme="streamlit")
models = pd.DataFrame({'learner': list(map(model, config_history))})
hps = list(map(hp, config_history))
df_hp = pd.DataFrame(hps)
df_models = pd.concat((models, df_hp), axis=1)
def highlight_last_row(s):
return ['background-color: yellow' if i == len(s) - 1 else '' for i in range(len(s))]
st.dataframe(df_models.style.apply(highlight_last_row, axis=0))
st.write('Estimator tested')
st.table(automl.estimator_list)
with tab2:
st.header('Best Model')
st.text(automl.model.estimator)
col1, col2, col3 = st.columns((1,1,1))
with col1:
st.metric(label="r2_score", value=round(1 - automl.best_loss, 2))
with col2:
st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
with col3:
st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
fig_features = px.bar(df_features_importance, x='features importance', y='features name')
st.divider()
st.plotly_chart(fig_features, theme="streamlit")
def download_model(model):
output_model = pickle.dumps(model)
b64 = base64.b64encode(output_model).decode()
href = f'<a href="data:file/output_model;base64,{b64}" download="automl.pkl">Download Trained Model File (.pkl)</a>'
st.markdown(href, unsafe_allow_html=True)
download_model(automl)