Spaces:

thov
/

AutoML

Sleeping

App Files Files Community

AutoML / autoML.py

thov

Update autoML.py

5583690 7 months ago

raw

history blame

No virus

4.42 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from flaml import AutoML
	from flaml.automl.data import get_output_from_log
	import pickle
	import plotly.express as px
	import base64
	import time

	from utils import csv_to_featuers_list, pre_process_df, pre_process_features

	def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimize_reg):

	progress_text="Training in progress. Please wait."
	my_bar = st.progress(0, text=progress_text)
	time.sleep(0.5)

	df = pd.read_csv(csv)
	df = pre_process_df(df)
	df_features = df[df.columns.difference([label])]
	df_features=(df_features-df_features.mean())/df_features.std()
	y = df[label]

	my_bar.progress(50, text=progress_text)


	if task == 'Classification':
	automl_settings = {
	"time_budget": int(budget),
	"metric": metric_to_minimize_class,
	"task": 'classification',
	"log_file_name": 'classlog.log',
	"early_stop": True,
	"eval_method": "holdout"
	}

	if task == 'Regression':
	automl_settings = {
	"time_budget": int(budget),
	"metric": metric_to_minimize_reg,
	"task": 'regression',
	"log_file_name": 'reglog.log',
	"early_stop": True,
	"eval_method": "holdout"
	}

	automl = AutoML()
	automl.fit(df_features, y, **automl_settings)

	my_bar.progress(100, text=progress_text)
	time.sleep(0.5)
	my_bar.empty()

	tab1, tab2 = st.tabs(["AutoML", "Best Model"])

	with tab1:

	if task == 'Classification':
	log = 'classlog.log'
	metric = metric_to_minimize_class
	if task == 'Regression':
	log = 'reglog.log'
	metric = metric_to_minimize_reg

	time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = get_output_from_log(filename=log, time_budget=120)

	def model(s):
	mod = s.get('Current Learner')
	return mod

	def hp(s):
	hparams = s.get('Current Hyper-parameters')
	return hparams

	df_res = pd.DataFrame({'time': time_history,
	metric: 1 - np.array(best_valid_loss_history),
	'model': list(map(model, config_history)),
	})

	fig = px.line(df_res,
	title='evolution of best models found by AutoML',
	x='time',
	y=metric,
	hover_name='model',
	line_shape='hv',
	range_y=[0,1])

	st.plotly_chart(fig, theme="streamlit")

	models = pd.DataFrame({'learner': list(map(model, config_history))})
	hps = list(map(hp, config_history))
	df_hp = pd.DataFrame(hps)
	df_models = pd.concat((models, df_hp), axis=1)

	def highlight_last_row(s):
	return ['background-color: yellow' if i == len(s) - 1 else '' for i in range(len(s))]

	st.dataframe(df_models.style.apply(highlight_last_row, axis=0))

	st.write('Estimator tested')
	st.table(automl.estimator_list)

	with tab2:
	st.header('Best Model')

	st.text(automl.model.estimator)

	col1, col2, col3 = st.columns((1,1,1))

	with col1:
	st.metric(label="r2_score", value=round(1 - automl.best_loss, 2))
	with col2:
	st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
	with col3:
	st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')

	df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
	fig_features = px.bar(df_features_importance, x='features importance', y='features name')

	st.divider()
	st.plotly_chart(fig_features, theme="streamlit")


	def download_model(model):
	output_model = pickle.dumps(model)
	b64 = base64.b64encode(output_model).decode()
	href = f'<a href="data:file/output_model;base64,{b64}" download="automl.pkl">Download Trained Model File (.pkl)</a>'
	st.markdown(href, unsafe_allow_html=True)

	download_model(automl)