thov commited on
Commit
f0d411f
1 Parent(s): cb240cd

Upload autoML.py

Browse files
Files changed (1) hide show
  1. autoML.py +132 -0
autoML.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from flaml import AutoML
5
+ from flaml.automl.data import get_output_from_log
6
+ import pickle
7
+ import plotly.express as px
8
+ import base64
9
+ import time
10
+
11
+ from utils import csv_to_featuers_list, pre_process_df, pre_process_features
12
+
13
+ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimize_reg):
14
+
15
+ progress_text="Training in progress. Please wait."
16
+ my_bar = st.progress(0, text=progress_text)
17
+ time.sleep(0.5)
18
+
19
+ df = pd.read_csv(csv)
20
+ df.drop(columns='Unnamed: 0', inplace=True)
21
+ df = pre_process_df(df)
22
+ df_features = df[df.columns.difference([label])]
23
+ df_features=(df_features-df_features.mean())/df_features.std()
24
+ y = df[label]
25
+
26
+ my_bar.progress(50, text=progress_text)
27
+
28
+
29
+ if task == 'Classification':
30
+ automl_settings = {
31
+ "time_budget": int(budget),
32
+ "metric": metric_to_minimize_class,
33
+ "task": 'classification',
34
+ "log_file_name": 'classlog.log',
35
+ "early_stop": True,
36
+ "eval_method": "holdout"
37
+ }
38
+
39
+ if task == 'Regression':
40
+ automl_settings = {
41
+ "time_budget": int(budget),
42
+ "metric": metric_to_minimize_reg,
43
+ "task": 'regression',
44
+ "log_file_name": 'reglog.log',
45
+ "early_stop": True,
46
+ "eval_method": "holdout"
47
+ }
48
+
49
+ automl = AutoML()
50
+ automl.fit(df_features, y, **automl_settings)
51
+
52
+ my_bar.progress(100, text=progress_text)
53
+ time.sleep(0.5)
54
+ my_bar.empty()
55
+
56
+ tab1, tab2 = st.tabs(["AutoML", "Best Model"])
57
+
58
+ with tab1:
59
+
60
+ if task == 'Classification':
61
+ log = 'classlog.log'
62
+ metric = metric_to_minimize_class
63
+ if task == 'Regression':
64
+ log = 'reglog.log'
65
+ metric = metric_to_minimize_reg
66
+
67
+ time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = get_output_from_log(filename=log, time_budget=120)
68
+
69
+ def model(s):
70
+ mod = s.get('Current Learner')
71
+ return mod
72
+
73
+ def hp(s):
74
+ hparams = s.get('Current Hyper-parameters')
75
+ return hparams
76
+
77
+ df_res = pd.DataFrame({'time': time_history,
78
+ metric: 1 - np.array(best_valid_loss_history),
79
+ 'model': list(map(model, config_history)),
80
+ })
81
+
82
+ fig = px.line(df_res,
83
+ title='evolution of best models found by AutoML',
84
+ x='time',
85
+ y=metric,
86
+ hover_name='model',
87
+ line_shape='hv',
88
+ range_y=[0,1])
89
+
90
+ st.plotly_chart(fig, theme="streamlit")
91
+
92
+ models = pd.DataFrame({'learner': list(map(model, config_history))})
93
+ hps = list(map(hp, config_history))
94
+ df_hp = pd.DataFrame(hps)
95
+ df_models = pd.concat((models, df_hp), axis=1)
96
+
97
+ def highlight_last_row(s):
98
+ return ['background-color: yellow' if i == len(s) - 1 else '' for i in range(len(s))]
99
+
100
+ st.dataframe(df_models.style.apply(highlight_last_row, axis=0))
101
+
102
+ st.write('Estimator tested')
103
+ st.table(automl.estimator_list)
104
+
105
+ with tab2:
106
+ st.header('Best Model')
107
+
108
+ st.text(automl.model.estimator)
109
+
110
+ col1, col2, col3 = st.columns((1,1,1))
111
+
112
+ with col1:
113
+ st.metric(label="r2_score", value=round(1 - automl.best_loss, 2))
114
+ with col2:
115
+ st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
116
+ with col3:
117
+ st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
118
+
119
+ df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
120
+ fig_features = px.bar(df_features_importance, x='features importance', y='features name')
121
+
122
+ st.divider()
123
+ st.plotly_chart(fig_features, theme="streamlit")
124
+
125
+
126
+ def download_model(model):
127
+ output_model = pickle.dumps(model)
128
+ b64 = base64.b64encode(output_model).decode()
129
+ href = f'<a href="data:file/output_model;base64,{b64}" download="automl.pkl">Download Trained Model File (.pkl)</a>'
130
+ st.markdown(href, unsafe_allow_html=True)
131
+
132
+ download_model(automl)