add download, progress bar & other things
Browse files
app.py
CHANGED
@@ -3,14 +3,20 @@ import pandas as pd
|
|
3 |
import numpy as np
|
4 |
from flaml import AutoML
|
5 |
from flaml.automl.data import get_output_from_log
|
|
|
6 |
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
|
7 |
import plotly.express as px
|
|
|
|
|
8 |
|
9 |
from utils import csv_to_featuers_list, pre_process_df, pre_process_features
|
10 |
|
|
|
11 |
st.set_page_config(layout="wide")
|
12 |
|
13 |
-
|
|
|
|
|
14 |
|
15 |
with st.sidebar:
|
16 |
demo_but_class = st.button(label="Demo Classification on Wine Rate Dataset")
|
@@ -23,14 +29,21 @@ with st.sidebar:
|
|
23 |
budget = st.text_area(label='Budget Time', value="5")
|
24 |
#start_but = st.button(label='AutoML')
|
25 |
|
|
|
|
|
|
|
26 |
|
27 |
if demo_but_class:
|
|
|
28 |
df = pd.read_csv('WineRate.csv')
|
|
|
29 |
df = pre_process_df(df)
|
30 |
label = 'quality'
|
31 |
-
|
|
|
32 |
y = df[label]
|
33 |
-
|
|
|
34 |
|
35 |
automl_settings = {
|
36 |
"time_budget": int(budget),
|
@@ -42,29 +55,80 @@ if demo_but_class:
|
|
42 |
}
|
43 |
|
44 |
automl = AutoML()
|
45 |
-
automl.fit(
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
|
67 |
if demo_but_reg:
|
|
|
68 |
df = pd.read_csv('house_california.csv')
|
69 |
df.drop(columns='Unnamed: 0', inplace=True)
|
70 |
df = pre_process_df(df)
|
@@ -73,6 +137,8 @@ if demo_but_reg:
|
|
73 |
df_features=(df_features-df_features.mean())/df_features.std()
|
74 |
y = df[label]
|
75 |
|
|
|
|
|
76 |
automl_settings = {
|
77 |
"time_budget": int(budget),
|
78 |
"metric": 'r2',
|
@@ -85,6 +151,10 @@ if demo_but_reg:
|
|
85 |
automl = AutoML()
|
86 |
automl.fit(df_features, y, **automl_settings)
|
87 |
|
|
|
|
|
|
|
|
|
88 |
tab1, tab2 = st.tabs(["AutoML", "Best Model"])
|
89 |
|
90 |
with tab1:
|
@@ -125,18 +195,33 @@ if demo_but_reg:
|
|
125 |
with tab2:
|
126 |
st.header('Best Model')
|
127 |
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
|
131 |
fig_features = px.bar(df_features_importance, x='features importance', y='features name')
|
132 |
|
133 |
st.plotly_chart(fig_features, theme="streamlit")
|
134 |
|
135 |
-
#add button to download the best model
|
136 |
-
|
137 |
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
-
|
|
|
|
|
|
|
140 |
|
141 |
df = pd.read_csv('house_california.csv')
|
142 |
df.drop(columns='Unnamed: 0', inplace=True)
|
@@ -146,6 +231,8 @@ if demo_but_class == False and demo_but_class == False:
|
|
146 |
df_features=(df_features-df_features.mean())/df_features.std()
|
147 |
y = df[label]
|
148 |
|
|
|
|
|
149 |
automl_settings = {
|
150 |
"time_budget": int(budget),
|
151 |
"metric": 'r2',
|
@@ -158,6 +245,10 @@ if demo_but_class == False and demo_but_class == False:
|
|
158 |
automl = AutoML()
|
159 |
automl.fit(df_features, y, **automl_settings)
|
160 |
|
|
|
|
|
|
|
|
|
161 |
tab1, tab2 = st.tabs(["AutoML", "Best Model"])
|
162 |
|
163 |
with tab1:
|
@@ -198,11 +289,27 @@ if demo_but_class == False and demo_but_class == False:
|
|
198 |
with tab2:
|
199 |
st.header('Best Model')
|
200 |
|
201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
|
204 |
fig_features = px.bar(df_features_importance, x='features importance', y='features name')
|
205 |
|
206 |
st.plotly_chart(fig_features, theme="streamlit")
|
207 |
|
208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import numpy as np
|
4 |
from flaml import AutoML
|
5 |
from flaml.automl.data import get_output_from_log
|
6 |
+
import pickle
|
7 |
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
|
8 |
import plotly.express as px
|
9 |
+
import base64
|
10 |
+
import time
|
11 |
|
12 |
from utils import csv_to_featuers_list, pre_process_df, pre_process_features
|
13 |
|
14 |
+
|
15 |
st.set_page_config(layout="wide")
|
16 |
|
17 |
+
|
18 |
+
#state
|
19 |
+
#add progress bar
|
20 |
|
21 |
with st.sidebar:
|
22 |
demo_but_class = st.button(label="Demo Classification on Wine Rate Dataset")
|
|
|
29 |
budget = st.text_area(label='Budget Time', value="5")
|
30 |
#start_but = st.button(label='AutoML')
|
31 |
|
32 |
+
progress_text="Training in progress. Please wait."
|
33 |
+
my_bar = st.progress(0, text=progress_text)
|
34 |
+
time.sleep(0.5)
|
35 |
|
36 |
if demo_but_class:
|
37 |
+
|
38 |
df = pd.read_csv('WineRate.csv')
|
39 |
+
df.drop(columns=['Id', 'Unnamed: 0'], inplace=True)
|
40 |
df = pre_process_df(df)
|
41 |
label = 'quality'
|
42 |
+
df_features = df[df.columns.difference([label])]
|
43 |
+
df_features=(df_features-df_features.mean())/df_features.std()
|
44 |
y = df[label]
|
45 |
+
|
46 |
+
my_bar.progress(50, text=progress_text)
|
47 |
|
48 |
automl_settings = {
|
49 |
"time_budget": int(budget),
|
|
|
55 |
}
|
56 |
|
57 |
automl = AutoML()
|
58 |
+
automl.fit(df_features, y, **automl_settings)
|
59 |
|
60 |
+
my_bar.progress(100, text=progress_text)
|
61 |
+
time.sleep(0.5)
|
62 |
+
my_bar.empty()
|
63 |
+
|
64 |
+
tab1, tab2 = st.tabs(["AutoML", "Best Model"])
|
65 |
+
|
66 |
+
with tab1:
|
67 |
+
|
68 |
+
time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = get_output_from_log(filename="classlog.log", time_budget=120)
|
69 |
|
70 |
+
def model(s):
|
71 |
+
mod = s.get('Current Learner')
|
72 |
+
return mod
|
73 |
+
|
74 |
+
def hp(s):
|
75 |
+
hparams = s.get('Current Hyper-parameters')
|
76 |
+
return hparams
|
77 |
+
|
78 |
+
df_res = pd.DataFrame({'time': time_history,
|
79 |
+
'r2': 1 - np.array(best_valid_loss_history),
|
80 |
+
'model': list(map(model, config_history)),
|
81 |
+
})
|
82 |
+
fig = px.line(df_res,
|
83 |
+
x='time',
|
84 |
+
y='r2',
|
85 |
+
hover_name='model',
|
86 |
+
line_shape='hv',
|
87 |
+
range_y=[0,1])
|
88 |
+
|
89 |
+
st.plotly_chart(fig, theme="streamlit")
|
90 |
+
|
91 |
+
models = pd.DataFrame({'learner': list(map(model, config_history))})
|
92 |
+
hps = list(map(hp, config_history))
|
93 |
+
df_hp = pd.DataFrame(hps)
|
94 |
+
df_models = pd.concat((models, df_hp), axis=1)
|
95 |
+
|
96 |
+
def highlight_last_row(s):
|
97 |
+
return ['background-color: yellow' if i == len(s) - 1 else '' for i in range(len(s))]
|
98 |
+
|
99 |
+
st.dataframe(df_models.style.apply(highlight_last_row, axis=0))
|
100 |
+
|
101 |
+
with tab2:
|
102 |
+
st.header('Best Model')
|
103 |
+
|
104 |
+
st.text(automl.model.estimator)
|
105 |
+
|
106 |
+
col1, col2, col3 = st.columns((1,1,1))
|
107 |
+
|
108 |
+
with col1:
|
109 |
+
st.metric(label="Accuracy", value=round(1 - automl.best_loss, 2))
|
110 |
+
with col2:
|
111 |
+
st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
|
112 |
+
with col3:
|
113 |
+
st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
|
114 |
|
115 |
+
df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
|
116 |
+
fig_features = px.bar(df_features_importance, x='features importance', y='features name')
|
117 |
+
|
118 |
+
st.plotly_chart(fig_features, theme="streamlit")
|
119 |
+
|
120 |
+
|
121 |
+
def download_model(model):
|
122 |
+
output_model = pickle.dumps(model)
|
123 |
+
b64 = base64.b64encode(output_model).decode()
|
124 |
+
href = f'<a href="data:file/output_model;base64,{b64}" download="automl.pkl">Download Trained Model File (.pkl)</a>'
|
125 |
+
st.markdown(href, unsafe_allow_html=True)
|
126 |
+
|
127 |
+
download_model(automl)
|
128 |
|
129 |
|
130 |
if demo_but_reg:
|
131 |
+
|
132 |
df = pd.read_csv('house_california.csv')
|
133 |
df.drop(columns='Unnamed: 0', inplace=True)
|
134 |
df = pre_process_df(df)
|
|
|
137 |
df_features=(df_features-df_features.mean())/df_features.std()
|
138 |
y = df[label]
|
139 |
|
140 |
+
my_bar.progress(50, text=progress_text)
|
141 |
+
|
142 |
automl_settings = {
|
143 |
"time_budget": int(budget),
|
144 |
"metric": 'r2',
|
|
|
151 |
automl = AutoML()
|
152 |
automl.fit(df_features, y, **automl_settings)
|
153 |
|
154 |
+
my_bar.progress(100, text=progress_text)
|
155 |
+
time.sleep(0.5)
|
156 |
+
my_bar.empty()
|
157 |
+
|
158 |
tab1, tab2 = st.tabs(["AutoML", "Best Model"])
|
159 |
|
160 |
with tab1:
|
|
|
195 |
with tab2:
|
196 |
st.header('Best Model')
|
197 |
|
198 |
+
st.text(automl.model.estimator)
|
199 |
+
|
200 |
+
col1, col2, col3 = st.columns((1,1,1))
|
201 |
+
|
202 |
+
with col1:
|
203 |
+
st.metric(label="r2_score", value=round(1 - automl.best_loss, 2))
|
204 |
+
with col2:
|
205 |
+
st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
|
206 |
+
with col3:
|
207 |
+
st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
|
208 |
|
209 |
df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
|
210 |
fig_features = px.bar(df_features_importance, x='features importance', y='features name')
|
211 |
|
212 |
st.plotly_chart(fig_features, theme="streamlit")
|
213 |
|
|
|
|
|
214 |
|
215 |
+
def download_model(model):
|
216 |
+
output_model = pickle.dumps(model)
|
217 |
+
b64 = base64.b64encode(output_model).decode()
|
218 |
+
href = f'<a href="data:file/output_model;base64,{b64}" download="automl.pkl">Download Trained Model File (.pkl)</a>'
|
219 |
+
st.markdown(href, unsafe_allow_html=True)
|
220 |
|
221 |
+
download_model(automl)
|
222 |
+
|
223 |
+
|
224 |
+
if (demo_but_class == False) and (demo_but_class == False):
|
225 |
|
226 |
df = pd.read_csv('house_california.csv')
|
227 |
df.drop(columns='Unnamed: 0', inplace=True)
|
|
|
231 |
df_features=(df_features-df_features.mean())/df_features.std()
|
232 |
y = df[label]
|
233 |
|
234 |
+
my_bar.progress(50, text=progress_text)
|
235 |
+
|
236 |
automl_settings = {
|
237 |
"time_budget": int(budget),
|
238 |
"metric": 'r2',
|
|
|
245 |
automl = AutoML()
|
246 |
automl.fit(df_features, y, **automl_settings)
|
247 |
|
248 |
+
my_bar.progress(100, text=progress_text)
|
249 |
+
time.sleep(0.5)
|
250 |
+
my_bar.empty()
|
251 |
+
|
252 |
tab1, tab2 = st.tabs(["AutoML", "Best Model"])
|
253 |
|
254 |
with tab1:
|
|
|
289 |
with tab2:
|
290 |
st.header('Best Model')
|
291 |
|
292 |
+
st.text(automl.model.estimator)
|
293 |
+
|
294 |
+
col1, col2, col3 = st.columns((1,1,1))
|
295 |
+
|
296 |
+
with col1:
|
297 |
+
st.metric(label="r2_score", value=round(1 - automl.best_loss, 2))
|
298 |
+
with col2:
|
299 |
+
st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
|
300 |
+
with col3:
|
301 |
+
st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
|
302 |
|
303 |
df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
|
304 |
fig_features = px.bar(df_features_importance, x='features importance', y='features name')
|
305 |
|
306 |
st.plotly_chart(fig_features, theme="streamlit")
|
307 |
|
308 |
+
|
309 |
+
def download_model(model):
|
310 |
+
output_model = pickle.dumps(model)
|
311 |
+
b64 = base64.b64encode(output_model).decode()
|
312 |
+
href = f'<a href="data:file/output_model;base64,{b64}" download="automl.pkl">Download Trained Model File (.pkl)</a>'
|
313 |
+
st.markdown(href, unsafe_allow_html=True)
|
314 |
+
|
315 |
+
download_model(automl)
|