thov commited on
Commit
8f578ae
1 Parent(s): cc69360

add download, progress bar & other things

Browse files
Files changed (1) hide show
  1. app.py +133 -26
app.py CHANGED
@@ -3,14 +3,20 @@ import pandas as pd
3
  import numpy as np
4
  from flaml import AutoML
5
  from flaml.automl.data import get_output_from_log
 
6
  from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
7
  import plotly.express as px
 
 
8
 
9
  from utils import csv_to_featuers_list, pre_process_df, pre_process_features
10
 
 
11
  st.set_page_config(layout="wide")
12
 
13
- #st.title("Auto ML")
 
 
14
 
15
  with st.sidebar:
16
  demo_but_class = st.button(label="Demo Classification on Wine Rate Dataset")
@@ -23,14 +29,21 @@ with st.sidebar:
23
  budget = st.text_area(label='Budget Time', value="5")
24
  #start_but = st.button(label='AutoML')
25
 
 
 
 
26
 
27
  if demo_but_class:
 
28
  df = pd.read_csv('WineRate.csv')
 
29
  df = pre_process_df(df)
30
  label = 'quality'
31
- X = df[df.columns.difference([label])]
 
32
  y = df[label]
33
- X = pre_process_features(X)
 
34
 
35
  automl_settings = {
36
  "time_budget": int(budget),
@@ -42,29 +55,80 @@ if demo_but_class:
42
  }
43
 
44
  automl = AutoML()
45
- automl.fit(X, y, **automl_settings)
46
 
47
- time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = get_output_from_log(filename="classlog.log", time_budget=120)
48
-
49
- def model(s):
50
- mod = s.get('Current Learner')
51
- return mod
 
 
 
 
52
 
53
- df_res = pd.DataFrame({'time': time_history,
54
- 'accuracy': 1 - np.array(best_valid_loss_history),
55
- 'model': list(map(model, config_history)),
56
- })
57
- fig = px.line(df_res,
58
- x='time',
59
- y='accuracy',
60
- hover_name='model',
61
- line_shape='hv',
62
- range_y=[0,1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- st.plotly_chart(fig, theme="streamlit")
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
 
67
  if demo_but_reg:
 
68
  df = pd.read_csv('house_california.csv')
69
  df.drop(columns='Unnamed: 0', inplace=True)
70
  df = pre_process_df(df)
@@ -73,6 +137,8 @@ if demo_but_reg:
73
  df_features=(df_features-df_features.mean())/df_features.std()
74
  y = df[label]
75
 
 
 
76
  automl_settings = {
77
  "time_budget": int(budget),
78
  "metric": 'r2',
@@ -85,6 +151,10 @@ if demo_but_reg:
85
  automl = AutoML()
86
  automl.fit(df_features, y, **automl_settings)
87
 
 
 
 
 
88
  tab1, tab2 = st.tabs(["AutoML", "Best Model"])
89
 
90
  with tab1:
@@ -125,18 +195,33 @@ if demo_but_reg:
125
  with tab2:
126
  st.header('Best Model')
127
 
128
- #add name of the best model with its HP
 
 
 
 
 
 
 
 
 
129
 
130
  df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
131
  fig_features = px.bar(df_features_importance, x='features importance', y='features name')
132
 
133
  st.plotly_chart(fig_features, theme="streamlit")
134
 
135
- #add button to download the best model
136
-
137
 
 
 
 
 
 
138
 
139
- if demo_but_class == False and demo_but_class == False:
 
 
 
140
 
141
  df = pd.read_csv('house_california.csv')
142
  df.drop(columns='Unnamed: 0', inplace=True)
@@ -146,6 +231,8 @@ if demo_but_class == False and demo_but_class == False:
146
  df_features=(df_features-df_features.mean())/df_features.std()
147
  y = df[label]
148
 
 
 
149
  automl_settings = {
150
  "time_budget": int(budget),
151
  "metric": 'r2',
@@ -158,6 +245,10 @@ if demo_but_class == False and demo_but_class == False:
158
  automl = AutoML()
159
  automl.fit(df_features, y, **automl_settings)
160
 
 
 
 
 
161
  tab1, tab2 = st.tabs(["AutoML", "Best Model"])
162
 
163
  with tab1:
@@ -198,11 +289,27 @@ if demo_but_class == False and demo_but_class == False:
198
  with tab2:
199
  st.header('Best Model')
200
 
201
- #add name of the best model with its HP
 
 
 
 
 
 
 
 
 
202
 
203
  df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
204
  fig_features = px.bar(df_features_importance, x='features importance', y='features name')
205
 
206
  st.plotly_chart(fig_features, theme="streamlit")
207
 
208
- #add button to download the best model
 
 
 
 
 
 
 
 
3
  import numpy as np
4
  from flaml import AutoML
5
  from flaml.automl.data import get_output_from_log
6
+ import pickle
7
  from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
8
  import plotly.express as px
9
+ import base64
10
+ import time
11
 
12
  from utils import csv_to_featuers_list, pre_process_df, pre_process_features
13
 
14
+
15
  st.set_page_config(layout="wide")
16
 
17
+
18
+ #state
19
+ #add progress bar
20
 
21
  with st.sidebar:
22
  demo_but_class = st.button(label="Demo Classification on Wine Rate Dataset")
 
29
  budget = st.text_area(label='Budget Time', value="5")
30
  #start_but = st.button(label='AutoML')
31
 
32
+ progress_text="Training in progress. Please wait."
33
+ my_bar = st.progress(0, text=progress_text)
34
+ time.sleep(0.5)
35
 
36
  if demo_but_class:
37
+
38
  df = pd.read_csv('WineRate.csv')
39
+ df.drop(columns=['Id', 'Unnamed: 0'], inplace=True)
40
  df = pre_process_df(df)
41
  label = 'quality'
42
+ df_features = df[df.columns.difference([label])]
43
+ df_features=(df_features-df_features.mean())/df_features.std()
44
  y = df[label]
45
+
46
+ my_bar.progress(50, text=progress_text)
47
 
48
  automl_settings = {
49
  "time_budget": int(budget),
 
55
  }
56
 
57
  automl = AutoML()
58
+ automl.fit(df_features, y, **automl_settings)
59
 
60
+ my_bar.progress(100, text=progress_text)
61
+ time.sleep(0.5)
62
+ my_bar.empty()
63
+
64
+ tab1, tab2 = st.tabs(["AutoML", "Best Model"])
65
+
66
+ with tab1:
67
+
68
+ time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = get_output_from_log(filename="classlog.log", time_budget=120)
69
 
70
+ def model(s):
71
+ mod = s.get('Current Learner')
72
+ return mod
73
+
74
+ def hp(s):
75
+ hparams = s.get('Current Hyper-parameters')
76
+ return hparams
77
+
78
+ df_res = pd.DataFrame({'time': time_history,
79
+ 'r2': 1 - np.array(best_valid_loss_history),
80
+ 'model': list(map(model, config_history)),
81
+ })
82
+ fig = px.line(df_res,
83
+ x='time',
84
+ y='r2',
85
+ hover_name='model',
86
+ line_shape='hv',
87
+ range_y=[0,1])
88
+
89
+ st.plotly_chart(fig, theme="streamlit")
90
+
91
+ models = pd.DataFrame({'learner': list(map(model, config_history))})
92
+ hps = list(map(hp, config_history))
93
+ df_hp = pd.DataFrame(hps)
94
+ df_models = pd.concat((models, df_hp), axis=1)
95
+
96
+ def highlight_last_row(s):
97
+ return ['background-color: yellow' if i == len(s) - 1 else '' for i in range(len(s))]
98
+
99
+ st.dataframe(df_models.style.apply(highlight_last_row, axis=0))
100
+
101
+ with tab2:
102
+ st.header('Best Model')
103
+
104
+ st.text(automl.model.estimator)
105
+
106
+ col1, col2, col3 = st.columns((1,1,1))
107
+
108
+ with col1:
109
+ st.metric(label="Accuracy", value=round(1 - automl.best_loss, 2))
110
+ with col2:
111
+ st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
112
+ with col3:
113
+ st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
114
 
115
+ df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
116
+ fig_features = px.bar(df_features_importance, x='features importance', y='features name')
117
+
118
+ st.plotly_chart(fig_features, theme="streamlit")
119
+
120
+
121
+ def download_model(model):
122
+ output_model = pickle.dumps(model)
123
+ b64 = base64.b64encode(output_model).decode()
124
+ href = f'<a href="data:file/output_model;base64,{b64}" download="automl.pkl">Download Trained Model File (.pkl)</a>'
125
+ st.markdown(href, unsafe_allow_html=True)
126
+
127
+ download_model(automl)
128
 
129
 
130
  if demo_but_reg:
131
+
132
  df = pd.read_csv('house_california.csv')
133
  df.drop(columns='Unnamed: 0', inplace=True)
134
  df = pre_process_df(df)
 
137
  df_features=(df_features-df_features.mean())/df_features.std()
138
  y = df[label]
139
 
140
+ my_bar.progress(50, text=progress_text)
141
+
142
  automl_settings = {
143
  "time_budget": int(budget),
144
  "metric": 'r2',
 
151
  automl = AutoML()
152
  automl.fit(df_features, y, **automl_settings)
153
 
154
+ my_bar.progress(100, text=progress_text)
155
+ time.sleep(0.5)
156
+ my_bar.empty()
157
+
158
  tab1, tab2 = st.tabs(["AutoML", "Best Model"])
159
 
160
  with tab1:
 
195
  with tab2:
196
  st.header('Best Model')
197
 
198
+ st.text(automl.model.estimator)
199
+
200
+ col1, col2, col3 = st.columns((1,1,1))
201
+
202
+ with col1:
203
+ st.metric(label="r2_score", value=round(1 - automl.best_loss, 2))
204
+ with col2:
205
+ st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
206
+ with col3:
207
+ st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
208
 
209
  df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
210
  fig_features = px.bar(df_features_importance, x='features importance', y='features name')
211
 
212
  st.plotly_chart(fig_features, theme="streamlit")
213
 
 
 
214
 
215
+ def download_model(model):
216
+ output_model = pickle.dumps(model)
217
+ b64 = base64.b64encode(output_model).decode()
218
+ href = f'<a href="data:file/output_model;base64,{b64}" download="automl.pkl">Download Trained Model File (.pkl)</a>'
219
+ st.markdown(href, unsafe_allow_html=True)
220
 
221
+ download_model(automl)
222
+
223
+
224
+ if (demo_but_class == False) and (demo_but_class == False):
225
 
226
  df = pd.read_csv('house_california.csv')
227
  df.drop(columns='Unnamed: 0', inplace=True)
 
231
  df_features=(df_features-df_features.mean())/df_features.std()
232
  y = df[label]
233
 
234
+ my_bar.progress(50, text=progress_text)
235
+
236
  automl_settings = {
237
  "time_budget": int(budget),
238
  "metric": 'r2',
 
245
  automl = AutoML()
246
  automl.fit(df_features, y, **automl_settings)
247
 
248
+ my_bar.progress(100, text=progress_text)
249
+ time.sleep(0.5)
250
+ my_bar.empty()
251
+
252
  tab1, tab2 = st.tabs(["AutoML", "Best Model"])
253
 
254
  with tab1:
 
289
  with tab2:
290
  st.header('Best Model')
291
 
292
+ st.text(automl.model.estimator)
293
+
294
+ col1, col2, col3 = st.columns((1,1,1))
295
+
296
+ with col1:
297
+ st.metric(label="r2_score", value=round(1 - automl.best_loss, 2))
298
+ with col2:
299
+ st.metric(label="Time to find", value=str(round(automl.time_to_find_best_model, 2))+' sec')
300
+ with col3:
301
+ st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
302
 
303
  df_features_importance = pd.DataFrame({'features name': automl.model.estimator.feature_name_, 'features importance': automl.model.estimator.feature_importances_})
304
  fig_features = px.bar(df_features_importance, x='features importance', y='features name')
305
 
306
  st.plotly_chart(fig_features, theme="streamlit")
307
 
308
+
309
+ def download_model(model):
310
+ output_model = pickle.dumps(model)
311
+ b64 = base64.b64encode(output_model).decode()
312
+ href = f'<a href="data:file/output_model;base64,{b64}" download="automl.pkl">Download Trained Model File (.pkl)</a>'
313
+ st.markdown(href, unsafe_allow_html=True)
314
+
315
+ download_model(automl)