Théo Villette
commited on
Commit
•
e390497
1
Parent(s):
82c6295
update
Browse files
app.py
CHANGED
@@ -1,6 +1,4 @@
|
|
1 |
import streamlit as st
|
2 |
-
from flaml.automl.data import get_output_from_log
|
3 |
-
import plotly.express as px
|
4 |
|
5 |
from utils import csv_to_featuers_list
|
6 |
from autoML import autoML
|
|
|
1 |
import streamlit as st
|
|
|
|
|
2 |
|
3 |
from utils import csv_to_featuers_list
|
4 |
from autoML import autoML
|
autoML.py
CHANGED
@@ -7,8 +7,11 @@ import pickle
|
|
7 |
import plotly.express as px
|
8 |
import base64
|
9 |
import time
|
10 |
-
|
11 |
-
from
|
|
|
|
|
|
|
12 |
|
13 |
def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimize_reg):
|
14 |
|
@@ -17,9 +20,7 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
|
|
17 |
time.sleep(0.5)
|
18 |
|
19 |
df = pd.read_csv(csv)
|
20 |
-
df = pre_process_df(df)
|
21 |
df_features = df[df.columns.difference([label])]
|
22 |
-
df_features=(df_features-df_features.mean())/df_features.std()
|
23 |
y = df[label]
|
24 |
|
25 |
my_bar.progress(50, text=progress_text)
|
@@ -49,8 +50,31 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
|
|
49 |
"eval_method": "holdout"
|
50 |
}
|
51 |
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
my_bar.progress(100, text=progress_text)
|
56 |
time.sleep(0.5)
|
@@ -112,13 +136,18 @@ def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimiz
|
|
112 |
with col3:
|
113 |
st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
|
114 |
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
st.divider()
|
120 |
-
st.plotly_chart(fig_features, theme="streamlit")
|
121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
def download_model(model):
|
124 |
output_model = pickle.dumps(model)
|
|
|
7 |
import plotly.express as px
|
8 |
import base64
|
9 |
import time
|
10 |
+
from sklearn.compose import ColumnTransformer
|
11 |
+
from sklearn.pipeline import Pipeline
|
12 |
+
from sklearn.impute import SimpleImputer
|
13 |
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
14 |
+
from sklearn.inspection import permutation_importance
|
15 |
|
16 |
def autoML(csv, task, budget, label, metric_to_minimize_class, metric_to_minimize_reg):
|
17 |
|
|
|
20 |
time.sleep(0.5)
|
21 |
|
22 |
df = pd.read_csv(csv)
|
|
|
23 |
df_features = df[df.columns.difference([label])]
|
|
|
24 |
y = df[label]
|
25 |
|
26 |
my_bar.progress(50, text=progress_text)
|
|
|
50 |
"eval_method": "holdout"
|
51 |
}
|
52 |
|
53 |
+
num_cols = df_features.select_dtypes(include=['float64', 'int64']).columns
|
54 |
+
cat_cols = df_features.select_dtypes(include=['object']).columns
|
55 |
+
|
56 |
+
numeric_transformer = Pipeline(steps=[
|
57 |
+
('imputer', SimpleImputer(strategy='mean')),
|
58 |
+
('scaler', StandardScaler())
|
59 |
+
])
|
60 |
+
|
61 |
+
categorical_transformer = Pipeline(steps=[
|
62 |
+
('imputer', SimpleImputer(strategy='most_frequent')),
|
63 |
+
('onehot', OneHotEncoder(handle_unknown='ignore'))
|
64 |
+
])
|
65 |
+
|
66 |
+
preprocessor = ColumnTransformer(
|
67 |
+
transformers=[
|
68 |
+
('num', numeric_transformer, num_cols),
|
69 |
+
('cat', categorical_transformer, cat_cols)
|
70 |
+
])
|
71 |
+
|
72 |
+
automl = AutoML(**automl_settings)
|
73 |
+
|
74 |
+
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
|
75 |
+
('classifier', automl)])
|
76 |
+
|
77 |
+
pipeline.fit(df_features, y)
|
78 |
|
79 |
my_bar.progress(100, text=progress_text)
|
80 |
time.sleep(0.5)
|
|
|
136 |
with col3:
|
137 |
st.metric(label="Time to train", value=str(round(automl.best_config_train_time, 2))+' sec')
|
138 |
|
139 |
+
perm_importance = permutation_importance(
|
140 |
+
pipeline, df_features, y, n_repeats=8
|
141 |
+
)
|
|
|
|
|
|
|
142 |
|
143 |
+
df_features_importance = pd.DataFrame({'features name': df_features.columns,
|
144 |
+
'features importance': perm_importance["importances_mean"],
|
145 |
+
'std error': perm_importance["importances_std"]})
|
146 |
+
|
147 |
+
fig_features = px.bar(df_features_importance, x='features importance', y='features name', error_x='std error')
|
148 |
+
|
149 |
+
st.divider()
|
150 |
+
st.plotly_chart(fig_features, theme="streamlit")
|
151 |
|
152 |
def download_model(model):
|
153 |
output_model = pickle.dumps(model)
|