Spaces:
Sleeping
Sleeping
File size: 7,854 Bytes
dee4d19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import streamlit as st
from streamlit_option_menu import option_menu
from pandas_profiling import ProfileReport
from streamlit_pandas_profiling import st_profile_report
import os
import pandas as pd
from pycaret.classification import *
from classification import prep_and_train
import time
from get_profile import get_profile
from classification import tuning
# if os.path.exists('./dataset.csv'):
# df = pd.read_csv('dataset.csv', index_col=None)
s = ClassificationExperiment()
with st.sidebar: #Side bar config
selected = option_menu(menu_title=None,options=["Home", 'Classification','Regression', 'Time Series'],
icons=['house', 'file-binary','graph-up','bezier2'], menu_icon="cast", default_index=0)
st.title("Upload Your Dataset")
file = st.file_uploader("Upload Your Dataset")
if file:
st.session_state.df = pd.read_csv(file, index_col=None)
st.session_state.df.to_csv('dataset.csv', index=None)
if selected == 'Home':
nones = ['None' for i in range(5)]
section= option_menu(None, ["Info", "Data profile",'AutoML'],
default_index=0, icons=nones,orientation="horizontal")
if section== 'Info':
st.title('Main info about service')
st.write('Some Text about service')
if section == 'Data profile':
st.title('This section will give you main information about uploaded dataset')
st.write('Simply click "Generate new profile" if you want to generate new profile data and click "View old report to load previous profile"')
if st.checkbox('Huge Dataset'):
speedup = 'config_minimal.yaml'
else:
speedup= 'config_default.yaml'
if st.button('Generate report'):
try:
st_profile_report(get_profile(st.session_state.df, speedup))
except NameError:
st.error('Please upload dataset first')
if selected == 'Classification':
section = option_menu(None, ["Prep & Train",'Tune & Analyse','Predict'],
default_index=0,icons=['1-square','1-square','1-square'],orientation="horizontal")
if section == 'Prep & Train':
col1, col2 = st.columns([3,1.6])
with col2:
try:
st.title("Prepare you data and train best model")
st.session_state.targ = st.selectbox('Choose target', st.session_state.df.columns)
# time = st.slider('budget_time', 0.3, 1.5, 0.5, 0.1)
dic ={
'lr':'LogReg',
'ridge':'Ridge Classifier',
'lda':'Linear Discriminant Analysis',
'et':'Extra Trees Classifier',
'nb':'Naive Bayes',
'qda':'Quadratic Discriminant Analysis',
'rf':'Random Forest Classifier',
'gbc':'Gradient Boosting Classifier',
'lightgbm':'Light Gradient Boosting Machine',
'catboost':'CatBoost Classifier',
'ada':'Ada Boost Classifier',
'dt':'Decision Tree Classifier',
'knn':'K Neighbors Classifier',
'dummy':'Dummy Classifier',
'svm':'SVM - Linear Kernel'
}
model = st.multiselect('Choose model',
['lr',
'ridge',
'lda',
'et',
'nb',
'qda',
'rf',
'gbc',
'lightgbm',
'catboost',
'ada',
'dt',
'knn',
'dummy',
'svm'], help='Blablabla', format_func=lambda x: dic.get(x))
if st.button('Try model'):
try:
st.session_state.best, st.session_state.model_info, st.session_state.metrics_info = prep_and_train(st.session_state.targ, st.session_state.df, model)
save_model(st.session_state.best, 'dt_pipeline')
# model_info.to_csv('model_info.csv', index=None)
# metrics_info.to_csv('metrics_info.csv',index=None)
with col1:
st.subheader('Actual Model')
st.session_state.model_info_last = st.session_state.model_info
st.session_state.metrics_info_last = st.session_state.metrics_info
col1, col2 = st.columns([3.5,1.8])
with col1:
st.dataframe(st.session_state.metrics_info)
with col2:
st.dataframe(st.session_state.model_info)
except ValueError:
st.error('Please choose target with binary labels')
else:
try:
with col1:
st.subheader('Your last teached model')
col1, col2 = st.columns([3.5,1.8])
with col1:
st.dataframe(st.session_state.metrics_info_last)
with col2:
st.dataframe(st.session_state.model_info_last)
except AttributeError:
st.write('teach the first model')
except AttributeError:
st.error('Please load dataset first')
if section == 'Tune & Analyse':
st.title('Choose parameters to tune your model')
metrics_info_last = pd.read_csv('metrics_info.csv', index_col=None)
st.subheader('Current model')
st.table(st.session_state.metrics_info_last.head(1))
col1,col2,col3 = st.columns(3)
with col1:
plot_model(st.session_state.best, plot = 'auc', display_format='streamlit')
with col2:
plot_model(st.session_state.best, plot = 'threshold', display_format='streamlit')
with col3:
plot_model(st.session_state.best, plot = 'confusion_matrix', display_format='streamlit')
col1, col2 = st.columns([2,4])
with col2:
option = st.selectbox(
'Choose the tuning engine',
('scikit-learn', 'optuna', 'scikit-optimize'))
st.session_state.optimize = st.selectbox('Choose metric to optimize', ('Accuracy','AUC','F1'))
st.session_state.iters = st.slider('n_estimators', 5, 20, 5, 1)
if st.button('Tune'):
clf1 = setup(data = st.session_state.df, target = st.session_state.targ)
st.session_state.tuned_dt = tune_model(estimator=st.session_state.best,n_iter=st.session_state.iters,choose_better=True,optimize=st.session_state.optimize)
st.session_state.info_df = pull()
with col1:
try:
st.dataframe(st.session_state.info_df)
st.write('Last best params')
st.write(st.session_state.tuned_dt)
except AttributeError:
pass
|