Spaces:
Sleeping
Sleeping
import streamlit as st | |
from streamlit_option_menu import option_menu | |
from pandas_profiling import ProfileReport | |
from streamlit_pandas_profiling import st_profile_report | |
import os | |
import pandas as pd | |
from pycaret.classification import * | |
from classification import prep_and_train | |
import time | |
from get_profile import get_profile | |
from classification import tuning | |
# if os.path.exists('./dataset.csv'): | |
# df = pd.read_csv('dataset.csv', index_col=None) | |
s = ClassificationExperiment() | |
with st.sidebar: #Side bar config | |
selected = option_menu(menu_title=None,options=["Home", 'Classification','Regression', 'Time Series'], | |
icons=['house', 'file-binary','graph-up','bezier2'], menu_icon="cast", default_index=0) | |
st.title("Upload Your Dataset") | |
file = st.file_uploader("Upload Your Dataset") | |
if file: | |
st.session_state.df = pd.read_csv(file, index_col=None) | |
st.session_state.df.to_csv('dataset.csv', index=None) | |
if selected == 'Home': | |
nones = ['None' for i in range(5)] | |
section= option_menu(None, ["Info", "Data profile",'AutoML'], | |
default_index=0, icons=nones,orientation="horizontal") | |
if section== 'Info': | |
st.title('Main info about service') | |
st.write('Some Text about service') | |
if section == 'Data profile': | |
st.title('This section will give you main information about uploaded dataset') | |
st.write('Simply click "Generate new profile" if you want to generate new profile data and click "View old report to load previous profile"') | |
if st.checkbox('Huge Dataset'): | |
speedup = 'config_minimal.yaml' | |
else: | |
speedup= 'config_default.yaml' | |
if st.button('Generate report'): | |
try: | |
st_profile_report(get_profile(st.session_state.df, speedup)) | |
except NameError: | |
st.error('Please upload dataset first') | |
if selected == 'Classification': | |
section = option_menu(None, ["Prep & Train",'Tune & Analyse','Predict'], | |
default_index=0,icons=['1-square','1-square','1-square'],orientation="horizontal") | |
if section == 'Prep & Train': | |
col1, col2 = st.columns([3,1.6]) | |
with col2: | |
try: | |
st.title("Prepare you data and train best model") | |
st.session_state.targ = st.selectbox('Choose target', st.session_state.df.columns) | |
# time = st.slider('budget_time', 0.3, 1.5, 0.5, 0.1) | |
dic ={ | |
'lr':'LogReg', | |
'ridge':'Ridge Classifier', | |
'lda':'Linear Discriminant Analysis', | |
'et':'Extra Trees Classifier', | |
'nb':'Naive Bayes', | |
'qda':'Quadratic Discriminant Analysis', | |
'rf':'Random Forest Classifier', | |
'gbc':'Gradient Boosting Classifier', | |
'lightgbm':'Light Gradient Boosting Machine', | |
'catboost':'CatBoost Classifier', | |
'ada':'Ada Boost Classifier', | |
'dt':'Decision Tree Classifier', | |
'knn':'K Neighbors Classifier', | |
'dummy':'Dummy Classifier', | |
'svm':'SVM - Linear Kernel' | |
} | |
model = st.multiselect('Choose model', | |
['lr', | |
'ridge', | |
'lda', | |
'et', | |
'nb', | |
'qda', | |
'rf', | |
'gbc', | |
'lightgbm', | |
'catboost', | |
'ada', | |
'dt', | |
'knn', | |
'dummy', | |
'svm'], help='Blablabla', format_func=lambda x: dic.get(x)) | |
if st.button('Try model'): | |
try: | |
st.session_state.best, st.session_state.model_info, st.session_state.metrics_info = prep_and_train(st.session_state.targ, st.session_state.df, model) | |
save_model(st.session_state.best, 'dt_pipeline') | |
# model_info.to_csv('model_info.csv', index=None) | |
# metrics_info.to_csv('metrics_info.csv',index=None) | |
with col1: | |
st.subheader('Actual Model') | |
st.session_state.model_info_last = st.session_state.model_info | |
st.session_state.metrics_info_last = st.session_state.metrics_info | |
col1, col2 = st.columns([3.5,1.8]) | |
with col1: | |
st.dataframe(st.session_state.metrics_info) | |
with col2: | |
st.dataframe(st.session_state.model_info) | |
except ValueError: | |
st.error('Please choose target with binary labels') | |
else: | |
try: | |
with col1: | |
st.subheader('Your last teached model') | |
col1, col2 = st.columns([3.5,1.8]) | |
with col1: | |
st.dataframe(st.session_state.metrics_info_last) | |
with col2: | |
st.dataframe(st.session_state.model_info_last) | |
except AttributeError: | |
st.write('teach the first model') | |
except AttributeError: | |
st.error('Please load dataset first') | |
if section == 'Tune & Analyse': | |
st.title('Choose parameters to tune your model') | |
metrics_info_last = pd.read_csv('metrics_info.csv', index_col=None) | |
st.subheader('Current model') | |
st.table(st.session_state.metrics_info_last.head(1)) | |
col1,col2,col3 = st.columns(3) | |
with col1: | |
plot_model(st.session_state.best, plot = 'auc', display_format='streamlit') | |
with col2: | |
plot_model(st.session_state.best, plot = 'threshold', display_format='streamlit') | |
with col3: | |
plot_model(st.session_state.best, plot = 'confusion_matrix', display_format='streamlit') | |
col1, col2 = st.columns([2,4]) | |
with col2: | |
option = st.selectbox( | |
'Choose the tuning engine', | |
('scikit-learn', 'optuna', 'scikit-optimize')) | |
st.session_state.optimize = st.selectbox('Choose metric to optimize', ('Accuracy','AUC','F1')) | |
st.session_state.iters = st.slider('n_estimators', 5, 20, 5, 1) | |
if st.button('Tune'): | |
clf1 = setup(data = st.session_state.df, target = st.session_state.targ) | |
st.session_state.tuned_dt = tune_model(estimator=st.session_state.best,n_iter=st.session_state.iters,choose_better=True,optimize=st.session_state.optimize) | |
st.session_state.info_df = pull() | |
with col1: | |
try: | |
st.dataframe(st.session_state.info_df) | |
st.write('Last best params') | |
st.write(st.session_state.tuned_dt) | |
except AttributeError: | |
pass | |