File size: 7,854 Bytes
dee4d19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import streamlit as st
from streamlit_option_menu import option_menu
from pandas_profiling import ProfileReport
from streamlit_pandas_profiling import st_profile_report
import os 
import pandas as pd
from pycaret.classification import *
from classification import prep_and_train
import time
from get_profile import get_profile
from classification import tuning

# if os.path.exists('./dataset.csv'): 
#     df = pd.read_csv('dataset.csv', index_col=None)

s = ClassificationExperiment()


with st.sidebar: #Side bar config
    selected = option_menu(menu_title=None,options=["Home", 'Classification','Regression', 'Time Series'], 
        icons=['house', 'file-binary','graph-up','bezier2'], menu_icon="cast", default_index=0)
    st.title("Upload Your Dataset")
    file = st.file_uploader("Upload Your Dataset")
    if file: 
        st.session_state.df = pd.read_csv(file, index_col=None)
        st.session_state.df.to_csv('dataset.csv', index=None)


if selected == 'Home':
    nones = ['None' for i in range(5)]
    section= option_menu(None, ["Info", "Data profile",'AutoML'], 
    default_index=0, icons=nones,orientation="horizontal")
    if section== 'Info':
        st.title('Main info about service')
        st.write('Some Text about service')
    if section == 'Data profile':
        st.title('This section will give you main information about uploaded dataset')
        st.write('Simply click "Generate new profile" if you want to generate new profile data and click "View old report to load previous profile"')
        if st.checkbox('Huge Dataset'):
            speedup = 'config_minimal.yaml'
        else:
            speedup= 'config_default.yaml'
        if st.button('Generate report'):
            try:
                st_profile_report(get_profile(st.session_state.df, speedup))
            except NameError:
                st.error('Please upload dataset first')

if selected == 'Classification':
    section = option_menu(None, ["Prep & Train",'Tune & Analyse','Predict'], 
    default_index=0,icons=['1-square','1-square','1-square'],orientation="horizontal")
    if section == 'Prep & Train':
        col1, col2 = st.columns([3,1.6])
        with col2:
            try:
                    st.title("Prepare you data and train best model")
                    st.session_state.targ = st.selectbox('Choose target', st.session_state.df.columns)
                    # time = st.slider('budget_time', 0.3, 1.5, 0.5, 0.1)
                    dic ={
                        'lr':'LogReg',
                        'ridge':'Ridge Classifier',
                        'lda':'Linear Discriminant Analysis',
                        'et':'Extra Trees Classifier',
                        'nb':'Naive Bayes',
                        'qda':'Quadratic Discriminant Analysis',
                        'rf':'Random Forest Classifier',
                        'gbc':'Gradient Boosting Classifier',
                        'lightgbm':'Light Gradient Boosting Machine',
                        'catboost':'CatBoost Classifier',
                        'ada':'Ada Boost Classifier',
                        'dt':'Decision Tree Classifier',
                        'knn':'K Neighbors Classifier',
                        'dummy':'Dummy Classifier',
                        'svm':'SVM - Linear Kernel'
                    }
                    model = st.multiselect('Choose model',
                                        ['lr',
                                            'ridge',
                                            'lda',
                                            'et',
                                            'nb',
                                            'qda',
                                            'rf',
                                            'gbc',
                                            'lightgbm',
                                            'catboost',
                                            'ada',
                                            'dt',
                                            'knn',
                                            'dummy',
                                            'svm'], help='Blablabla', format_func=lambda x: dic.get(x)) 
                    if st.button('Try model'):
                        try:
                            st.session_state.best, st.session_state.model_info, st.session_state.metrics_info = prep_and_train(st.session_state.targ, st.session_state.df, model)
                            save_model(st.session_state.best, 'dt_pipeline')
                            # model_info.to_csv('model_info.csv', index=None)
                            # metrics_info.to_csv('metrics_info.csv',index=None)
                            with col1:
                                st.subheader('Actual Model')
                                st.session_state.model_info_last = st.session_state.model_info
                                st.session_state.metrics_info_last = st.session_state.metrics_info
                                col1, col2 = st.columns([3.5,1.8])
                                with col1:
                                    st.dataframe(st.session_state.metrics_info)
                                with col2:
                                    st.dataframe(st.session_state.model_info)     
                        except ValueError:
                                st.error('Please choose target with binary labels')
                    else:
                            try:
                                with col1:
                                    st.subheader('Your last teached model')
                                    col1, col2 = st.columns([3.5,1.8])
                                    with col1:
                                        st.dataframe(st.session_state.metrics_info_last)
                                    with col2:
                                        st.dataframe(st.session_state.model_info_last)
                            except AttributeError:
                                st.write('teach the first model')

            except AttributeError: 
                st.error('Please load dataset first')

    if section == 'Tune & Analyse':
        st.title('Choose parameters to tune your model')
        metrics_info_last = pd.read_csv('metrics_info.csv', index_col=None)
        st.subheader('Current model')
        st.table(st.session_state.metrics_info_last.head(1))
        col1,col2,col3 = st.columns(3)
        with col1:
            plot_model(st.session_state.best, plot = 'auc', display_format='streamlit')
        with col2:
            plot_model(st.session_state.best, plot = 'threshold', display_format='streamlit')
        with col3:
            plot_model(st.session_state.best, plot = 'confusion_matrix', display_format='streamlit')

        col1, col2 = st.columns([2,4])
        with col2:
            option = st.selectbox(
            'Choose the tuning engine',
            ('scikit-learn', 'optuna', 'scikit-optimize'))
            st.session_state.optimize = st.selectbox('Choose metric to optimize', ('Accuracy','AUC','F1'))
            st.session_state.iters = st.slider('n_estimators', 5, 20, 5, 1)  
            if st.button('Tune'):
                clf1 = setup(data = st.session_state.df, target = st.session_state.targ)
                st.session_state.tuned_dt = tune_model(estimator=st.session_state.best,n_iter=st.session_state.iters,choose_better=True,optimize=st.session_state.optimize)
                st.session_state.info_df = pull()
            with col1:
                try:
                    st.dataframe(st.session_state.info_df)
                    st.write('Last best params')
                    st.write(st.session_state.tuned_dt)
                except AttributeError:
                    pass