Spaces:

AICOE-Datamatics
/

AiNext

Build error

File size: 38,552 Bytes

247c8df

# -*- coding: utf-8 -*-

####  Importing Modules ####
import base64
import pandas as pd
import streamlit as st
from autoclean import data_clean
from model_pipeline_steps import get_problem_type1, model_build
from PIL import Image
from DA_P1 import get_details, imbalnce_ratio, word_cloud, plotly_target, plot_ngram
import pickle
from NLP_text_classification import model_train, predict_text, predict_csv
from kmeans import k_means
from jinja2.ext import i18n



info = {}

#********* Handling rturn variable in cache memory to solve reloading issue in streamlit ******#
@st.cache(allow_output_mutation=True)
def get_details_local(data):
    final_output = get_details(data)
    return final_output

@st.cache(allow_output_mutation=True)
def clean(dataset, drop_features):
    cleaned_data, steps_dict = data_clean(dataset, drop_features)
    return cleaned_data, steps_dict

@st.cache(allow_output_mutation=True)
def get_problem_type_local(cleaned_data, target_data):
    p_type = get_problem_type1(cleaned_data, target_data)
    return p_type

@st.cache(allow_output_mutation=True)
def model_build_local(cleaned_data, target_data, p_type, balance_data, steps_dict):
    model = model_build(cleaned_data, target_data, p_type, balance_data, steps_dict)
    return model

@st.cache(allow_output_mutation=True)
def model_train_local(dataset, input_feature, target_data, balance_data):
    model_info = model_train(dataset, input_feature, target_data, balance_data)
    return model_info

@st.cache(allow_output_mutation=True)
def word_cloud_local(dataset, input_col):
    plt = word_cloud(dataset, input_col)
    return plt

@st.cache(allow_output_mutation=True)
def plotly_target_local(dataset, tg_col):
    plt = plotly_target(dataset, tg_col)
    return plt

@st.cache(allow_output_mutation=True)
def plot_ngram_local(dataset, tg_col):
    plt = plot_ngram(dataset, tg_col)
    return plt

#******************************************************************#


def main():
    try:
        # setting tab title and icon
        st.set_page_config(page_title="AiNext",
                           page_icon="image.png")

        # Hiding streamlit wateermark
        hide_streamlit_style = """
                    <style>
                    #MainMenu {visibility: hidden;}
                    footer {visibility: hidden;}
                    </style>
                    """
        st.markdown(hide_streamlit_style, unsafe_allow_html=True)

        # To do Navigation Menu
        st.markdown(
            '<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">',
            unsafe_allow_html=True)
        st.markdown("""
        <nav class="navbar fixed-top navbar-expand-lg navbar-dark" style="background-color: #AED6F1;">
          <a class="navbar-brand" href=""><b><font color = "#8b0000">Ai</font><i style="color:#1997E5 ;">Next</i></b></a>
          <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
            <span class="navbar-toggler-icon"></span>
          </button>
          <div class="collapse navbar-collapse" id="navbarNav">
            <ul class="navbar-nav">
              <li class="nav-item active">
                <a class="nav-link disabled" href="#" style="color:black ;">Home <span class="sr-only">(current)</span></a>
              </li>
              <li class="nav-item">
                <a class="nav-link" href="mailto: technology.coe@digital.datamatics.com" style="color:black ;" target="_blank">Contact Us</a>
              </li>
            </ul>
          </div>
          <div>
          <a style="color:red;" href="https://www.datamatics.com/" target="_blank"><b>DATAMATICS</b></a>
          </div>
        </nav>
        """, unsafe_allow_html=True)

        # Image in sidebar and link to mail
        image_loan = Image.open("AI.jpg")
        st.sidebar.image(image_loan, use_column_width=True)
        st.sidebar.markdown(
            """<a class="nav-link" href="mailto: technology.coe@digital.datamatics.com" style="color:white ;" target="_blank">Mail us at - <u>technology.coe@digital.datamatics.com</u></a>""",
            unsafe_allow_html=True)

        # Upload CSV File
        st.header("Upload Input csv file")
        file_upload = st.file_uploader("  ", type=["csv"])


        if file_upload is not None:

            # Selecting Experiment type (Supervised or UnSupervised)
            st.subheader("Select the Experiment type")
            exp_type = st.selectbox(label=' ', options=['Select', 'Supervised', 'UnSupervised'])
            print(exp_type)

            # **************************** Supervised Section ********************************** #
            if exp_type == "Supervised":
                st.subheader("Supervised")

                # read Dataset
                dataset = pd.read_csv(file_upload)

                # read columns
                cols = dataset.columns.tolist()
                st.text(" ")

                # Selecting features to drop
                st.subheader("choose the features which you want to drop")
                drop_features = st.multiselect('', cols)
                # print(drop_features)


                # Selecting target feature
                st.text(" ")
                st.subheader("Pick Your Target feature")
                target_data = st.selectbox(label=' ', options=cols, index=len(cols) - 1)
                # print(target_data)

                # **** Following code is to identify problem type is NLP text classification or Predictive analysis using Input feature **** #
                total_len = len(cols)
                drop_len = len(drop_features)
                problem_statement = ""
                input_feature_temp = ""
                st.sidebar.text(" ")
                sidebar_col1, sidebar_col2, sidebar_col3 = st.sidebar.beta_columns(3)
                if st.checkbox("Check Problem Type"):
                    if (target_data not in drop_features) and ((total_len - drop_len) == 2):
                        temp_data = dataset.drop(drop_features, axis=1)
                        temp_data = temp_data.drop(target_data, axis=1)
                        temp_col = temp_data.columns.tolist()
                        print(temp_data.dtypes[temp_col[0]])
                        if temp_data.dtypes[temp_col[0]] == "object":
                            print("NLP text Classification")
                            html_string = "<button style='border-radius: 12px;algin:center;background-color:#04AA6D;border: none;color: white;padding: 20px;text-align: center;text-decoration: none;display: inline-block;font-size: 16px;margin: 4px 2px;'>NLP Text Classification</button>"
                            sidebar_col2.markdown(html_string, unsafe_allow_html=True)
                            problem_statement = "NLP text Classification"
                            input_feature_temp = temp_col[0]
                        else:
                            html_string = "<button style='border-radius: 12px;algin:center;background-color:#04AA6D;border: none;color: white;padding: 20px;text-align: center;text-decoration: none;display: inline-block;font-size: 16px;margin: 4px 2px;'>Predictive Analytics</button>"
                            sidebar_col2.markdown(html_string, unsafe_allow_html=True)
                            print("Predictive Analytics")
                            problem_statement = "Predictive Analytics"

                    elif (target_data not in drop_features) and ((total_len - drop_len) > 2):
                        html_string = "<button style='border-radius: 12px;algin:center;background-color:#04AA6D;border: none;color: white;padding: 20px;text-align: center;text-decoration: none;display: inline-block;font-size: 16px;margin: 4px 2px;'>Predictive Analytics</button>"
                        sidebar_col2.markdown(html_string, unsafe_allow_html=True)
                        print("Predictive Analytics")
                        problem_statement = "Predictive Analytics"
                    elif (target_data in drop_features):
                        st.error("Selected Target column is also selected to drop.So Can't proceed")
                #******************************************************************************************#


                # *********************** Predictive Analytics Section *************************************#
                if problem_statement == "Predictive Analytics" and problem_statement != "":
                    # ************ Data Analysis Code goes here ********** #
                    final_output = get_details_local(dataset)
                    # print(dataset)
                    st.text(" ")
                    first = dataset.head(10)
                    # last = dataset.tail(10)
                    if st.button("Click here to Analyze Data"):

                        container = st.beta_expander("Data Analysis and visualization Details")
                        # c1,c2=container.beta_columns(2)
                        container.subheader("First 10 Rows")
                        container.write(first)
                        # c2.subheader("Last 10 Rows")
                        # c2.write(last)

                        container.text(" ")

                        overview_con = container.beta_container()
                        overview_con.subheader("Overview of Dataset")
                        overview_con.text(" ")
                        ov_c1, ov_c2, ov_c3 = overview_con.beta_columns(3)
                        ov_c1.write("Statistics")
                        for key, value in final_output['overview']['data_statistics'].items():
                            temp = str(key) + ":  " + str(value)
                            ov_c1.text(temp)
                        ov_c2.write("Variable Info")
                        for key, value in final_output['overview']['variable_type'].items():
                            temp = str(key) + ":  " + str(value)
                            ov_c2.text(temp)
                        ov_c3.write("Reproduction")
                        for key, value in final_output['reproduction'].items():
                            temp = str(key) + ":  " + str(value)
                            ov_c3.text(temp)

                        container.text(" ")
                        numeric_con = container.beta_container()
                        numeric_con.subheader("Numeric Variable Information")
                        numeric_con.text(" ")

                        for key, value in final_output['numerical_variable_info']['variable_info'].items():
                            numeric_con.text(" ")
                            temp_key = "Numeric Column:" + str(key)
                            numeric_con.write(temp_key)
                            num_c1, num_c2, num_c3, num_c4 = numeric_con.beta_columns(4)
                            i = 1
                            for key1, value1 in value.items():
                                temp = str(key1) + ":  " + str(value1)
                                if (i <= 7):
                                    num_c1.text(temp)
                                elif (i > 7 and i <= 14):
                                    num_c2.text(temp)
                                elif (i > 14 and i <= 21):
                                    num_c3.text(temp)
                                elif i > 21 and i <= 24:
                                    num_c4.text(temp)
                                elif i > 24:
                                    numeric_con.plotly_chart(value1, config={'displaylogo': False})
                                i = i + 1

                        container.text(" ")
                        categorical_con = container.beta_container()
                        categorical_con.subheader("Categorical Variable Information")
                        categorical_con.text(" ")

                        for key, value in final_output['categorical_variable_info']['variable_info'].items():
                            categorical_con.text(" ")
                            temp_key = "Categorical Column:" + str(key)
                            categorical_con.write(temp_key)
                            num_c1, num_c2, num_c3, num_c4 = categorical_con.beta_columns(4)
                            i = 1
                            for key1, value1 in value.items():
                                temp = str(key1) + ":  " + str(value1)
                                if (i <= 5):
                                    num_c1.text(temp)
                                elif (i > 5 and i <= 10):
                                    num_c2.text(temp)
                                elif (i > 10 and i <= 15):
                                    num_c3.text(temp)
                                elif i > 15 and i <= 16:
                                    num_c4.text(temp)
                                elif i > 16:
                                    categorical_con.plotly_chart(value1, config={'displaylogo': False})
                                i = i + 1

                        container.text(" ")
                        container.text("Scatter chart Matrix")
                        container.plotly_chart(final_output['scatter_chart_matrix'],config = {'displaylogo': False})
                        container.text(" ")

                        container.text(" ")
                        corr_con = container.beta_container()
                        corr_con.subheader("Correlation Matrix Information")
                        corr_con.text(" ")
                        # corr_c1, corr_c2, corr_c3 = corr_con.beta_columns(3)
                        # j = 0
                        for key1, value1 in final_output['correlation_matrix_info'].items():
                            corr_con.text(" ")
                            corr_con.write(key1)
                            # col.pyplot(value)
                            corr_con.plotly_chart(value1, config={'displaylogo': False})
                            # col.plotly_chart(value1,use_container_width=True)
                            # j=j+1

                        container.text(" ")
                        missing_con = container.beta_container()
                        missing_con.subheader("Missing Values Information")
                        missing_con.text(" ")
                        mis_c1, mis_c2 = missing_con.beta_columns(2)
                        mis_c3, mis_c4 = missing_con.beta_columns(2)
                        k = 0
                        for key, value in final_output['missing_values_info'].items():
                            corr_con.text(" ")
                            col = mis_c1
                            if k == 0:
                                col = mis_c1
                            elif k == 1:
                                col = mis_c2
                            elif k == 2:
                                col = mis_c3
                            elif k == 3:
                                col = mis_c4
                            col.write(key)
                            col.pyplot(value)
                            k = k + 1
                    # ********************************************************#


                    # ******  Option for handling Imbalanced Dataset ******#
                    st.text(" ")
                    ir_res = imbalnce_ratio(dataset, target_data)
                    ir_res = "Imbalance Ratio (" + ir_res + ")"
                    st.subheader("Select below option to Handle Imbalanced Dataset (optional)")
                    st.text(ir_res)
                    balance_data = st.selectbox(label=' ', options=["Auto", "False"])
                    #*******************************************************#


                    #********* Data Cleaning and Model Building code goes here *********#
                    st.text(" ")
                    if (st.checkbox('Start build model') is True) and (target_data not in drop_features):
                        st.text(" ")
                        cleaned_data, steps_dict = clean(dataset, drop_features)
                        sample_data = cleaned_data.head()
                        info['clean_data'] = sample_data
                        info['auto_drop'] = steps_dict['auto_drop']

                        p_type = get_problem_type_local(cleaned_data, target_data)
                        statement_ptype = "Problem type :" + p_type
                        info['problem'] = statement_ptype

                        statement_target = "Target column: " + target_data
                        info['target_statement'] = statement_target
                        info['target'] = target_data

                        model = model_build_local(cleaned_data, target_data, p_type, balance_data, steps_dict)

                        info['model'] = model
                        info['step_dict'] = steps_dict


                    elif target_data in drop_features:
                        st.error("Selected Target column is also selected to drop.So Can't proceed")
                    #**********************************************************************************#


                    # print(info)
                    # ******************* Model Result ***********************#
                    if info:
                        for columns in info['auto_drop']:
                            txt = "automatically dropped column: " + columns
                            st.write(txt)
                        st.text(" ")
                        st.subheader("After Cleaning data")
                        st.write(info['clean_data'])
                        st.write(info['problem'])
                        st.write(info['target_statement'])
                        # print(info['model'])
                        for key, val in info['model'].items():
                            st.text(" ")
                            # if key == "Regression graph" :
                            #     st.write(key)
                            #     st.pyplot(val)
                            if key == "Best pipeline" or key == "step_dict":
                                pass
                            elif key == "ROC Curve" or key == "model_comparison" or key == "Regression graph":
                                st.write(key)
                                st.plotly_chart(val, config={'displaylogo': False})
                            elif key == "Classification Report":
                                st.write(key)
                                st.text(val)
                            elif key == "Handling Imbalanced Dataset":
                                st.write(key)
                                for key1, val1 in val.items():
                                    st.write(key1)
                                    st.text(val1)
                            else:
                                st.write(key)
                                st.write(val)
                                st.text(" ")
                                st.text(" ")
                        # ***************************************************************#

                        # ************************** Prediction **************************#
                        st.subheader("Upload csv file for Predictions : ")
                        file_upload1 = st.file_uploader("   ", type=["csv"])

                        print(file_upload1)

                        if file_upload1 is not None:
                            try:

                                test_data = pd.read_csv(file_upload1)

                                data = test_data.copy()

                                data.drop(info['step_dict']['dropped_features'], axis=1, inplace=True)

                                for col in data.columns:
                                    data[col].fillna(info['step_dict']['missing_values'][col], inplace=True)

                                # print(info['target'])

                                for data1 in info['step_dict']['categorical_to_numeric']:
                                    for key, value in data1.items():
                                        col_name = key.split('_encoded')[0]
                                        if col_name != info['target']:
                                            # print(col_name)
                                            # print(value)
                                            data[col_name].replace(value, inplace=True)

                                if info['target'] in data.columns: data.drop([info['target']], axis=1, inplace=True)

                                final_model = info['model']['Best pipeline']
                                # print(final_model)

                                predictions = final_model.predict(data)
                                # print(predictions)
                                print(len(test_data))
                                print(len(predictions))
                                predict_column_name = info['target'] + "_prediction"
                                test_data[predict_column_name] = predictions
                                for data1 in info['step_dict']['categorical_to_numeric']:
                                    for key, value in data1.items():
                                        col_name = key.split('_encoded')[0]
                                        if col_name == info['target']:
                                            # print(col_name)
                                            # print(value)
                                            d = {}
                                            for i, v in value.items():
                                                d[v] = i
                                            test_data[predict_column_name].replace(d, inplace=True)

                                # csv = test_data.to_csv(index=False)
                                # b64 = base64.b64encode(csv.encode()).decode()  # some strings <-> bytes conversions necessary here
                                # href = f'<a href="data:file/csv;base64,{b64}">Download The Prediction Results CSV File</a> (right-click and save as &lt;some_name&gt;.csv)'

                                csv = test_data.to_csv(index=False)

                                b64 = base64.b64encode(csv.encode()).decode()
                                href = f'<a href="data:file/csv;base64,{b64}" download="download.csv">Download Predicted file</a>'
                                st.markdown(href, unsafe_allow_html=True)

                                output_model = pickle.dumps(final_model)
                                b64 = base64.b64encode(output_model).decode()
                                href = f'<a href="data:file/output_model;base64,{b64}" download="Best_model.pkl">Download Best Model .pkl File</a> '
                                st.markdown(href, unsafe_allow_html=True)
                            except Exception as e:
                                st.text(e)
                                st.error("Uploaded wrong data for prediction")
                        # ***************************************************************************#
                # *********************** End of Predictive Analytics Section *************************************#

                # *********************** NLP text Classification Section *************************************#
                elif problem_statement == "NLP text Classification" and problem_statement != "":
                    try:
                        # ********* Data Analysis and visualization code ************** #
                        st.text(" ")
                        vis_con = st.beta_expander("Data Visualization")
                        st.text(" ")
                        vis_con.subheader("Select Input Feature")
                        select_col = ["Select"]
                        t_cols = select_col + cols
                        input_col = vis_con.selectbox(label=' ', options=t_cols)
                        st.set_option('deprecation.showPyplotGlobalUse', False)
                        res = word_cloud_local(dataset, input_col)
                        if res is not None: vis_con.plotly_chart(res)
                        true_bigrams = plot_ngram_local(dataset, input_col)
                        if true_bigrams is not None: vis_con.plotly_chart(true_bigrams, config={'displaylogo': False})
                        st.text(" ")
                        vis_con.subheader("Select target Feature")
                        tg_col = vis_con.selectbox(label='  ', options=t_cols)
                        plot_res = plotly_target_local(dataset, tg_col)
                        if plot_res is not None: vis_con.plotly_chart(plot_res, config={'displaylogo': False})
                        #*****************************************************************************************#


                        # ******  Option for handling Imbalanced Dataset ****** #
                        input_feature = input_feature_temp
                        st.text(" ")
                        ir_res = imbalnce_ratio(dataset, target_data)
                        ir_res = "Imbalance Ratio (" + ir_res + ")"
                        st.subheader("Select below option to Handle Imbalanced Dataset (optional)")
                        st.text(ir_res)
                        balance_data = st.selectbox(label=' ', options=["Auto", "False"])
                        #***********************************************************#

                        # ********* Data Cleaning and Model Building code goes here *********#
                        st.text(" ")
                        if st.checkbox("Start Build model") and input_feature != target_data:
                            model_info = model_train_local(dataset, input_feature, target_data, balance_data)

                            #************ Model Result ***************#
                            for key, val in model_info.items():
                                st.text(" ")
                                if key == "Classification Report":
                                    st.write(key)
                                    st.text(val)
                                elif key == "model_comparison" or key == "ROC Curve":
                                    st.write(key)
                                    st.plotly_chart(val, config={'displaylogo': False})
                                elif key == "Handling Imbalanced Dataset":
                                    st.write(key)
                                    for key1, val1 in val.items():
                                        st.write(key1)
                                        st.text(val1)
                                elif key == "Best pipeline" or key == "tfidf_vector":
                                    pass
                                else:
                                    st.write(key)
                                    st.write(val)
                            #***********************************************************#

                            # ****************** Prediction ******************* #
                            c1, c2 = st.beta_columns(2)
                            exp1 = c1.beta_expander("Prediction on text data")
                            exp2 = c2.beta_expander("Prediction on csv data")
                            form_predict = exp1.form("predict")
                            text_val = form_predict.text_area("Enter text for prediction")
                            if form_predict.form_submit_button("Predict") and text_val != "":
                                prediction = predict_text(text_val, model_info["Best pipeline"],
                                                          model_info["tfidf_vector"])
                                prediction = "Result :" + str(prediction[0])
                                form_predict.write(prediction)
                            f_up = exp2.file_uploader("predict_csv", type=["csv"])
                            if f_up and exp2.button("Predict"):
                                df = pd.read_csv(f_up, encoding='ISO-8859-1')
                                df_copy = df.copy()
                                predictions = predict_csv(df_copy, model_info["Best pipeline"],
                                                          model_info["tfidf_vector"], input_feature)
                                predict_column_name = target_data + "_prediction"
                                df[predict_column_name] = predictions

                                csv = df.to_csv(index=False)

                                b64 = base64.b64encode(csv.encode()).decode()
                                href = f'<a href="data:file/csv;base64,{b64}" download="download.csv">Download Predicted file</a>'
                                exp2.markdown(href, unsafe_allow_html=True)

                                output_model = pickle.dumps(model_info["Best pipeline"])
                                b64 = base64.b64encode(output_model).decode()
                                href = f'<a href="data:file/output_model;base64,{b64}" download="Best_model.pkl">Download Best Model .pkl File</a> '
                                exp2.markdown(href, unsafe_allow_html=True)
                                print("completed")


                            elif target_data == input_feature:
                                st.error("Input feature and target data cannot be same")
                    except Exception as e:
                        st.error(e)
                        st.error("Something went wrong")
                        # ****************************************************** #
                # *********************** End of NLP text Classification Section *************************************#
            # *************************  End of Supervised Section **************************************************#


            # **************************** UnSupervised Section (In Progress) ********************************** #
            elif exp_type == "UnSupervised":
                st.subheader("UnSupervised")

                # ************ Data Analysis Code goes here ********** #
                dataset = pd.read_csv(file_upload)
                final_output = get_details_local(dataset)
                cols = dataset.columns.tolist()
                # print(dataset)
                st.text(" ")
                first = dataset.head(10)
                # last = dataset.tail(10)
                if st.button("Click here to Analyze Data"):

                    container = st.beta_expander("Data Analysis and visualization Details")
                    # c1,c2=container.beta_columns(2)
                    container.subheader("First 10 Rows")
                    container.write(first)
                    # c2.subheader("Last 10 Rows")
                    # c2.write(last)

                    container.text(" ")

                    overview_con = container.beta_container()
                    overview_con.subheader("Overview of Dataset")
                    overview_con.text(" ")
                    ov_c1, ov_c2, ov_c3 = overview_con.beta_columns(3)
                    ov_c1.write("Statistics")
                    for key, value in final_output['overview']['data_statistics'].items():
                        temp = str(key) + ":  " + str(value)
                        ov_c1.text(temp)
                    ov_c2.write("Variable Info")
                    for key, value in final_output['overview']['variable_type'].items():
                        temp = str(key) + ":  " + str(value)
                        ov_c2.text(temp)
                    ov_c3.write("Reproduction")
                    for key, value in final_output['reproduction'].items():
                        temp = str(key) + ":  " + str(value)
                        ov_c3.text(temp)

                    container.text(" ")
                    numeric_con = container.beta_container()
                    numeric_con.subheader("Numeric Variable Information")
                    numeric_con.text(" ")

                    for key, value in final_output['numerical_variable_info']['variable_info'].items():
                        numeric_con.text(" ")
                        temp_key = "Numeric Column:" + str(key)
                        numeric_con.write(temp_key)
                        num_c1, num_c2, num_c3, num_c4 = numeric_con.beta_columns(4)
                        i = 1
                        for key1, value1 in value.items():
                            temp = str(key1) + ":  " + str(value1)
                            if (i <= 7):
                                num_c1.text(temp)
                            elif (i > 7 and i <= 14):
                                num_c2.text(temp)
                            elif (i > 14 and i <= 21):
                                num_c3.text(temp)
                            elif i > 21 and i <= 24:
                                num_c4.text(temp)
                            elif i > 24:
                                numeric_con.plotly_chart(value1, config={'displaylogo': False})
                            i = i + 1

                    container.text(" ")
                    categorical_con = container.beta_container()
                    categorical_con.subheader("Categorical Variable Information")
                    categorical_con.text(" ")

                    for key, value in final_output['categorical_variable_info']['variable_info'].items():
                        categorical_con.text(" ")
                        temp_key = "Categorical Column:" + str(key)
                        categorical_con.write(temp_key)
                        num_c1, num_c2, num_c3, num_c4 = categorical_con.beta_columns(4)
                        i = 1
                        for key1, value1 in value.items():
                            temp = str(key1) + ":  " + str(value1)
                            if (i <= 5):
                                num_c1.text(temp)
                            elif (i > 5 and i <= 10):
                                num_c2.text(temp)
                            elif (i > 10 and i <= 15):
                                num_c3.text(temp)
                            elif i > 15 and i <= 16:
                                num_c4.text(temp)
                            elif i > 16:
                                categorical_con.plotly_chart(value1, config={'displaylogo': False})
                            i = i + 1

                    container.text(" ")
                    container.text("Scatter chart Matrix")
                    container.plotly_chart(final_output['scatter_chart_matrix'],config = {'displaylogo': False})
                    container.text(" ")

                    container.text(" ")
                    corr_con = container.beta_container()
                    corr_con.subheader("Correlation Matrix Information")
                    corr_con.text(" ")
                    # corr_c1, corr_c2, corr_c3 = corr_con.beta_columns(3)
                    # j = 0
                    for key1, value1 in final_output['correlation_matrix_info'].items():
                        corr_con.text(" ")
                        corr_con.write(key1)
                        # col.pyplot(value)
                        corr_con.plotly_chart(value1, config={'displaylogo': False})
                        # col.plotly_chart(value1,use_container_width=True)
                        # j=j+1

                    container.text(" ")
                    missing_con = container.beta_container()
                    missing_con.subheader("Missing Values Information")
                    missing_con.text(" ")
                    mis_c1, mis_c2 = missing_con.beta_columns(2)
                    mis_c3, mis_c4 = missing_con.beta_columns(2)
                    k = 0
                    for key, value in final_output['missing_values_info'].items():
                        corr_con.text(" ")
                        col = mis_c1
                        if k == 0:
                            col = mis_c1
                        elif k == 1:
                            col = mis_c2
                        elif k == 2:
                            col = mis_c3
                        elif k == 3:
                            col = mis_c4
                        col.write(key)
                        col.pyplot(value)
                        k = k + 1

                # ********************************************************#

                # *********** Selecting Model for clustering ***********#
                st.subheader("Select the Model")
                model = st.selectbox(label=' ', options=['Select', 'KMeans'])
                #********************************************************#

                # ******* Data cleaning and checking with elbow technique using Kmeans clustering *******#
                if model == "KMeans":
                    st.text(" ")
                    st.subheader("choose the features which you want to drop")
                    drop_features = st.multiselect('', cols)

                    st.text(" ")
                    cleaned_data, steps_dict = clean(dataset, drop_features)
                    sample_data = cleaned_data.head()
                    info['clean_data'] = sample_data
                    info['auto_drop'] = steps_dict['auto_drop']
                    val1 = k_means(dataset, cols, drop_features, sample_data)
                    st.write("Elbow-Curve")
                    st.plotly_chart(val1, config={'displaylogo': False})
                #                     st.write("Silhouette-Score")
                #                     st.plotly_chart(val2,  config={'displaylogo': False})

                # ******************************************************************************* #

                else:
                    pass

            # **************************** End of UnSupervised Section ********************************** #

    except Exception as e:
        st.header(e)


if __name__ == '__main__':
    main()