Spaces:

scorpion237
/

scoring_classification

Sleeping

App Files Files Community

scorpion237 commited on Jul 8, 2023

Commit

6a19a00

•

1 Parent(s): 74fa9ef

Create app.py

Browse files

Files changed (1) hide show

app.py +441 -0

app.py ADDED Viewed

	@@ -0,0 +1,441 @@

+import streamlit as st, base64
+import pandas as pd, seaborn as sns
+import os, matplotlib.pyplot as plt
+import pickle, numpy as np
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
+# image de fond
+def add_bg_from_local(image_file):
+    with open(image_file, "rb") as image_file:
+        encoded_string = base64.b64encode(image_file.read())
+    st.markdown(
+    f"""
+    <style>
+    .stApp {{
+        background-image: url(data:image/{"png"};base64,{encoded_string.decode()});
+        background-size: cover
+    }}
+    </style>
+    """,
+    unsafe_allow_html=True
+    )
+add_bg_from_local('./images/route.png')
+fig = plt.figure(figsize=(10, 10))
+_, middle, _ = st.columns((2, 3, 2))
+with middle:
+    st.title(":orange[_Scoring App_]")
+# path du dossier data
+path  = ".\data"
+# fonction pour loader le dataset
+@st.cache_data
+def load_data(file_path):
+    return pd.read_csv(os.path.join(path, file_path))
+# convertir dataframe en csv
+def  convert_df_to_csv(frame):
+    return frame.to_csv(index=False).encode("utf-8")
+# fonction principale
+st.sidebar.image(r"./images/picture1.png")
+def main():
+    st.markdown("<h2 style = 'text-align:center; \
+        color:green;'> Classification pour l'octroi de credit </h2>", unsafe_allow_html = True)
+    # charger le fichier
+    uploaded_file = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"])
+    # creation du menu
+    menu = ["Home", "Data Exploration", "Data Visualisation", "Make prediction"]
+    choice = st.sidebar.selectbox("Select menu", menu)
+    # charger le jeu de donnees
+    data = load_data("loan.csv")
+    # supprime la colonne Loan_ID
+    data.drop("Loan_ID", axis=1, inplace=True)
+    if choice == "Home":
+        st.write("Nous avons develeopper pour ce projet un model de classification\
+            qui permet, sur la base de certaines variables, de determiner si oui ou non\
+                il est envisageable d'octroyer un pret bancaire a une tierce personne.")
+        st.subheader(":orange[__Presentation du jeu de donnee__] :memo:")
+        st.markdown("Le jeu de donnees comporte 614 lignes et 13 colonnes. **Loan_Status**\
+            est la variables a predire (categorielle a deuc classe: **Y** pour le pret a ete \
+                octroyer et **N**) pour le contraire. afin d'avoir les reultats les plus\
+                    optimaux possibles, nous allons dans un premier temps faire une \
+                        ***Analyse exploratoire** de nos donnees. Par suite nous passerons\
+                            a la phase de preparation des donnees pour afin finir avec \
+                                la phase de creation et optimisation des models.\
+                                `Si vous televerser un fichier au format csv, vous avez la\
+                                    possibilite de comparer les prediction pour chaque\
+                                    model et de telechager le fichier csv correspondant.`")
+        #st.image("./images/processor.jpg")
+    if choice == "Data Exploration":
+        st.subheader(":orange[_Data Exploration_] :bar_chart:")
+        # afficher les donnees
+        st.write(data.head())
+        # valeurs manquante
+        if st.sidebar.checkbox("Valeur Manquante"):
+            st.subheader(":orange[Valeur Manquante]")
+            na_count = data.isnull().sum().to_frame(name='count')
+            na_per = (data.isnull().sum().to_frame(name='percentage %')/data.shape[0]*100).round(2)
+            st.write(pd.concat([na_count, na_per], axis=1).sort_values(by='count', ascending=False).T)
+        # valeur unique par colonne
+        if st.sidebar.checkbox("Valeur Unique par colonnes"):
+            st.subheader(":orange[Valeur Unique par colonnes]")
+            only = data.nunique().sort_values(ascending=False).to_frame(name='count')
+            perc = (data.nunique().sort_values(ascending=False).to_frame(name='percentage %')/data.shape[0]*100).round(2)
+            dtype = data.dtypes.to_frame(name='dtypes')
+            st.write(pd.concat([only, perc, dtype], axis=1).T)
+        # statistique sommaire
+        if st.sidebar.checkbox("Statistiques somaire"):
+            st.subheader(":orange[Statistiques sommaire]")
+            st.write(data.describe())
+        # matrice de correlation
+        if st.sidebar.checkbox("Matrice de correlation"):
+            fig = plt.figure(figsize=(7,5))
+            st.subheader(":orange[Matrice de correlation]")
+            st.write(sns.heatmap(data.corr(), annot=True, vmin=-1, vmax=1, cmap='ocean'))
+            st.pyplot(fig)
+            plt.show()
+    if choice == "Data Visualisation":
+        st.subheader(":orange[_Data Visualisation_] :chart:")
+        if st.sidebar.checkbox("Analyse Univariee"):
+            # selection des variables qualitatives
+            categorical_columns = data.select_dtypes(include='object').columns.tolist()
+            st.write("Liste des variables qaulitatives")
+            st.write(categorical_columns)
+            fig = plt.figure(figsize=(14, 8))
+            sns.set_theme(context='notebook', style='darkgrid', palette='deep', font='sans-serif', font_scale=1, color_codes=True, rc=None)
+            for idx, col in enumerate(categorical_columns[:-1]):
+                plt.subplot(2, 3, idx+1)
+                sns.countplot(data=data, x=col, hue="Loan_Status")
+            sns.countplot(data=data, x='Loan_Status')
+            st.pyplot(fig)
+            plt.show()
+            # selection des variables quantitatives
+            numerical_columns = data.select_dtypes(include='number').columns.tolist()
+            st.write("Liste des variables quantitatives")
+            st.write(numerical_columns)
+            fig = plt.figure(figsize=(15,7))
+            for idx, col in enumerate(numerical_columns):
+                plt.subplot(2,3, idx+1)
+                plt.hist(data[col], density=True)
+                sns.kdeplot(data=data, x=col)
+                plt.title(col)
+            #plt.subplots_adjust(hspace=0.5)
+            plt.tight_layout(h_pad=2, w_pad=3., rect=(1,1,2,2))
+            st.pyplot(fig)
+            plt.show()
+        if st.sidebar.checkbox("Analyse bivariee"):
+            st.subheader(":orange[Analyse bivariee]")
+            numerical_columns = data.select_dtypes(include='number').columns.tolist()
+            fig = plt.figure(figsize = (14, 8))
+            for idx, num_col in enumerate(numerical_columns[:-2]):
+                plt.subplot(2, 2, idx+1)
+                sns.boxplot(y=num_col, data=data, x='Loan_Status')
+            plt.tight_layout(h_pad=2, w_pad=3., rect=(1,1,2,2))
+            st.pyplot(fig)
+            plt.show()
+    if choice == "Make prediction":
+        st.subheader(":orange[Make prediction] :fleur_de_lis:")
+        if uploaded_file is not None:
+            data = pd.read_csv(uploaded_file)
+            # data preprocessing
+            from sklearn.impute import SimpleImputer
+            try:
+                data.drop(["Loan_ID"], axis=1, inplace=True)
+            except:
+                pass
+            # encodage
+            data_encoded = pd.get_dummies(data, drop_first=True)
+            st.subheader(":orange[Donnees encodees]")
+            st.write(data_encoded)
+            # separation du jeu de donnee
+            X, y = data_encoded.drop(["Loan_Status_Y"], axis=1), data_encoded["Loan_Status_Y"]
+            # traintement des valeurs manquantes
+            sp = SimpleImputer(strategy="most_frequent")
+            X = sp.fit_transform(X)
+            # mis a l'echelle des variables
+            std = StandardScaler()
+            X = std.fit_transform(X)
+            # Prediction
+                # Random Forest predictor
+            if st.sidebar.checkbox("Random Forest"):
+                st.subheader(":orange[Random Forest] :sunglasses:")
+                rf = pickle.load(open("scoring_rf.pkl", "rb"))
+                pred = rf.predict(X)
+                pred_proba = rf.predict_proba(X)
+                st.subheader(':green[Prediction]')
+                loan_status = np.array(['N','Y'])
+                prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
+                df = pd.concat([data, prediction], axis=1)
+                st.write(df)
+                 # download frame
+                csv = convert_df_to_csv(df)
+                st.download_button("Press to Download",
+                                   csv,
+                                   "random_forest.csv",
+                                   "text/csv",
+                                   key='download_csv')
+                st.text("Model report : \n " + classification_report(y, pred))
+                # Accuracy score
+                rf_score = accuracy_score(pred,y)
+                st.write(":green[score d'exactitude]")
+                st.write(f"{round(rf_score*100,2)}% d'exactitude")
+                st.subheader(':green[Prediction Probability]')
+                st.write(pred_proba)
+                # Linear Discriminant Analysis
+            if st.sidebar.checkbox("Discriminant Analysis"):
+                st.subheader(":orange[Discriminant Analysis] :sunglasses:")
+                lda = pickle.load(open("scoring_lda.pkl", "rb"))
+                pred = lda.predict(X)
+                pred_proba = lda.predict_proba(X)
+                st.subheader(':green[Prediction]')
+                loan_status = np.array(['N','Y'])
+                prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
+                df = pd.concat([data, prediction], axis=1)
+                st.write(df)
+                 # download
+                csv = convert_df_to_csv(df)
+                st.download_button("Press to Download",
+                                   csv,
+                                   "discriminant.csv",
+                                   "text/csv",
+                                   key='download_csv')
+                st.text("Model report : \n " + classification_report(y, pred))
+                # Accuracy score
+                lda_score = accuracy_score(pred,y)
+                st.subheader(":green[score d'exactitude]")
+                st.write(f"{round(lda_score*100,2)}% d'exactitude")
+                st.subheader(':green[Prediction Probability]')
+                st.write(pred_proba)
+                # matrice de confusion
+                fig = plt.figure(figsize=(2,1))
+                cm = confusion_matrix(y, pred)
+                st.subheader(":green[Matrice de confusion]")
+                sns.heatmap(cm, annot=True, cmap='Dark2')
+                st.pyplot(fig)
+                plt.plot()
+                # XGBoost
+            if st.sidebar.checkbox("XGBoost"):
+                st.subheader(":orange[XGBoost] :sunglasses:")
+                xg = pickle.load(open("scoring_xg.pkl", "rb"))
+                pred = xg.predict(X)
+                pred_proba = xg.predict_proba(X)
+                st.subheader(':green[Prediction]')
+                loan_status = np.array(['N','Y'])
+                prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
+                df = pd.concat([data, prediction], axis=1)
+                st.write(df)
+                 # download
+                csv = convert_df_to_csv(df)
+                st.download_button("Press to Download",
+                                   csv,
+                                   "xgboost.csv",
+                                   "text/csv",
+                                   key='download_csv')
+                st.text("Model report : \n " + classification_report(y, pred))
+                # Accuracy score
+                xg_score = accuracy_score(pred,y)
+                st.subheader(":green[score d'exactitude]")
+                st.write(f"{round(xg_score*100,2)}% d'exactitude")
+                st.subheader(':green[Prediction Probability]')
+                st.write(pred_proba)
+                # ANN
+            if st.sidebar.checkbox("Neural Network"):
+                st.subheader(":orange[Neural Network] :sunglasses:")
+                ann = pickle.load(open("scoring_ann.pkl", "rb"))
+                pred = ann.predict(X)
+                pred_proba = ann.predict_proba(X)
+                st.subheader(':green[Prediction]')
+                loan_status = np.array(['N','Y'])
+                prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
+                df = pd.concat([data, prediction], axis=1)
+                st.write(df)
+                 # download
+                csv = convert_df_to_csv(df)
+                st.download_button("Press to Download",
+                                   csv,
+                                   "neural_network.csv",
+                                   "text/csv",
+                                   key='download_csv')
+                st.text("Model report : \n " + classification_report(y, pred))
+                # Accuracy score
+                ann_score = accuracy_score(pred,y)
+                st.subheader(":green[score d'exactitude]")
+                st.write(f"{round(ann_score*100,2)}% d'exactitude")
+                st.subheader(':green[Prediction Probability]')
+                st.write(pred_proba)
+        else:
+            def user_input_features():
+                gender = st.sidebar.selectbox('Gender',('Male','Female'))
+                married = st.sidebar.selectbox('Married',('Yes','No'))
+                depedents = st.sidebar.selectbox('Dependent',(0, 1, 2, "3+"))
+                education = st.sidebar.selectbox('Education',('Graduate','Not Graduate'))
+                self_employed = st.sidebar.selectbox('Self_employed',('Yes','No'))
+                applicanincome = st.sidebar.slider('ApplicanIncome', 150, 81000)
+                coapplicanincome = st.sidebar.slider('CoapplicanIncome', 0, 42000)
+                loan_amount = st.sidebar.slider('LoanAmount', 0, 800)
+                loan_amount_term = st.sidebar.slider('Loan_Amount_Term', 10, 500)
+                credit_history = st.sidebar.selectbox('Credi_History', (0, 1))
+                property_area = st.sidebar.selectbox('Property_Area', ("Urban", "Rural", "Semiurban"))
+                if gender == "Male":
+                    gender = 1
+                else:
+                    gender = 0
+                if married == 'Yes':
+                    married = 1
+                else:
+                    married = 0
+                depedents_1, depedents_2, depedents_3 = 0,0,0
+                if depedents == 1:
+                    depedents_1=1
+                elif depedents == 2:
+                    depedents_2=1
+                elif depedents > 2 :
+                    depedents_3=1
+                if education == "Not Graduate":
+                    education=1
+                else:
+                    education=0
+                if self_employed == "Yes":
+                    self_employed = 1
+                else:
+                    self_employed = 0
+                property_urban, property_semiurban = 0, 0
+                if property_area == "Semiurban":
+                    property_semiurban = 1
+                elif property_area == "Urban":
+                    property_urban == 1
+                data = { 'ApplicationIncome': (applicanincome - 5403)/6109,
+                        'CoapplicationIncome': (coapplicanincome - 1621) / 2926,
+                        'LoanAmount': (loan_amount -146)/85,
+                        'Loan_Amount_Term': (loan_amount_term - 342)/65,
+                        'Credi_History': (credit_history -0.84)/0.35,
+                        'Gender_Male': gender,
+                        'Married_Yes': married,
+                        'Depedents_1': depedents_1,
+                        'Depedents_2': depedents_2,
+                        'Depedents_3+': depedents_3,
+                        'Education_Not_Graduate': education,
+                        'Self_Employed_Yes': self_employed,
+                        'Property_Area_Semiurban': property_semiurban,
+                        'Property_Area_Urban': property_urban
+                        }
+                features = pd.DataFrame(data, index=[0])
+                return features
+            data_input = user_input_features()
+            # Random Forest
+            if st.sidebar.checkbox("Random Forest"):
+                st.subheader(":orange[Random Forest]")
+                rf = pickle.load(open("scoring_rf.pkl", "rb"))
+                pred = rf.predict(data_input)
+                if pred == 1:
+                    st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:")
+                else:
+                    st.write(":red[__Desole,...__] :disappointed:")
+                pred_proba = rf.predict_proba(data_input)
+                loan_status = np.array(['N','Y'])
+                prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
+                df = pd.concat([data_input, prediction], axis=1)
+                st.write(df)
+                st.subheader(":green[probability] :question:")
+                st.write(pred_proba)
+             # Discriminant Analysis
+            if st.sidebar.checkbox("Discriminant Analysis"):
+                st.subheader(":orange[Discriminant Analysis]")
+                lda = pickle.load(open("scoring_lda.pkl", "rb"))
+                pred = lda.predict(data_input)
+                if pred == 1:
+                    st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:")
+                else:
+                    st.write(":red[__Desole,...__] :disappointed:")
+                pred_proba = lda.predict_proba(data_input)
+                loan_status = np.array(['N','Y'])
+                prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
+                df = pd.concat([data_input, prediction], axis=1)
+                st.write(df)
+                st.subheader(":green[probability] :question:")
+                st.write(pred_proba)
+            # XGboost
+            if st.sidebar.checkbox("XGBoost"):
+                st.subheader(":orange[XGBoost]")
+                xg = pickle.load(open("scoring_xg.pkl", "rb"))
+                pred = xg.predict(data_input)
+                if pred == 1:
+                    st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:")
+                else:
+                    st.write(":red[__Desole,...__] :disappointed:")
+                pred_proba = xg.predict_proba(data_input)
+                loan_status = np.array(['N','Y'])
+                prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
+                df = pd.concat([data_input, prediction], axis=1)
+                st.write(df)
+                st.subheader(":green[probability] :question:")
+                st.write(pred_proba)
+            # ANN
+            if st.sidebar.checkbox("Neural Network"):
+                st.subheader(":orange[Neural Network]")
+                ann = pickle.load(open("scoring_ann.pkl", "rb"))
+                pred = ann.predict(data_input)
+                if pred == 1:
+                    st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:")
+                else:
+                    st.write(":red[__Desole,...__] :disappointed:")
+                pred_proba = ann.predict_proba(data_input)
+                loan_status = np.array(['N','Y'])
+                prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
+                df = pd.concat([data_input, prediction], axis=1)
+                st.write(df)
+                st.subheader(":green[probability] :question:")
+                st.write(pred_proba)
+# lancer l'application
+if __name__ == "__main__":
+    main()