Spaces:
Sleeping
Sleeping
import streamlit as st, base64 | |
import pandas as pd, seaborn as sns | |
import os, matplotlib.pyplot as plt | |
import pickle, numpy as np | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix | |
# image de fond | |
def add_bg_from_local(image_file): | |
with open(image_file, "rb") as image_file: | |
encoded_string = base64.b64encode(image_file.read()) | |
st.markdown( | |
f""" | |
<style> | |
.stApp {{ | |
background-image: url(data:image/{"png"};base64,{encoded_string.decode()}); | |
background-size: cover | |
}} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
add_bg_from_local('route.png') | |
fig = plt.figure(figsize=(10, 10)) | |
_, middle, _ = st.columns((2, 3, 2)) | |
with middle: | |
st.title(":orange[_Scoring App_]") | |
# path du dossier data | |
#path = ".\data" | |
# fonction pour loader le dataset | |
def load_data(file_path): | |
return pd.read_csv(file_path) | |
# convertir dataframe en csv | |
def convert_df_to_csv(frame): | |
return frame.to_csv(index=False).encode("utf-8") | |
# fonction principale | |
st.sidebar.image("picture1.png") | |
def main(): | |
st.markdown("<h2 style = 'text-align:center; \ | |
color:green;'> Classification pour l'octroi de credit </h2>", unsafe_allow_html = True) | |
# charger le fichier | |
uploaded_file = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"]) | |
# creation du menu | |
menu = ["Home", "Data Exploration", "Data Visualisation", "Make prediction"] | |
choice = st.sidebar.selectbox("Select menu", menu) | |
# charger le jeu de donnees | |
data = load_data("loan.csv") | |
# supprime la colonne Loan_ID | |
data.drop("Loan_ID", axis=1, inplace=True) | |
if choice == "Home": | |
st.write("Nous avons develeopper pour ce projet un model de classification\ | |
qui permet, sur la base de certaines variables, de determiner si oui ou non\ | |
il est envisageable d'octroyer un pret bancaire a une tierce personne.") | |
st.subheader(":orange[__Presentation du jeu de donnee__] :memo:") | |
st.markdown("Le jeu de donnees comporte 614 lignes et 13 colonnes. **Loan_Status**\ | |
est la variables a predire (categorielle a deuc classe: **Y** pour le pret a ete \ | |
octroyer et **N**) pour le contraire. afin d'avoir les reultats les plus\ | |
optimaux possibles, nous allons dans un premier temps faire une \ | |
***Analyse exploratoire** de nos donnees. Par suite nous passerons\ | |
a la phase de preparation des donnees pour afin finir avec \ | |
la phase de creation et optimisation des models.\ | |
`Si vous televerser un fichier au format csv, vous avez la\ | |
possibilite de comparer les prediction pour chaque\ | |
model et de telechager le fichier csv correspondant.`") | |
#st.image("./images/processor.jpg") | |
if choice == "Data Exploration": | |
st.subheader(":orange[_Data Exploration_] :bar_chart:") | |
# afficher les donnees | |
st.write(data.head()) | |
# valeurs manquante | |
if st.sidebar.checkbox("Valeur Manquante"): | |
st.subheader(":orange[Valeur Manquante]") | |
na_count = data.isnull().sum().to_frame(name='count') | |
na_per = (data.isnull().sum().to_frame(name='percentage %')/data.shape[0]*100).round(2) | |
st.write(pd.concat([na_count, na_per], axis=1).sort_values(by='count', ascending=False).T) | |
# valeur unique par colonne | |
if st.sidebar.checkbox("Valeur Unique par colonnes"): | |
st.subheader(":orange[Valeur Unique par colonnes]") | |
only = data.nunique().sort_values(ascending=False).to_frame(name='count') | |
perc = (data.nunique().sort_values(ascending=False).to_frame(name='percentage %')/data.shape[0]*100).round(2) | |
dtype = data.dtypes.to_frame(name='dtypes') | |
st.write(pd.concat([only, perc, dtype], axis=1).T) | |
# statistique sommaire | |
if st.sidebar.checkbox("Statistiques somaire"): | |
st.subheader(":orange[Statistiques sommaire]") | |
st.write(data.describe()) | |
# matrice de correlation | |
if st.sidebar.checkbox("Matrice de correlation"): | |
fig = plt.figure(figsize=(7,5)) | |
st.subheader(":orange[Matrice de correlation]") | |
st.write(sns.heatmap(data.corr(), annot=True, vmin=-1, vmax=1, cmap='ocean')) | |
st.pyplot(fig) | |
plt.show() | |
if choice == "Data Visualisation": | |
st.subheader(":orange[_Data Visualisation_] :chart:") | |
if st.sidebar.checkbox("Analyse Univariee"): | |
# selection des variables qualitatives | |
categorical_columns = data.select_dtypes(include='object').columns.tolist() | |
st.write("Liste des variables qaulitatives") | |
st.write(categorical_columns) | |
fig = plt.figure(figsize=(14, 8)) | |
sns.set_theme(context='notebook', style='darkgrid', palette='deep', font='sans-serif', font_scale=1, color_codes=True, rc=None) | |
for idx, col in enumerate(categorical_columns[:-1]): | |
plt.subplot(2, 3, idx+1) | |
sns.countplot(data=data, x=col, hue="Loan_Status") | |
sns.countplot(data=data, x='Loan_Status') | |
st.pyplot(fig) | |
plt.show() | |
# selection des variables quantitatives | |
numerical_columns = data.select_dtypes(include='number').columns.tolist() | |
st.write("Liste des variables quantitatives") | |
st.write(numerical_columns) | |
fig = plt.figure(figsize=(15,7)) | |
for idx, col in enumerate(numerical_columns): | |
plt.subplot(2,3, idx+1) | |
plt.hist(data[col], density=True) | |
sns.kdeplot(data=data, x=col) | |
plt.title(col) | |
#plt.subplots_adjust(hspace=0.5) | |
plt.tight_layout(h_pad=2, w_pad=3., rect=(1,1,2,2)) | |
st.pyplot(fig) | |
plt.show() | |
if st.sidebar.checkbox("Analyse bivariee"): | |
st.subheader(":orange[Analyse bivariee]") | |
numerical_columns = data.select_dtypes(include='number').columns.tolist() | |
fig = plt.figure(figsize = (14, 8)) | |
for idx, num_col in enumerate(numerical_columns[:-2]): | |
plt.subplot(2, 2, idx+1) | |
sns.boxplot(y=num_col, data=data, x='Loan_Status') | |
plt.tight_layout(h_pad=2, w_pad=3., rect=(1,1,2,2)) | |
st.pyplot(fig) | |
plt.show() | |
if choice == "Make prediction": | |
st.subheader(":orange[Make prediction] :fleur_de_lis:") | |
if uploaded_file is not None: | |
data = pd.read_csv(uploaded_file) | |
# data preprocessing | |
from sklearn.impute import SimpleImputer | |
try: | |
data.drop(["Loan_ID"], axis=1, inplace=True) | |
except: | |
pass | |
# encodage | |
data_encoded = pd.get_dummies(data, drop_first=True) | |
st.subheader(":orange[Donnees encodees]") | |
st.write(data_encoded) | |
# separation du jeu de donnee | |
X, y = data_encoded.drop(["Loan_Status_Y"], axis=1), data_encoded["Loan_Status_Y"] | |
# traintement des valeurs manquantes | |
sp = SimpleImputer(strategy="most_frequent") | |
X = sp.fit_transform(X) | |
# mis a l'echelle des variables | |
std = StandardScaler() | |
X = std.fit_transform(X) | |
# Prediction | |
# Random Forest predictor | |
if st.sidebar.checkbox("Random Forest"): | |
st.subheader(":orange[Random Forest] :sunglasses:") | |
rf = pickle.load(open("scoring_rf.pkl", "rb")) | |
pred = rf.predict(X) | |
pred_proba = rf.predict_proba(X) | |
st.subheader(':green[Prediction]') | |
loan_status = np.array(['N','Y']) | |
prediction = pd.DataFrame(loan_status[pred], columns=['prediction']) | |
df = pd.concat([data, prediction], axis=1) | |
st.write(df) | |
# download frame | |
csv = convert_df_to_csv(df) | |
st.download_button("Press to Download", | |
csv, | |
"random_forest.csv", | |
"text/csv", | |
key='download_csv') | |
st.text("Model report : \n " + classification_report(y, pred)) | |
# Accuracy score | |
rf_score = accuracy_score(pred,y) | |
st.write(":green[score d'exactitude]") | |
st.write(f"{round(rf_score*100,2)}% d'exactitude") | |
st.subheader(':green[Prediction Probability]') | |
st.write(pred_proba) | |
# Linear Discriminant Analysis | |
if st.sidebar.checkbox("Discriminant Analysis"): | |
st.subheader(":orange[Discriminant Analysis] :sunglasses:") | |
lda = pickle.load(open("scoring_lda.pkl", "rb")) | |
pred = lda.predict(X) | |
pred_proba = lda.predict_proba(X) | |
st.subheader(':green[Prediction]') | |
loan_status = np.array(['N','Y']) | |
prediction = pd.DataFrame(loan_status[pred], columns=['prediction']) | |
df = pd.concat([data, prediction], axis=1) | |
st.write(df) | |
# download | |
csv = convert_df_to_csv(df) | |
st.download_button("Press to Download", | |
csv, | |
"discriminant.csv", | |
"text/csv", | |
key='download_csv') | |
st.text("Model report : \n " + classification_report(y, pred)) | |
# Accuracy score | |
lda_score = accuracy_score(pred,y) | |
st.subheader(":green[score d'exactitude]") | |
st.write(f"{round(lda_score*100,2)}% d'exactitude") | |
st.subheader(':green[Prediction Probability]') | |
st.write(pred_proba) | |
# matrice de confusion | |
fig = plt.figure(figsize=(2,1)) | |
cm = confusion_matrix(y, pred) | |
st.subheader(":green[Matrice de confusion]") | |
sns.heatmap(cm, annot=True, cmap='Dark2') | |
st.pyplot(fig) | |
plt.plot() | |
# XGBoost | |
if st.sidebar.checkbox("XGBoost"): | |
st.subheader(":orange[XGBoost] :sunglasses:") | |
xg = pickle.load(open("scoring_xg.pkl", "rb")) | |
pred = xg.predict(X) | |
pred_proba = xg.predict_proba(X) | |
st.subheader(':green[Prediction]') | |
loan_status = np.array(['N','Y']) | |
prediction = pd.DataFrame(loan_status[pred], columns=['prediction']) | |
df = pd.concat([data, prediction], axis=1) | |
st.write(df) | |
# download | |
csv = convert_df_to_csv(df) | |
st.download_button("Press to Download", | |
csv, | |
"xgboost.csv", | |
"text/csv", | |
key='download_csv') | |
st.text("Model report : \n " + classification_report(y, pred)) | |
# Accuracy score | |
xg_score = accuracy_score(pred,y) | |
st.subheader(":green[score d'exactitude]") | |
st.write(f"{round(xg_score*100,2)}% d'exactitude") | |
st.subheader(':green[Prediction Probability]') | |
st.write(pred_proba) | |
# ANN | |
if st.sidebar.checkbox("Neural Network"): | |
st.subheader(":orange[Neural Network] :sunglasses:") | |
ann = pickle.load(open("scoring_ann.pkl", "rb")) | |
pred = ann.predict(X) | |
pred_proba = ann.predict_proba(X) | |
st.subheader(':green[Prediction]') | |
loan_status = np.array(['N','Y']) | |
prediction = pd.DataFrame(loan_status[pred], columns=['prediction']) | |
df = pd.concat([data, prediction], axis=1) | |
st.write(df) | |
# download | |
csv = convert_df_to_csv(df) | |
st.download_button("Press to Download", | |
csv, | |
"neural_network.csv", | |
"text/csv", | |
key='download_csv') | |
st.text("Model report : \n " + classification_report(y, pred)) | |
# Accuracy score | |
ann_score = accuracy_score(pred,y) | |
st.subheader(":green[score d'exactitude]") | |
st.write(f"{round(ann_score*100,2)}% d'exactitude") | |
st.subheader(':green[Prediction Probability]') | |
st.write(pred_proba) | |
else: | |
def user_input_features(): | |
gender = st.sidebar.selectbox('Gender',('Male','Female')) | |
married = st.sidebar.selectbox('Married',('Yes','No')) | |
depedents = st.sidebar.selectbox('Dependent',(0, 1, 2, "3+")) | |
education = st.sidebar.selectbox('Education',('Graduate','Not Graduate')) | |
self_employed = st.sidebar.selectbox('Self_employed',('Yes','No')) | |
applicanincome = st.sidebar.slider('ApplicanIncome', 150, 81000) | |
coapplicanincome = st.sidebar.slider('CoapplicanIncome', 0, 42000) | |
loan_amount = st.sidebar.slider('LoanAmount', 0, 800) | |
loan_amount_term = st.sidebar.slider('Loan_Amount_Term', 10, 500) | |
credit_history = st.sidebar.selectbox('Credi_History', (0, 1)) | |
property_area = st.sidebar.selectbox('Property_Area', ("Urban", "Rural", "Semiurban")) | |
if gender == "Male": | |
gender = 1 | |
else: | |
gender = 0 | |
if married == 'Yes': | |
married = 1 | |
else: | |
married = 0 | |
depedents_1, depedents_2, depedents_3 = 0,0,0 | |
if depedents == 1: | |
depedents_1=1 | |
elif depedents == 2: | |
depedents_2=1 | |
elif depedents > 2 : | |
depedents_3=1 | |
if education == "Not Graduate": | |
education=1 | |
else: | |
education=0 | |
if self_employed == "Yes": | |
self_employed = 1 | |
else: | |
self_employed = 0 | |
property_urban, property_semiurban = 0, 0 | |
if property_area == "Semiurban": | |
property_semiurban = 1 | |
elif property_area == "Urban": | |
property_urban == 1 | |
data = { 'ApplicationIncome': (applicanincome - 5403)/6109, | |
'CoapplicationIncome': (coapplicanincome - 1621) / 2926, | |
'LoanAmount': (loan_amount -146)/85, | |
'Loan_Amount_Term': (loan_amount_term - 342)/65, | |
'Credi_History': (credit_history -0.84)/0.35, | |
'Gender_Male': gender, | |
'Married_Yes': married, | |
'Depedents_1': depedents_1, | |
'Depedents_2': depedents_2, | |
'Depedents_3+': depedents_3, | |
'Education_Not_Graduate': education, | |
'Self_Employed_Yes': self_employed, | |
'Property_Area_Semiurban': property_semiurban, | |
'Property_Area_Urban': property_urban | |
} | |
features = pd.DataFrame(data, index=[0]) | |
return features | |
data_input = user_input_features() | |
# Random Forest | |
if st.sidebar.checkbox("Random Forest"): | |
st.subheader(":orange[Random Forest]") | |
rf = pickle.load(open("scoring_rf.pkl", "rb")) | |
pred = rf.predict(data_input) | |
if pred == 1: | |
st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:") | |
else: | |
st.write(":red[__Desole,...__] :disappointed:") | |
pred_proba = rf.predict_proba(data_input) | |
loan_status = np.array(['N','Y']) | |
prediction = pd.DataFrame(loan_status[pred], columns=['prediction']) | |
df = pd.concat([data_input, prediction], axis=1) | |
st.write(df) | |
st.subheader(":green[probability] :question:") | |
st.write(pred_proba) | |
# Discriminant Analysis | |
if st.sidebar.checkbox("Discriminant Analysis"): | |
st.subheader(":orange[Discriminant Analysis]") | |
lda = pickle.load(open("scoring_lda.pkl", "rb")) | |
pred = lda.predict(data_input) | |
if pred == 1: | |
st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:") | |
else: | |
st.write(":red[__Desole,...__] :disappointed:") | |
pred_proba = lda.predict_proba(data_input) | |
loan_status = np.array(['N','Y']) | |
prediction = pd.DataFrame(loan_status[pred], columns=['prediction']) | |
df = pd.concat([data_input, prediction], axis=1) | |
st.write(df) | |
st.subheader(":green[probability] :question:") | |
st.write(pred_proba) | |
# XGboost | |
if st.sidebar.checkbox("XGBoost"): | |
st.subheader(":orange[XGBoost]") | |
xg = pickle.load(open("scoring_xg.pkl", "rb")) | |
pred = xg.predict(data_input) | |
if pred == 1: | |
st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:") | |
else: | |
st.write(":red[__Desole,...__] :disappointed:") | |
pred_proba = xg.predict_proba(data_input) | |
loan_status = np.array(['N','Y']) | |
prediction = pd.DataFrame(loan_status[pred], columns=['prediction']) | |
df = pd.concat([data_input, prediction], axis=1) | |
st.write(df) | |
st.subheader(":green[probability] :question:") | |
st.write(pred_proba) | |
# ANN | |
if st.sidebar.checkbox("Neural Network"): | |
st.subheader(":orange[Neural Network]") | |
ann = pickle.load(open("scoring_ann.pkl", "rb")) | |
pred = ann.predict(data_input) | |
if pred == 1: | |
st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:") | |
else: | |
st.write(":red[__Desole,...__] :disappointed:") | |
pred_proba = ann.predict_proba(data_input) | |
loan_status = np.array(['N','Y']) | |
prediction = pd.DataFrame(loan_status[pred], columns=['prediction']) | |
df = pd.concat([data_input, prediction], axis=1) | |
st.write(df) | |
st.subheader(":green[probability] :question:") | |
st.write(pred_proba) | |
# lancer l'application | |
if __name__ == "__main__": | |
main() | |