Spaces:

scorpion237
/

scoring_classification

Sleeping

Update app.py

57c18a7 12 months ago

21.1 kB

	import streamlit as st, base64
	import pandas as pd, seaborn as sns
	import os, matplotlib.pyplot as plt
	import pickle, numpy as np, xgboost as xgb
	from keras.models import load_model
	from sklearn.preprocessing import StandardScaler
	from sklearn.metrics import classification_report, accuracy_score, confusion_matrix


	# image de fond
	def add_bg_from_local(image_file):
	with open(image_file, "rb") as image_file:
	encoded_string = base64.b64encode(image_file.read())
	st.markdown(
	f"""
	<style>
	.stApp {{
	background-image: url(data:image/{"png"};base64,{encoded_string.decode()});
	background-size: cover
	}}
	</style>
	""",
	unsafe_allow_html=True
	)
	add_bg_from_local('route.png')


	fig = plt.figure(figsize=(10, 10))
	_, middle, _ = st.columns((2, 3, 2))
	with middle:
	st.title(":orange[_Scoring App_]")
	# path du dossier data
	#path = ".\data"

	# fonction pour loader le dataset
	@st.cache_data
	def load_data(file_path):
	return pd.read_csv(file_path)

	# convertir dataframe en csv
	def convert_df_to_csv(frame):
	return frame.to_csv(index=False).encode("utf-8")

	# fonction principale
	st.sidebar.image("picture1.png")
	def main():
	st.markdown("<h2 style = 'text-align:center; \
	color:green;'> Classification pour l'octroi de credit </h2>", unsafe_allow_html = True)

	# charger le fichier
	uploaded_file = st.sidebar.file_uploader("Importer fichier csv", type=["csv"])

	# creation du menu
	menu = ["Home", "Data Exploration", "Data Visualisation", "Make prediction"]
	choice = st.sidebar.selectbox("Select menu", menu)

	# charger le jeu de donnees
	data = load_data("loan.csv")

	# supprime la colonne Loan_ID
	data.drop("Loan_ID", axis=1, inplace=True)
	if choice == "Home":
	st.write("Nous avons develeopper pour ce projet un model de classification\
	qui permet, sur la base de certaines variables, de determiner si oui ou non\
	il est envisageable d'octroyer un pret bancaire a une tierce personne.")

	st.subheader(":orange[__Presentation du jeu de donnee__] :memo:")

	st.markdown("Le jeu de donnees comporte 614 lignes et 13 colonnes. Loan_Status\
	est la variables a predire (categorielle a deuc classe: Y pour le pret a ete \
	octroyer et N) pour le contraire. afin d'avoir les reultats les plus\
	optimaux possibles, nous allons dans un premier temps faire une \
	*Analyse exploratoire de nos donnees. Par suite nous passerons\
	a la phase de preparation des donnees pour afin finir avec \
	la phase de creation et optimisation des models.\
	`Si vous televerser un fichier au format csv, vous avez la\
	possibilite de comparer les prediction pour chaque\
	model et de telechager le fichier csv correspondant.`")
	#st.image("./images/processor.jpg")

	if choice == "Data Exploration":
	st.subheader(":orange[_Data Exploration_] :bar_chart:")
	# afficher les donnees
	st.write(data.head())

	# valeurs manquante
	if st.sidebar.checkbox("Valeur Manquante"):
	st.subheader(":orange[Valeur Manquante]")
	na_count = data.isnull().sum().to_frame(name='count')
	na_per = (data.isnull().sum().to_frame(name='percentage %')/data.shape[0]*100).round(2)
	st.write(pd.concat([na_count, na_per], axis=1).sort_values(by='count', ascending=False).T)

	# valeur unique par colonne
	if st.sidebar.checkbox("Valeur Unique par colonnes"):
	st.subheader(":orange[Valeur Unique par colonnes]")
	only = data.nunique().sort_values(ascending=False).to_frame(name='count')
	perc = (data.nunique().sort_values(ascending=False).to_frame(name='percentage %')/data.shape[0]*100).round(2)
	dtype = data.dtypes.to_frame(name='dtypes')
	st.write(pd.concat([only, perc, dtype], axis=1).T)

	# statistique sommaire
	if st.sidebar.checkbox("Statistiques somaire"):
	st.subheader(":orange[Statistiques sommaire]")
	st.write(data.describe())

	# matrice de correlation
	if st.sidebar.checkbox("Matrice de correlation"):
	fig = plt.figure(figsize=(7,5))
	st.subheader(":orange[Matrice de correlation]")
	st.write(sns.heatmap(data.corr(), annot=True, vmin=-1, vmax=1, cmap='ocean'))
	st.pyplot(fig)
	plt.show()

	if choice == "Data Visualisation":
	st.subheader(":orange[_Data Visualisation_] :chart:")
	if st.sidebar.checkbox("Analyse Univariee"):
	# selection des variables qualitatives
	categorical_columns = data.select_dtypes(include='object').columns.tolist()
	st.write("Liste des variables qaulitatives")
	st.write(categorical_columns)
	fig = plt.figure(figsize=(14, 8))
	sns.set_theme(context='notebook', style='darkgrid', palette='deep', font='sans-serif', font_scale=1, color_codes=True, rc=None)
	for idx, col in enumerate(categorical_columns[:-1]):
	plt.subplot(2, 3, idx+1)
	sns.countplot(data=data, x=col, hue="Loan_Status")
	sns.countplot(data=data, x='Loan_Status')
	st.pyplot(fig)
	plt.show()

	# selection des variables quantitatives
	numerical_columns = data.select_dtypes(include='number').columns.tolist()
	st.write("Liste des variables quantitatives")
	st.write(numerical_columns)
	fig = plt.figure(figsize=(15,7))
	for idx, col in enumerate(numerical_columns):
	plt.subplot(2,3, idx+1)
	plt.hist(data[col], density=True)
	sns.kdeplot(data=data, x=col)
	plt.title(col)
	#plt.subplots_adjust(hspace=0.5)
	plt.tight_layout(h_pad=2, w_pad=3., rect=(1,1,2,2))
	st.pyplot(fig)
	plt.show()

	if st.sidebar.checkbox("Analyse bivariee"):
	st.subheader(":orange[Analyse bivariee]")
	numerical_columns = data.select_dtypes(include='number').columns.tolist()
	fig = plt.figure(figsize = (14, 8))
	for idx, num_col in enumerate(numerical_columns[:-2]):
	plt.subplot(2, 2, idx+1)
	sns.boxplot(y=num_col, data=data, x='Loan_Status')
	plt.tight_layout(h_pad=2, w_pad=3., rect=(1,1,2,2))
	st.pyplot(fig)
	plt.show()

	if choice == "Make prediction":
	st.subheader(":orange[Make prediction] :fleur_de_lis:")
	if uploaded_file is not None:
	data = pd.read_csv(uploaded_file)

	# data preprocessing
	from sklearn.impute import SimpleImputer
	try:
	data.drop(["Loan_ID"], axis=1, inplace=True)
	except:
	pass
	# encodage
	data_encoded = pd.get_dummies(data, drop_first=True)
	st.subheader(":orange[Donnees encodees]")
	st.write(data_encoded)

	# separation du jeu de donnee
	if data_encoded.shape[1] == 15:
	X, y = data_encoded.drop(["Loan_Status_Y"], axis=1), data_encoded["Loan_Status_Y"]
	else:
	X = data_encoded

	# traintement des valeurs manquantes
	sp = SimpleImputer(strategy="most_frequent")
	X = sp.fit_transform(X)

	# mis a l'echelle des variables
	std = StandardScaler()
	X = std.fit_transform(X)

	# Prediction
	# Random Forest predictor
	if st.sidebar.checkbox("Random Forest"):
	st.subheader(":orange[Random Forest] :sunglasses:")
	rf = pickle.load(open("scoring_rf.pkl", "rb"))
	pred = rf.predict(X)
	pred_proba = rf.predict_proba(X)
	st.subheader(':green[Prediction]')
	loan_status = np.array(['N','Y'])
	prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
	df = pd.concat([data, prediction], axis=1)
	st.write(df)
	# download frame
	csv = convert_df_to_csv(df)
	st.download_button("Press to Download",
	csv,
	"random_forest.csv",
	"text/csv",
	key='rf_download_csv')

	# Accuracy score
	if data_encoded.shape[1] == 15:
	st.text("Model report : \n " + classification_report(y, pred))
	rf_score = accuracy_score(pred,y)
	st.write(":green[score d'exactitude]")
	st.write(f"{round(rf_score*100,2)}% d'exactitude")
	st.subheader(':green[Prediction Probability]')
	st.write(pred_proba)

	# Linear Discriminant Analysis
	if st.sidebar.checkbox("Discriminant Analysis"):
	st.subheader(":orange[Discriminant Analysis] :sunglasses:")
	lda = pickle.load(open("scoring_lda.pkl", "rb"))
	pred = lda.predict(X)
	pred_proba = lda.predict_proba(X)
	st.subheader(':green[Prediction]')
	loan_status = np.array(['N','Y'])
	prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
	df = pd.concat([data, prediction], axis=1)
	st.write(df)
	# download
	csv = convert_df_to_csv(df)
	st.download_button("Press to Download",
	csv,
	"discriminant.csv",
	"text/csv",
	key='lda_download_csv')
	#st.text("Model report : \n " + classification_report(y, pred))

	if data_encoded.shape[1] == 15:
	st.text("Model report : \n " + classification_report(y, pred))
	# Accuracy score
	lda_score = accuracy_score(pred,y)
	st.subheader(":green[score d'exactitude]")
	st.write(f"{round(lda_score*100,2)}% d'exactitude")
	st.subheader(':green[Prediction Probability]')
	st.write(pred_proba)

	if data_encoded.shape[1] == 15:
	# matrice de confusion
	fig = plt.figure(figsize=(2,1))
	cm = confusion_matrix(y, pred)
	st.subheader(":green[Matrice de confusion]")
	sns.heatmap(cm, annot=True, cmap='Dark2')
	st.pyplot(fig)
	plt.plot()

	# XGBoost
	if st.sidebar.checkbox("XGBoost"):
	st.subheader(":orange[XGBoost] :sunglasses:")
	xg = xgb.XGBClassifier()
	xg.load_model("xg.json")
	pred = xg.predict(X)
	pred_proba = xg.predict_proba(X)
	st.subheader(':green[Prediction]')
	loan_status = np.array(['N','Y'])
	prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
	df = pd.concat([data, prediction], axis=1)
	st.write(df)
	# download
	csv = convert_df_to_csv(df)
	st.download_button("Press to Download",
	csv,
	"xgboost.csv",
	"text/csv",
	key='xg_download_csv')
	#st.text("Model report : \n " + classification_report(y, pred))
	if data_encoded.shape[1] == 15:
	st.text("Model report : \n " + classification_report(y, pred))
	# Accuracy score
	xg_score = accuracy_score(pred,y)
	st.subheader(":green[score d'exactitude]")
	st.write(f"{round(xg_score*100,2)}% d'exactitude")
	st.subheader(':green[Prediction Probability]')
	st.write(pred_proba)

	# ANN
	if st.sidebar.checkbox("Neural Network"):
	st.subheader(":orange[Neural Network] :sunglasses:")
	ann = load_model("ann.h5")
	pred_proba = ann.predict(X)
	pred = np.where(pred_proba < 0.5, 0, 1)
	st.subheader(':green[Prediction]')
	loan_status = np.array(['N','Y'])
	prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
	df = pd.concat([data, prediction], axis=1)
	st.write(df)
	# download
	csv = convert_df_to_csv(df)
	st.download_button("Press to Download",
	csv,
	"neural_network.csv",
	"text/csv",
	key='ann_download_csv')
	#st.text("Model report : \n " + classification_report(y, pred))
	if data_encoded.shape[1] == 15:
	st.text("Model report : \n " + classification_report(y, pred))
	# Accuracy score
	ann_score = accuracy_score(pred,y)
	st.subheader(":green[score d'exactitude]")
	st.write(f"{round(ann_score*100,2)}% d'exactitude")
	st.subheader(':green[Prediction Probability]')
	un = pd.DataFrame(pred_proba, columns=['1'])
	zero = pd.DataFrame(np.subtract(1, pred_proba), columns=['0'])
	st.write(pd.concat([zero, un], axis=1).round(2))

	else:
	def user_input_features():
	gender = st.sidebar.selectbox('Gender',('Male','Female'))
	married = st.sidebar.selectbox('Married',('Yes','No'))
	depedents = st.sidebar.selectbox('Dependent',(0, 1, 2, "3+"))
	education = st.sidebar.selectbox('Education',('Graduate','Not Graduate'))
	self_employed = st.sidebar.selectbox('Self_employed',('Yes','No'))
	applicanincome = st.sidebar.slider('ApplicanIncome', 150, 81000)
	coapplicanincome = st.sidebar.slider('CoapplicanIncome', 0, 42000)
	loan_amount = st.sidebar.slider('LoanAmount', 0, 800)
	loan_amount_term = st.sidebar.slider('Loan_Amount_Term', 10, 500)
	credit_history = st.sidebar.selectbox('Credi_History', (0, 1))
	property_area = st.sidebar.selectbox('Property_Area', ("Urban", "Rural", "Semiurban"))

	if gender == "Male":
	gender = 1
	else:
	gender = 0

	if married == 'Yes':
	married = 1
	else:
	married = 0

	depedents_1, depedents_2, depedents_3 = 0,0,0
	if depedents == 1:
	depedents_1=1
	elif depedents == 2:
	depedents_2=1
	elif depedents > 2 :
	depedents_3=1

	if education == "Not Graduate":
	education=1
	else:
	education=0

	if self_employed == "Yes":
	self_employed = 1
	else:
	self_employed = 0

	property_urban, property_semiurban = 0, 0
	if property_area == "Semiurban":
	property_semiurban = 1
	elif property_area == "Urban":
	property_urban == 1

	data = { 'ApplicationIncome': (applicanincome - 5403)/6109,
	'CoapplicationIncome': (coapplicanincome - 1621) / 2926,
	'LoanAmount': (loan_amount -146)/85,
	'Loan_Amount_Term': (loan_amount_term - 342)/65,
	'Credi_History': (credit_history -0.84)/0.35,
	'Gender_Male': gender,
	'Married_Yes': married,
	'Depedents_1': depedents_1,
	'Depedents_2': depedents_2,
	'Depedents_3+': depedents_3,
	'Education_Not_Graduate': education,
	'Self_Employed_Yes': self_employed,
	'Property_Area_Semiurban': property_semiurban,
	'Property_Area_Urban': property_urban
	}
	features = pd.DataFrame(data, index=[0])
	return features
	data_input = user_input_features()

	# Random Forest
	if st.sidebar.checkbox("Random Forest"):
	st.subheader(":orange[Random Forest]")
	rf = pickle.load(open("scoring_rf.pkl", "rb"))
	pred = rf.predict(data_input)
	if pred == 1:
	st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:")
	else:
	st.write(":red[__Desole,...__] :disappointed:")
	pred_proba = rf.predict_proba(data_input)
	loan_status = np.array(['N','Y'])
	prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
	df = pd.concat([data_input, prediction], axis=1)
	st.write(df)
	st.subheader(":green[probability] :question:")
	st.write(pred_proba)

	# Discriminant Analysis
	if st.sidebar.checkbox("Discriminant Analysis"):
	st.subheader(":orange[Discriminant Analysis]")
	lda = pickle.load(open("scoring_lda.pkl", "rb"))
	pred = lda.predict(data_input)
	if pred == 1:
	st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:")
	else:
	st.write(":red[__Desole,...__] :disappointed:")
	pred_proba = lda.predict_proba(data_input)
	loan_status = np.array(['N','Y'])
	prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
	df = pd.concat([data_input, prediction], axis=1)
	st.write(df)
	st.subheader(":green[probability] :question:")
	st.write(pred_proba)

	# XGboost
	if st.sidebar.checkbox("XGBoost"):
	st.subheader(":orange[XGBoost]")
	xg = xgb.XGBClassifier()
	xg.load_model("xg.json")
	pred = xg.predict(data_input)
	if pred == 1:
	st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:")
	else:
	st.write(":red[__Desole,...__] :disappointed:")
	pred_proba = xg.predict_proba(data_input)
	loan_status = np.array(['N','Y'])
	prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
	df = pd.concat([data_input, prediction], axis=1)
	st.write(df)
	st.subheader(":green[probability] :question:")
	st.write(pred_proba)

	# ANN
	if st.sidebar.checkbox("Neural Network"):
	st.subheader(":orange[Neural Network]")
	ann = load_model('ann.h5')
	pred_proba = ann.predict(data_input)
	pred = np.where(pred_proba < 0.5, 0, 1)
	if pred == 1:
	st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:")
	else:
	st.write(":red[__Desole,...__] :disappointed:")
	loan_status = np.array(['N','Y'])
	prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
	df = pd.concat([data_input, prediction], axis=1)
	st.write(df)
	st.subheader(":green[probability] :question:")
	un = pd.DataFrame(pred_proba, columns=['1'])
	zero = pd.DataFrame(np.subtract(1, pred_proba), columns=['0'])
	st.write(pd.concat([zero, un], axis=1).round(2))


	# lancer l'application
	if __name__ == "__main__":
	main()