Spaces:

Johan713
/

Advanced-Loan-Predictor

Runtime error

App Files Files

xet

Community

Advanced-Loan-Predictor / app.py

Johan713

Update app.py

60b864d verified about 1 year ago

raw

history blame contribute delete

8.13 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.express as px
	import plotly.graph_objects as go
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.preprocessing import StandardScaler

	@st.cache_data
	def load_and_preprocess_data():
	data = pd.read_csv('train.csv')

	data['Gender'].fillna(data['Gender'].mode()[0], inplace=True)
	data['Married'].fillna(data['Married'].mode()[0], inplace=True)
	data['Dependents'].fillna(data['Dependents'].mode()[0], inplace=True)
	data['Self_Employed'].fillna(data['Self_Employed'].mode()[0], inplace=True)
	data['LoanAmount'].fillna(data['LoanAmount'].median(), inplace=True)
	data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mode()[0], inplace=True)
	data['Credit_History'].fillna(data['Credit_History'].mode()[0], inplace=True)

	data['Dependents'] = data['Dependents'].replace('3+', '3').astype(int)

	data['LoanAmount'] = np.log1p(data['LoanAmount'])
	data['ApplicantIncome'] = np.log1p(data['ApplicantIncome'])
	data['CoapplicantIncome'] = np.log1p(data['CoapplicantIncome'])

	return data

	@st.cache_resource
	def get_model(data):
	# Prepare the data
	X = data.drop(['Loan_ID', 'Loan_Status'], axis=1)
	y = data['Loan_Status']

	# Handle categorical variables
	X = pd.get_dummies(X, drop_first=True)

	# Store feature names
	feature_names = X.columns.tolist()

	# Split the data
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Scale the features
	scaler = StandardScaler()
	X_train_scaled = scaler.fit_transform(X_train)
	X_test_scaled = scaler.transform(X_test)

	# Train the model
	model = RandomForestClassifier(n_estimators=100, random_state=42)
	model.fit(X_train_scaled, y_train)

	return model, scaler, feature_names
	def predict_loan_approval(model, scaler, feature_names, input_data):
	input_df = pd.DataFrame([input_data])
	input_df = pd.get_dummies(input_df, drop_first=True)

	for col in feature_names:
	if col not in input_df.columns:
	input_df[col] = 0

	input_df = input_df.reindex(columns=feature_names, fill_value=0)

	input_scaled = scaler.transform(input_df)
	prediction = model.predict(input_scaled)
	probability = model.predict_proba(input_scaled)[0][1]

	adjusted_probability = max(probability, 0.3)

	adjusted_prediction = 'Y' if adjusted_probability >= 0.3 else 'N'

	return adjusted_prediction, adjusted_probability

	# Streamlit app
	def main():
	st.set_page_config(page_title="Loan Approval Predictor", layout="wide")

	# Sidebar
	st.sidebar.title("Navigation")
	page = st.sidebar.radio("Go to", ["Predict", "Explore Data"])

	# Load data and model
	data = load_and_preprocess_data()
	model, scaler, feature_names = get_model(data)

	if page == "Predict":
	st.title("Loan Approval Predictor")
	st.write("Fill in the details below to predict your loan approval chances.")

	col1, col2, col3 = st.columns(3)

	with col1:
	gender = st.selectbox("Gender", ["Male", "Female"])
	married = st.selectbox("Married", ["Yes", "No"])
	dependents = st.selectbox("Dependents", ["0", "1", "2", "3+"])
	education = st.selectbox("Education", ["Graduate", "Not Graduate"])

	with col2:
	self_employed = st.selectbox("Self Employed", ["Yes", "No"])
	applicant_income = st.number_input("Applicant Income", min_value=0)
	coapplicant_income = st.number_input("Coapplicant Income", min_value=0)
	loan_amount = st.number_input("Loan Amount", min_value=0)

	with col3:
	loan_amount_term = st.number_input("Loan Amount Term (in months)", min_value=0)
	credit_history = st.selectbox("Credit History", [0, 1])
	property_area = st.selectbox("Property Area", ["Urban", "Semiurban", "Rural"])

	if st.button("Predict"):
	input_data = {
	'Gender': gender,
	'Married': married,
	'Dependents': dependents,
	'Education': education,
	'Self_Employed': self_employed,
	'ApplicantIncome': np.log1p(applicant_income),
	'CoapplicantIncome': np.log1p(coapplicant_income),
	'LoanAmount': np.log1p(loan_amount),
	'Loan_Amount_Term': loan_amount_term,
	'Credit_History': credit_history,
	'Property_Area': property_area
	}

	prediction, probability = predict_loan_approval(model, scaler, feature_names, input_data)

	st.subheader("Prediction Result")
	if prediction == 'Y':
	st.success(f"Congratulations! Your loan is likely to be approved with a {probability:.2%} chance.")
	else:
	st.error(f"Sorry, your loan is likely to be rejected. The approval chance is {probability:.2%}.")

	# Visualization of prediction probability
	fig = go.Figure(go.Indicator(
	mode = "gauge+number",
	value = probability * 100,
	domain = {'x': [0, 1], 'y': [0, 1]},
	title = {'text': "Approval Probability"},
	gauge = {
	'axis': {'range': [0, 100]},
	'bar': {'color': "darkblue"},
	'steps': [
	{'range': [0, 30], 'color': "lightgray"},
	{'range': [30, 70], 'color': "gray"},
	{'range': [70, 100], 'color': "darkgray"}
	],
	'threshold': {
	'line': {'color': "red", 'width': 4},
	'thickness': 0.75,
	'value': 30
	}
	}
	))
	st.plotly_chart(fig)

	elif page == "Explore Data":
	st.title("Explore Loan Application Data")

	# Data overview
	st.subheader("Data Overview")
	st.write(data.head())
	st.write(f"Total number of records: {len(data)}")

	# Loan Status Distribution
	st.subheader("Loan Status Distribution")
	fig = px.pie(data, names='Loan_Status', title='Loan Status Distribution', hole=0.3,
	color_discrete_sequence=px.colors.sequential.RdBu)
	st.plotly_chart(fig)

	# Correlation Heatmap
	st.subheader("Correlation Heatmap")
	numeric_cols = data.select_dtypes(include=[np.number]).columns
	corr_matrix = data[numeric_cols].corr()
	fig = px.imshow(corr_matrix, text_auto=True, aspect="auto", color_continuous_scale='RdBu')
	st.plotly_chart(fig)

	# Loan Amount Distribution
	st.subheader("Loan Amount Distribution")
	fig = px.histogram(data, x="LoanAmount", nbins=50, title="Loan Amount Distribution",
	color="Loan_Status", color_discrete_sequence=px.colors.sequential.RdBu)
	st.plotly_chart(fig)

	# Applicant Income vs Loan Amount
	st.subheader("Applicant Income vs Loan Amount")
	fig = px.scatter(data, x="ApplicantIncome", y="LoanAmount", color="Loan_Status",
	title="Applicant Income vs Loan Amount",
	color_discrete_sequence=px.colors.sequential.RdBu)
	st.plotly_chart(fig)

	# Loan Status by Education and Credit History
	st.subheader("Loan Status by Education and Credit History")
	fig = px.sunburst(data, path=['Education', 'Credit_History', 'Loan_Status'],
	title="Loan Status by Education and Credit History",
	color='Loan_Status', color_discrete_sequence=px.colors.sequential.RdBu)
	st.plotly_chart(fig)

	if __name__ == "__main__":
	main()