Johan713's picture
Update app.py
60b864d verified
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
@st.cache_data
def load_and_preprocess_data():
data = pd.read_csv('train.csv')
data['Gender'].fillna(data['Gender'].mode()[0], inplace=True)
data['Married'].fillna(data['Married'].mode()[0], inplace=True)
data['Dependents'].fillna(data['Dependents'].mode()[0], inplace=True)
data['Self_Employed'].fillna(data['Self_Employed'].mode()[0], inplace=True)
data['LoanAmount'].fillna(data['LoanAmount'].median(), inplace=True)
data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mode()[0], inplace=True)
data['Credit_History'].fillna(data['Credit_History'].mode()[0], inplace=True)
data['Dependents'] = data['Dependents'].replace('3+', '3').astype(int)
data['LoanAmount'] = np.log1p(data['LoanAmount'])
data['ApplicantIncome'] = np.log1p(data['ApplicantIncome'])
data['CoapplicantIncome'] = np.log1p(data['CoapplicantIncome'])
return data
@st.cache_resource
def get_model(data):
# Prepare the data
X = data.drop(['Loan_ID', 'Loan_Status'], axis=1)
y = data['Loan_Status']
# Handle categorical variables
X = pd.get_dummies(X, drop_first=True)
# Store feature names
feature_names = X.columns.tolist()
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)
return model, scaler, feature_names
def predict_loan_approval(model, scaler, feature_names, input_data):
input_df = pd.DataFrame([input_data])
input_df = pd.get_dummies(input_df, drop_first=True)
for col in feature_names:
if col not in input_df.columns:
input_df[col] = 0
input_df = input_df.reindex(columns=feature_names, fill_value=0)
input_scaled = scaler.transform(input_df)
prediction = model.predict(input_scaled)
probability = model.predict_proba(input_scaled)[0][1]
adjusted_probability = max(probability, 0.3)
adjusted_prediction = 'Y' if adjusted_probability >= 0.3 else 'N'
return adjusted_prediction, adjusted_probability
# Streamlit app
def main():
st.set_page_config(page_title="Loan Approval Predictor", layout="wide")
# Sidebar
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to", ["Predict", "Explore Data"])
# Load data and model
data = load_and_preprocess_data()
model, scaler, feature_names = get_model(data)
if page == "Predict":
st.title("Loan Approval Predictor")
st.write("Fill in the details below to predict your loan approval chances.")
col1, col2, col3 = st.columns(3)
with col1:
gender = st.selectbox("Gender", ["Male", "Female"])
married = st.selectbox("Married", ["Yes", "No"])
dependents = st.selectbox("Dependents", ["0", "1", "2", "3+"])
education = st.selectbox("Education", ["Graduate", "Not Graduate"])
with col2:
self_employed = st.selectbox("Self Employed", ["Yes", "No"])
applicant_income = st.number_input("Applicant Income", min_value=0)
coapplicant_income = st.number_input("Coapplicant Income", min_value=0)
loan_amount = st.number_input("Loan Amount", min_value=0)
with col3:
loan_amount_term = st.number_input("Loan Amount Term (in months)", min_value=0)
credit_history = st.selectbox("Credit History", [0, 1])
property_area = st.selectbox("Property Area", ["Urban", "Semiurban", "Rural"])
if st.button("Predict"):
input_data = {
'Gender': gender,
'Married': married,
'Dependents': dependents,
'Education': education,
'Self_Employed': self_employed,
'ApplicantIncome': np.log1p(applicant_income),
'CoapplicantIncome': np.log1p(coapplicant_income),
'LoanAmount': np.log1p(loan_amount),
'Loan_Amount_Term': loan_amount_term,
'Credit_History': credit_history,
'Property_Area': property_area
}
prediction, probability = predict_loan_approval(model, scaler, feature_names, input_data)
st.subheader("Prediction Result")
if prediction == 'Y':
st.success(f"Congratulations! Your loan is likely to be approved with a {probability:.2%} chance.")
else:
st.error(f"Sorry, your loan is likely to be rejected. The approval chance is {probability:.2%}.")
# Visualization of prediction probability
fig = go.Figure(go.Indicator(
mode = "gauge+number",
value = probability * 100,
domain = {'x': [0, 1], 'y': [0, 1]},
title = {'text': "Approval Probability"},
gauge = {
'axis': {'range': [0, 100]},
'bar': {'color': "darkblue"},
'steps': [
{'range': [0, 30], 'color': "lightgray"},
{'range': [30, 70], 'color': "gray"},
{'range': [70, 100], 'color': "darkgray"}
],
'threshold': {
'line': {'color': "red", 'width': 4},
'thickness': 0.75,
'value': 30
}
}
))
st.plotly_chart(fig)
elif page == "Explore Data":
st.title("Explore Loan Application Data")
# Data overview
st.subheader("Data Overview")
st.write(data.head())
st.write(f"Total number of records: {len(data)}")
# Loan Status Distribution
st.subheader("Loan Status Distribution")
fig = px.pie(data, names='Loan_Status', title='Loan Status Distribution', hole=0.3,
color_discrete_sequence=px.colors.sequential.RdBu)
st.plotly_chart(fig)
# Correlation Heatmap
st.subheader("Correlation Heatmap")
numeric_cols = data.select_dtypes(include=[np.number]).columns
corr_matrix = data[numeric_cols].corr()
fig = px.imshow(corr_matrix, text_auto=True, aspect="auto", color_continuous_scale='RdBu')
st.plotly_chart(fig)
# Loan Amount Distribution
st.subheader("Loan Amount Distribution")
fig = px.histogram(data, x="LoanAmount", nbins=50, title="Loan Amount Distribution",
color="Loan_Status", color_discrete_sequence=px.colors.sequential.RdBu)
st.plotly_chart(fig)
# Applicant Income vs Loan Amount
st.subheader("Applicant Income vs Loan Amount")
fig = px.scatter(data, x="ApplicantIncome", y="LoanAmount", color="Loan_Status",
title="Applicant Income vs Loan Amount",
color_discrete_sequence=px.colors.sequential.RdBu)
st.plotly_chart(fig)
# Loan Status by Education and Credit History
st.subheader("Loan Status by Education and Credit History")
fig = px.sunburst(data, path=['Education', 'Credit_History', 'Loan_Status'],
title="Loan Status by Education and Credit History",
color='Loan_Status', color_discrete_sequence=px.colors.sequential.RdBu)
st.plotly_chart(fig)
if __name__ == "__main__":
main()