gc5 / app.py
7sugiwa
Add application file
7af84f6
import streamlit as st
import numpy as np
import pandas as pd
import pickle
# Load trained models
with open('logistic_regression_model.pkl', 'rb') as file:
model = pickle.load(file)
# Load scaler
with open('scaler.pkl', 'rb') as file:
scaler = pickle.load(file)
# Load PCA
with open('pca_transformer.pkl', 'rb') as file:
pca = pickle.load(file)
# Define the column names as they were used in training
columns = ['limit_balance', 'sex', 'education_level', 'marital_status', 'age',
'pay_0', 'pay_2', 'pay_3', 'pay_4', 'pay_5', 'pay_6',
'bill_amt_1', 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6',
'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5', 'pay_amt_6']
# Define the columns that were scaled (continuous variables)
transform_cols = ['limit_balance', 'age',
'bill_amt_1', 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6',
'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5', 'pay_amt_6']
# Threshold for deciding on log transformation
skewness_threshold = 1 # Adjust this based on what you used during training
# Function to predict default payment next month
def predict_default(features):
# Create a DataFrame from the features
data = np.array([features])
# Initialize a DataFrame to hold the features
df = pd.DataFrame(data, columns=columns) # Ensure 'columns' list matches training
# Apply log transformation and scaling to the appropriate columns
for col in transform_cols:
if abs(df[col].skew()) > skewness_threshold:
df[col] = np.log1p(df[col]) # Log transformation
# Replace any inf/-inf with NaN and fill NaNs with mean
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.fillna(df.mean(), inplace=True)
# Scale the data
scaled_data = scaler.transform(df[transform_cols])
# Replace the original columns with scaled ones
df[transform_cols] = scaled_data
# Apply PCA transformation (if you're using PCA in your pipeline)
pca_data = pca.transform(df)
# Predict using the model
prediction = model.predict(pca_data)
return prediction[0]
# Creating a simple form
st.title("Credit Default Prediction")
st.write("Enter the details to predict default payment next month")
# Input fields
limit_balance = st.number_input('Limit Balance', min_value=0)
sex = st.selectbox('Sex', options=[1, 2], format_func=lambda x: 'Male' if x == 1 else 'Female')
education_level = st.selectbox('Education Level', options=[1, 2, 3, 4, 5, 6], format_func=lambda x: {1: 'graduate school', 2: 'university', 3: 'high school', 4: 'others', 5: 'unknown', 6: 'unknown'}.get(x, 'unknown'))
marital_status = st.selectbox('Marital Status', options=[1, 2, 3], format_func=lambda x: {1: 'married', 2: 'single', 3: 'others'}.get(x, 'unknown'))
age = st.number_input('Age', min_value=0)
# Repayment status
pay_status = [st.selectbox(f'Payment Status in Month {i+1}', options=list(range(-2,9)), index=4) for i in range(6)]
bill_amts = [st.number_input(f'Bill Amount {i+1}', min_value=0) for i in range(6)]
pay_amts = [st.number_input(f'Previous Payment {i+1}', min_value=0) for i in range(6)]
# Predict button
if st.button("Predict"):
# Gather all feature inputs in the exact order and number as the model expects
features = [limit_balance, sex, education_level, marital_status, age] + pay_status + bill_amts + pay_amts
# Make prediction and display the result
prediction = predict_default(features)
if prediction == 1:
st.write("The client is likely to default next month.")
else:
st.write("The client is unlikely to default next month.")