import streamlit as st import numpy as np import pandas as pd import pickle # Load trained models with open('logistic_regression_model.pkl', 'rb') as file: model = pickle.load(file) # Load scaler with open('scaler.pkl', 'rb') as file: scaler = pickle.load(file) # Load PCA with open('pca_transformer.pkl', 'rb') as file: pca = pickle.load(file) # Define the column names as they were used in training columns = ['limit_balance', 'sex', 'education_level', 'marital_status', 'age', 'pay_0', 'pay_2', 'pay_3', 'pay_4', 'pay_5', 'pay_6', 'bill_amt_1', 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6', 'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5', 'pay_amt_6'] # Define the columns that were scaled (continuous variables) transform_cols = ['limit_balance', 'age', 'bill_amt_1', 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6', 'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5', 'pay_amt_6'] # Threshold for deciding on log transformation skewness_threshold = 1 # Adjust this based on what you used during training # Function to predict default payment next month def predict_default(features): # Create a DataFrame from the features data = np.array([features]) # Initialize a DataFrame to hold the features df = pd.DataFrame(data, columns=columns) # Ensure 'columns' list matches training # Apply log transformation and scaling to the appropriate columns for col in transform_cols: if abs(df[col].skew()) > skewness_threshold: df[col] = np.log1p(df[col]) # Log transformation # Replace any inf/-inf with NaN and fill NaNs with mean df.replace([np.inf, -np.inf], np.nan, inplace=True) df.fillna(df.mean(), inplace=True) # Scale the data scaled_data = scaler.transform(df[transform_cols]) # Replace the original columns with scaled ones df[transform_cols] = scaled_data # Apply PCA transformation (if you're using PCA in your pipeline) pca_data = pca.transform(df) # Predict using the model prediction = model.predict(pca_data) return prediction[0] # Creating a simple form st.title("Credit Default Prediction") st.write("Enter the details to predict default payment next month") # Input fields limit_balance = st.number_input('Limit Balance', min_value=0) sex = st.selectbox('Sex', options=[1, 2], format_func=lambda x: 'Male' if x == 1 else 'Female') education_level = st.selectbox('Education Level', options=[1, 2, 3, 4, 5, 6], format_func=lambda x: {1: 'graduate school', 2: 'university', 3: 'high school', 4: 'others', 5: 'unknown', 6: 'unknown'}.get(x, 'unknown')) marital_status = st.selectbox('Marital Status', options=[1, 2, 3], format_func=lambda x: {1: 'married', 2: 'single', 3: 'others'}.get(x, 'unknown')) age = st.number_input('Age', min_value=0) # Repayment status pay_status = [st.selectbox(f'Payment Status in Month {i+1}', options=list(range(-2,9)), index=4) for i in range(6)] bill_amts = [st.number_input(f'Bill Amount {i+1}', min_value=0) for i in range(6)] pay_amts = [st.number_input(f'Previous Payment {i+1}', min_value=0) for i in range(6)] # Predict button if st.button("Predict"): # Gather all feature inputs in the exact order and number as the model expects features = [limit_balance, sex, education_level, marital_status, age] + pay_status + bill_amts + pay_amts # Make prediction and display the result prediction = predict_default(features) if prediction == 1: st.write("The client is likely to default next month.") else: st.write("The client is unlikely to default next month.")