File size: 3,712 Bytes
7af84f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import streamlit as st
import numpy as np
import pandas as pd
import pickle

# Load trained models
with open('logistic_regression_model.pkl', 'rb') as file:
    model = pickle.load(file)

# Load scaler
with open('scaler.pkl', 'rb') as file:
    scaler = pickle.load(file)

# Load PCA
with open('pca_transformer.pkl', 'rb') as file:
    pca = pickle.load(file)

# Define the column names as they were used in training
columns = ['limit_balance', 'sex', 'education_level', 'marital_status', 'age',
           'pay_0', 'pay_2', 'pay_3', 'pay_4', 'pay_5', 'pay_6',
           'bill_amt_1', 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6',
           'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5', 'pay_amt_6']


# Define the columns that were scaled (continuous variables)
transform_cols = ['limit_balance', 'age', 
                  'bill_amt_1', 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6',
                  'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5', 'pay_amt_6']

# Threshold for deciding on log transformation
skewness_threshold = 1  # Adjust this based on what you used during training

# Function to predict default payment next month
def predict_default(features):
    # Create a DataFrame from the features
    data = np.array([features])
    # Initialize a DataFrame to hold the features
    df = pd.DataFrame(data, columns=columns)  # Ensure 'columns' list matches training

    # Apply log transformation and scaling to the appropriate columns
    for col in transform_cols:
        if abs(df[col].skew()) > skewness_threshold:
            df[col] = np.log1p(df[col])  # Log transformation
    # Replace any inf/-inf with NaN and fill NaNs with mean
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.fillna(df.mean(), inplace=True)
    # Scale the data
    scaled_data = scaler.transform(df[transform_cols])
    # Replace the original columns with scaled ones
    df[transform_cols] = scaled_data

    # Apply PCA transformation (if you're using PCA in your pipeline)
    pca_data = pca.transform(df)

    # Predict using the model
    prediction = model.predict(pca_data)
    return prediction[0]
# Creating a simple form
st.title("Credit Default Prediction")
st.write("Enter the details to predict default payment next month")

# Input fields
limit_balance = st.number_input('Limit Balance', min_value=0)
sex = st.selectbox('Sex', options=[1, 2], format_func=lambda x: 'Male' if x == 1 else 'Female')
education_level = st.selectbox('Education Level', options=[1, 2, 3, 4, 5, 6], format_func=lambda x: {1: 'graduate school', 2: 'university', 3: 'high school', 4: 'others', 5: 'unknown', 6: 'unknown'}.get(x, 'unknown'))
marital_status = st.selectbox('Marital Status', options=[1, 2, 3], format_func=lambda x: {1: 'married', 2: 'single', 3: 'others'}.get(x, 'unknown'))
age = st.number_input('Age', min_value=0)
# Repayment status
pay_status = [st.selectbox(f'Payment Status in Month {i+1}', options=list(range(-2,9)), index=4) for i in range(6)]
bill_amts = [st.number_input(f'Bill Amount {i+1}', min_value=0) for i in range(6)]
pay_amts = [st.number_input(f'Previous Payment {i+1}', min_value=0) for i in range(6)]

# Predict button
if st.button("Predict"):
    # Gather all feature inputs in the exact order and number as the model expects
    features = [limit_balance, sex, education_level, marital_status, age] + pay_status + bill_amts + pay_amts
    # Make prediction and display the result
    prediction = predict_default(features)
    if prediction == 1:
        st.write("The client is likely to default next month.")
    else:
        st.write("The client is unlikely to default next month.")