7sugiwa
commited on
Commit
•
7af84f6
1
Parent(s):
4881e42
Add application file
Browse files- app.py +82 -0
- logistic_regression_model.pkl +3 -0
- pca_transformer.pkl +3 -0
- scaler.pkl +3 -0
app.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import pickle
|
5 |
+
|
6 |
+
# Load trained models
|
7 |
+
with open('logistic_regression_model.pkl', 'rb') as file:
|
8 |
+
model = pickle.load(file)
|
9 |
+
|
10 |
+
# Load scaler
|
11 |
+
with open('scaler.pkl', 'rb') as file:
|
12 |
+
scaler = pickle.load(file)
|
13 |
+
|
14 |
+
# Load PCA
|
15 |
+
with open('pca_transformer.pkl', 'rb') as file:
|
16 |
+
pca = pickle.load(file)
|
17 |
+
|
18 |
+
# Define the column names as they were used in training
|
19 |
+
columns = ['limit_balance', 'sex', 'education_level', 'marital_status', 'age',
|
20 |
+
'pay_0', 'pay_2', 'pay_3', 'pay_4', 'pay_5', 'pay_6',
|
21 |
+
'bill_amt_1', 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6',
|
22 |
+
'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5', 'pay_amt_6']
|
23 |
+
|
24 |
+
|
25 |
+
# Define the columns that were scaled (continuous variables)
|
26 |
+
transform_cols = ['limit_balance', 'age',
|
27 |
+
'bill_amt_1', 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6',
|
28 |
+
'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5', 'pay_amt_6']
|
29 |
+
|
30 |
+
# Threshold for deciding on log transformation
|
31 |
+
skewness_threshold = 1 # Adjust this based on what you used during training
|
32 |
+
|
33 |
+
# Function to predict default payment next month
|
34 |
+
def predict_default(features):
|
35 |
+
# Create a DataFrame from the features
|
36 |
+
data = np.array([features])
|
37 |
+
# Initialize a DataFrame to hold the features
|
38 |
+
df = pd.DataFrame(data, columns=columns) # Ensure 'columns' list matches training
|
39 |
+
|
40 |
+
# Apply log transformation and scaling to the appropriate columns
|
41 |
+
for col in transform_cols:
|
42 |
+
if abs(df[col].skew()) > skewness_threshold:
|
43 |
+
df[col] = np.log1p(df[col]) # Log transformation
|
44 |
+
# Replace any inf/-inf with NaN and fill NaNs with mean
|
45 |
+
df.replace([np.inf, -np.inf], np.nan, inplace=True)
|
46 |
+
df.fillna(df.mean(), inplace=True)
|
47 |
+
# Scale the data
|
48 |
+
scaled_data = scaler.transform(df[transform_cols])
|
49 |
+
# Replace the original columns with scaled ones
|
50 |
+
df[transform_cols] = scaled_data
|
51 |
+
|
52 |
+
# Apply PCA transformation (if you're using PCA in your pipeline)
|
53 |
+
pca_data = pca.transform(df)
|
54 |
+
|
55 |
+
# Predict using the model
|
56 |
+
prediction = model.predict(pca_data)
|
57 |
+
return prediction[0]
|
58 |
+
# Creating a simple form
|
59 |
+
st.title("Credit Default Prediction")
|
60 |
+
st.write("Enter the details to predict default payment next month")
|
61 |
+
|
62 |
+
# Input fields
|
63 |
+
limit_balance = st.number_input('Limit Balance', min_value=0)
|
64 |
+
sex = st.selectbox('Sex', options=[1, 2], format_func=lambda x: 'Male' if x == 1 else 'Female')
|
65 |
+
education_level = st.selectbox('Education Level', options=[1, 2, 3, 4, 5, 6], format_func=lambda x: {1: 'graduate school', 2: 'university', 3: 'high school', 4: 'others', 5: 'unknown', 6: 'unknown'}.get(x, 'unknown'))
|
66 |
+
marital_status = st.selectbox('Marital Status', options=[1, 2, 3], format_func=lambda x: {1: 'married', 2: 'single', 3: 'others'}.get(x, 'unknown'))
|
67 |
+
age = st.number_input('Age', min_value=0)
|
68 |
+
# Repayment status
|
69 |
+
pay_status = [st.selectbox(f'Payment Status in Month {i+1}', options=list(range(-2,9)), index=4) for i in range(6)]
|
70 |
+
bill_amts = [st.number_input(f'Bill Amount {i+1}', min_value=0) for i in range(6)]
|
71 |
+
pay_amts = [st.number_input(f'Previous Payment {i+1}', min_value=0) for i in range(6)]
|
72 |
+
|
73 |
+
# Predict button
|
74 |
+
if st.button("Predict"):
|
75 |
+
# Gather all feature inputs in the exact order and number as the model expects
|
76 |
+
features = [limit_balance, sex, education_level, marital_status, age] + pay_status + bill_amts + pay_amts
|
77 |
+
# Make prediction and display the result
|
78 |
+
prediction = predict_default(features)
|
79 |
+
if prediction == 1:
|
80 |
+
st.write("The client is likely to default next month.")
|
81 |
+
else:
|
82 |
+
st.write("The client is unlikely to default next month.")
|
logistic_regression_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f56acf983a9371f6079168a6f9dbc7db3d1fe8d443de44c23cb00a3df8f5be4
|
3 |
+
size 821
|
pca_transformer.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5dc5da85ec1a2d693ce04093dc6a74ad7527d866b2c2395b9d345c0c51e169d
|
3 |
+
size 4425
|
scaler.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d039e6efe6f677ea61becebd0ba1a29e36ce17fa145866ad46fdd8c0352086fc
|
3 |
+
size 1044
|