Spaces:

7sugiwa
/

gc5

Sleeping

gc5 / app.py

7sugiwa

Add application file

7af84f6 6 months ago

No virus

3.71 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	import pickle

	# Load trained models
	with open('logistic_regression_model.pkl', 'rb') as file:
	model = pickle.load(file)

	# Load scaler
	with open('scaler.pkl', 'rb') as file:
	scaler = pickle.load(file)

	# Load PCA
	with open('pca_transformer.pkl', 'rb') as file:
	pca = pickle.load(file)

	# Define the column names as they were used in training
	columns = ['limit_balance', 'sex', 'education_level', 'marital_status', 'age',
	'pay_0', 'pay_2', 'pay_3', 'pay_4', 'pay_5', 'pay_6',
	'bill_amt_1', 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6',
	'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5', 'pay_amt_6']


	# Define the columns that were scaled (continuous variables)
	transform_cols = ['limit_balance', 'age',
	'bill_amt_1', 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6',
	'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5', 'pay_amt_6']

	# Threshold for deciding on log transformation
	skewness_threshold = 1 # Adjust this based on what you used during training

	# Function to predict default payment next month
	def predict_default(features):
	# Create a DataFrame from the features
	data = np.array([features])
	# Initialize a DataFrame to hold the features
	df = pd.DataFrame(data, columns=columns) # Ensure 'columns' list matches training

	# Apply log transformation and scaling to the appropriate columns
	for col in transform_cols:
	if abs(df[col].skew()) > skewness_threshold:
	df[col] = np.log1p(df[col]) # Log transformation
	# Replace any inf/-inf with NaN and fill NaNs with mean
	df.replace([np.inf, -np.inf], np.nan, inplace=True)
	df.fillna(df.mean(), inplace=True)
	# Scale the data
	scaled_data = scaler.transform(df[transform_cols])
	# Replace the original columns with scaled ones
	df[transform_cols] = scaled_data

	# Apply PCA transformation (if you're using PCA in your pipeline)
	pca_data = pca.transform(df)

	# Predict using the model
	prediction = model.predict(pca_data)
	return prediction[0]
	# Creating a simple form
	st.title("Credit Default Prediction")
	st.write("Enter the details to predict default payment next month")

	# Input fields
	limit_balance = st.number_input('Limit Balance', min_value=0)
	sex = st.selectbox('Sex', options=[1, 2], format_func=lambda x: 'Male' if x == 1 else 'Female')
	education_level = st.selectbox('Education Level', options=[1, 2, 3, 4, 5, 6], format_func=lambda x: {1: 'graduate school', 2: 'university', 3: 'high school', 4: 'others', 5: 'unknown', 6: 'unknown'}.get(x, 'unknown'))
	marital_status = st.selectbox('Marital Status', options=[1, 2, 3], format_func=lambda x: {1: 'married', 2: 'single', 3: 'others'}.get(x, 'unknown'))
	age = st.number_input('Age', min_value=0)
	# Repayment status
	pay_status = [st.selectbox(f'Payment Status in Month {i+1}', options=list(range(-2,9)), index=4) for i in range(6)]
	bill_amts = [st.number_input(f'Bill Amount {i+1}', min_value=0) for i in range(6)]
	pay_amts = [st.number_input(f'Previous Payment {i+1}', min_value=0) for i in range(6)]

	# Predict button
	if st.button("Predict"):
	# Gather all feature inputs in the exact order and number as the model expects
	features = [limit_balance, sex, education_level, marital_status, age] + pay_status + bill_amts + pay_amts
	# Make prediction and display the result
	prediction = predict_default(features)
	if prediction == 1:
	st.write("The client is likely to default next month.")
	else:
	st.write("The client is unlikely to default next month.")