harish199's picture
Update app.py
5b65e5b
# Data Preparation
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import SelectKBest, f_classif
def load_data(data_path):
df = pd.read_csv(data_path)
df.drop_duplicates(inplace=True)
df.dropna(axis=0, inplace=True)
return df
def remove_outliers_iqr(df, col):
Q1 = df[col].quantile(0.25)
Q3 = df[col].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df = df[(df[col] >= lower_bound)]
df = df[(df[col] <= upper_bound)]
return df
categorical_features = ["person_home_ownership", "loan_intent", "loan_grade", "cb_person_default_on_file"]
le_pho = LabelEncoder()
le_li = LabelEncoder()
le_lg = LabelEncoder()
le_cpd = LabelEncoder()
def label_encode_categorical_columns(df):
df["person_home_ownership"] = le_pho.fit_transform(df["person_home_ownership"])
df["loan_intent"] = le_li.fit_transform(df["loan_intent"])
df["loan_grade"] = le_lg.fit_transform(df["loan_grade"])
df["cb_person_default_on_file"] = le_cpd.fit_transform(df["cb_person_default_on_file"])
return df
def preprocess_data(data):
can_have_outlier = ["person_age","person_income","person_emp_length","cb_person_cred_hist_length","loan_amnt"]
for col in can_have_outlier:
data = remove_outliers_iqr(data, col)
preprocessed_data = label_encode_categorical_columns(data)
return preprocessed_data
def data_balance(new_df):
# Divide by class
df_class_0 = new_df[new_df['loan_status'] == 0]
df_class_1 = new_df[new_df['loan_status'] == 1]
count_class_0, count_class_1 = new_df['loan_status'].value_counts()
# random over sampling
df_class_1_over = df_class_1.sample(count_class_0, replace=True)
new_df = pd.concat([df_class_0, df_class_1_over], axis=0)
return new_df
def credit_risk_dataset_generator():
"""
A function which generates the credit risk dataset
"""
data_path = "credit_risk_dataset.csv"
data = load_data(data_path)
preprocessed_data = preprocess_data(data)
for_model_df = data_balance(preprocessed_data)
return for_model_df
import pandas as pd
df = credit_risk_dataset_generator()
X = df.drop("loan_status", axis = 1)
y = df['loan_status']
from sklearn import ensemble
from sklearn.model_selection import train_test_split
# Train/Test split the dataset
x_train, x_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
X_train_scaled= st_x.fit_transform(x_train)
X_test_scaled= st_x.transform(x_test)
from sklearn.ensemble import GradientBoostingClassifier
gbm = GradientBoostingClassifier()
gbm.fit(X_train_scaled, y_train)
import numpy as np
import pandas as pd
def preprocess(model_input):
model_input["person_home_ownership"] = le_pho.transform(model_input["person_home_ownership"])
model_input["loan_intent"] = le_li.transform(model_input["loan_intent"])
model_input["loan_grade"] = le_lg.transform(model_input["loan_grade"])
model_input["cb_person_default_on_file"] = le_cpd.transform(model_input["cb_person_default_on_file"])
#print(model_input['person_income'])
model_input = st_x.transform(model_input)
return model_input
def credit_run(person_age, person_income, person_home_ownership,
person_emp_length, loan_intent, loan_grade, loan_amnt,
loan_int_rate, cb_person_default_on_file, cb_person_cred_hist_length):
model_input = preprocess(
pd.DataFrame({
'person_age': person_age,
'person_income': (person_income*12),
'person_home_ownership': person_home_ownership,
'person_emp_length': person_emp_length,
'loan_intent': loan_intent,
'loan_grade': loan_grade,
'loan_amnt': loan_amnt,
'loan_int_rate': loan_int_rate,
'loan_percent_income': [loan_amnt / person_income],
'cb_person_default_on_file': cb_person_default_on_file,
'cb_person_cred_hist_length': cb_person_cred_hist_length,
})
)
out = gbm.predict(model_input)
return "High risk of defaulting" if out[0] == 1 else "Low risk of defaulting"
import gradio as gr
with gr.Blocks(theme=gr.themes.Soft()) as demo:
with gr.Row():
with gr.Column(scale=1,min_width=600):
gr.Image("Non_Payment_Logo.png")
with gr.Column(scale=1,min_width=600):
person_age=gr.Slider(label="Customer Age(In Years)", minimum=18, maximum=90, step=1)
Person_Emp_Length=gr.Slider(label="Customer Employement Length(In Years)", minimum=0, maximum=60, step=1)
with gr.Column(scale=2,min_width=600):
with gr.Row():
with gr.Column(scale=1,min_width=500):
Home_Ownership_Status=gr.Radio(['MORTGAGE', 'OTHER','OWN', 'RENT'],label="Home Ownership Status")
with gr.Column(scale=2,min_width=100):
Person_Defaulted_in_History=gr.Radio(['N', 'Y'],label="Missed Payment in History")
with gr.Row():
with gr.Column(scale=3,min_width=300):
Credit_Intent=gr.Dropdown(['DEBTCONSOLIDATION', 'EDUCATION', 'HOMEIMPROVEMENT', 'MEDICAL', 'PERSONAL', 'VENTURE'],label="Intent")
with gr.Column(scale=4,min_width=300):
Type_Of_Credit=gr.Dropdown(['A','B', 'C', 'D', 'E','F', 'G'],label="Type Of Credit")
with gr.Row():
with gr.Column(scale=3,min_width=300):
Person_Income=gr.Number(label="Customer Income(per month)")
with gr.Column(scale=4,min_width=300):
Loan_Amount=gr.Number(label="Premium Amount")
with gr.Row():
with gr.Column(scale=3,min_width=300):
Loan_Interest_Rate=gr.Number(label="Interest Rate")
with gr.Column(scale=4,min_width=300):
Person_Credit_History_Length=gr.Number(label="Customers's Credit History Length")
with gr.Row():
with gr.Column():
default= gr.Radio(['Low risk of defaulting', 'High risk of defaulting'],label="Chances Of Defaulting")
btn = gr.Button("PREDICT")
btn.click(fn=credit_run, inputs=[person_age,Person_Income,Home_Ownership_Status,Person_Emp_Length,Credit_Intent,Type_Of_Credit,Loan_Amount,Loan_Interest_Rate,Person_Defaulted_in_History,Person_Credit_History_Length], outputs=[default])
# Define your examples here
examples_data = [
[23, 15000, 'RENT', 2, 'EDUCATION', 'A', 300000, 8.9, 'Y', 6],
[32, 12000, 'RENT', 1, 'MEDICAL', 'B', 50000, 10.65, 'Y', 3],
[42, 30000, "OTHER", 12, 'HOMEIMPROVEMENT', 'C', 800000, 7.9, 'Y', 8],
[38, 20000, "MORTGAGE", 10, 'PERSONAL', 'F', 10000, 15.25, 'N', 5],
[29, 30000, "OWN", 8, 'VENTURE', 'D', 13500, 12.25, 'N', 6]
]
gr.Examples(examples=examples_data, inputs=[person_age, Person_Income, Home_Ownership_Status, Person_Emp_Length, Credit_Intent, Type_Of_Credit, Loan_Amount, Loan_Interest_Rate, Person_Defaulted_in_History, Person_Credit_History_Length], outputs=[default])
demo.launch(debug=True)