|
|
|
import pandas as pd |
|
from sklearn.preprocessing import LabelEncoder |
|
from sklearn.feature_selection import SelectKBest, f_classif |
|
|
|
def load_data(data_path): |
|
df = pd.read_csv(data_path) |
|
df.drop_duplicates(inplace=True) |
|
df.dropna(axis=0, inplace=True) |
|
return df |
|
|
|
|
|
def remove_outliers_iqr(df, col): |
|
Q1 = df[col].quantile(0.25) |
|
Q3 = df[col].quantile(0.75) |
|
IQR = Q3 - Q1 |
|
|
|
lower_bound = Q1 - 1.5 * IQR |
|
upper_bound = Q3 + 1.5 * IQR |
|
|
|
df = df[(df[col] >= lower_bound)] |
|
df = df[(df[col] <= upper_bound)] |
|
return df |
|
|
|
categorical_features = ["person_home_ownership", "loan_intent", "loan_grade", "cb_person_default_on_file"] |
|
le_pho = LabelEncoder() |
|
le_li = LabelEncoder() |
|
le_lg = LabelEncoder() |
|
le_cpd = LabelEncoder() |
|
def label_encode_categorical_columns(df): |
|
df["person_home_ownership"] = le_pho.fit_transform(df["person_home_ownership"]) |
|
df["loan_intent"] = le_li.fit_transform(df["loan_intent"]) |
|
df["loan_grade"] = le_lg.fit_transform(df["loan_grade"]) |
|
df["cb_person_default_on_file"] = le_cpd.fit_transform(df["cb_person_default_on_file"]) |
|
return df |
|
|
|
|
|
def preprocess_data(data): |
|
can_have_outlier = ["person_age","person_income","person_emp_length","cb_person_cred_hist_length","loan_amnt"] |
|
for col in can_have_outlier: |
|
data = remove_outliers_iqr(data, col) |
|
preprocessed_data = label_encode_categorical_columns(data) |
|
return preprocessed_data |
|
|
|
|
|
def data_balance(new_df): |
|
|
|
df_class_0 = new_df[new_df['loan_status'] == 0] |
|
df_class_1 = new_df[new_df['loan_status'] == 1] |
|
count_class_0, count_class_1 = new_df['loan_status'].value_counts() |
|
|
|
df_class_1_over = df_class_1.sample(count_class_0, replace=True) |
|
new_df = pd.concat([df_class_0, df_class_1_over], axis=0) |
|
return new_df |
|
|
|
|
|
def credit_risk_dataset_generator(): |
|
""" |
|
A function which generates the credit risk dataset |
|
""" |
|
data_path = "credit_risk_dataset.csv" |
|
data = load_data(data_path) |
|
preprocessed_data = preprocess_data(data) |
|
for_model_df = data_balance(preprocessed_data) |
|
|
|
return for_model_df |
|
|
|
import pandas as pd |
|
|
|
df = credit_risk_dataset_generator() |
|
|
|
X = df.drop("loan_status", axis = 1) |
|
y = df['loan_status'] |
|
|
|
from sklearn import ensemble |
|
from sklearn.model_selection import train_test_split |
|
|
|
x_train, x_test, y_train, y_test = train_test_split( |
|
X, y, test_size=0.2, random_state=42 |
|
) |
|
from sklearn.preprocessing import StandardScaler |
|
st_x= StandardScaler() |
|
X_train_scaled= st_x.fit_transform(x_train) |
|
X_test_scaled= st_x.transform(x_test) |
|
|
|
from sklearn.ensemble import GradientBoostingClassifier |
|
gbm = GradientBoostingClassifier() |
|
gbm.fit(X_train_scaled, y_train) |
|
|
|
import numpy as np |
|
import pandas as pd |
|
|
|
def preprocess(model_input): |
|
model_input["person_home_ownership"] = le_pho.transform(model_input["person_home_ownership"]) |
|
model_input["loan_intent"] = le_li.transform(model_input["loan_intent"]) |
|
model_input["loan_grade"] = le_lg.transform(model_input["loan_grade"]) |
|
model_input["cb_person_default_on_file"] = le_cpd.transform(model_input["cb_person_default_on_file"]) |
|
|
|
model_input = st_x.transform(model_input) |
|
return model_input |
|
|
|
def credit_run(person_age, person_income, person_home_ownership, |
|
person_emp_length, loan_intent, loan_grade, loan_amnt, |
|
loan_int_rate, cb_person_default_on_file, cb_person_cred_hist_length): |
|
model_input = preprocess( |
|
pd.DataFrame({ |
|
'person_age': person_age, |
|
'person_income': (person_income*12), |
|
'person_home_ownership': person_home_ownership, |
|
'person_emp_length': person_emp_length, |
|
'loan_intent': loan_intent, |
|
'loan_grade': loan_grade, |
|
'loan_amnt': loan_amnt, |
|
'loan_int_rate': loan_int_rate, |
|
'loan_percent_income': [loan_amnt / person_income], |
|
'cb_person_default_on_file': cb_person_default_on_file, |
|
'cb_person_cred_hist_length': cb_person_cred_hist_length, |
|
}) |
|
) |
|
out = gbm.predict(model_input) |
|
return "High risk of defaulting" if out[0] == 1 else "Low risk of defaulting" |
|
|
|
import gradio as gr |
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
with gr.Row(): |
|
with gr.Column(scale=1,min_width=600): |
|
gr.Image("Non_Payment_Logo.png") |
|
with gr.Column(scale=1,min_width=600): |
|
person_age=gr.Slider(label="Customer Age(In Years)", minimum=18, maximum=90, step=1) |
|
Person_Emp_Length=gr.Slider(label="Customer Employement Length(In Years)", minimum=0, maximum=60, step=1) |
|
|
|
with gr.Column(scale=2,min_width=600): |
|
with gr.Row(): |
|
with gr.Column(scale=1,min_width=500): |
|
Home_Ownership_Status=gr.Radio(['MORTGAGE', 'OTHER','OWN', 'RENT'],label="Home Ownership Status") |
|
with gr.Column(scale=2,min_width=100): |
|
Person_Defaulted_in_History=gr.Radio(['N', 'Y'],label="Missed Payment in History") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=3,min_width=300): |
|
Credit_Intent=gr.Dropdown(['DEBTCONSOLIDATION', 'EDUCATION', 'HOMEIMPROVEMENT', 'MEDICAL', 'PERSONAL', 'VENTURE'],label="Intent") |
|
with gr.Column(scale=4,min_width=300): |
|
Type_Of_Credit=gr.Dropdown(['A','B', 'C', 'D', 'E','F', 'G'],label="Type Of Credit") |
|
with gr.Row(): |
|
with gr.Column(scale=3,min_width=300): |
|
Person_Income=gr.Number(label="Customer Income(per month)") |
|
with gr.Column(scale=4,min_width=300): |
|
Loan_Amount=gr.Number(label="Premium Amount") |
|
with gr.Row(): |
|
with gr.Column(scale=3,min_width=300): |
|
Loan_Interest_Rate=gr.Number(label="Interest Rate") |
|
with gr.Column(scale=4,min_width=300): |
|
Person_Credit_History_Length=gr.Number(label="Customers's Credit History Length") |
|
with gr.Row(): |
|
with gr.Column(): |
|
default= gr.Radio(['Low risk of defaulting', 'High risk of defaulting'],label="Chances Of Defaulting") |
|
|
|
btn = gr.Button("PREDICT") |
|
btn.click(fn=credit_run, inputs=[person_age,Person_Income,Home_Ownership_Status,Person_Emp_Length,Credit_Intent,Type_Of_Credit,Loan_Amount,Loan_Interest_Rate,Person_Defaulted_in_History,Person_Credit_History_Length], outputs=[default]) |
|
|
|
examples_data = [ |
|
[23, 15000, 'RENT', 2, 'EDUCATION', 'A', 300000, 8.9, 'Y', 6], |
|
[32, 12000, 'RENT', 1, 'MEDICAL', 'B', 50000, 10.65, 'Y', 3], |
|
[42, 30000, "OTHER", 12, 'HOMEIMPROVEMENT', 'C', 800000, 7.9, 'Y', 8], |
|
[38, 20000, "MORTGAGE", 10, 'PERSONAL', 'F', 10000, 15.25, 'N', 5], |
|
[29, 30000, "OWN", 8, 'VENTURE', 'D', 13500, 12.25, 'N', 6] |
|
] |
|
|
|
gr.Examples(examples=examples_data, inputs=[person_age, Person_Income, Home_Ownership_Status, Person_Emp_Length, Credit_Intent, Type_Of_Credit, Loan_Amount, Loan_Interest_Rate, Person_Defaulted_in_History, Person_Credit_History_Length], outputs=[default]) |
|
demo.launch(debug=True) |