Spaces:
Sleeping
Sleeping
import streamlit as st | |
from joblib import load | |
import pandas as pd | |
import numpy as np | |
import pickle | |
import json | |
# Load the final pipeline | |
final_pipeline = load('final_pipeline_deploy_2.joblib') | |
df = pd.read_csv('df_subset.csv') | |
# Splitting `X` | |
training_data = df.drop(['TARGET'], axis=1) | |
# Load the Models | |
with open('model_rnd_2.pkl','rb') as file_1: | |
rnd_model = pickle.load(file_1) | |
with open('list_num_cols_2.txt', 'r') as file_2: | |
list_num_cols = json.load(file_2) | |
with open('list_cat_cols_2.txt', 'r') as file_3: | |
list_cat_cols = json.load(file_3) | |
with open('list_sig_cols_2.txt', 'r') as file_4: | |
significant_feature_names = json.load(file_4) | |
def run(): | |
with st.form(key='from_homecredit'): | |
NAME_EDUCATION_TYPE = st.selectbox('NAME EDUCATION TYPE', ('Secondary / secondary special', 'Higher education', 'Lower secondary', 'Incomplete higher', 'Academic degree'), index=1) | |
NAME_CONTRACT_TYPE = st.selectbox('NAME CONTRACT TYPE', ('Cash loans', 'Revolving loans')) | |
REGION_RATING_CLIENT = st.number_input('REGION RATING CLIENT', min_value=1, max_value=3, value=1) | |
FLOORSMAX_AVG = st.number_input('FLOORSMAX AVG', min_value=0.0, max_value=1.0, value=0.0, step=0.1) | |
FLOORSMAX_MODE = st.number_input('FLOORSMAX MODE', min_value=0.0, max_value=1.0, value=0.0, step=0.1) | |
FLOORSMAX_MEDI = st.number_input('FLOORSMAX MEDI', min_value=0.0, max_value=1.0, value=0.0, step=0.1) | |
submitted = st.form_submit_button('Predict') | |
data_inf = { | |
'NAME_EDUCATION_TYPE': NAME_EDUCATION_TYPE, | |
'NAME_CONTRACT_TYPE': NAME_CONTRACT_TYPE, | |
'REGION_RATING_CLIENT': REGION_RATING_CLIENT, | |
'FLOORSMAX_AVG': FLOORSMAX_AVG, | |
'FLOORSMAX_MODE': FLOORSMAX_MODE, | |
'FLOORSMAX_MEDI': FLOORSMAX_MEDI, | |
} | |
data_inf = pd.DataFrame([data_inf]) | |
st.dataframe(data_inf) | |
if submitted: | |
# Fit the pipeline with training data | |
final_pipeline.fit(training_data) | |
# Transform data using the pipeline | |
data_inf_transformed = final_pipeline.transform(data_inf) | |
# Get feature names from the pipelines | |
num_feature_names_inf = final_pipeline.named_transformers_['pipe_num'].named_steps['minmaxscaler'].get_feature_names_out(list_num_cols) | |
cat_feature_names_inf = final_pipeline.named_transformers_['pipe_cat'].named_steps['onehotencoder'].get_feature_names_out(list_cat_cols) | |
# Combine numerical and categorical feature names | |
feature_names = np.concatenate((num_feature_names_inf, cat_feature_names_inf), axis=0) | |
# Convert the transformed data into a DataFrame | |
data_inf_final = pd.DataFrame(data_inf_transformed, columns=feature_names) | |
# Filter the transformed_df based on the significant feature names | |
data_inf_final = data_inf_final[significant_feature_names].copy() | |
# Predict using Linear Regression | |
y_pred_inf = rnd_model.predict(data_inf_final) | |
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0) | |
if y_pred_inf == 1: | |
st.write('# Late Payment: YES') | |
else: | |
st.write('# Late Payment: NO') | |
if __name__=='__main__': | |
run() | |