HOME-CREDIT-ANALYSIS / prediction.py
imamzarkasie's picture
Upload 12 files
bc15c43
raw
history blame contribute delete
No virus
3.08 kB
import streamlit as st
from joblib import load
import pandas as pd
import numpy as np
import pickle
import json
# Load the final pipeline
final_pipeline = load('final_pipeline_deploy_2.joblib')
df = pd.read_csv('df_subset.csv')
# Splitting `X`
training_data = df.drop(['TARGET'], axis=1)
# Load the Models
with open('model_rnd_2.pkl','rb') as file_1:
rnd_model = pickle.load(file_1)
with open('list_num_cols_2.txt', 'r') as file_2:
list_num_cols = json.load(file_2)
with open('list_cat_cols_2.txt', 'r') as file_3:
list_cat_cols = json.load(file_3)
with open('list_sig_cols_2.txt', 'r') as file_4:
significant_feature_names = json.load(file_4)
def run():
with st.form(key='from_homecredit'):
NAME_EDUCATION_TYPE = st.selectbox('NAME EDUCATION TYPE', ('Secondary / secondary special', 'Higher education', 'Lower secondary', 'Incomplete higher', 'Academic degree'), index=1)
NAME_CONTRACT_TYPE = st.selectbox('NAME CONTRACT TYPE', ('Cash loans', 'Revolving loans'))
REGION_RATING_CLIENT = st.number_input('REGION RATING CLIENT', min_value=1, max_value=3, value=1)
FLOORSMAX_AVG = st.number_input('FLOORSMAX AVG', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
FLOORSMAX_MODE = st.number_input('FLOORSMAX MODE', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
FLOORSMAX_MEDI = st.number_input('FLOORSMAX MEDI', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
submitted = st.form_submit_button('Predict')
data_inf = {
'NAME_EDUCATION_TYPE': NAME_EDUCATION_TYPE,
'NAME_CONTRACT_TYPE': NAME_CONTRACT_TYPE,
'REGION_RATING_CLIENT': REGION_RATING_CLIENT,
'FLOORSMAX_AVG': FLOORSMAX_AVG,
'FLOORSMAX_MODE': FLOORSMAX_MODE,
'FLOORSMAX_MEDI': FLOORSMAX_MEDI,
}
data_inf = pd.DataFrame([data_inf])
st.dataframe(data_inf)
if submitted:
# Fit the pipeline with training data
final_pipeline.fit(training_data)
# Transform data using the pipeline
data_inf_transformed = final_pipeline.transform(data_inf)
# Get feature names from the pipelines
num_feature_names_inf = final_pipeline.named_transformers_['pipe_num'].named_steps['minmaxscaler'].get_feature_names_out(list_num_cols)
cat_feature_names_inf = final_pipeline.named_transformers_['pipe_cat'].named_steps['onehotencoder'].get_feature_names_out(list_cat_cols)
# Combine numerical and categorical feature names
feature_names = np.concatenate((num_feature_names_inf, cat_feature_names_inf), axis=0)
# Convert the transformed data into a DataFrame
data_inf_final = pd.DataFrame(data_inf_transformed, columns=feature_names)
# Filter the transformed_df based on the significant feature names
data_inf_final = data_inf_final[significant_feature_names].copy()
# Predict using Linear Regression
y_pred_inf = rnd_model.predict(data_inf_final)
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
if y_pred_inf == 1:
st.write('# Late Payment: YES')
else:
st.write('# Late Payment: NO')
if __name__=='__main__':
run()