import streamlit as st from joblib import load import pandas as pd import numpy as np import pickle import json # Load the final pipeline final_pipeline = load('final_pipeline_deploy_2.joblib') df = pd.read_csv('df_subset.csv') # Splitting `X` training_data = df.drop(['TARGET'], axis=1) # Load the Models with open('model_rnd_2.pkl','rb') as file_1: rnd_model = pickle.load(file_1) with open('list_num_cols_2.txt', 'r') as file_2: list_num_cols = json.load(file_2) with open('list_cat_cols_2.txt', 'r') as file_3: list_cat_cols = json.load(file_3) with open('list_sig_cols_2.txt', 'r') as file_4: significant_feature_names = json.load(file_4) def run(): with st.form(key='from_homecredit'): NAME_EDUCATION_TYPE = st.selectbox('NAME EDUCATION TYPE', ('Secondary / secondary special', 'Higher education', 'Lower secondary', 'Incomplete higher', 'Academic degree'), index=1) NAME_CONTRACT_TYPE = st.selectbox('NAME CONTRACT TYPE', ('Cash loans', 'Revolving loans')) REGION_RATING_CLIENT = st.number_input('REGION RATING CLIENT', min_value=1, max_value=3, value=1) FLOORSMAX_AVG = st.number_input('FLOORSMAX AVG', min_value=0.0, max_value=1.0, value=0.0, step=0.1) FLOORSMAX_MODE = st.number_input('FLOORSMAX MODE', min_value=0.0, max_value=1.0, value=0.0, step=0.1) FLOORSMAX_MEDI = st.number_input('FLOORSMAX MEDI', min_value=0.0, max_value=1.0, value=0.0, step=0.1) submitted = st.form_submit_button('Predict') data_inf = { 'NAME_EDUCATION_TYPE': NAME_EDUCATION_TYPE, 'NAME_CONTRACT_TYPE': NAME_CONTRACT_TYPE, 'REGION_RATING_CLIENT': REGION_RATING_CLIENT, 'FLOORSMAX_AVG': FLOORSMAX_AVG, 'FLOORSMAX_MODE': FLOORSMAX_MODE, 'FLOORSMAX_MEDI': FLOORSMAX_MEDI, } data_inf = pd.DataFrame([data_inf]) st.dataframe(data_inf) if submitted: # Fit the pipeline with training data final_pipeline.fit(training_data) # Transform data using the pipeline data_inf_transformed = final_pipeline.transform(data_inf) # Get feature names from the pipelines num_feature_names_inf = final_pipeline.named_transformers_['pipe_num'].named_steps['minmaxscaler'].get_feature_names_out(list_num_cols) cat_feature_names_inf = final_pipeline.named_transformers_['pipe_cat'].named_steps['onehotencoder'].get_feature_names_out(list_cat_cols) # Combine numerical and categorical feature names feature_names = np.concatenate((num_feature_names_inf, cat_feature_names_inf), axis=0) # Convert the transformed data into a DataFrame data_inf_final = pd.DataFrame(data_inf_transformed, columns=feature_names) # Filter the transformed_df based on the significant feature names data_inf_final = data_inf_final[significant_feature_names].copy() # Predict using Linear Regression y_pred_inf = rnd_model.predict(data_inf_final) y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0) if y_pred_inf == 1: st.write('# Late Payment: YES') else: st.write('# Late Payment: NO') if __name__=='__main__': run()