Spaces:
Sleeping
Sleeping
File size: 3,081 Bytes
bc15c43 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import streamlit as st
from joblib import load
import pandas as pd
import numpy as np
import pickle
import json
# Load the final pipeline
final_pipeline = load('final_pipeline_deploy_2.joblib')
df = pd.read_csv('df_subset.csv')
# Splitting `X`
training_data = df.drop(['TARGET'], axis=1)
# Load the Models
with open('model_rnd_2.pkl','rb') as file_1:
rnd_model = pickle.load(file_1)
with open('list_num_cols_2.txt', 'r') as file_2:
list_num_cols = json.load(file_2)
with open('list_cat_cols_2.txt', 'r') as file_3:
list_cat_cols = json.load(file_3)
with open('list_sig_cols_2.txt', 'r') as file_4:
significant_feature_names = json.load(file_4)
def run():
with st.form(key='from_homecredit'):
NAME_EDUCATION_TYPE = st.selectbox('NAME EDUCATION TYPE', ('Secondary / secondary special', 'Higher education', 'Lower secondary', 'Incomplete higher', 'Academic degree'), index=1)
NAME_CONTRACT_TYPE = st.selectbox('NAME CONTRACT TYPE', ('Cash loans', 'Revolving loans'))
REGION_RATING_CLIENT = st.number_input('REGION RATING CLIENT', min_value=1, max_value=3, value=1)
FLOORSMAX_AVG = st.number_input('FLOORSMAX AVG', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
FLOORSMAX_MODE = st.number_input('FLOORSMAX MODE', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
FLOORSMAX_MEDI = st.number_input('FLOORSMAX MEDI', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
submitted = st.form_submit_button('Predict')
data_inf = {
'NAME_EDUCATION_TYPE': NAME_EDUCATION_TYPE,
'NAME_CONTRACT_TYPE': NAME_CONTRACT_TYPE,
'REGION_RATING_CLIENT': REGION_RATING_CLIENT,
'FLOORSMAX_AVG': FLOORSMAX_AVG,
'FLOORSMAX_MODE': FLOORSMAX_MODE,
'FLOORSMAX_MEDI': FLOORSMAX_MEDI,
}
data_inf = pd.DataFrame([data_inf])
st.dataframe(data_inf)
if submitted:
# Fit the pipeline with training data
final_pipeline.fit(training_data)
# Transform data using the pipeline
data_inf_transformed = final_pipeline.transform(data_inf)
# Get feature names from the pipelines
num_feature_names_inf = final_pipeline.named_transformers_['pipe_num'].named_steps['minmaxscaler'].get_feature_names_out(list_num_cols)
cat_feature_names_inf = final_pipeline.named_transformers_['pipe_cat'].named_steps['onehotencoder'].get_feature_names_out(list_cat_cols)
# Combine numerical and categorical feature names
feature_names = np.concatenate((num_feature_names_inf, cat_feature_names_inf), axis=0)
# Convert the transformed data into a DataFrame
data_inf_final = pd.DataFrame(data_inf_transformed, columns=feature_names)
# Filter the transformed_df based on the significant feature names
data_inf_final = data_inf_final[significant_feature_names].copy()
# Predict using Linear Regression
y_pred_inf = rnd_model.predict(data_inf_final)
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
if y_pred_inf == 1:
st.write('# Late Payment: YES')
else:
st.write('# Late Payment: NO')
if __name__=='__main__':
run()
|