File size: 3,081 Bytes
bc15c43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import streamlit as st
from joblib import load
import pandas as pd
import numpy as np
import pickle
import json

# Load the final pipeline
final_pipeline = load('final_pipeline_deploy_2.joblib')

df = pd.read_csv('df_subset.csv')

# Splitting `X`

training_data = df.drop(['TARGET'], axis=1)

# Load the Models

with open('model_rnd_2.pkl','rb') as file_1:
  rnd_model = pickle.load(file_1)

with open('list_num_cols_2.txt', 'r') as file_2:
  list_num_cols = json.load(file_2)

with open('list_cat_cols_2.txt', 'r') as file_3:
  list_cat_cols = json.load(file_3)

with open('list_sig_cols_2.txt', 'r') as file_4:
  significant_feature_names = json.load(file_4)

def run():
  with st.form(key='from_homecredit'):
      NAME_EDUCATION_TYPE = st.selectbox('NAME EDUCATION TYPE', ('Secondary / secondary special', 'Higher education', 'Lower secondary', 'Incomplete higher', 'Academic degree'), index=1)
      NAME_CONTRACT_TYPE = st.selectbox('NAME CONTRACT TYPE', ('Cash loans', 'Revolving loans'))
      REGION_RATING_CLIENT = st.number_input('REGION RATING CLIENT', min_value=1, max_value=3, value=1)
      FLOORSMAX_AVG = st.number_input('FLOORSMAX AVG', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
      FLOORSMAX_MODE = st.number_input('FLOORSMAX MODE', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
      FLOORSMAX_MEDI = st.number_input('FLOORSMAX MEDI', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
      
      submitted = st.form_submit_button('Predict')

  data_inf = {
      'NAME_EDUCATION_TYPE': NAME_EDUCATION_TYPE,
      'NAME_CONTRACT_TYPE': NAME_CONTRACT_TYPE,
      'REGION_RATING_CLIENT': REGION_RATING_CLIENT,
      'FLOORSMAX_AVG': FLOORSMAX_AVG,
      'FLOORSMAX_MODE': FLOORSMAX_MODE,
      'FLOORSMAX_MEDI': FLOORSMAX_MEDI,
      
    }

  data_inf = pd.DataFrame([data_inf])
  st.dataframe(data_inf)

  if submitted:
    # Fit the pipeline with training data
    final_pipeline.fit(training_data)

    # Transform data using the pipeline
    data_inf_transformed = final_pipeline.transform(data_inf)

    # Get feature names from the pipelines
    num_feature_names_inf = final_pipeline.named_transformers_['pipe_num'].named_steps['minmaxscaler'].get_feature_names_out(list_num_cols)
    cat_feature_names_inf = final_pipeline.named_transformers_['pipe_cat'].named_steps['onehotencoder'].get_feature_names_out(list_cat_cols)

    # Combine numerical and categorical feature names
    feature_names = np.concatenate((num_feature_names_inf, cat_feature_names_inf), axis=0)

    # Convert the transformed data into a DataFrame
    data_inf_final = pd.DataFrame(data_inf_transformed, columns=feature_names)
    
    # Filter the transformed_df based on the significant feature names
    data_inf_final = data_inf_final[significant_feature_names].copy()

    # Predict using Linear Regression
    y_pred_inf = rnd_model.predict(data_inf_final)
    y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)

    if y_pred_inf == 1:
        st.write('# Late Payment: YES')
    else:
        st.write('# Late Payment: NO')

if __name__=='__main__':
    run()