Spaces:

imamzarkasie
/

HOME-CREDIT-ANALYSIS

Sleeping

App Files Files Community

HOME-CREDIT-ANALYSIS / prediction.py

imamzarkasie

Upload 12 files

bc15c43 about 1 year ago

raw

history blame contribute delete

No virus

3.08 kB

	import streamlit as st
	from joblib import load
	import pandas as pd
	import numpy as np
	import pickle
	import json

	# Load the final pipeline
	final_pipeline = load('final_pipeline_deploy_2.joblib')

	df = pd.read_csv('df_subset.csv')

	# Splitting `X`

	training_data = df.drop(['TARGET'], axis=1)

	# Load the Models

	with open('model_rnd_2.pkl','rb') as file_1:
	rnd_model = pickle.load(file_1)

	with open('list_num_cols_2.txt', 'r') as file_2:
	list_num_cols = json.load(file_2)

	with open('list_cat_cols_2.txt', 'r') as file_3:
	list_cat_cols = json.load(file_3)

	with open('list_sig_cols_2.txt', 'r') as file_4:
	significant_feature_names = json.load(file_4)

	def run():
	with st.form(key='from_homecredit'):
	NAME_EDUCATION_TYPE = st.selectbox('NAME EDUCATION TYPE', ('Secondary / secondary special', 'Higher education', 'Lower secondary', 'Incomplete higher', 'Academic degree'), index=1)
	NAME_CONTRACT_TYPE = st.selectbox('NAME CONTRACT TYPE', ('Cash loans', 'Revolving loans'))
	REGION_RATING_CLIENT = st.number_input('REGION RATING CLIENT', min_value=1, max_value=3, value=1)
	FLOORSMAX_AVG = st.number_input('FLOORSMAX AVG', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
	FLOORSMAX_MODE = st.number_input('FLOORSMAX MODE', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
	FLOORSMAX_MEDI = st.number_input('FLOORSMAX MEDI', min_value=0.0, max_value=1.0, value=0.0, step=0.1)

	submitted = st.form_submit_button('Predict')

	data_inf = {
	'NAME_EDUCATION_TYPE': NAME_EDUCATION_TYPE,
	'NAME_CONTRACT_TYPE': NAME_CONTRACT_TYPE,
	'REGION_RATING_CLIENT': REGION_RATING_CLIENT,
	'FLOORSMAX_AVG': FLOORSMAX_AVG,
	'FLOORSMAX_MODE': FLOORSMAX_MODE,
	'FLOORSMAX_MEDI': FLOORSMAX_MEDI,

	}

	data_inf = pd.DataFrame([data_inf])
	st.dataframe(data_inf)

	if submitted:
	# Fit the pipeline with training data
	final_pipeline.fit(training_data)

	# Transform data using the pipeline
	data_inf_transformed = final_pipeline.transform(data_inf)

	# Get feature names from the pipelines
	num_feature_names_inf = final_pipeline.named_transformers_['pipe_num'].named_steps['minmaxscaler'].get_feature_names_out(list_num_cols)
	cat_feature_names_inf = final_pipeline.named_transformers_['pipe_cat'].named_steps['onehotencoder'].get_feature_names_out(list_cat_cols)

	# Combine numerical and categorical feature names
	feature_names = np.concatenate((num_feature_names_inf, cat_feature_names_inf), axis=0)

	# Convert the transformed data into a DataFrame
	data_inf_final = pd.DataFrame(data_inf_transformed, columns=feature_names)

	# Filter the transformed_df based on the significant feature names
	data_inf_final = data_inf_final[significant_feature_names].copy()

	# Predict using Linear Regression
	y_pred_inf = rnd_model.predict(data_inf_final)
	y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)

	if y_pred_inf == 1:
	st.write('# Late Payment: YES')
	else:
	st.write('# Late Payment: NO')

	if __name__=='__main__':
	run()