Spaces:

imamzarkasie
/

HOME-CREDIT-ANALYSIS

Sleeping

App Files Files Community

imamzarkasie commited on Jun 17, 2023

Commit

bc15c43

•

1 Parent(s): b0386f2

Upload 12 files

Browse files

Files changed (13) hide show

.gitattributes +1 -0
__pycache__/eda.cpython-39.pyc +0 -0
__pycache__/prediction.cpython-39.pyc +0 -0
app.py +10 -0
credit.jpeg +0 -0
df_subset.csv +3 -0
eda.py +130 -0
final_pipeline_deploy_2.joblib +3 -0
list_cat_cols_2.txt +1 -0
list_num_cols_2.txt +1 -0
list_sig_cols_2.txt +1 -0
model_rnd_2.pkl +3 -0
prediction.py +85 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+df_subset.csv filter=lfs diff=lfs merge=lfs -text

__pycache__/eda.cpython-39.pyc ADDED Viewed

Binary file (3.1 kB). View file

__pycache__/prediction.cpython-39.pyc ADDED Viewed

Binary file (2.44 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import streamlit as st
+import eda
+import prediction
+navigation = st.sidebar.selectbox('Select Page : ', ('EDA', 'Predict A Customer'))
+if navigation == 'EDA':
+    eda.run()
+else:
+    prediction.run()

credit.jpeg ADDED Viewed

df_subset.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffb7af805110a43d12e97100ecc65fd433912ae737303e9deb21d0a9f4d07cbd
+size 99064252

eda.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+import plotly.express as px
+from PIL import Image
+st.set_page_config(
+    page_title = 'Home Credit Analysis - EDA',
+    layout = 'wide',
+    initial_sidebar_state = 'expanded'
+    )
+def run():
+    # Membuat Title
+    st.title('Home Credit Analysis')
+    # Membuat Sub Header
+    st.subheader('EDA for Home Credit Analysis')
+    # Menambahkan Gambar
+    image = Image.open('credit.jpeg')
+    st.image(image, caption='Home Credit')
+    # Menambahkan Deskripsi
+    st.write('This page created by **Imam Zarkasie**')
+    st.write('### Hello!')
+    st.write('#### The competition of credit card service is heating up!')
+    st.write('##### In this page we can explore customer segmentation, more than that this website provides an ability to predict a customer payment risk.')
+    # Membuat Garis Lurus
+    st.markdown('---')
+    # Magic Syntax
+    '''
+    On this page, the author will do a simple exploration.
+    The dataset used is the churn dataset.
+    '''
+    # Show DataFrame
+    df = pd.read_csv('df_subset.csv')
+    st.dataframe(df)
+    #Melihat histogram fitur target
+    # Membuat visualisasi Distribusi Payment
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(x='TARGET', data=df, palette="winter", ax=ax[0])
+    ax[0].set_xlabel("Payment", fontsize= 12)
+    ax[0].set_ylabel("# of Payment", fontsize= 12)
+    fig.suptitle('Payment Distribution', fontsize=18, fontweight='bold')
+    ax[0].set_ylim(0,300000)
+    ax[0].tick_params(axis='x', rotation=90)
+    plt.xlabel("Payment", fontsize= 12)
+    plt.ylabel("# of Payment", fontsize= 12)
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+205), ha='center', va='center',fontsize = 11)
+    df['TARGET'].value_counts().plot(kind='pie',autopct='%1.1f%%', textprops = {"fontsize":12})
+    ax[1].set_ylabel("% of Payment", fontsize= 12)
+    st.pyplot(fig)
+    # Membuat visualisasi Distribusi Contract
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(x='NAME_CONTRACT_TYPE', data=df, palette="winter", ax=ax[0])
+    ax[0].set_xlabel("Contract", fontsize= 12)
+    ax[0].set_ylabel("# of Contract", fontsize= 12)
+    fig.suptitle('Contract Distribution', fontsize=18, fontweight='bold')
+    ax[0].set_ylim(0,300000)
+    ax[0].tick_params(axis='x', rotation=90)
+    plt.xlabel("Contract", fontsize= 12)
+    plt.ylabel("# of Contract", fontsize= 12)
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+205), ha='center', va='center',fontsize = 11)
+    df['NAME_CONTRACT_TYPE'].value_counts().plot(kind='pie',autopct='%1.1f%%', textprops = {"fontsize":12})
+    ax[1].set_ylabel("% of Contract", fontsize= 12)
+    plt.show()
+    # Menampilkan plot di Streamlit
+    st.pyplot(fig)
+    # Membuat visualisasi Distribusi Gender
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(x='CODE_GENDER', data=df, palette="winter", ax=ax[0])
+    ax[0].set_xlabel("Gender", fontsize= 12)
+    ax[0].set_ylabel("# of Gender", fontsize= 12)
+    fig.suptitle('Gender Distribution', fontsize=18, fontweight='bold')
+    ax[0].set_ylim(0,300000)
+    ax[0].tick_params(axis='x', rotation=90)
+    plt.xlabel("Gender", fontsize= 12)
+    plt.ylabel("# of Gender", fontsize= 12)
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+205), ha='center', va='center',fontsize = 11)
+    df['CODE_GENDER'].value_counts().plot(kind='pie',autopct='%1.1f%%', textprops = {"fontsize":12})
+    ax[1].set_ylabel("% of Gender", fontsize= 12)
+    plt.show()
+    # Menampilkan plot di Streamlit
+    st.pyplot(fig)
+    # Membuat plot scatter
+    fig, ax = plt.subplots(figsize=(6, 3))
+    sns.scatterplot(x='AMT_ANNUITY', y='AMT_CREDIT', hue='TARGET', data=df, ax=ax)
+    # Menambahkan label sumbu dan judul plot
+    plt.xlabel('AMT_ANNUITY')
+    plt.ylabel('AMT_CREDIT')
+    plt.title('Scatter Plot: AMT_ANNUITY vs AMT_CREDIT')
+    # Menambahkan legenda
+    plt.legend(title='Target')
+    # Menampilkan plot di Streamlit
+    st.pyplot(fig)
+if __name__=='__main__':
+    run()

final_pipeline_deploy_2.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40fef3a8aab0f688c1b6a9c847f3ee525d51d88907a75066878ed9d2bd24f511
+size 836

list_cat_cols_2.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["NAME_EDUCATION_TYPE", "NAME_CONTRACT_TYPE"]

list_num_cols_2.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["REGION_RATING_CLIENT", "FLOORSMAX_AVG", "FLOORSMAX_MODE", "FLOORSMAX_MEDI"]

list_sig_cols_2.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["REGION_RATING_CLIENT", "FLOORSMAX_AVG", "FLOORSMAX_MODE", "FLOORSMAX_MEDI", "NAME_EDUCATION_TYPE_Academic degree", "NAME_EDUCATION_TYPE_Higher education", "NAME_EDUCATION_TYPE_Incomplete higher", "NAME_EDUCATION_TYPE_Lower secondary", "NAME_EDUCATION_TYPE_Secondary / secondary special", "NAME_CONTRACT_TYPE_Cash loans", "NAME_CONTRACT_TYPE_Revolving loans"]

model_rnd_2.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d46f4b55e8b402c13baf8487d6dc8383e75954872715aa131186068f3be664a
+size 16302509

prediction.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import streamlit as st
+from joblib import load
+import pandas as pd
+import numpy as np
+import pickle
+import json
+# Load the final pipeline
+final_pipeline = load('final_pipeline_deploy_2.joblib')
+df = pd.read_csv('df_subset.csv')
+# Splitting `X`
+training_data = df.drop(['TARGET'], axis=1)
+# Load the Models
+with open('model_rnd_2.pkl','rb') as file_1:
+  rnd_model = pickle.load(file_1)
+with open('list_num_cols_2.txt', 'r') as file_2:
+  list_num_cols = json.load(file_2)
+with open('list_cat_cols_2.txt', 'r') as file_3:
+  list_cat_cols = json.load(file_3)
+with open('list_sig_cols_2.txt', 'r') as file_4:
+  significant_feature_names = json.load(file_4)
+def run():
+  with st.form(key='from_homecredit'):
+      NAME_EDUCATION_TYPE = st.selectbox('NAME EDUCATION TYPE', ('Secondary / secondary special', 'Higher education', 'Lower secondary', 'Incomplete higher', 'Academic degree'), index=1)
+      NAME_CONTRACT_TYPE = st.selectbox('NAME CONTRACT TYPE', ('Cash loans', 'Revolving loans'))
+      REGION_RATING_CLIENT = st.number_input('REGION RATING CLIENT', min_value=1, max_value=3, value=1)
+      FLOORSMAX_AVG = st.number_input('FLOORSMAX AVG', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
+      FLOORSMAX_MODE = st.number_input('FLOORSMAX MODE', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
+      FLOORSMAX_MEDI = st.number_input('FLOORSMAX MEDI', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
+      submitted = st.form_submit_button('Predict')
+  data_inf = {
+      'NAME_EDUCATION_TYPE': NAME_EDUCATION_TYPE,
+      'NAME_CONTRACT_TYPE': NAME_CONTRACT_TYPE,
+      'REGION_RATING_CLIENT': REGION_RATING_CLIENT,
+      'FLOORSMAX_AVG': FLOORSMAX_AVG,
+      'FLOORSMAX_MODE': FLOORSMAX_MODE,
+      'FLOORSMAX_MEDI': FLOORSMAX_MEDI,
+    }
+  data_inf = pd.DataFrame([data_inf])
+  st.dataframe(data_inf)
+  if submitted:
+    # Fit the pipeline with training data
+    final_pipeline.fit(training_data)
+    # Transform data using the pipeline
+    data_inf_transformed = final_pipeline.transform(data_inf)
+    # Get feature names from the pipelines
+    num_feature_names_inf = final_pipeline.named_transformers_['pipe_num'].named_steps['minmaxscaler'].get_feature_names_out(list_num_cols)
+    cat_feature_names_inf = final_pipeline.named_transformers_['pipe_cat'].named_steps['onehotencoder'].get_feature_names_out(list_cat_cols)
+    # Combine numerical and categorical feature names
+    feature_names = np.concatenate((num_feature_names_inf, cat_feature_names_inf), axis=0)
+    # Convert the transformed data into a DataFrame
+    data_inf_final = pd.DataFrame(data_inf_transformed, columns=feature_names)
+    # Filter the transformed_df based on the significant feature names
+    data_inf_final = data_inf_final[significant_feature_names].copy()
+    # Predict using Linear Regression
+    y_pred_inf = rnd_model.predict(data_inf_final)
+    y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
+    if y_pred_inf == 1:
+        st.write('# Late Payment: YES')
+    else:
+        st.write('# Late Payment: NO')
+if __name__=='__main__':
+    run()