imamzarkasie commited on
Commit
bc15c43
1 Parent(s): b0386f2

Upload 12 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ df_subset.csv filter=lfs diff=lfs merge=lfs -text
__pycache__/eda.cpython-39.pyc ADDED
Binary file (3.1 kB). View file
 
__pycache__/prediction.cpython-39.pyc ADDED
Binary file (2.44 kB). View file
 
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ navigation = st.sidebar.selectbox('Select Page : ', ('EDA', 'Predict A Customer'))
6
+
7
+ if navigation == 'EDA':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
credit.jpeg ADDED
df_subset.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffb7af805110a43d12e97100ecc65fd433912ae737303e9deb21d0a9f4d07cbd
3
+ size 99064252
eda.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+ import plotly.express as px
7
+ from PIL import Image
8
+
9
+ st.set_page_config(
10
+ page_title = 'Home Credit Analysis - EDA',
11
+ layout = 'wide',
12
+ initial_sidebar_state = 'expanded'
13
+ )
14
+
15
+ def run():
16
+ # Membuat Title
17
+ st.title('Home Credit Analysis')
18
+
19
+ # Membuat Sub Header
20
+ st.subheader('EDA for Home Credit Analysis')
21
+
22
+ # Menambahkan Gambar
23
+ image = Image.open('credit.jpeg')
24
+ st.image(image, caption='Home Credit')
25
+
26
+ # Menambahkan Deskripsi
27
+ st.write('This page created by **Imam Zarkasie**')
28
+ st.write('### Hello!')
29
+ st.write('#### The competition of credit card service is heating up!')
30
+ st.write('##### In this page we can explore customer segmentation, more than that this website provides an ability to predict a customer payment risk.')
31
+
32
+
33
+ # Membuat Garis Lurus
34
+ st.markdown('---')
35
+
36
+ # Magic Syntax
37
+ '''
38
+ On this page, the author will do a simple exploration.
39
+ The dataset used is the churn dataset.
40
+ '''
41
+
42
+ # Show DataFrame
43
+ df = pd.read_csv('df_subset.csv')
44
+ st.dataframe(df)
45
+
46
+ #Melihat histogram fitur target
47
+ # Membuat visualisasi Distribusi Payment
48
+ fig, ax =plt.subplots(1,2,figsize=(15,6))
49
+
50
+ sns.countplot(x='TARGET', data=df, palette="winter", ax=ax[0])
51
+ ax[0].set_xlabel("Payment", fontsize= 12)
52
+ ax[0].set_ylabel("# of Payment", fontsize= 12)
53
+ fig.suptitle('Payment Distribution', fontsize=18, fontweight='bold')
54
+ ax[0].set_ylim(0,300000)
55
+ ax[0].tick_params(axis='x', rotation=90)
56
+ plt.xlabel("Payment", fontsize= 12)
57
+ plt.ylabel("# of Payment", fontsize= 12)
58
+
59
+ for p in ax[0].patches:
60
+ ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
61
+ p.get_height()+205), ha='center', va='center',fontsize = 11)
62
+
63
+ df['TARGET'].value_counts().plot(kind='pie',autopct='%1.1f%%', textprops = {"fontsize":12})
64
+ ax[1].set_ylabel("% of Payment", fontsize= 12)
65
+ st.pyplot(fig)
66
+
67
+ # Membuat visualisasi Distribusi Contract
68
+ fig, ax =plt.subplots(1,2,figsize=(15,6))
69
+
70
+ sns.countplot(x='NAME_CONTRACT_TYPE', data=df, palette="winter", ax=ax[0])
71
+ ax[0].set_xlabel("Contract", fontsize= 12)
72
+ ax[0].set_ylabel("# of Contract", fontsize= 12)
73
+ fig.suptitle('Contract Distribution', fontsize=18, fontweight='bold')
74
+ ax[0].set_ylim(0,300000)
75
+ ax[0].tick_params(axis='x', rotation=90)
76
+ plt.xlabel("Contract", fontsize= 12)
77
+ plt.ylabel("# of Contract", fontsize= 12)
78
+
79
+ for p in ax[0].patches:
80
+ ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
81
+ p.get_height()+205), ha='center', va='center',fontsize = 11)
82
+
83
+ df['NAME_CONTRACT_TYPE'].value_counts().plot(kind='pie',autopct='%1.1f%%', textprops = {"fontsize":12})
84
+ ax[1].set_ylabel("% of Contract", fontsize= 12)
85
+ plt.show()
86
+
87
+ # Menampilkan plot di Streamlit
88
+ st.pyplot(fig)
89
+
90
+ # Membuat visualisasi Distribusi Gender
91
+ fig, ax =plt.subplots(1,2,figsize=(15,6))
92
+
93
+ sns.countplot(x='CODE_GENDER', data=df, palette="winter", ax=ax[0])
94
+ ax[0].set_xlabel("Gender", fontsize= 12)
95
+ ax[0].set_ylabel("# of Gender", fontsize= 12)
96
+ fig.suptitle('Gender Distribution', fontsize=18, fontweight='bold')
97
+ ax[0].set_ylim(0,300000)
98
+ ax[0].tick_params(axis='x', rotation=90)
99
+ plt.xlabel("Gender", fontsize= 12)
100
+ plt.ylabel("# of Gender", fontsize= 12)
101
+
102
+ for p in ax[0].patches:
103
+ ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
104
+ p.get_height()+205), ha='center', va='center',fontsize = 11)
105
+
106
+ df['CODE_GENDER'].value_counts().plot(kind='pie',autopct='%1.1f%%', textprops = {"fontsize":12})
107
+ ax[1].set_ylabel("% of Gender", fontsize= 12)
108
+ plt.show()
109
+
110
+ # Menampilkan plot di Streamlit
111
+ st.pyplot(fig)
112
+
113
+
114
+ # Membuat plot scatter
115
+ fig, ax = plt.subplots(figsize=(6, 3))
116
+ sns.scatterplot(x='AMT_ANNUITY', y='AMT_CREDIT', hue='TARGET', data=df, ax=ax)
117
+
118
+ # Menambahkan label sumbu dan judul plot
119
+ plt.xlabel('AMT_ANNUITY')
120
+ plt.ylabel('AMT_CREDIT')
121
+ plt.title('Scatter Plot: AMT_ANNUITY vs AMT_CREDIT')
122
+
123
+ # Menambahkan legenda
124
+ plt.legend(title='Target')
125
+
126
+ # Menampilkan plot di Streamlit
127
+ st.pyplot(fig)
128
+
129
+ if __name__=='__main__':
130
+ run()
final_pipeline_deploy_2.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40fef3a8aab0f688c1b6a9c847f3ee525d51d88907a75066878ed9d2bd24f511
3
+ size 836
list_cat_cols_2.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["NAME_EDUCATION_TYPE", "NAME_CONTRACT_TYPE"]
list_num_cols_2.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["REGION_RATING_CLIENT", "FLOORSMAX_AVG", "FLOORSMAX_MODE", "FLOORSMAX_MEDI"]
list_sig_cols_2.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["REGION_RATING_CLIENT", "FLOORSMAX_AVG", "FLOORSMAX_MODE", "FLOORSMAX_MEDI", "NAME_EDUCATION_TYPE_Academic degree", "NAME_EDUCATION_TYPE_Higher education", "NAME_EDUCATION_TYPE_Incomplete higher", "NAME_EDUCATION_TYPE_Lower secondary", "NAME_EDUCATION_TYPE_Secondary / secondary special", "NAME_CONTRACT_TYPE_Cash loans", "NAME_CONTRACT_TYPE_Revolving loans"]
model_rnd_2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d46f4b55e8b402c13baf8487d6dc8383e75954872715aa131186068f3be664a
3
+ size 16302509
prediction.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from joblib import load
3
+ import pandas as pd
4
+ import numpy as np
5
+ import pickle
6
+ import json
7
+
8
+ # Load the final pipeline
9
+ final_pipeline = load('final_pipeline_deploy_2.joblib')
10
+
11
+ df = pd.read_csv('df_subset.csv')
12
+
13
+ # Splitting `X`
14
+
15
+ training_data = df.drop(['TARGET'], axis=1)
16
+
17
+ # Load the Models
18
+
19
+ with open('model_rnd_2.pkl','rb') as file_1:
20
+ rnd_model = pickle.load(file_1)
21
+
22
+ with open('list_num_cols_2.txt', 'r') as file_2:
23
+ list_num_cols = json.load(file_2)
24
+
25
+ with open('list_cat_cols_2.txt', 'r') as file_3:
26
+ list_cat_cols = json.load(file_3)
27
+
28
+ with open('list_sig_cols_2.txt', 'r') as file_4:
29
+ significant_feature_names = json.load(file_4)
30
+
31
+ def run():
32
+ with st.form(key='from_homecredit'):
33
+ NAME_EDUCATION_TYPE = st.selectbox('NAME EDUCATION TYPE', ('Secondary / secondary special', 'Higher education', 'Lower secondary', 'Incomplete higher', 'Academic degree'), index=1)
34
+ NAME_CONTRACT_TYPE = st.selectbox('NAME CONTRACT TYPE', ('Cash loans', 'Revolving loans'))
35
+ REGION_RATING_CLIENT = st.number_input('REGION RATING CLIENT', min_value=1, max_value=3, value=1)
36
+ FLOORSMAX_AVG = st.number_input('FLOORSMAX AVG', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
37
+ FLOORSMAX_MODE = st.number_input('FLOORSMAX MODE', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
38
+ FLOORSMAX_MEDI = st.number_input('FLOORSMAX MEDI', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
39
+
40
+ submitted = st.form_submit_button('Predict')
41
+
42
+ data_inf = {
43
+ 'NAME_EDUCATION_TYPE': NAME_EDUCATION_TYPE,
44
+ 'NAME_CONTRACT_TYPE': NAME_CONTRACT_TYPE,
45
+ 'REGION_RATING_CLIENT': REGION_RATING_CLIENT,
46
+ 'FLOORSMAX_AVG': FLOORSMAX_AVG,
47
+ 'FLOORSMAX_MODE': FLOORSMAX_MODE,
48
+ 'FLOORSMAX_MEDI': FLOORSMAX_MEDI,
49
+
50
+ }
51
+
52
+ data_inf = pd.DataFrame([data_inf])
53
+ st.dataframe(data_inf)
54
+
55
+ if submitted:
56
+ # Fit the pipeline with training data
57
+ final_pipeline.fit(training_data)
58
+
59
+ # Transform data using the pipeline
60
+ data_inf_transformed = final_pipeline.transform(data_inf)
61
+
62
+ # Get feature names from the pipelines
63
+ num_feature_names_inf = final_pipeline.named_transformers_['pipe_num'].named_steps['minmaxscaler'].get_feature_names_out(list_num_cols)
64
+ cat_feature_names_inf = final_pipeline.named_transformers_['pipe_cat'].named_steps['onehotencoder'].get_feature_names_out(list_cat_cols)
65
+
66
+ # Combine numerical and categorical feature names
67
+ feature_names = np.concatenate((num_feature_names_inf, cat_feature_names_inf), axis=0)
68
+
69
+ # Convert the transformed data into a DataFrame
70
+ data_inf_final = pd.DataFrame(data_inf_transformed, columns=feature_names)
71
+
72
+ # Filter the transformed_df based on the significant feature names
73
+ data_inf_final = data_inf_final[significant_feature_names].copy()
74
+
75
+ # Predict using Linear Regression
76
+ y_pred_inf = rnd_model.predict(data_inf_final)
77
+ y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
78
+
79
+ if y_pred_inf == 1:
80
+ st.write('# Late Payment: YES')
81
+ else:
82
+ st.write('# Late Payment: NO')
83
+
84
+ if __name__=='__main__':
85
+ run()