initial commit
Browse files- __pycache__/eda.cpython-39.pyc +0 -0
- __pycache__/prediction.cpython-39.pyc +0 -0
- app.py +10 -0
- eda.py +65 -0
- model.pkl +3 -0
- prediction.py +98 -0
- requirements.txt +8 -0
- scaler.pkl +3 -0
__pycache__/eda.cpython-39.pyc
ADDED
Binary file (1.9 kB). View file
|
|
__pycache__/prediction.cpython-39.pyc
ADDED
Binary file (2.67 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda
|
3 |
+
import prediction
|
4 |
+
|
5 |
+
page = st.sidebar.selectbox('Choose page : ', ('EDA', 'Prediction'))
|
6 |
+
|
7 |
+
if page == 'EDA' :
|
8 |
+
eda.run()
|
9 |
+
else:
|
10 |
+
prediction.run()
|
eda.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.express as px
|
6 |
+
|
7 |
+
|
8 |
+
def run():
|
9 |
+
#Membuat title
|
10 |
+
st.title('Credit Default Predictor - Exploratory Data Analysis')
|
11 |
+
|
12 |
+
#Membuat subheader
|
13 |
+
st.subheader('Exploratory Data Analysis of the model Credit Default Predictor')
|
14 |
+
|
15 |
+
#Membuat garis
|
16 |
+
st.markdown('----')
|
17 |
+
|
18 |
+
#Masukkan pandas dataframe
|
19 |
+
|
20 |
+
#Show dataframe
|
21 |
+
df = pd.read_csv('https://raw.githubusercontent.com/AdeWT/AdeWT-Hacktiv8-things/main/BigQuery_credit_card_default_27_12_2023.csv')
|
22 |
+
st.dataframe(df)
|
23 |
+
|
24 |
+
#Membuat bar plot
|
25 |
+
st.write('### Spread of default and non-default in the dataset')
|
26 |
+
fig = plt.figure(figsize=(15,5))
|
27 |
+
sns.countplot(x='default_payment_next_month', data = df)
|
28 |
+
st.pyplot(fig)
|
29 |
+
|
30 |
+
#Membuat histogram
|
31 |
+
st.write('### Distribution plot of limit balance')
|
32 |
+
fig = plt.figure(figsize=(15,5))
|
33 |
+
sns.histplot(df['limit_balance'], bins = 30, kde = True)
|
34 |
+
st.pyplot(fig)
|
35 |
+
#tambah penjelas
|
36 |
+
|
37 |
+
#membuat histogram berdasarkan inputan user
|
38 |
+
st.write('### Choose which data to see spread of')
|
39 |
+
option = st.selectbox('Choose data : ', ('sex', 'education_level', 'marital_status'))
|
40 |
+
fig = plt.figure(figsize= (15,5))
|
41 |
+
sns.countplot(x=option, data=df)
|
42 |
+
st.pyplot(fig)
|
43 |
+
st.write(f'#### sex = 1 is male, 2 is female')
|
44 |
+
st.write('#### education_level =')
|
45 |
+
st.write('#### 1 is graduate school')
|
46 |
+
st.write('#### 2 is university')
|
47 |
+
st.write('#### 3 is high school')
|
48 |
+
st.write('#### 4 is others')
|
49 |
+
st.write('#### 5 and 6 are unknown')
|
50 |
+
st.write('#### marital_status =')
|
51 |
+
st.write('#### 1 is married')
|
52 |
+
st.write('#### 2 is single')
|
53 |
+
st.write('#### 3 is others')
|
54 |
+
|
55 |
+
#Membuat Plotly plot
|
56 |
+
|
57 |
+
st.write('### Plotly Plot - Education Level on Default Payment Next Month')
|
58 |
+
fig = px.scatter(df, x = 'education_level', y = 'default_payment_next_month', hover_data = ['limit_balance',
|
59 |
+
'education_level',
|
60 |
+
'marital_status',
|
61 |
+
'age'])
|
62 |
+
st.plotly_chart(fig)
|
63 |
+
st.write('#### 1 is default while 0 is no default')
|
64 |
+
if __name__ == '__main__':
|
65 |
+
run()
|
model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbeffa7344ec24c9af95777df0a3ae533686af36282cc1b6bc45086cfb412f1a
|
3 |
+
size 184229
|
prediction.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# learn stremlit, this one for the predictions
|
2 |
+
|
3 |
+
#import libs
|
4 |
+
import streamlit as st
|
5 |
+
import pandas as pd
|
6 |
+
import pickle
|
7 |
+
|
8 |
+
#open related files/load files
|
9 |
+
with open('scaler.pkl', 'rb') as file_1:
|
10 |
+
scaler = pickle.load(file_1)
|
11 |
+
|
12 |
+
with open('model.pkl', 'rb') as file_2:
|
13 |
+
model = pickle.load(file_2)
|
14 |
+
|
15 |
+
|
16 |
+
def run():
|
17 |
+
#Make the input form for the user to input data?
|
18 |
+
with st.form('Form_CreditDefaultPredictor'):
|
19 |
+
#Field limit balance
|
20 |
+
limit_balance = st.number_input('limit_balance',min_value=10000, max_value=1000000)
|
21 |
+
#Field age
|
22 |
+
age = st.number_input('age', min_value= 21, max_value = 70, step = 1, help = 'Age of borrower')
|
23 |
+
#Field education level
|
24 |
+
education_level = st.slider('education_level', 1, 4, 2)
|
25 |
+
st.write('#### - 1 is graduate school')
|
26 |
+
st.write('#### - 2 is university')
|
27 |
+
st.write('#### - 3 is high school')
|
28 |
+
st.write('#### - 4 is others')
|
29 |
+
#Field marital status
|
30 |
+
marital_status = st.slider('marital_status', 1, 3, 2)
|
31 |
+
st.write('#### - 1 is married')
|
32 |
+
st.write('#### - 2 is single')
|
33 |
+
st.write('#### - 3 is others')
|
34 |
+
#Field pay_0
|
35 |
+
pay_0 = st.slider('pay_0', -2, 9, -1 )
|
36 |
+
st.write('### latest month payment status')
|
37 |
+
st.write('#### - -2: pay early')
|
38 |
+
st.write('#### - -1 = pay on deadline')
|
39 |
+
st.write('#### - 0 : pay delayed for 0 month')
|
40 |
+
st.write('#### - 1 = payment delayed for one month')
|
41 |
+
st.write('#### - 2 = payment delayed for two months')
|
42 |
+
st.write('#### ...')
|
43 |
+
st.write('#### - 8 = payment delayed for 8 months')
|
44 |
+
st.write('#### - 9 = payment delayed for 9 months')
|
45 |
+
#Field pay_2
|
46 |
+
pay_2 = st.slider('pay_1', -2, 9, -1, key=2 )
|
47 |
+
st.write('#### 1 months before latest month payment status, same scale as above')
|
48 |
+
#Field pay_3
|
49 |
+
pay_3 = st.slider('pay_2', -2, 9, -1, key=3 )
|
50 |
+
st.write('#### 2 months before latest month payment status, same scale as above')
|
51 |
+
#Field pay_4
|
52 |
+
pay_4 = st.slider('pay_3', -2, 9, -1, key=4 )
|
53 |
+
st.write('#### 3 months before latest month payment status, same scale as above')
|
54 |
+
#Field pay_5
|
55 |
+
pay_5 = st.slider('pay_4', -2, 9, -1, key=5 )
|
56 |
+
st.write('#### 4 months before latest month payment status, same scale as above')
|
57 |
+
#Field pay_6
|
58 |
+
pay_6 = st.slider('pay_5', -2, 9, -1, key=6 )
|
59 |
+
st.write('#### 5 months before latest month payment status, same scale as above')
|
60 |
+
# bikin batasan
|
61 |
+
st.markdown('---------')
|
62 |
+
#bikin submit button
|
63 |
+
submitted = st.form_submit_button('Predict!')
|
64 |
+
|
65 |
+
#inference/satuin data supaya bisa masuk model
|
66 |
+
# nama col ('Name',etc) harus sama dengan di model
|
67 |
+
# keys dari col harus sama dengan nama variable di form streamlit
|
68 |
+
data_inf = {
|
69 |
+
'limit_balance' : limit_balance,
|
70 |
+
'education_level' : education_level,
|
71 |
+
'marital_status' : marital_status,
|
72 |
+
'age': age,
|
73 |
+
'pay_0' : pay_0,
|
74 |
+
'pay_2' : pay_2,
|
75 |
+
'pay_3' : pay_3,
|
76 |
+
'pay_4' : pay_4,
|
77 |
+
'pay_5' : pay_5,
|
78 |
+
'pay_6' : pay_6,
|
79 |
+
}
|
80 |
+
|
81 |
+
#turn to dataframe for model
|
82 |
+
data_inf = pd.DataFrame([data_inf])
|
83 |
+
#aslo show the input from user
|
84 |
+
st.dataframe(data_inf)
|
85 |
+
|
86 |
+
#what happen when predict button is pushed/clicked:
|
87 |
+
if submitted: #ketika si submitted itu punya value, maka
|
88 |
+
#scale
|
89 |
+
data_inf_scaled = scaler.transform(data_inf)
|
90 |
+
|
91 |
+
# predict using linear reg model
|
92 |
+
y_pred_inf = model.predict(data_inf_scaled)
|
93 |
+
|
94 |
+
#kasih tau hasilnya apa
|
95 |
+
st.write('## Prediction of whether the borrower will default : ',str(int(y_pred_inf)))
|
96 |
+
st.write('###1 = will default, 0 = will not default')
|
97 |
+
if __name__ == '__main__':
|
98 |
+
run()
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.29.0
|
2 |
+
pandas==2.1.1
|
3 |
+
seaborn==0.12.2
|
4 |
+
matplotlib==3.8
|
5 |
+
numpy==1.26.2
|
6 |
+
plotly==5.9.0
|
7 |
+
Pillow==9.4.0
|
8 |
+
scikit-learn==1.3.0
|
scaler.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40f37a2f6cc2ccb76aa8797beab0a1735236223787a5a483f198412cc7d1bd04
|
3 |
+
size 1073
|