Spaces:
Build error
Build error
Upload 5 files
Browse files- app.py +10 -0
- eda_m2.py +172 -0
- model_knn.pkl +3 -0
- prediction_m2.py +68 -0
- requirements.txt +7 -0
app.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda_m2
|
3 |
+
import prediction_m2
|
4 |
+
|
5 |
+
navigation = st.sidebar.selectbox('Pilih Halaman: ', ('EDA', 'Prediksi'))
|
6 |
+
|
7 |
+
if navigation == 'EDA':
|
8 |
+
eda_m2.run()
|
9 |
+
else:
|
10 |
+
prediction_m2.run()
|
eda_m2.py
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.express as px
|
6 |
+
|
7 |
+
|
8 |
+
st.write('### Nama : Kumala Cantika Ainun Maya')
|
9 |
+
st.write('### Batch : SBY-004')
|
10 |
+
st.write('### Objective : ')
|
11 |
+
st.write('##### Pada bagian ini berisikan EDA data informasi nasabah dengan kelayakan diberikan pinjaman')
|
12 |
+
st.markdown('---')
|
13 |
+
|
14 |
+
def run():
|
15 |
+
# Load dataset
|
16 |
+
st.write('### Data Loan Status Nasabah')
|
17 |
+
df = pd.read_csv('train.csv')
|
18 |
+
st.dataframe(df)
|
19 |
+
|
20 |
+
|
21 |
+
st.markdown('---')
|
22 |
+
st.title('Exploratory Data Analysis(EDA) Data Layak Pinjam Nasabah')
|
23 |
+
|
24 |
+
# Plot Target
|
25 |
+
st.write('### Pie Chart Loan Status Nasabah')
|
26 |
+
fig,ax1 = plt.subplots(figsize=(5,5))
|
27 |
+
df['Loan_Status'].value_counts().plot(kind='pie',labels=['yes', 'no'],
|
28 |
+
autopct='%.2f%%',ax=ax1, colors=['lime','red'])
|
29 |
+
st.pyplot(fig)
|
30 |
+
'''
|
31 |
+
Pie chart tersebut merupakan perbandingan antara target yang layak dan tidak layak mendapatkan pinjaman,
|
32 |
+
yang mana hanya sekitar **68.73%** nasabah yang layak untuk mendapatkan pinjaman.
|
33 |
+
'''
|
34 |
+
st.markdown('---')
|
35 |
+
|
36 |
+
# Plot ApplicantIncome
|
37 |
+
st.write('### Histogram dan Box Plot Pendapatan Nasabah')
|
38 |
+
fig = plt.figure(figsize=(15, 6))
|
39 |
+
# Histogram
|
40 |
+
plt.subplot(1, 2, 1)
|
41 |
+
sns.histplot(data=df['ApplicantIncome'], palette='Set1', color='cyan')
|
42 |
+
plt.title('Histogram Pendapatan Nasabah')
|
43 |
+
plt.xlabel('Pendapatan')
|
44 |
+
plt.ylabel('Total')
|
45 |
+
# Boxplot
|
46 |
+
plt.subplot(1, 2, 2)
|
47 |
+
sns.boxplot(y=df['ApplicantIncome'], color='cyan')
|
48 |
+
plt.title('Boxplot Pendapatan')
|
49 |
+
plt.xlabel('Pendapatan')
|
50 |
+
st.pyplot(fig)
|
51 |
+
'''
|
52 |
+
Berdasarkan visualisasi histogram dan boxplot menunjukkan bahwa:
|
53 |
+
|
54 |
+
- Persebaran data pendapatan nasabah mayoritas ada pada rentang lebih dari 0 dan kurang dari sama dengan 10000
|
55 |
+
- Terdapat banyak outlier yang terlihat dalam visualisasi boxplot
|
56 |
+
- Persebaran data cenderung miring kanan/ skew, sehingga mungkin diperlukan cek skewness pada fitur ini
|
57 |
+
'''
|
58 |
+
st.markdown('---')
|
59 |
+
|
60 |
+
# Plot Coapllicantincome
|
61 |
+
st.write('### Histogram dan Box Plot Pendapatan Sampingan Nasabah')
|
62 |
+
fig = plt.figure(figsize=(15, 6))
|
63 |
+
# Scatterplot
|
64 |
+
plt.subplot(1, 2, 1)
|
65 |
+
sns.scatterplot(data=df['CoapplicantIncome'], palette='Set1', color='red')
|
66 |
+
plt.title('Histogram Pendapatan Sampingan Nasabah')
|
67 |
+
plt.xlabel('Pendapatan Sampingan')
|
68 |
+
plt.ylabel('Total')
|
69 |
+
# Boxplot
|
70 |
+
plt.subplot(1, 2, 2)
|
71 |
+
sns.boxplot(y=df['CoapplicantIncome'], color='red')
|
72 |
+
plt.title('Boxplot Pendapatan Sampingan')
|
73 |
+
plt.xlabel('Pendapatan Sampingan')
|
74 |
+
st.pyplot(fig)
|
75 |
+
'''
|
76 |
+
Berdasarkan visualisasi histogram dan boxplot menunjukkan bahwa:
|
77 |
+
|
78 |
+
- Persebaran data pendapatan nasabah mayoritas ada pada rentang 0 hingga kurang dari 10000
|
79 |
+
- Terdapat banyak outlier yang terlihat dalam visualisasi boxplot
|
80 |
+
- Persebaran data cenderung miring kanan/ skew, sehingga mungkin diperlukan cek skewness pada fitur ini
|
81 |
+
- Jika dibandingkan dengan `ApplicantIncome`, pendapatan sampingan yang dimiliki oleh nasabah jauh lebih sedikit dibandingkan pendapatan utama
|
82 |
+
'''
|
83 |
+
st.markdown('---')
|
84 |
+
|
85 |
+
|
86 |
+
# Plot Loan Amount
|
87 |
+
st.write('### Histogram dan Box Plot Pinjaman Nasabah')
|
88 |
+
fig = plt.figure(figsize=(15, 6))
|
89 |
+
# Distogram
|
90 |
+
plt.subplot(1, 2, 1)
|
91 |
+
sns.distplot(df['LoanAmount'], color='grey')
|
92 |
+
plt.title('Histogram Besar Pinjaman')
|
93 |
+
plt.xlabel('Pinjaman')
|
94 |
+
plt.ylabel('Total')
|
95 |
+
# Boxplot
|
96 |
+
plt.subplot(1, 2, 2)
|
97 |
+
sns.boxplot(y=df['LoanAmount'], color='grey')
|
98 |
+
plt.title('Boxplot Besar Pinjaman')
|
99 |
+
plt.xlabel('Pinjaman')
|
100 |
+
st.pyplot(fig)
|
101 |
+
'''
|
102 |
+
Berdasarkan visualisasi histogram dan boxplot menunjukkan bahwa:
|
103 |
+
|
104 |
+
- Persebaran data mayoritas pinjaman yang diajukan oleh nasabah adalah mulai rentang lebih dari sama dengan 100 hingga kurang dari 200
|
105 |
+
- Terdapat banyak outlier yang terlihat dalam visualisasi boxplot
|
106 |
+
- Persebaran data cenderung miring kanan/ skew, sehingga mungkin diperlukan cek skewness juga pada fitur ini
|
107 |
+
'''
|
108 |
+
st.markdown('---')
|
109 |
+
|
110 |
+
# EDA catgoeical
|
111 |
+
st.write('### Pie Chart Informasi Status Nasabah')
|
112 |
+
fig = plt.figure(figsize = (20,10))
|
113 |
+
|
114 |
+
# Fitur Gender
|
115 |
+
plt.subplot(1,4,1)
|
116 |
+
plt.pie(df['Gender'].value_counts(), labels=['Male', 'Female'], autopct='%1.0f%%',colors=['orange','grey'])
|
117 |
+
plt.title('Perbandingan Jenis Kelamin Nasabah')
|
118 |
+
# Fitur Married
|
119 |
+
plt.subplot(1,4,2)
|
120 |
+
plt.pie(df['Married'].value_counts(), labels=['Yes', 'No'], autopct='%1.0f%%',colors=['orange','grey'])
|
121 |
+
plt.title('Perbandingan Status Nasabah')
|
122 |
+
# Fitur Self Employed
|
123 |
+
plt.subplot(1,4,3)
|
124 |
+
plt.pie(df['Self_Employed'].value_counts(), labels=['No', 'Yes'], autopct='%1.0f%%',colors=['orange','grey'])
|
125 |
+
plt.title('Perbandingan Status Pekerjaan Nasabah')
|
126 |
+
# Fitur Credit_History
|
127 |
+
plt.subplot(1,4,4)
|
128 |
+
plt.pie(df['Credit_History'].value_counts(), labels=['Yes', 'No'], autopct='%1.0f%%',colors=['orange','grey'])
|
129 |
+
plt.title('Perbandingan Rekam Pinjaman Nasabah')
|
130 |
+
st.pyplot(fig)
|
131 |
+
'''
|
132 |
+
Hasil visualisasi 4 fitur categorical `Gender`, `Married`, `Self_Employed`, dan `Credit_History` diperoleh beberapa hal berikut:
|
133 |
+
|
134 |
+
- Sekitar 81% nasabah berjenis kelamin pria
|
135 |
+
- Sekitar 65% nasabah sudah berkeluarga
|
136 |
+
- Sekitar 86% nasabah tidak bekerja secara mandiri (bekerja kepada badan atau orang lain, sehingga berkemungkinan memiliki pendapatan yang tetap)
|
137 |
+
- Sekitar 84% nasabah sudah pernah melakukan pinjaman sebelumnya
|
138 |
+
'''
|
139 |
+
st.markdown('---')
|
140 |
+
|
141 |
+
# Plot bar perbadingan status Dependents
|
142 |
+
st.write('### Jumlah Tanggungan Nasabah')
|
143 |
+
fig,ax1 = plt.subplots(figsize=(5,5))
|
144 |
+
df['Dependents'].value_counts().sort_index().plot(kind='bar', rot=0, color='navy')
|
145 |
+
plt.xlabel('Tanggungan (orang)')
|
146 |
+
plt.ylabel('Jumlah')
|
147 |
+
st.pyplot(fig)
|
148 |
+
'''
|
149 |
+
Berdasarkan visualisasi bar perbandingan jumlah tanggungan yang dimiliki nasabah diketahui bahwa:
|
150 |
+
|
151 |
+
- Sebagian besar nasabah tidak memiliki tanggungan pembiayaan
|
152 |
+
'''
|
153 |
+
st.markdown('---')
|
154 |
+
|
155 |
+
# Plot Loan Amount Term
|
156 |
+
st.write('### Scatter Persebaran Rentang Waktu Peminjaman')
|
157 |
+
fig = plt.figure(figsize=(10, 4))
|
158 |
+
sns.scatterplot(data=df['Loan_Amount_Term'], palette='Set1', color='pink')
|
159 |
+
plt.xlabel('Waktu(hari)')
|
160 |
+
plt.ylabel('Jumlah')
|
161 |
+
st.pyplot(fig)
|
162 |
+
'''
|
163 |
+
Berdasarkan visualisasi histagram diatas menunjukkan bahwa:
|
164 |
+
|
165 |
+
- Rentang waktu peminjaman paling sering adalah dalam rentang 300 hingga 400 atau lebih tepatnya pada **360 hari**
|
166 |
+
- Dalam rentang waktu tersebut dapat juga berarti nasabah sebagian besar memiliki rentang waktu peminjaman selama 1 tahun lamanya.
|
167 |
+
'''
|
168 |
+
st.markdown('---')
|
169 |
+
|
170 |
+
|
171 |
+
if __name__ == '__main__':
|
172 |
+
run()
|
model_knn.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf814de0f19feae73b0ddcd925bf66d37faf3f1debacd0b694c6bb8639163502
|
3 |
+
size 104719
|
prediction_m2.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
Nama : Kumala Cantika Ainun Maya
|
3 |
+
Batch : SBY-004
|
4 |
+
Objective : Pada bagian ini berisikan ----------------
|
5 |
+
---------
|
6 |
+
'''
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
import pickle
|
10 |
+
import json
|
11 |
+
import streamlit as st
|
12 |
+
import pandas as pd
|
13 |
+
|
14 |
+
|
15 |
+
with open('model_knn.pkl', 'rb') as file_1:
|
16 |
+
model_knn = pickle.load(file_1)
|
17 |
+
|
18 |
+
def run():
|
19 |
+
with st.form(key='Form Parameter'):
|
20 |
+
loanid = st.write()
|
21 |
+
gender = st.radio("Gender",["Male","Female"])
|
22 |
+
married = st.radio("Marital Status",['Yes','No'])
|
23 |
+
dependents = st.selectbox("Dependents",('0','1','2','3+'))
|
24 |
+
education = st.radio("Education",['Graduate','Not Graduate'])
|
25 |
+
self_employed = st.radio("Self Employed",['Yes','No'])
|
26 |
+
applicantincome = st.number_input("Applicant Income :",min_value=150,max_value=81000,step=1)
|
27 |
+
coapplicantincome = st.number_input("CoApplicant Income :",min_value=0,max_value=41700,step=1)
|
28 |
+
loanamount = st.number_input("Loan Amount :",min_value=9,max_value=700,step=1)
|
29 |
+
loanamountterm = st.selectbox("Loan Amount Term",(12,36,60,84,120,180,240,360,480))
|
30 |
+
credithistory = st.radio("Credit History",['No','Yes'])
|
31 |
+
propertyarea = st.radio("Property Area",['Urban', 'Semiurban', 'Rural'])
|
32 |
+
submitted = st.form_submit_button('Predict')
|
33 |
+
|
34 |
+
if credithistory == 'No':
|
35 |
+
credithistory = 0
|
36 |
+
else:
|
37 |
+
credithistory = 1
|
38 |
+
|
39 |
+
data_inf = {'Loan_ID':loanid,
|
40 |
+
'Gender':gender,
|
41 |
+
'Married':married,
|
42 |
+
'Dependents': dependents,
|
43 |
+
'Education':education,
|
44 |
+
'Self_Employed':self_employed,
|
45 |
+
'ApplicantIncome':applicantincome,
|
46 |
+
'CoapplicantIncome':coapplicantincome,
|
47 |
+
'LoanAmount':loanamount,
|
48 |
+
'Loan_Amount_Term':loanamountterm,
|
49 |
+
'Credit_History':credithistory,
|
50 |
+
'Property_Area':propertyarea
|
51 |
+
}
|
52 |
+
|
53 |
+
df = pd.DataFrame([data_inf])
|
54 |
+
st.dataframe(df)
|
55 |
+
|
56 |
+
y_pred_inf = model_knn.predict(df)
|
57 |
+
|
58 |
+
if y_pred_inf == 0:
|
59 |
+
y_pred_inf = 'No'
|
60 |
+
else:
|
61 |
+
y_pred_inf = 'Yes'
|
62 |
+
|
63 |
+
st.write('## Hasil Prediksi Klasifikasi Loan Status : ',str(y_pred_inf))
|
64 |
+
|
65 |
+
if __name__ == '__main__':
|
66 |
+
run()
|
67 |
+
|
68 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit == 1.31.1
|
2 |
+
pandas == 2.2.1
|
3 |
+
numpy == 1.26.4
|
4 |
+
seaborn == 0.13.2
|
5 |
+
matplotlib == 3.8.3
|
6 |
+
scikit-learn == 1.4.1.post1
|
7 |
+
plotly == 5.19.0
|