Spaces:
Sleeping
Sleeping
Upload 7 files
Browse files- EDA.py +85 -0
- Model.py +61 -0
- P1G5_Set_1_Titan_Russo.csv +0 -0
- app.py +36 -0
- stl_knn.pkl +3 -0
- stl_logReg.pkl +3 -0
- stl_svc.pkl +3 -0
EDA.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import seaborn as sns
|
5 |
+
|
6 |
+
|
7 |
+
# Load data
|
8 |
+
data = pd.read_csv("P1G5_Set_1_Titan_Russo.csv")
|
9 |
+
|
10 |
+
|
11 |
+
def eda():
|
12 |
+
|
13 |
+
st.title("Eksploratory Data Analysis")
|
14 |
+
st.write('Analyze the DataFrame for Better Understanding')
|
15 |
+
st.markdown("<h2><b>Limit Balance vs. Bill Amount by Default Payment Next Month</b></h2>",
|
16 |
+
unsafe_allow_html=True)
|
17 |
+
|
18 |
+
palette = ["#FF0000", "#4129E1"] # custom colors
|
19 |
+
for i in range(1, 7):
|
20 |
+
plt.figure()
|
21 |
+
sns.scatterplot(
|
22 |
+
x="limit_balance", y=f"bill_amt_{i}", hue="default_payment_next_month", data=data, palette=palette)
|
23 |
+
plt.title(
|
24 |
+
f"Limit Balance vs. Pay Amount {i} by Default Payment Next Month")
|
25 |
+
st.set_option('deprecation.showPyplotGlobalUse', False)
|
26 |
+
st.pyplot()
|
27 |
+
st.write("**Explanation**:")
|
28 |
+
markdown_text = """
|
29 |
+
These plots indicate that a higher `limit_balance` means a higher likelihood of non defaulting on payments.
|
30 |
+
"""
|
31 |
+
st.markdown(markdown_text)
|
32 |
+
|
33 |
+
st.markdown("<h2><b>Heatmap of Correlation Matrix</b></h2>",
|
34 |
+
unsafe_allow_html=True)
|
35 |
+
# Heatmap
|
36 |
+
corr_matrix = data.corr()
|
37 |
+
plt.figure(figsize=(15, 10))
|
38 |
+
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
|
39 |
+
plt.title('Heatmap of Correlation Matrix')
|
40 |
+
st.set_option('deprecation.showPyplotGlobalUse', False)
|
41 |
+
st.pyplot()
|
42 |
+
st.write("") # Add a blank line
|
43 |
+
|
44 |
+
st.write("**Explanation**:")
|
45 |
+
markdown_text = """
|
46 |
+
From the heatmap we can see the correlation between each columns. We can see the `pay_0`, `pay_2`, `pay_3`, `pay_4`, `pay_5`, `pay_6` have correlation each others from categorical columns meanwhile we can see `bill_amt_1`, `bill_amt_2`, `bill_amt_3`, `bill_amt_4`, `bill_amt_5`, `bill_amt_6` have correlation each others from numerical columns
|
47 |
+
"""
|
48 |
+
st.markdown(markdown_text)
|
49 |
+
|
50 |
+
st.markdown("<h2><b>Checking Distribution Data</b></h2>",
|
51 |
+
unsafe_allow_html=True)
|
52 |
+
|
53 |
+
# Columns to plot
|
54 |
+
cols_num = ['limit_balance', 'age', 'bill_amt_1',
|
55 |
+
'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6',
|
56 |
+
'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5',
|
57 |
+
'pay_amt_6']
|
58 |
+
|
59 |
+
# creating subplots for histogram
|
60 |
+
fig, axes = plt.subplots(5, 4, figsize=(18, 15))
|
61 |
+
|
62 |
+
# Flatten axes array
|
63 |
+
axes = axes.flatten()
|
64 |
+
|
65 |
+
# p;ots for each column
|
66 |
+
for i, col in enumerate(cols_num):
|
67 |
+
# membuat histogram dengan kernel density estimate
|
68 |
+
sns.histplot(data[col], ax=axes[i], kde=True)
|
69 |
+
axes[i].set_title(f'Distribution {col}')
|
70 |
+
axes[i].set_xlabel(col)
|
71 |
+
axes[i].set_ylabel('Frequency')
|
72 |
+
|
73 |
+
# hapus figure
|
74 |
+
for j in range(len(cols_num), len(axes)):
|
75 |
+
axes[j].remove()
|
76 |
+
|
77 |
+
# display
|
78 |
+
plt.tight_layout()
|
79 |
+
st.set_option('deprecation.showPyplotGlobalUse', False)
|
80 |
+
st.pyplot()
|
81 |
+
st.write("") # Add a blank line
|
82 |
+
|
83 |
+
st.write("**Explanation**:")
|
84 |
+
st.write('Checking the distribution data from non categorical columns, we can say the data is positive skewed')
|
85 |
+
st.write()
|
Model.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import joblib
|
5 |
+
|
6 |
+
|
7 |
+
def model():
|
8 |
+
st.title("Model of Prediction Default Payment Next Month")
|
9 |
+
st.write("Prediction of Customer Will Not Pay The Debt or Not")
|
10 |
+
st.sidebar.header('User Input Features')
|
11 |
+
|
12 |
+
input_data = user_input()
|
13 |
+
|
14 |
+
st.subheader('User Input')
|
15 |
+
st.write(input_data)
|
16 |
+
|
17 |
+
load_model = joblib.load("stl_logReg.pkl")
|
18 |
+
|
19 |
+
prediction = load_model.predict(input_data)
|
20 |
+
|
21 |
+
if prediction == 1:
|
22 |
+
prediction = 'Default on Payment'
|
23 |
+
else:
|
24 |
+
prediction = 'No Default on Payment'
|
25 |
+
|
26 |
+
st.write('Based on User Input, The Placement Model Predicted: ')
|
27 |
+
st.write(prediction)
|
28 |
+
|
29 |
+
|
30 |
+
def user_input(num_rows=1):
|
31 |
+
education_level = st.sidebar.selectbox('Education Level', [1, 2, 3, 4])
|
32 |
+
pay_0 = st.sidebar.number_input('Pay_0', -2, 8, 0)
|
33 |
+
pay_2 = st.sidebar.number_input('Pay_2', -2, 8, 0)
|
34 |
+
pay_3 = st.sidebar.number_input('Pay_3', -2, 8, 0)
|
35 |
+
pay_4 = st.sidebar.number_input('Pay_4', -2, 8, 0)
|
36 |
+
pay_5 = st.sidebar.number_input('Pay_5', -2, 8, 0)
|
37 |
+
pay_6 = st.sidebar.number_input('Pay_6', -2, 8, 0)
|
38 |
+
pay_amt_1 = st.sidebar.number_input('Pay_amt_1', 0, 30000, 0)
|
39 |
+
pay_amt_2 = st.sidebar.number_input('Pay_amt_2', 0, 30000, 0)
|
40 |
+
pay_amt_3 = st.sidebar.number_input('Pay_amt_3', 0, 30000, 0)
|
41 |
+
pay_amt_4 = st.sidebar.number_input('Pay_amt_4', 0, 30000, 0)
|
42 |
+
pay_amt_5 = st.sidebar.number_input('Pay_amt_5', 0, 30000, 0)
|
43 |
+
pay_amt_6 = st.sidebar.number_input('Pay_amt_6', 0, 30000, 0)
|
44 |
+
|
45 |
+
data = {
|
46 |
+
'limit_balance': np.random.randint(5000, 100000, num_rows),
|
47 |
+
'education_level': education_level,
|
48 |
+
'pay_0': pay_0,
|
49 |
+
'pay_2': pay_2,
|
50 |
+
'pay_3': pay_3,
|
51 |
+
'pay_4': pay_4,
|
52 |
+
'pay_5': pay_5,
|
53 |
+
'pay_6': pay_6,
|
54 |
+
'pay_amt_1': pay_amt_1,
|
55 |
+
'pay_amt_2': pay_amt_2,
|
56 |
+
'pay_amt_3': pay_amt_3,
|
57 |
+
'pay_amt_4': pay_amt_4,
|
58 |
+
'pay_amt_5': pay_amt_5,
|
59 |
+
'pay_amt_6': pay_amt_6}
|
60 |
+
features = pd.DataFrame(data, index=[0])
|
61 |
+
return features
|
P1G5_Set_1_Titan_Russo.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import joblib
|
5 |
+
|
6 |
+
from EDA import eda
|
7 |
+
from Model import model
|
8 |
+
|
9 |
+
# Load data
|
10 |
+
data = pd.read_csv("P1G5_Set_1_Titan_Russo.csv")
|
11 |
+
|
12 |
+
st.header('GC 5')
|
13 |
+
st.write("""
|
14 |
+
Created by Titan Russo - HCK015 """)
|
15 |
+
|
16 |
+
st.write('This is a program to predict credit card defaults based on multiple features using credit_card_default dataset obtained from ml.datasets in BigQuery')
|
17 |
+
st.write('Default Payment Next Month Data')
|
18 |
+
data
|
19 |
+
|
20 |
+
|
21 |
+
def main():
|
22 |
+
# Define menu options
|
23 |
+
menu_options = ["EDA", "Model"]
|
24 |
+
|
25 |
+
# Create sidebar menu
|
26 |
+
selected_option = st.sidebar.radio("Menu", menu_options)
|
27 |
+
|
28 |
+
# Display selected page
|
29 |
+
if selected_option == "EDA":
|
30 |
+
eda()
|
31 |
+
elif selected_option == "Model":
|
32 |
+
model()
|
33 |
+
|
34 |
+
|
35 |
+
if __name__ == "__main__":
|
36 |
+
main()
|
stl_knn.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1a003a42c2764a56287d661e9a221ce363af2f34810ea139f8ee94b01f139d4
|
3 |
+
size 531958
|
stl_logReg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b7dc1a178b1a97fc93b74ed6e290905b2d0c39351a895c872cf543e97971100
|
3 |
+
size 959
|
stl_svc.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:717f13d87418703cc55926a21d0dc38d081133bd2ab4db011fd4d7c9214d7f94
|
3 |
+
size 121979
|