TRusso commited on
Commit
5a7ae2a
1 Parent(s): 94ec419

Upload 7 files

Browse files
Files changed (7) hide show
  1. EDA.py +85 -0
  2. Model.py +61 -0
  3. P1G5_Set_1_Titan_Russo.csv +0 -0
  4. app.py +36 -0
  5. stl_knn.pkl +3 -0
  6. stl_logReg.pkl +3 -0
  7. stl_svc.pkl +3 -0
EDA.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+
6
+
7
+ # Load data
8
+ data = pd.read_csv("P1G5_Set_1_Titan_Russo.csv")
9
+
10
+
11
+ def eda():
12
+
13
+ st.title("Eksploratory Data Analysis")
14
+ st.write('Analyze the DataFrame for Better Understanding')
15
+ st.markdown("<h2><b>Limit Balance vs. Bill Amount by Default Payment Next Month</b></h2>",
16
+ unsafe_allow_html=True)
17
+
18
+ palette = ["#FF0000", "#4129E1"] # custom colors
19
+ for i in range(1, 7):
20
+ plt.figure()
21
+ sns.scatterplot(
22
+ x="limit_balance", y=f"bill_amt_{i}", hue="default_payment_next_month", data=data, palette=palette)
23
+ plt.title(
24
+ f"Limit Balance vs. Pay Amount {i} by Default Payment Next Month")
25
+ st.set_option('deprecation.showPyplotGlobalUse', False)
26
+ st.pyplot()
27
+ st.write("**Explanation**:")
28
+ markdown_text = """
29
+ These plots indicate that a higher `limit_balance` means a higher likelihood of non defaulting on payments.
30
+ """
31
+ st.markdown(markdown_text)
32
+
33
+ st.markdown("<h2><b>Heatmap of Correlation Matrix</b></h2>",
34
+ unsafe_allow_html=True)
35
+ # Heatmap
36
+ corr_matrix = data.corr()
37
+ plt.figure(figsize=(15, 10))
38
+ sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
39
+ plt.title('Heatmap of Correlation Matrix')
40
+ st.set_option('deprecation.showPyplotGlobalUse', False)
41
+ st.pyplot()
42
+ st.write("") # Add a blank line
43
+
44
+ st.write("**Explanation**:")
45
+ markdown_text = """
46
+ From the heatmap we can see the correlation between each columns. We can see the `pay_0`, `pay_2`, `pay_3`, `pay_4`, `pay_5`, `pay_6` have correlation each others from categorical columns meanwhile we can see `bill_amt_1`, `bill_amt_2`, `bill_amt_3`, `bill_amt_4`, `bill_amt_5`, `bill_amt_6` have correlation each others from numerical columns
47
+ """
48
+ st.markdown(markdown_text)
49
+
50
+ st.markdown("<h2><b>Checking Distribution Data</b></h2>",
51
+ unsafe_allow_html=True)
52
+
53
+ # Columns to plot
54
+ cols_num = ['limit_balance', 'age', 'bill_amt_1',
55
+ 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6',
56
+ 'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5',
57
+ 'pay_amt_6']
58
+
59
+ # creating subplots for histogram
60
+ fig, axes = plt.subplots(5, 4, figsize=(18, 15))
61
+
62
+ # Flatten axes array
63
+ axes = axes.flatten()
64
+
65
+ # p;ots for each column
66
+ for i, col in enumerate(cols_num):
67
+ # membuat histogram dengan kernel density estimate
68
+ sns.histplot(data[col], ax=axes[i], kde=True)
69
+ axes[i].set_title(f'Distribution {col}')
70
+ axes[i].set_xlabel(col)
71
+ axes[i].set_ylabel('Frequency')
72
+
73
+ # hapus figure
74
+ for j in range(len(cols_num), len(axes)):
75
+ axes[j].remove()
76
+
77
+ # display
78
+ plt.tight_layout()
79
+ st.set_option('deprecation.showPyplotGlobalUse', False)
80
+ st.pyplot()
81
+ st.write("") # Add a blank line
82
+
83
+ st.write("**Explanation**:")
84
+ st.write('Checking the distribution data from non categorical columns, we can say the data is positive skewed')
85
+ st.write()
Model.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import joblib
5
+
6
+
7
+ def model():
8
+ st.title("Model of Prediction Default Payment Next Month")
9
+ st.write("Prediction of Customer Will Not Pay The Debt or Not")
10
+ st.sidebar.header('User Input Features')
11
+
12
+ input_data = user_input()
13
+
14
+ st.subheader('User Input')
15
+ st.write(input_data)
16
+
17
+ load_model = joblib.load("stl_logReg.pkl")
18
+
19
+ prediction = load_model.predict(input_data)
20
+
21
+ if prediction == 1:
22
+ prediction = 'Default on Payment'
23
+ else:
24
+ prediction = 'No Default on Payment'
25
+
26
+ st.write('Based on User Input, The Placement Model Predicted: ')
27
+ st.write(prediction)
28
+
29
+
30
+ def user_input(num_rows=1):
31
+ education_level = st.sidebar.selectbox('Education Level', [1, 2, 3, 4])
32
+ pay_0 = st.sidebar.number_input('Pay_0', -2, 8, 0)
33
+ pay_2 = st.sidebar.number_input('Pay_2', -2, 8, 0)
34
+ pay_3 = st.sidebar.number_input('Pay_3', -2, 8, 0)
35
+ pay_4 = st.sidebar.number_input('Pay_4', -2, 8, 0)
36
+ pay_5 = st.sidebar.number_input('Pay_5', -2, 8, 0)
37
+ pay_6 = st.sidebar.number_input('Pay_6', -2, 8, 0)
38
+ pay_amt_1 = st.sidebar.number_input('Pay_amt_1', 0, 30000, 0)
39
+ pay_amt_2 = st.sidebar.number_input('Pay_amt_2', 0, 30000, 0)
40
+ pay_amt_3 = st.sidebar.number_input('Pay_amt_3', 0, 30000, 0)
41
+ pay_amt_4 = st.sidebar.number_input('Pay_amt_4', 0, 30000, 0)
42
+ pay_amt_5 = st.sidebar.number_input('Pay_amt_5', 0, 30000, 0)
43
+ pay_amt_6 = st.sidebar.number_input('Pay_amt_6', 0, 30000, 0)
44
+
45
+ data = {
46
+ 'limit_balance': np.random.randint(5000, 100000, num_rows),
47
+ 'education_level': education_level,
48
+ 'pay_0': pay_0,
49
+ 'pay_2': pay_2,
50
+ 'pay_3': pay_3,
51
+ 'pay_4': pay_4,
52
+ 'pay_5': pay_5,
53
+ 'pay_6': pay_6,
54
+ 'pay_amt_1': pay_amt_1,
55
+ 'pay_amt_2': pay_amt_2,
56
+ 'pay_amt_3': pay_amt_3,
57
+ 'pay_amt_4': pay_amt_4,
58
+ 'pay_amt_5': pay_amt_5,
59
+ 'pay_amt_6': pay_amt_6}
60
+ features = pd.DataFrame(data, index=[0])
61
+ return features
P1G5_Set_1_Titan_Russo.csv ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import joblib
5
+
6
+ from EDA import eda
7
+ from Model import model
8
+
9
+ # Load data
10
+ data = pd.read_csv("P1G5_Set_1_Titan_Russo.csv")
11
+
12
+ st.header('GC 5')
13
+ st.write("""
14
+ Created by Titan Russo - HCK015 """)
15
+
16
+ st.write('This is a program to predict credit card defaults based on multiple features using credit_card_default dataset obtained from ml.datasets in BigQuery')
17
+ st.write('Default Payment Next Month Data')
18
+ data
19
+
20
+
21
+ def main():
22
+ # Define menu options
23
+ menu_options = ["EDA", "Model"]
24
+
25
+ # Create sidebar menu
26
+ selected_option = st.sidebar.radio("Menu", menu_options)
27
+
28
+ # Display selected page
29
+ if selected_option == "EDA":
30
+ eda()
31
+ elif selected_option == "Model":
32
+ model()
33
+
34
+
35
+ if __name__ == "__main__":
36
+ main()
stl_knn.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1a003a42c2764a56287d661e9a221ce363af2f34810ea139f8ee94b01f139d4
3
+ size 531958
stl_logReg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b7dc1a178b1a97fc93b74ed6e290905b2d0c39351a895c872cf543e97971100
3
+ size 959
stl_svc.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:717f13d87418703cc55926a21d0dc38d081133bd2ab4db011fd4d7c9214d7f94
3
+ size 121979