Spaces:

TRusso
/

P1G5_Set_1_Titan_Russo

Sleeping

App Files Files Community

TRusso commited on Apr 27

Commit

5a7ae2a

•

1 Parent(s): 94ec419

Upload 7 files

Browse files

Files changed (7) hide show

EDA.py +85 -0
Model.py +61 -0
P1G5_Set_1_Titan_Russo.csv +0 -0
app.py +36 -0
stl_knn.pkl +3 -0
stl_logReg.pkl +3 -0
stl_svc.pkl +3 -0

EDA.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import streamlit as st
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+# Load data
+data = pd.read_csv("P1G5_Set_1_Titan_Russo.csv")
+def eda():
+    st.title("Eksploratory Data Analysis")
+    st.write('Analyze the DataFrame for Better Understanding')
+    st.markdown("<h2><b>Limit Balance vs. Bill Amount by Default Payment Next Month</b></h2>",
+                unsafe_allow_html=True)
+    palette = ["#FF0000", "#4129E1"]  # custom colors
+    for i in range(1, 7):
+        plt.figure()
+        sns.scatterplot(
+            x="limit_balance", y=f"bill_amt_{i}", hue="default_payment_next_month", data=data, palette=palette)
+        plt.title(
+            f"Limit Balance vs. Pay Amount {i} by Default Payment Next Month")
+        st.set_option('deprecation.showPyplotGlobalUse', False)
+        st.pyplot()
+    st.write("**Explanation**:")
+    markdown_text = """
+    These plots indicate that a higher `limit_balance` means a higher likelihood of non defaulting on payments.
+    """
+    st.markdown(markdown_text)
+    st.markdown("<h2><b>Heatmap of Correlation Matrix</b></h2>",
+                unsafe_allow_html=True)
+    # Heatmap
+    corr_matrix = data.corr()
+    plt.figure(figsize=(15, 10))
+    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
+    plt.title('Heatmap of Correlation Matrix')
+    st.set_option('deprecation.showPyplotGlobalUse', False)
+    st.pyplot()
+    st.write("")  # Add a blank line
+    st.write("**Explanation**:")
+    markdown_text = """
+    From the heatmap we can see the correlation between each columns. We can see the `pay_0`, `pay_2`, `pay_3`, `pay_4`, `pay_5`, `pay_6` have correlation each others from categorical columns meanwhile we can see `bill_amt_1`, `bill_amt_2`, `bill_amt_3`, `bill_amt_4`, `bill_amt_5`, `bill_amt_6` have correlation each others from numerical columns
+    """
+    st.markdown(markdown_text)
+    st.markdown("<h2><b>Checking Distribution Data</b></h2>",
+                unsafe_allow_html=True)
+    # Columns to plot
+    cols_num = ['limit_balance', 'age', 'bill_amt_1',
+                'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6',
+                'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5',
+                'pay_amt_6']
+    # creating subplots for histogram
+    fig, axes = plt.subplots(5, 4, figsize=(18, 15))
+    # Flatten axes array
+    axes = axes.flatten()
+    # p;ots for each column
+    for i, col in enumerate(cols_num):
+        # membuat histogram dengan kernel density estimate
+        sns.histplot(data[col], ax=axes[i], kde=True)
+        axes[i].set_title(f'Distribution {col}')
+        axes[i].set_xlabel(col)
+        axes[i].set_ylabel('Frequency')
+    # hapus figure
+    for j in range(len(cols_num), len(axes)):
+        axes[j].remove()
+    # display
+    plt.tight_layout()
+    st.set_option('deprecation.showPyplotGlobalUse', False)
+    st.pyplot()
+    st.write("")  # Add a blank line
+    st.write("**Explanation**:")
+    st.write('Checking the distribution data from non categorical columns, we can say the data is positive skewed')
+    st.write()

Model.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import streamlit as st
+import numpy as np
+import pandas as pd
+import joblib
+def model():
+    st.title("Model of Prediction Default Payment Next Month")
+    st.write("Prediction of Customer Will Not Pay The Debt or Not")
+    st.sidebar.header('User Input Features')
+    input_data = user_input()
+    st.subheader('User Input')
+    st.write(input_data)
+    load_model = joblib.load("stl_logReg.pkl")
+    prediction = load_model.predict(input_data)
+    if prediction == 1:
+        prediction = 'Default on Payment'
+    else:
+        prediction = 'No Default on Payment'
+    st.write('Based on User Input, The Placement Model Predicted: ')
+    st.write(prediction)
+def user_input(num_rows=1):
+    education_level = st.sidebar.selectbox('Education Level', [1, 2, 3, 4])
+    pay_0 = st.sidebar.number_input('Pay_0', -2, 8, 0)
+    pay_2 = st.sidebar.number_input('Pay_2', -2, 8, 0)
+    pay_3 = st.sidebar.number_input('Pay_3', -2, 8, 0)
+    pay_4 = st.sidebar.number_input('Pay_4', -2, 8, 0)
+    pay_5 = st.sidebar.number_input('Pay_5', -2, 8, 0)
+    pay_6 = st.sidebar.number_input('Pay_6', -2, 8, 0)
+    pay_amt_1 = st.sidebar.number_input('Pay_amt_1', 0, 30000, 0)
+    pay_amt_2 = st.sidebar.number_input('Pay_amt_2', 0, 30000, 0)
+    pay_amt_3 = st.sidebar.number_input('Pay_amt_3', 0, 30000, 0)
+    pay_amt_4 = st.sidebar.number_input('Pay_amt_4', 0, 30000, 0)
+    pay_amt_5 = st.sidebar.number_input('Pay_amt_5', 0, 30000, 0)
+    pay_amt_6 = st.sidebar.number_input('Pay_amt_6', 0, 30000, 0)
+    data = {
+        'limit_balance': np.random.randint(5000, 100000, num_rows),
+        'education_level': education_level,
+        'pay_0': pay_0,
+        'pay_2': pay_2,
+        'pay_3': pay_3,
+        'pay_4': pay_4,
+        'pay_5': pay_5,
+        'pay_6': pay_6,
+        'pay_amt_1': pay_amt_1,
+        'pay_amt_2': pay_amt_2,
+        'pay_amt_3': pay_amt_3,
+        'pay_amt_4': pay_amt_4,
+        'pay_amt_5': pay_amt_5,
+        'pay_amt_6': pay_amt_6}
+    features = pd.DataFrame(data, index=[0])
+    return features

P1G5_Set_1_Titan_Russo.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import streamlit as st
+import numpy as np
+import pandas as pd
+import joblib
+from EDA import eda
+from Model import model
+# Load data
+data = pd.read_csv("P1G5_Set_1_Titan_Russo.csv")
+st.header('GC 5')
+st.write("""
+Created by Titan Russo - HCK015 """)
+st.write('This is a program to predict credit card defaults based on multiple features using credit_card_default dataset obtained from ml.datasets in BigQuery')
+st.write('Default Payment Next Month Data')
+data
+def main():
+    # Define menu options
+    menu_options = ["EDA", "Model"]
+    # Create sidebar menu
+    selected_option = st.sidebar.radio("Menu", menu_options)
+    # Display selected page
+    if selected_option == "EDA":
+        eda()
+    elif selected_option == "Model":
+        model()
+if __name__ == "__main__":
+    main()

stl_knn.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1a003a42c2764a56287d661e9a221ce363af2f34810ea139f8ee94b01f139d4
+size 531958

stl_logReg.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b7dc1a178b1a97fc93b74ed6e290905b2d0c39351a895c872cf543e97971100
+size 959

stl_svc.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:717f13d87418703cc55926a21d0dc38d081133bd2ab4db011fd4d7c9214d7f94
+size 121979