File size: 3,760 Bytes
110b1f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image

st.set_page_config(
    page_title = 'Customer Churn Risk - EDA',
    layout = 'wide',
    initial_sidebar_state = 'expanded'
    )

def run():
    # Membuat Title
    st.title('Customer Churn Prediction')

    # Membuat Sub Header
    st.subheader('EDA for Customer Churn Risk')

    # Menambahkan Gambar
    image = Image.open('churn.jpeg')
    st.image(image, caption='Customer Churn')

    # Menambahkan Deskripsi
    st.write('This page created by **Imam Zarkasie**')
    st.write('### Hello!')
    st.write('#### The competition of product sales in the e-commerce industry in heating up!')
    st.write('##### In this page we can explore customer segmentation, more than that this website provides an ability to predict a customer churn risk.')


    # Membuat Garis Lurus
    st.markdown('---')

    # Magic Syntax
    '''
    On this page, the author will do a simple exploration.
    The dataset used is the churn dataset.
    '''

    # Show DataFrame
    df = pd.read_csv('churn.csv')
    st.dataframe(df)

    #Melihat histogram fitur target
    df['churn_risk_score'].value_counts().plot(kind='bar')

    # Membuat Historgram
    st.write('#### Histogram of Churn')
    fig = plt.figure(figsize=(15, 5))
    sns.histplot(df['churn_risk_score'], bins=30, kde=True)
    st.pyplot(fig)

    # Mengelompokkan data dan menghitung statistik churn risk score
    grouped = df.groupby('membership_category')['churn_risk_score'].value_counts().unstack().fillna(0)

    # Membuat barplot
    fig, ax = plt.subplots(figsize=(8, 3))
    grouped.plot(kind='bar', stacked=False, ax=ax)

    # Menampilkan plot di Streamlit
    st.pyplot(fig)

    # Mengelompokkan data dan menghitung statistik churn risk score
    grouped = df.groupby('gender')['churn_risk_score'].value_counts().unstack().fillna(0)

    # Mengambil nilai churn risk score yang unik
    churn_risk_scores = df['churn_risk_score'].unique()

    # Mengatur lebar barplot
    bar_width = 0.35

    # Mengatur posisi barplot untuk setiap gender
    male_positions = np.arange(len(churn_risk_scores))
    female_positions = male_positions + bar_width

    # Membuat figure dan axes untuk plot
    fig, ax = plt.subplots(figsize=(8, 4))

    # Membuat barplot untuk gender Male dengan warna biru (blue)
    ax.bar(male_positions, grouped.loc['M'], width=bar_width, label='Male', color='lightblue')

    # Membuat barplot untuk gender Female dengan warna merah (red)
    ax.bar(female_positions, grouped.loc['F'], width=bar_width, label='Female', color='pink')

    # Memberikan label pada sumbu x
    ax.set_xticks(male_positions + bar_width/2)
    ax.set_xticklabels(churn_risk_scores)

    # Memberikan judul pada sumbu x dan y
    ax.set_xlabel('Churn Risk Score')
    ax.set_ylabel('Count')

    # Menambahkan judul plot
    ax.set_title('Churn Risk Score by Gender')

    # Menampilkan legenda
    ax.legend()

    # Menampilkan grid
    ax.grid(True, axis='y', linestyle='--', alpha=0.5)

    # Menampilkan plot di Streamlit
    st.pyplot(fig)

    # Membuat Histogram Berdasarkan Input User
    st.write('#### Histogram based on input user')
    pilihan = st.selectbox('Pilih column : ', ('avg_time_spent', 'avg_transaction_value', 'avg_frequency_login_days', 'points_in_wallet'))
    fig = plt.figure(figsize=(15, 5))
    sns.histplot(df[pilihan], bins=30, kde=True)
    st.pyplot(fig)

    # Membuat Plotly Plot
    st.write('#### Plotly Plot - points_in_wallet VS avg_transaction_value')
    fig = px.scatter(df, x='points_in_wallet', y='avg_transaction_value')
    st.plotly_chart(fig)

if __name__=='__main__':
    run()