File size: 3,760 Bytes
110b1f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image
st.set_page_config(
page_title = 'Customer Churn Risk - EDA',
layout = 'wide',
initial_sidebar_state = 'expanded'
)
def run():
# Membuat Title
st.title('Customer Churn Prediction')
# Membuat Sub Header
st.subheader('EDA for Customer Churn Risk')
# Menambahkan Gambar
image = Image.open('churn.jpeg')
st.image(image, caption='Customer Churn')
# Menambahkan Deskripsi
st.write('This page created by **Imam Zarkasie**')
st.write('### Hello!')
st.write('#### The competition of product sales in the e-commerce industry in heating up!')
st.write('##### In this page we can explore customer segmentation, more than that this website provides an ability to predict a customer churn risk.')
# Membuat Garis Lurus
st.markdown('---')
# Magic Syntax
'''
On this page, the author will do a simple exploration.
The dataset used is the churn dataset.
'''
# Show DataFrame
df = pd.read_csv('churn.csv')
st.dataframe(df)
#Melihat histogram fitur target
df['churn_risk_score'].value_counts().plot(kind='bar')
# Membuat Historgram
st.write('#### Histogram of Churn')
fig = plt.figure(figsize=(15, 5))
sns.histplot(df['churn_risk_score'], bins=30, kde=True)
st.pyplot(fig)
# Mengelompokkan data dan menghitung statistik churn risk score
grouped = df.groupby('membership_category')['churn_risk_score'].value_counts().unstack().fillna(0)
# Membuat barplot
fig, ax = plt.subplots(figsize=(8, 3))
grouped.plot(kind='bar', stacked=False, ax=ax)
# Menampilkan plot di Streamlit
st.pyplot(fig)
# Mengelompokkan data dan menghitung statistik churn risk score
grouped = df.groupby('gender')['churn_risk_score'].value_counts().unstack().fillna(0)
# Mengambil nilai churn risk score yang unik
churn_risk_scores = df['churn_risk_score'].unique()
# Mengatur lebar barplot
bar_width = 0.35
# Mengatur posisi barplot untuk setiap gender
male_positions = np.arange(len(churn_risk_scores))
female_positions = male_positions + bar_width
# Membuat figure dan axes untuk plot
fig, ax = plt.subplots(figsize=(8, 4))
# Membuat barplot untuk gender Male dengan warna biru (blue)
ax.bar(male_positions, grouped.loc['M'], width=bar_width, label='Male', color='lightblue')
# Membuat barplot untuk gender Female dengan warna merah (red)
ax.bar(female_positions, grouped.loc['F'], width=bar_width, label='Female', color='pink')
# Memberikan label pada sumbu x
ax.set_xticks(male_positions + bar_width/2)
ax.set_xticklabels(churn_risk_scores)
# Memberikan judul pada sumbu x dan y
ax.set_xlabel('Churn Risk Score')
ax.set_ylabel('Count')
# Menambahkan judul plot
ax.set_title('Churn Risk Score by Gender')
# Menampilkan legenda
ax.legend()
# Menampilkan grid
ax.grid(True, axis='y', linestyle='--', alpha=0.5)
# Menampilkan plot di Streamlit
st.pyplot(fig)
# Membuat Histogram Berdasarkan Input User
st.write('#### Histogram based on input user')
pilihan = st.selectbox('Pilih column : ', ('avg_time_spent', 'avg_transaction_value', 'avg_frequency_login_days', 'points_in_wallet'))
fig = plt.figure(figsize=(15, 5))
sns.histplot(df[pilihan], bins=30, kde=True)
st.pyplot(fig)
# Membuat Plotly Plot
st.write('#### Plotly Plot - points_in_wallet VS avg_transaction_value')
fig = px.scatter(df, x='points_in_wallet', y='avg_transaction_value')
st.plotly_chart(fig)
if __name__=='__main__':
run() |