churn_P2M1 / eda.py
andinedwrn's picture
Update eda.py
ca09e7b
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image
st.set_page_config(
page_title = 'Customer Churn - EDA',
layout = 'wide',
initial_sidebar_state= 'expanded'
)
def run():
st.title('Customer Churn')
image = Image.open ('customchurn.jpg')
st.image(image, caption= 'customer churn', use_column_width=True)
st.write('Page written by **Geraldine Dewarani**')
st.markdown('---')
# Show Data Frame
data = pd.read_csv ('churn.csv')
st.dataframe(data)
#barplot roomtype superhost
st.write ('#### Distribution of churn risk score')
fig = plt.figure(figsize= (15,5))
churn_counts = data['churn_risk_score'].value_counts().sort_index()
fig, ax = plt.subplots(figsize=(8, 6))
sns.barplot(x=churn_counts.index, y=churn_counts.values, ax=ax)
ax.set_title('Distribution of Churn Risk Score')
ax.set_xlabel('Churn Risk Score')
ax.set_ylabel('Count')
st.pyplot(fig)
#histogram of cat feature
categorical_features = ["gender", "region_category", "membership_category", "joined_through_referral", "preferred_offer_types", "medium_of_operation", "internet_option", "used_special_discount", "offer_application_preference", "past_complaint", "complaint_status", "feedback"]
# Plot categorical feature distribution
for feature in categorical_features:
plt.figure(figsize=(10, 6))
ax = sns.barplot(x=data[feature].value_counts().index, y=data[feature].value_counts().values)
ax.set_title(f'Distribution of {feature}')
ax.set_xlabel(feature)
ax.set_ylabel('Count')
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
for i, v in enumerate(data[feature].value_counts().values):
ax.text(i, v, str(v), ha='center', va='bottom', fontweight='bold')
st.pyplot(plt.gcf())
if __name__ == '__main__':
run()