Spaces:

ardifarizky
/

milestone2

Runtime error

File size: 5,104 Bytes

import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image

st.set_page_config(
    page_title= 'FIFA 2022',
    layout='wide',
    initial_sidebar_state='expanded'
)

hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            footer {visibility: hidden;}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True) 




def run():
    
    st.title('Heart Failure Prediction')
    # st.subheader('Heart Failure Prediction Exploratory Data Analysis')
    # #Show Dataframe
    d = pd.read_csv('hotel_bookings.csv')
    corr = d.corr()
    def pearson_correlation(x, y):

      # dind the mean of each array
      x_mean = np.mean(x)
      y_mean = np.mean(y)
    
      # find the covariance of the two arrays
      covariance = np.cov(x, y)[0, 1]
    
      # find the standard deviation of each array
      x_std = np.std(x)
      y_std = np.std(y)
    
      # calculate the Pearson correlation coefficient
      r = covariance / (x_std * y_std)
    
      return r

    mask = np.zeros_like(corr)
    mask[np.triu_indices_from(mask)] = True

    sns.set(style='white')
    fig, ax = plt.subplots(figsize=(12, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    
    sns.heatmap(corr, mask=mask, cmap=cmap, vmax=1, center=0,
                square=True, linewidths=.5, cbar_kws={"shrink": .5})
    
    plt.title('Data Correlation')
    st.pyplot(fig)

    fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))

    sns.histplot(data=d, x='lead_time', hue='is_canceled',
                 kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time")
    
    sns.histplot(data=d, x='booking_changes', hue='is_canceled',
                 ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes")
    
    sns.histplot(data=d, x='deposit_type', hue='is_canceled',
                 ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type")
    
    plt.tight_layout()
    st.pyplot(fig)

    booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count')

    pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0)
    
    plt.figure(figsize=(12, 10))
    pivot_table.plot(kind='line')
    plt.title('Seasonal Booking Trends')
    plt.xlabel('Month and Week Number')
    plt.ylabel('Booking Count')
    plt.legend(title='Hotel Type')
    plt.xticks(rotation=45)
    plt.tight_layout()
    st.pyplot(fig)

    demographics_counts = d[['babies', 'adults', 'children']].sum()
    
    # creating the pie chart
    plt.figure(figsize=(8, 8))
    plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140)
    plt.title('Distribution of Guest Demographics')
    plt.axis('equal')  
    
    st.pyplot(fig)

        
    fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))

    sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1')
    ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)")

    sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1')
    ax[0, 1].set_title("Cancellation Rate by Hotel Type")

    sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1')
    ax[1, 0].set_title("Booking Changes by Hotel Type")

    sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1')
    ax[1, 1].set_title("Total Bookings by Hotel Type")


    plt.tight_layout()

    st.pyplot(fig)

    plt.figure(figsize=(12, 6))
    sns.countplot(data=d, x='market_segment', palette='Set3')
    plt.title('Distribution of Market Segmentation')
    plt.xlabel('Market Segment')
    plt.ylabel('Count')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    
    plt.show()
    
    # create a count plot for distribution channels
    plt.figure(figsize=(10, 6))
    sns.countplot(data=d, x='distribution_channel', palette='Set2')
    plt.title('Distribution of Distribution Channels')
    plt.xlabel('Distribution Channel')
    plt.ylabel('Count')
    plt.tight_layout()
    
    st.pyplot(fig)
    
    # st.write('#### scatterplot berdasarkan Input User')
    # pilihan1 = st.selectbox('Pilih column : ', ('age', 'creatinine_phosphokinase','ejection_fraction', 'platelets','serum_creatinine', 'serum_sodium', 'time'),key=1)
    # pilihan2 = st.selectbox('Pilih column : ', ('age', 'creatinine_phosphokinase','ejection_fraction', 'platelets','serum_creatinine', 'serum_sodium', 'time'),key=2)
    # pilihan3 = st.selectbox('Pilih column : ', ('anaemia', 'diabetes','high_blood_pressure', 'sex','smoking', 'DEATH_EVENT'),key=3)
    # fig = plt.figure(figsize=(15, 5))
    # sns.scatterplot(data=d,x=d[pilihan1],y=d[pilihan2],hue=d[pilihan3])
    # st.pyplot(fig)
    

if __name__ == '__main__':
    run()