mukhlishr's picture
Update eda.py
7ee9d9c
raw
history blame
2.29 kB
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image
# untuk lebarkan layout setelah import
st.set_page_config(
page_title = 'Hotel Reservation',
layout = 'wide',
initial_sidebar_state='expanded'
)
def run():
# Membuat file
st.title( 'Hotel Reservation ')
# Membuat sub header
st.subheader('Cancel or No Cancel Reservation')
# Menambahkan gambar
image = Image.open('hotel.jpg')
st.image(image, caption='Creepy Hotel')
# Menambahkan deskripsi
st.write('Exploratory Data dari dataset Hotel Reservation')
# show data frame
st.write('Menampilkan 10 Data dari dataset')
df = pd.read_csv('https://raw.githubusercontent.com/mukhlishr/rasyidi/main/h8dsft_P1G3_mukhlish_rasyidi.csv')
st.dataframe(df.head(10))
# Barplot booking status
st.write('###### Status Cancel Reservation')
fig=plt.figure(figsize=(15,5))
sns.countplot(x='booking_status', data = df)
st.pyplot(fig)
# Barplot segmented market
st.write('###### Source of reservation')
fig=plt.figure(figsize=(15,5))
sns.countplot(x='market_segment_type', data = df)
st.pyplot(fig)
# Barplot price room
st.write('###### Price room categories (1 = low, 2 = medium, 3 = high)')
bins = [-1, 100,200,1000]
labels =[1,2,3]
df['binned_price'] = pd.cut(df['avg_price_per_room'], bins,labels=labels).astype(int)
fig=plt.figure(figsize=(15,5))
sns.countplot(x='binned_price', data = df)
st.pyplot(fig)
# Barplot type room
st.write('###### Room type reserved')
fig=plt.figure(figsize=(15,5))
sns.countplot(x='room_type_reserved', data = df)
st.pyplot(fig)
# Barplot lead time
st.write('###### lead time date reservation to date stay (1 = low, 2 = medium, 3 = high)')
st.write('###### 1 = < 3 days, 2 = 3-7 days, 3 = 7-14 days, 4 = 14 -30 days, 5 = 30 - 90 days, 6 = > 90 days)')
bins = [-1, 3, 7, 14,30,90,500]
labels =[1,2,3,4,5,6]
df['binned_lead_time'] = pd.cut(df['lead_time'], bins,labels=labels).astype(int)
fig=plt.figure(figsize=(15,5))
sns.countplot(x='binned_lead_time', data = df)
st.pyplot(fig)
if __name__ == '__main__':
run()