Spaces:

ardifarizky
/

milestone2

Runtime error

App Files Files Community

milestone2 / eda.py

ardifarizky

Update eda.py

f7e19bd 11 months ago

raw history blame

No virus

5.1 kB

	import streamlit as st
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import plotly.express as px
	from PIL import Image

	st.set_page_config(
	page_title= 'FIFA 2022',
	layout='wide',
	initial_sidebar_state='expanded'
	)

	hide_streamlit_style = """
	<style>
	#MainMenu {visibility: hidden;}
	footer {visibility: hidden;}
	</style>
	"""
	st.markdown(hide_streamlit_style, unsafe_allow_html=True)




	def run():

	st.title('Heart Failure Prediction')
	# st.subheader('Heart Failure Prediction Exploratory Data Analysis')
	# #Show Dataframe
	d = pd.read_csv('hotel_bookings.csv')
	corr = d.corr()
	def pearson_correlation(x, y):

	# dind the mean of each array
	x_mean = np.mean(x)
	y_mean = np.mean(y)

	# find the covariance of the two arrays
	covariance = np.cov(x, y)[0, 1]

	# find the standard deviation of each array
	x_std = np.std(x)
	y_std = np.std(y)

	# calculate the Pearson correlation coefficient
	r = covariance / (x_std * y_std)

	return r

	mask = np.zeros_like(corr)
	mask[np.triu_indices_from(mask)] = True

	sns.set(style='white')
	fig, ax = plt.subplots(figsize=(12, 9))
	cmap = sns.diverging_palette(220, 10, as_cmap=True)

	sns.heatmap(corr, mask=mask, cmap=cmap, vmax=1, center=0,
	square=True, linewidths=.5, cbar_kws={"shrink": .5})

	plt.title('Data Correlation')
	st.pyplot(fig)

	fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))

	sns.histplot(data=d, x='lead_time', hue='is_canceled',
	kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time")

	sns.histplot(data=d, x='booking_changes', hue='is_canceled',
	ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes")

	sns.histplot(data=d, x='deposit_type', hue='is_canceled',
	ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type")

	plt.tight_layout()
	st.pyplot(fig)

	booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count')

	pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0)

	plt.figure(figsize=(12, 10))
	pivot_table.plot(kind='line')
	plt.title('Seasonal Booking Trends')
	plt.xlabel('Month and Week Number')
	plt.ylabel('Booking Count')
	plt.legend(title='Hotel Type')
	plt.xticks(rotation=45)
	plt.tight_layout()
	st.pyplot(fig)

	demographics_counts = d[['babies', 'adults', 'children']].sum()

	# creating the pie chart
	plt.figure(figsize=(8, 8))
	plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140)
	plt.title('Distribution of Guest Demographics')
	plt.axis('equal')

	st.pyplot(fig)


	fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))

	sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1')
	ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)")

	sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1')
	ax[0, 1].set_title("Cancellation Rate by Hotel Type")

	sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1')
	ax[1, 0].set_title("Booking Changes by Hotel Type")

	sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1')
	ax[1, 1].set_title("Total Bookings by Hotel Type")


	plt.tight_layout()

	st.pyplot(fig)

	plt.figure(figsize=(12, 6))
	sns.countplot(data=d, x='market_segment', palette='Set3')
	plt.title('Distribution of Market Segmentation')
	plt.xlabel('Market Segment')
	plt.ylabel('Count')
	plt.xticks(rotation=45, ha='right')
	plt.tight_layout()

	plt.show()

	# create a count plot for distribution channels
	plt.figure(figsize=(10, 6))
	sns.countplot(data=d, x='distribution_channel', palette='Set2')
	plt.title('Distribution of Distribution Channels')
	plt.xlabel('Distribution Channel')
	plt.ylabel('Count')
	plt.tight_layout()

	st.pyplot(fig)

	# st.write('#### scatterplot berdasarkan Input User')
	# pilihan1 = st.selectbox('Pilih column : ', ('age', 'creatinine_phosphokinase','ejection_fraction', 'platelets','serum_creatinine', 'serum_sodium', 'time'),key=1)
	# pilihan2 = st.selectbox('Pilih column : ', ('age', 'creatinine_phosphokinase','ejection_fraction', 'platelets','serum_creatinine', 'serum_sodium', 'time'),key=2)
	# pilihan3 = st.selectbox('Pilih column : ', ('anaemia', 'diabetes','high_blood_pressure', 'sex','smoking', 'DEATH_EVENT'),key=3)
	# fig = plt.figure(figsize=(15, 5))
	# sns.scatterplot(data=d,x=d[pilihan1],y=d[pilihan2],hue=d[pilihan3])
	# st.pyplot(fig)


	if __name__ == '__main__':
	run()