imamzarkasie's picture
Upload 15 files
65e6719
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image
st.set_page_config(
page_title = 'NYC TAXI - EDA',
layout = 'wide',
initial_sidebar_state = 'expanded'
)
def run():
# Membuat Title
st.title('NYC Taxi Price Prediction')
# Membuat Sub Header
st.subheader('EDA for NYC Yellow Taxi Analysis')
# Menambahkan Gambar
image = Image.open('taxi.jpeg')
st.image(image, caption='NYC Taxi')
# Menambahkan Deskripsi
st.write('This page created by **Imam Zarkasie**')
st.write('### Hello!')
st.write('#### The traffic in New York City makes residents choose taxis to travel around the city.')
st.write('##### In this page we can explore some of the segmentation of taxi passengers in New York City')
# Membuat Garis Lurus
st.markdown('---')
# Magic Syntax
'''
On this page, the author will do a simple exploration.
The dataset used is the NYC Yellow Taxi dataset.
This dataset comes from the website Google BigQuery.
'''
# Show DataFrame
data = pd.read_csv('taxi_dataset.csv')
st.dataframe(data)
st.write('#### Some description for features that have a class:')
st.write('##### trip_type:')
st.write('##### 1: Standard Rate')
st.write('##### 2: JFK Airport and Others')
st.write('##### payment_type:')
st.write('##### 1: Credit Card')
st.write('##### 2: Cash')
#Make price histogram plots and scatter plots to compare
fig = plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
sns.histplot(data['trip_price'], kde=True, bins=30)
plt.title('Histogram of trip_price')
plt.subplot(1, 2, 2)
sns.scatterplot(x='trip_distance', y='trip_price', data=data)
plt.title('trip_distance vs trip_price')
st.pyplot(fig)
# Create the payment_type pie plot
# Convert 'payment_type' column to int if it's not already
data['payment_type'] = data['payment_type'].astype(int)
# Calculate the counts of each payment type
method_payment_type = data['payment_type']
method_counts_payment_type = method_payment_type.value_counts()
fig, ax = plt.subplots(figsize=(8, 3))
method_counts_payment_type.plot(kind='pie',
autopct='%1.1f%%',
startangle=90,
shadow=True,
ax=ax)
plt.title('Pie Plot User Payment Type')
plt.axis('equal')
# Menambahkan legend dengan keterangan untuk setiap kelas
labels_payment_type = {1: 'Credit Card', 2: 'Cash', 3: 'No Charge', 4: 'Dispute'}
plt.legend(labels=[labels_payment_type[i] for i in method_counts_payment_type.index], loc='upper right')
plt.tight_layout()
st.pyplot(fig)
#Create pie plot trip_type
method_trip_type = data['trip_type']
method_counts_trip_type = method_trip_type.value_counts()
fig, ax = plt.subplots(figsize=(8, 3))
method_counts_trip_type.plot(kind='pie',
autopct='%1.1f%%',
startangle=90,
shadow=True,
ax=ax)
plt.title('Pie Plot User Trip Type')
plt.axis('equal')
# Menambahkan legend dengan keterangan untuk setiap kelas
labels = {1: 'Standard Rate', 2: 'JFK Airport', 5: 'Negotiated Fare', 4: 'westchester', 3: 'Newark'}
plt.legend(labels=[labels[i] for i in method_counts_trip_type.index], loc='upper right')
st.pyplot(fig)
#Passenger Count Histogram
passenger_count = data['passenger_count']
unique_counts = passenger_count.unique()
n_bins = 10
colors = ['steelblue', 'orange', 'green', 'red', 'purple', 'crimson', 'yellow']
bar_width = 0.8
fig =plt.figure(figsize=(8, 2))
for i, count in enumerate(unique_counts):
counts = passenger_count[passenger_count == count]
plt.hist(counts, bins=n_bins, color=colors[i], alpha=0.7, width=bar_width)
plt.title('Histogram of Passenger Count')
plt.xlabel('Passenger Count')
plt.ylabel('Frequency')
st.pyplot(fig)
if __name__=='__main__':
run()