Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import plotly.express as px | |
from PIL import Image | |
st.set_page_config( | |
page_title = 'NYC TAXI - EDA', | |
layout = 'wide', | |
initial_sidebar_state = 'expanded' | |
) | |
def run(): | |
# Membuat Title | |
st.title('NYC Taxi Price Prediction') | |
# Membuat Sub Header | |
st.subheader('EDA for NYC Yellow Taxi Analysis') | |
# Menambahkan Gambar | |
image = Image.open('taxi.jpeg') | |
st.image(image, caption='NYC Taxi') | |
# Menambahkan Deskripsi | |
st.write('This page created by **Imam Zarkasie**') | |
st.write('### Hello!') | |
st.write('#### The traffic in New York City makes residents choose taxis to travel around the city.') | |
st.write('##### In this page we can explore some of the segmentation of taxi passengers in New York City') | |
# Membuat Garis Lurus | |
st.markdown('---') | |
# Magic Syntax | |
''' | |
On this page, the author will do a simple exploration. | |
The dataset used is the NYC Yellow Taxi dataset. | |
This dataset comes from the website Google BigQuery. | |
''' | |
# Show DataFrame | |
data = pd.read_csv('taxi_dataset.csv') | |
st.dataframe(data) | |
st.write('#### Some description for features that have a class:') | |
st.write('##### trip_type:') | |
st.write('##### 1: Standard Rate') | |
st.write('##### 2: JFK Airport and Others') | |
st.write('##### payment_type:') | |
st.write('##### 1: Credit Card') | |
st.write('##### 2: Cash') | |
#Make price histogram plots and scatter plots to compare | |
fig = plt.figure(figsize=(15, 5)) | |
plt.subplot(1, 2, 1) | |
sns.histplot(data['trip_price'], kde=True, bins=30) | |
plt.title('Histogram of trip_price') | |
plt.subplot(1, 2, 2) | |
sns.scatterplot(x='trip_distance', y='trip_price', data=data) | |
plt.title('trip_distance vs trip_price') | |
st.pyplot(fig) | |
# Create the payment_type pie plot | |
# Convert 'payment_type' column to int if it's not already | |
data['payment_type'] = data['payment_type'].astype(int) | |
# Calculate the counts of each payment type | |
method_payment_type = data['payment_type'] | |
method_counts_payment_type = method_payment_type.value_counts() | |
fig, ax = plt.subplots(figsize=(8, 3)) | |
method_counts_payment_type.plot(kind='pie', | |
autopct='%1.1f%%', | |
startangle=90, | |
shadow=True, | |
ax=ax) | |
plt.title('Pie Plot User Payment Type') | |
plt.axis('equal') | |
# Menambahkan legend dengan keterangan untuk setiap kelas | |
labels_payment_type = {1: 'Credit Card', 2: 'Cash', 3: 'No Charge', 4: 'Dispute'} | |
plt.legend(labels=[labels_payment_type[i] for i in method_counts_payment_type.index], loc='upper right') | |
plt.tight_layout() | |
st.pyplot(fig) | |
#Create pie plot trip_type | |
method_trip_type = data['trip_type'] | |
method_counts_trip_type = method_trip_type.value_counts() | |
fig, ax = plt.subplots(figsize=(8, 3)) | |
method_counts_trip_type.plot(kind='pie', | |
autopct='%1.1f%%', | |
startangle=90, | |
shadow=True, | |
ax=ax) | |
plt.title('Pie Plot User Trip Type') | |
plt.axis('equal') | |
# Menambahkan legend dengan keterangan untuk setiap kelas | |
labels = {1: 'Standard Rate', 2: 'JFK Airport', 5: 'Negotiated Fare', 4: 'westchester', 3: 'Newark'} | |
plt.legend(labels=[labels[i] for i in method_counts_trip_type.index], loc='upper right') | |
st.pyplot(fig) | |
#Passenger Count Histogram | |
passenger_count = data['passenger_count'] | |
unique_counts = passenger_count.unique() | |
n_bins = 10 | |
colors = ['steelblue', 'orange', 'green', 'red', 'purple', 'crimson', 'yellow'] | |
bar_width = 0.8 | |
fig =plt.figure(figsize=(8, 2)) | |
for i, count in enumerate(unique_counts): | |
counts = passenger_count[passenger_count == count] | |
plt.hist(counts, bins=n_bins, color=colors[i], alpha=0.7, width=bar_width) | |
plt.title('Histogram of Passenger Count') | |
plt.xlabel('Passenger Count') | |
plt.ylabel('Frequency') | |
st.pyplot(fig) | |
if __name__=='__main__': | |
run() |