Spaces:
Sleeping
Sleeping
File size: 4,275 Bytes
65e6719 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image
st.set_page_config(
page_title = 'NYC TAXI - EDA',
layout = 'wide',
initial_sidebar_state = 'expanded'
)
def run():
# Membuat Title
st.title('NYC Taxi Price Prediction')
# Membuat Sub Header
st.subheader('EDA for NYC Yellow Taxi Analysis')
# Menambahkan Gambar
image = Image.open('taxi.jpeg')
st.image(image, caption='NYC Taxi')
# Menambahkan Deskripsi
st.write('This page created by **Imam Zarkasie**')
st.write('### Hello!')
st.write('#### The traffic in New York City makes residents choose taxis to travel around the city.')
st.write('##### In this page we can explore some of the segmentation of taxi passengers in New York City')
# Membuat Garis Lurus
st.markdown('---')
# Magic Syntax
'''
On this page, the author will do a simple exploration.
The dataset used is the NYC Yellow Taxi dataset.
This dataset comes from the website Google BigQuery.
'''
# Show DataFrame
data = pd.read_csv('taxi_dataset.csv')
st.dataframe(data)
st.write('#### Some description for features that have a class:')
st.write('##### trip_type:')
st.write('##### 1: Standard Rate')
st.write('##### 2: JFK Airport and Others')
st.write('##### payment_type:')
st.write('##### 1: Credit Card')
st.write('##### 2: Cash')
#Make price histogram plots and scatter plots to compare
fig = plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
sns.histplot(data['trip_price'], kde=True, bins=30)
plt.title('Histogram of trip_price')
plt.subplot(1, 2, 2)
sns.scatterplot(x='trip_distance', y='trip_price', data=data)
plt.title('trip_distance vs trip_price')
st.pyplot(fig)
# Create the payment_type pie plot
# Convert 'payment_type' column to int if it's not already
data['payment_type'] = data['payment_type'].astype(int)
# Calculate the counts of each payment type
method_payment_type = data['payment_type']
method_counts_payment_type = method_payment_type.value_counts()
fig, ax = plt.subplots(figsize=(8, 3))
method_counts_payment_type.plot(kind='pie',
autopct='%1.1f%%',
startangle=90,
shadow=True,
ax=ax)
plt.title('Pie Plot User Payment Type')
plt.axis('equal')
# Menambahkan legend dengan keterangan untuk setiap kelas
labels_payment_type = {1: 'Credit Card', 2: 'Cash', 3: 'No Charge', 4: 'Dispute'}
plt.legend(labels=[labels_payment_type[i] for i in method_counts_payment_type.index], loc='upper right')
plt.tight_layout()
st.pyplot(fig)
#Create pie plot trip_type
method_trip_type = data['trip_type']
method_counts_trip_type = method_trip_type.value_counts()
fig, ax = plt.subplots(figsize=(8, 3))
method_counts_trip_type.plot(kind='pie',
autopct='%1.1f%%',
startangle=90,
shadow=True,
ax=ax)
plt.title('Pie Plot User Trip Type')
plt.axis('equal')
# Menambahkan legend dengan keterangan untuk setiap kelas
labels = {1: 'Standard Rate', 2: 'JFK Airport', 5: 'Negotiated Fare', 4: 'westchester', 3: 'Newark'}
plt.legend(labels=[labels[i] for i in method_counts_trip_type.index], loc='upper right')
st.pyplot(fig)
#Passenger Count Histogram
passenger_count = data['passenger_count']
unique_counts = passenger_count.unique()
n_bins = 10
colors = ['steelblue', 'orange', 'green', 'red', 'purple', 'crimson', 'yellow']
bar_width = 0.8
fig =plt.figure(figsize=(8, 2))
for i, count in enumerate(unique_counts):
counts = passenger_count[passenger_count == count]
plt.hist(counts, bins=n_bins, color=colors[i], alpha=0.7, width=bar_width)
plt.title('Histogram of Passenger Count')
plt.xlabel('Passenger Count')
plt.ylabel('Frequency')
st.pyplot(fig)
if __name__=='__main__':
run() |