import streamlit as st import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import plotly.express as px from PIL import Image st.set_page_config( page_title = 'NYC TAXI - EDA', layout = 'wide', initial_sidebar_state = 'expanded' ) def run(): # Membuat Title st.title('NYC Taxi Price Prediction') # Membuat Sub Header st.subheader('EDA for NYC Yellow Taxi Analysis') # Menambahkan Gambar image = Image.open('taxi.jpeg') st.image(image, caption='NYC Taxi') # Menambahkan Deskripsi st.write('This page created by **Imam Zarkasie**') st.write('### Hello!') st.write('#### The traffic in New York City makes residents choose taxis to travel around the city.') st.write('##### In this page we can explore some of the segmentation of taxi passengers in New York City') # Membuat Garis Lurus st.markdown('---') # Magic Syntax ''' On this page, the author will do a simple exploration. The dataset used is the NYC Yellow Taxi dataset. This dataset comes from the website Google BigQuery. ''' # Show DataFrame data = pd.read_csv('taxi_dataset.csv') st.dataframe(data) st.write('#### Some description for features that have a class:') st.write('##### trip_type:') st.write('##### 1: Standard Rate') st.write('##### 2: JFK Airport and Others') st.write('##### payment_type:') st.write('##### 1: Credit Card') st.write('##### 2: Cash') #Make price histogram plots and scatter plots to compare fig = plt.figure(figsize=(15, 5)) plt.subplot(1, 2, 1) sns.histplot(data['trip_price'], kde=True, bins=30) plt.title('Histogram of trip_price') plt.subplot(1, 2, 2) sns.scatterplot(x='trip_distance', y='trip_price', data=data) plt.title('trip_distance vs trip_price') st.pyplot(fig) # Create the payment_type pie plot # Convert 'payment_type' column to int if it's not already data['payment_type'] = data['payment_type'].astype(int) # Calculate the counts of each payment type method_payment_type = data['payment_type'] method_counts_payment_type = method_payment_type.value_counts() fig, ax = plt.subplots(figsize=(8, 3)) method_counts_payment_type.plot(kind='pie', autopct='%1.1f%%', startangle=90, shadow=True, ax=ax) plt.title('Pie Plot User Payment Type') plt.axis('equal') # Menambahkan legend dengan keterangan untuk setiap kelas labels_payment_type = {1: 'Credit Card', 2: 'Cash', 3: 'No Charge', 4: 'Dispute'} plt.legend(labels=[labels_payment_type[i] for i in method_counts_payment_type.index], loc='upper right') plt.tight_layout() st.pyplot(fig) #Create pie plot trip_type method_trip_type = data['trip_type'] method_counts_trip_type = method_trip_type.value_counts() fig, ax = plt.subplots(figsize=(8, 3)) method_counts_trip_type.plot(kind='pie', autopct='%1.1f%%', startangle=90, shadow=True, ax=ax) plt.title('Pie Plot User Trip Type') plt.axis('equal') # Menambahkan legend dengan keterangan untuk setiap kelas labels = {1: 'Standard Rate', 2: 'JFK Airport', 5: 'Negotiated Fare', 4: 'westchester', 3: 'Newark'} plt.legend(labels=[labels[i] for i in method_counts_trip_type.index], loc='upper right') st.pyplot(fig) #Passenger Count Histogram passenger_count = data['passenger_count'] unique_counts = passenger_count.unique() n_bins = 10 colors = ['steelblue', 'orange', 'green', 'red', 'purple', 'crimson', 'yellow'] bar_width = 0.8 fig =plt.figure(figsize=(8, 2)) for i, count in enumerate(unique_counts): counts = passenger_count[passenger_count == count] plt.hist(counts, bins=n_bins, color=colors[i], alpha=0.7, width=bar_width) plt.title('Histogram of Passenger Count') plt.xlabel('Passenger Count') plt.ylabel('Frequency') st.pyplot(fig) if __name__=='__main__': run()