File size: 4,275 Bytes
65e6719
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image

st.set_page_config(
    page_title = 'NYC TAXI - EDA',
    layout = 'wide',
    initial_sidebar_state = 'expanded'
    )

def run():
    # Membuat Title
    st.title('NYC Taxi Price Prediction')

    # Membuat Sub Header
    st.subheader('EDA for NYC Yellow Taxi Analysis')

    # Menambahkan Gambar
    image = Image.open('taxi.jpeg')
    st.image(image, caption='NYC Taxi')

    # Menambahkan Deskripsi
    st.write('This page created by **Imam Zarkasie**')
    st.write('### Hello!')
    st.write('#### The traffic in New York City makes residents choose taxis to travel around the city.')
    st.write('##### In this page we can explore some of the segmentation of taxi passengers in New York City')


    # Membuat Garis Lurus
    st.markdown('---')

    # Magic Syntax
    '''
    On this page, the author will do a simple exploration.
    The dataset used is the NYC Yellow Taxi dataset.
    This dataset comes from the website Google BigQuery.
    '''

    # Show DataFrame
    data = pd.read_csv('taxi_dataset.csv')
    st.dataframe(data)


    st.write('####  Some description for features that have a class:')
    st.write('#####  trip_type:')
    st.write('#####  1: Standard Rate')
    st.write('#####  2: JFK Airport and Others')
    st.write('#####  payment_type:')
    st.write('#####  1: Credit Card')
    st.write('#####  2: Cash')

    #Make price histogram plots and scatter plots to compare
    fig = plt.figure(figsize=(15, 5))
    plt.subplot(1, 2, 1)
    sns.histplot(data['trip_price'], kde=True, bins=30)
    plt.title('Histogram of trip_price')

    plt.subplot(1, 2, 2)
    sns.scatterplot(x='trip_distance', y='trip_price', data=data)
    plt.title('trip_distance vs trip_price')
    
    st.pyplot(fig)

    # Create the payment_type pie plot

    # Convert 'payment_type' column to int if it's not already
    data['payment_type'] = data['payment_type'].astype(int)

    # Calculate the counts of each payment type
    method_payment_type = data['payment_type']
    method_counts_payment_type = method_payment_type.value_counts()

    fig, ax = plt.subplots(figsize=(8, 3))
    method_counts_payment_type.plot(kind='pie',
                                    autopct='%1.1f%%',
                                    startangle=90,
                                    shadow=True,
                                    ax=ax)
    plt.title('Pie Plot User Payment Type')
    plt.axis('equal')

    # Menambahkan legend dengan keterangan untuk setiap kelas
    labels_payment_type = {1: 'Credit Card', 2: 'Cash', 3: 'No Charge', 4: 'Dispute'}
    plt.legend(labels=[labels_payment_type[i] for i in method_counts_payment_type.index], loc='upper right')
    
    plt.tight_layout() 
    st.pyplot(fig)

    #Create pie plot trip_type
    method_trip_type = data['trip_type']
    method_counts_trip_type = method_trip_type.value_counts()

    fig, ax = plt.subplots(figsize=(8, 3))
    method_counts_trip_type.plot(kind='pie',
                                    autopct='%1.1f%%',
                                    startangle=90,
                                    shadow=True,
                                    ax=ax)
    plt.title('Pie Plot User Trip Type')
    plt.axis('equal')

    # Menambahkan legend dengan keterangan untuk setiap kelas
    labels = {1: 'Standard Rate', 2: 'JFK Airport', 5: 'Negotiated Fare', 4: 'westchester', 3: 'Newark'}
    plt.legend(labels=[labels[i] for i in method_counts_trip_type.index], loc='upper right')

    st.pyplot(fig)
    
    #Passenger Count Histogram
    passenger_count = data['passenger_count']
    unique_counts = passenger_count.unique()

    n_bins = 10
    colors = ['steelblue', 'orange', 'green', 'red', 'purple', 'crimson', 'yellow']
    bar_width = 0.8

    fig =plt.figure(figsize=(8, 2))

    for i, count in enumerate(unique_counts):
        counts = passenger_count[passenger_count == count]
        plt.hist(counts, bins=n_bins, color=colors[i], alpha=0.7, width=bar_width)

    plt.title('Histogram of Passenger Count')
    plt.xlabel('Passenger Count')
    plt.ylabel('Frequency')

    st.pyplot(fig)

if __name__=='__main__':
    run()