File size: 9,305 Bytes
03a0fd1
 
 
3aa78d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb08102
3aa78d9
cb08102
1772ebc
cb08102
 
3aa78d9
cb08102
 
3aa78d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03a0fd1
3aa78d9
 
 
 
03a0fd1
3aa78d9
 
03a0fd1
3aa78d9
 
 
 
 
 
 
03a0fd1
3aa78d9
03a0fd1
3aa78d9
 
 
 
03a0fd1
3aa78d9
 
03a0fd1
 
 
 
3aa78d9
 
 
 
03a0fd1
 
 
 
3aa78d9
 
03a0fd1
 
 
 
3aa78d9
 
 
 
 
 
 
cb08102
3aa78d9
 
 
 
03a0fd1
 
3aa78d9
03a0fd1
 
 
 
 
 
 
 
3aa78d9
03a0fd1
3aa78d9
 
 
 
 
 
 
 
c45f442
 
3aa78d9
 
03a0fd1
 
 
3aa78d9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import pandas as pd
import panel as pn
import hvplot.pandas
import numpy as np
from math import radians, sin, cos, sqrt, asin
uber_data = pd.read_csv(r'uber-raw-data-jul14.csv')
type(uber_data.loc[0,'Date/Time'])
uber_data['Date/Time'] = pd.to_datetime(uber_data['Date/Time'])
uber_data['BinnedHour']=uber_data['Date/Time'].dt.floor('15min')
uber_data['BinnedHour'].value_counts()
DayMap={0:'Monday', 1:'Tuesday', 2:'Wednesday', 3:'Thursday', 4:'Friday', 5:'Saturday', 6:'Sunday'}
uber_data['Day']=uber_data['BinnedHour'].dt.weekday.map(DayMap)
uber_data['Date']=uber_data['BinnedHour'].dt.date
uber_data['Day']=pd.Categorical(uber_data['Day'],categories=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'],ordered=True)
uber_data['Time']=uber_data['BinnedHour'].dt.time
weekly_data1 = uber_data.groupby(['Date','Day','Time']).count().dropna().rename(columns={'BinnedHour':'Rides'})['Rides'].reset_index()
daywise = weekly_data1.groupby('Day').sum('Day')
# Assuming you have the 'uber_data' DataFrame already defined

# --- Code 1 ---
# Calculate the value counts and sort by index
value_counts = uber_data['BinnedHour'].dt.day.value_counts().sort_index()

# Create a DataFrame from the value counts
df = pd.DataFrame({'Days': value_counts.index, 'Rides': value_counts.values})

# Create a Panel object for the Uber rides graph
pn.extension('plotly')
pn.config.sizing_mode = 'stretch_width'
uber_rides_graph = df.hvplot.bar(x='Days', y='Rides', color='black', xlabel='Days', ylabel='Rides',
                                 rot=0, title='Uber Rides per day in July 2014 at NYC',
                                 height=400, width=800)

# --- Code 2 ---
# Calculate the value counts and sort by index
value_counts = uber_data['BinnedHour'].value_counts().sort_index()

# Create a DataFrame from the value counts
df = pd.DataFrame({'BinnedHour': value_counts.index, 'Rides': value_counts.values})

# Create a Bokeh figure for the interactive DataFrame graph
interactive_df_figure = df.hvplot.line(x='BinnedHour', y='Rides', color='black', alpha=0.8,
                                       title='Uber Rides every 15 mins in the month of July at NYC',
                                       xlabel='Days', ylabel='No. of Rides',
                                       height=400, width=800)

# Create a Panel object with the Bokeh figure
interactive_df_pane = pn.pane.HoloViews(interactive_df_figure)

# --- Code 3 ---
# Extracting day of the week from the 'BinnedHour' column
uber_data['BinnedHour'] = pd.to_datetime(uber_data['BinnedHour'])
uber_data['BinnedHour'].value_counts()
DayMap = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}
uber_data['Day'] = uber_data['BinnedHour'].dt.weekday.map(DayMap)
uber_data['Date'] = uber_data['BinnedHour'].dt.date
uber_data['Day'] = pd.Categorical(uber_data['Day'],
                                  categories=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',
                                              'Sunday'],
                                  ordered=True)
uber_data['Time'] = uber_data['BinnedHour'].dt.time

# Grouping by Date, Day, and Time to get the count of rides for each time slot
weekly_data = uber_data.groupby(['Date', 'Day', 'Time']).count().dropna().rename(columns={'BinnedHour': 'Rides'})[
    'Rides'].reset_index()

# Summing up the rides per day
daywise = weekly_data.groupby('Day')['Rides'].sum()
df_total_rides = pd.DataFrame({'Days': daywise.index, 'Rides': daywise.values})

# Create a Panel object for the 'Total Rides per Day' graph
total_rides_graph = df_total_rides.hvplot.bar(x='Days', y='Rides', color='black', xlabel='Days', ylabel='Total Rides',
                                             rot=0, title='Total Rides per Day',
                                             height=400, width=800,
                                             value_label=True)  # Display total value when hovering

# --- Code 4 ---
# Your original data processing
weekly_data = weekly_data.groupby(['Day', 'Time']).mean('Rides')
weekly_data1 = weekly_data.unstack(level=0)
average_rides = weekly_data1.T.mean()

# Create a HoloViews plot
rides_plot = average_rides.hvplot(c='black', xlabel='Date', ylabel='Average rides',
                                   xticks=10, title='Average Uber rides on any day in July 2014 at NYC',
                                   height=400, width=800)

# Wrap the plot in a Panel
avg_rides_panel = pn.panel(rides_plot)
# --- Code 5 ---
# Countplot using hvplot
BaseMapper = {'B02512': 'Unter', 'B02598': 'Hinter', 'B02617': 'Weiter', 'B02682': 'Schmecken', 'B02764': 'Danach-NY'}
plot_top_rides_city = uber_data['Base'].map(BaseMapper).value_counts().hvplot(kind='bar', rot=0, xlabel='Base', ylabel='Total rides', color='black',
                                                                             title='CountPlot: Total uber rides vs Base - July 2014, NYC', height=400, width=800)

# --- Code 6 ---
# Your code 6 as provided
metro_art_coordinates = (40.7794, -73.9632)
empire_state_building_coordinates = (40.7484, -73.9857)

def haversine(coordinates1, coordinates2):
    lat1, lon1 = coordinates1
    lat2, lon2 = coordinates2
    
    # Convert to radians and apply Haversine formula
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    
    a = sin(dlat/2)**2 + cos(lat1)*cos(lat2)*sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    r = 3956
    return c * r

# Assuming `uber_data` is a DataFrame containing 'Lat' and 'Lon' columns
# Calculate distances from 'metro_art_coordinates' and 'empire_state_building_coordinates'
uber_data['Distance MM'] = uber_data[['Lat', 'Lon']].apply(lambda x: haversine(metro_art_coordinates, tuple(x)), axis=1)
uber_data['Distance ESB'] = uber_data[['Lat', 'Lon']].apply(lambda x: haversine(empire_state_building_coordinates, tuple(x)), axis=1)

# Count the number of rides within 0.25 miles of each location
# print((uber_data[['Distance MM', 'Distance ESB']] < 0.25).sum())

# Create distance range and count the number of rides within each distance
distance_range = np.arange(0.1, 5.1, 0.1)
distance_data = [(uber_data[['Distance MM', 'Distance ESB']] < dist).sum() for dist in distance_range]
distance_data = pd.concat(distance_data, axis=1)
distance_data = distance_data.T
distance_data.index = distance_range
distance_data = distance_data.rename(columns={'Distance MM': 'CloserToMM', 'Distance ESB': 'CloserToESB'})

pn.extension('bokeh')

# Create the hvplot figure with customized colors
fig = distance_data.hvplot(height=400, width=800, color=['black', 'grey']).opts(title='Number of Rides Closer to ESB and MM',
                                                                            xlabel='Threshold Radius(mi)',
                                                                            ylabel='Rides')

# Create a panel with the figure
fig_panel = pn.panel(fig)

# Define Panel widgets
yaxis_radio = pn.widgets.RadioButtonGroup(
    name='Y axis',
    options=['Rides vs Days', '15 min of Uber', 'Total Rides per Day', 'Avg Rides per Day', 'Top Rides City', 'Predicting Distance'],
    button_type='light',
    button_style='solid',
    inline=True
)

# Define the Panel layout
panel_layout = pn.Column(
    yaxis_radio,
    pn.pane.HoloViews(uber_rides_graph),
)

# Define the callback function for the radio button
def update_chart(event):
    if event.new == 'Rides vs Days':
        panel_layout[1] = pn.pane.HoloViews(uber_rides_graph)
    elif event.new == '15 min of Uber':
        panel_layout[1] = interactive_df_pane
    elif event.new == 'Total Rides per Day':
        panel_layout[1] = total_rides_graph
    elif event.new == 'Avg Rides per Day':
        panel_layout[1] = avg_rides_panel
    elif event.new == 'Top Rides City':
        panel_layout[1] = plot_top_rides_city
    elif event.new == 'Predicting Distance':
        panel_layout[1] = fig_panel

yaxis_radio.param.watch(update_chart, 'value')
panel_layout.append

# Display the Panel layout
panel_layout
import panel as pn
pn.extension()  # Add this line to load the Panel extension

# Layout using Template
template = pn.template.FastListTemplate(
    title='Uber Analysis Dashboard', 
    sidebar=[
        pn.pane.PNG('Uber2.png', sizing_mode='scale_both'),
        pn.pane.Markdown("# Key Performance Indicators (KPIs) of the EDA"),  
        pn.pane.Markdown("1. Let us visualize the total uber rides per day in the month of July 2014"),
        pn.pane.Markdown("2. Let us have a more closer look at it, say every 15 minutes from July 1 to July 31."),
        pn.pane.Markdown("3. Grouping weekly_data by days to plot total rides per week in july 2014."),
        pn.pane.Markdown("4. Finding average rides on any day."),
        pn.pane.Markdown("5. Now, let's try visualizing the relationship between Base and total number of rides in July 2014"),
        pn.pane.Markdown("6. The number of rides predicted to Metropolitan Museum (MM) and Empire State Building (ESB)")],
    main = [pn.Row(pn.Column(panel_layout)),
            pn.Row(pn.pane.Markdown("Designed and Developed with ❤️ by Chitranshu Nagdawane © 2023"))],
    accent_base_color="#000000",
    header_background="#000000"
)

template.servable()