Spaces:
Sleeping
Sleeping
import pandas as pd | |
import panel as pn | |
import hvplot.pandas | |
import numpy as np | |
from math import radians, sin, cos, sqrt, asin | |
uber_data = pd.read_csv(r'uber-raw-data-jul14.csv') | |
type(uber_data.loc[0,'Date/Time']) | |
uber_data['Date/Time'] = pd.to_datetime(uber_data['Date/Time']) | |
uber_data['BinnedHour']=uber_data['Date/Time'].dt.floor('15min') | |
uber_data['BinnedHour'].value_counts() | |
DayMap={0:'Monday', 1:'Tuesday', 2:'Wednesday', 3:'Thursday', 4:'Friday', 5:'Saturday', 6:'Sunday'} | |
uber_data['Day']=uber_data['BinnedHour'].dt.weekday.map(DayMap) | |
uber_data['Date']=uber_data['BinnedHour'].dt.date | |
uber_data['Day']=pd.Categorical(uber_data['Day'],categories=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'],ordered=True) | |
uber_data['Time']=uber_data['BinnedHour'].dt.time | |
weekly_data1 = uber_data.groupby(['Date','Day','Time']).count().dropna().rename(columns={'BinnedHour':'Rides'})['Rides'].reset_index() | |
daywise = weekly_data1.groupby('Day').sum('Day') | |
# Assuming you have the 'uber_data' DataFrame already defined | |
# --- Code 1 --- | |
# Calculate the value counts and sort by index | |
value_counts = uber_data['BinnedHour'].dt.day.value_counts().sort_index() | |
# Create a DataFrame from the value counts | |
df = pd.DataFrame({'Days': value_counts.index, 'Rides': value_counts.values}) | |
# Create a Panel object for the Uber rides graph | |
pn.extension('plotly') | |
pn.config.sizing_mode = 'stretch_width' | |
uber_rides_graph = df.hvplot.bar(x='Days', y='Rides', color='black', xlabel='Days', ylabel='Rides', | |
rot=0, title='Uber Rides per day in July 2014 at NYC', | |
height=400, width=800) | |
# --- Code 2 --- | |
# Calculate the value counts and sort by index | |
value_counts = uber_data['BinnedHour'].value_counts().sort_index() | |
# Create a DataFrame from the value counts | |
df = pd.DataFrame({'BinnedHour': value_counts.index, 'Rides': value_counts.values}) | |
# Create a Bokeh figure for the interactive DataFrame graph | |
interactive_df_figure = df.hvplot.line(x='BinnedHour', y='Rides', color='black', alpha=0.8, | |
title='Uber Rides every 15 mins in the month of July at NYC', | |
xlabel='Days', ylabel='No. of Rides', | |
height=400, width=800) | |
# Create a Panel object with the Bokeh figure | |
interactive_df_pane = pn.pane.HoloViews(interactive_df_figure) | |
# --- Code 3 --- | |
# Extracting day of the week from the 'BinnedHour' column | |
uber_data['BinnedHour'] = pd.to_datetime(uber_data['BinnedHour']) | |
uber_data['BinnedHour'].value_counts() | |
DayMap = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'} | |
uber_data['Day'] = uber_data['BinnedHour'].dt.weekday.map(DayMap) | |
uber_data['Date'] = uber_data['BinnedHour'].dt.date | |
uber_data['Day'] = pd.Categorical(uber_data['Day'], | |
categories=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', | |
'Sunday'], | |
ordered=True) | |
uber_data['Time'] = uber_data['BinnedHour'].dt.time | |
# Grouping by Date, Day, and Time to get the count of rides for each time slot | |
weekly_data = uber_data.groupby(['Date', 'Day', 'Time']).count().dropna().rename(columns={'BinnedHour': 'Rides'})[ | |
'Rides'].reset_index() | |
# Summing up the rides per day | |
daywise = weekly_data.groupby('Day')['Rides'].sum() | |
df_total_rides = pd.DataFrame({'Days': daywise.index, 'Rides': daywise.values}) | |
# Create a Panel object for the 'Total Rides per Day' graph | |
total_rides_graph = df_total_rides.hvplot.bar(x='Days', y='Rides', color='black', xlabel='Days', ylabel='Total Rides', | |
rot=0, title='Total Rides per Day', | |
height=400, width=800, | |
value_label=True) # Display total value when hovering | |
# --- Code 4 --- | |
# Your original data processing | |
weekly_data = weekly_data.groupby(['Day', 'Time']).mean('Rides') | |
weekly_data1 = weekly_data.unstack(level=0) | |
# Create a Panel object | |
avg_rides_graph = pn.panel(weekly_data1.T.mean().hvplot(c='black', xlabel='Date', ylabel='Average rides', | |
xticks=10, title='Average Uber rides on any day in July 2014 at NYC', | |
height=400, width=800)) | |
# --- Code 5 --- | |
# Countplot using hvplot | |
BaseMapper = {'B02512': 'Unter', 'B02598': 'Hinter', 'B02617': 'Weiter', 'B02682': 'Schmecken', 'B02764': 'Danach-NY'} | |
plot_top_rides_city = uber_data['Base'].map(BaseMapper).value_counts().hvplot(kind='bar', rot=0, xlabel='Base', ylabel='Total rides', color='black', | |
title='CountPlot: Total uber rides vs Base - July 2014, NYC', height=400, width=800) | |
# --- Code 6 --- | |
# Your code 6 as provided | |
metro_art_coordinates = (40.7794, -73.9632) | |
empire_state_building_coordinates = (40.7484, -73.9857) | |
def haversine(coordinates1, coordinates2): | |
lat1, lon1 = coordinates1 | |
lat2, lon2 = coordinates2 | |
# Convert to radians and apply Haversine formula | |
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) | |
dlon = lon2 - lon1 | |
dlat = lat2 - lat1 | |
a = sin(dlat/2)**2 + cos(lat1)*cos(lat2)*sin(dlon/2)**2 | |
c = 2 * asin(sqrt(a)) | |
r = 3956 | |
return c * r | |
# Assuming `uber_data` is a DataFrame containing 'Lat' and 'Lon' columns | |
# Calculate distances from 'metro_art_coordinates' and 'empire_state_building_coordinates' | |
uber_data['Distance MM'] = uber_data[['Lat', 'Lon']].apply(lambda x: haversine(metro_art_coordinates, tuple(x)), axis=1) | |
uber_data['Distance ESB'] = uber_data[['Lat', 'Lon']].apply(lambda x: haversine(empire_state_building_coordinates, tuple(x)), axis=1) | |
# Count the number of rides within 0.25 miles of each location | |
# print((uber_data[['Distance MM', 'Distance ESB']] < 0.25).sum()) | |
# Create distance range and count the number of rides within each distance | |
distance_range = np.arange(0.1, 5.1, 0.1) | |
distance_data = [(uber_data[['Distance MM', 'Distance ESB']] < dist).sum() for dist in distance_range] | |
distance_data = pd.concat(distance_data, axis=1) | |
distance_data = distance_data.T | |
distance_data.index = distance_range | |
distance_data = distance_data.rename(columns={'Distance MM': 'CloserToMM', 'Distance ESB': 'CloserToESB'}) | |
pn.extension('bokeh') | |
# Create the hvplot figure with customized colors | |
fig = distance_data.hvplot(height=400, width=800, color=['black', 'grey']).opts(title='Number of Rides Closer to ESB and MM', | |
xlabel='Threshold Radius(mi)', | |
ylabel='Rides') | |
# Create a panel with the figure | |
fig_panel = pn.panel(fig) | |
# Define Panel widgets | |
yaxis_radio = pn.widgets.RadioButtonGroup( | |
name='Y axis', | |
options=['Rides vs Days', '15 min of Uber', 'Total Rides per Day', 'Avg Rides per Day', 'Top Rides City', 'Predicting Distance'], | |
button_type='light', | |
button_style='solid', | |
inline=True | |
) | |
# Define the Panel layout | |
panel_layout = pn.Column( | |
yaxis_radio, | |
pn.pane.HoloViews(uber_rides_graph), | |
) | |
# Define the callback function for the radio button | |
def update_chart(event): | |
if event.new == 'Rides vs Days': | |
panel_layout[1] = pn.pane.HoloViews(uber_rides_graph) | |
elif event.new == '15 min of Uber': | |
panel_layout[1] = interactive_df_pane | |
elif event.new == 'Total Rides per Day': | |
panel_layout[1] = total_rides_graph | |
elif event.new == 'Avg Rides per Day': | |
panel_layout[1] = avg_rides_graph | |
elif event.new == 'Top Rides City': | |
panel_layout[1] = plot_top_rides_city | |
elif event.new == 'Predicting Distance': | |
panel_layout[1] = fig_panel | |
yaxis_radio.param.watch(update_chart, 'value') | |
panel_layout.append | |
# Display the Panel layout | |
panel_layout | |
import panel as pn | |
pn.extension() # Add this line to load the Panel extension | |
# Layout using Template | |
template = pn.template.FastListTemplate( | |
title='Uber Analysis Dashboard', | |
sidebar=[ | |
pn.pane.PNG('Uber2.png', sizing_mode='scale_both'), | |
pn.pane.Markdown("# Key Performance Indicators (KPIs) of the EDA"), | |
pn.pane.Markdown("1. Let us visualize the total uber rides per day in the month of July 2014"), | |
pn.pane.Markdown("2. Let us have a more closer look at it, say every 15 minutes from July 1 to July 31."), | |
pn.pane.Markdown("3. Grouping weekly_data by days to plot total rides per week in july 2014."), | |
pn.pane.Markdown("4. Finding average rides on any day."), | |
pn.pane.Markdown("5. Now, let's try visualizing the relationship between Base and total number of rides in July 2014"), | |
pn.pane.Markdown("6. The number of rides predicted to Metropolitan Museum (MM) and Empire State Building (ESB)")], | |
main = [pn.Row(pn.Column(panel_layout)), | |
pn.Row(pn.pane.Markdown("Designed and Developed with ❤️ by Chitranshu Nagdawane © 2023"))], | |
accent_base_color="#000000", | |
header_background="#000000" | |
) | |
template.servable() | |