import pandas as pd import panel as pn import hvplot.pandas import numpy as np from math import radians, sin, cos, sqrt, asin uber_data = pd.read_csv(r'uber-raw-data-jul14.csv') type(uber_data.loc[0,'Date/Time']) uber_data['Date/Time'] = pd.to_datetime(uber_data['Date/Time']) uber_data['BinnedHour']=uber_data['Date/Time'].dt.floor('15min') uber_data['BinnedHour'].value_counts() DayMap={0:'Monday', 1:'Tuesday', 2:'Wednesday', 3:'Thursday', 4:'Friday', 5:'Saturday', 6:'Sunday'} uber_data['Day']=uber_data['BinnedHour'].dt.weekday.map(DayMap) uber_data['Date']=uber_data['BinnedHour'].dt.date uber_data['Day']=pd.Categorical(uber_data['Day'],categories=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'],ordered=True) uber_data['Time']=uber_data['BinnedHour'].dt.time weekly_data1 = uber_data.groupby(['Date','Day','Time']).count().dropna().rename(columns={'BinnedHour':'Rides'})['Rides'].reset_index() daywise = weekly_data1.groupby('Day').sum('Day') # Assuming you have the 'uber_data' DataFrame already defined # --- Code 1 --- # Calculate the value counts and sort by index value_counts = uber_data['BinnedHour'].dt.day.value_counts().sort_index() # Create a DataFrame from the value counts df = pd.DataFrame({'Days': value_counts.index, 'Rides': value_counts.values}) # Create a Panel object for the Uber rides graph pn.extension('plotly') pn.config.sizing_mode = 'stretch_width' uber_rides_graph = df.hvplot.bar(x='Days', y='Rides', color='black', xlabel='Days', ylabel='Rides', rot=0, title='Uber Rides per day in July 2014 at NYC', height=400, width=800) # --- Code 2 --- # Calculate the value counts and sort by index value_counts = uber_data['BinnedHour'].value_counts().sort_index() # Create a DataFrame from the value counts df = pd.DataFrame({'BinnedHour': value_counts.index, 'Rides': value_counts.values}) # Create a Bokeh figure for the interactive DataFrame graph interactive_df_figure = df.hvplot.line(x='BinnedHour', y='Rides', color='black', alpha=0.8, title='Uber Rides every 15 mins in the month of July at NYC', xlabel='Days', ylabel='No. of Rides', height=400, width=800) # Create a Panel object with the Bokeh figure interactive_df_pane = pn.pane.HoloViews(interactive_df_figure) # --- Code 3 --- # Extracting day of the week from the 'BinnedHour' column uber_data['BinnedHour'] = pd.to_datetime(uber_data['BinnedHour']) uber_data['BinnedHour'].value_counts() DayMap = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'} uber_data['Day'] = uber_data['BinnedHour'].dt.weekday.map(DayMap) uber_data['Date'] = uber_data['BinnedHour'].dt.date uber_data['Day'] = pd.Categorical(uber_data['Day'], categories=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], ordered=True) uber_data['Time'] = uber_data['BinnedHour'].dt.time # Grouping by Date, Day, and Time to get the count of rides for each time slot weekly_data = uber_data.groupby(['Date', 'Day', 'Time']).count().dropna().rename(columns={'BinnedHour': 'Rides'})[ 'Rides'].reset_index() # Summing up the rides per day daywise = weekly_data.groupby('Day')['Rides'].sum() df_total_rides = pd.DataFrame({'Days': daywise.index, 'Rides': daywise.values}) # Create a Panel object for the 'Total Rides per Day' graph total_rides_graph = df_total_rides.hvplot.bar(x='Days', y='Rides', color='black', xlabel='Days', ylabel='Total Rides', rot=0, title='Total Rides per Day', height=400, width=800, value_label=True) # Display total value when hovering # --- Code 4 --- # Your original data processing weekly_data = weekly_data.groupby(['Day', 'Time']).mean('Rides') weekly_data1 = weekly_data.unstack(level=0) # Create a Panel object avg_rides_graph = pn.panel(weekly_data1.T.mean().hvplot(c='black', xlabel='Date', ylabel='Average rides', xticks=10, title='Average Uber rides on any day in July 2014 at NYC', height=400, width=800)) # --- Code 5 --- # Countplot using hvplot BaseMapper = {'B02512': 'Unter', 'B02598': 'Hinter', 'B02617': 'Weiter', 'B02682': 'Schmecken', 'B02764': 'Danach-NY'} plot_top_rides_city = uber_data['Base'].map(BaseMapper).value_counts().hvplot(kind='bar', rot=0, xlabel='Base', ylabel='Total rides', color='black', title='CountPlot: Total uber rides vs Base - July 2014, NYC', height=400, width=800) # --- Code 6 --- # Your code 6 as provided metro_art_coordinates = (40.7794, -73.9632) empire_state_building_coordinates = (40.7484, -73.9857) def haversine(coordinates1, coordinates2): lat1, lon1 = coordinates1 lat2, lon2 = coordinates2 # Convert to radians and apply Haversine formula lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) dlon = lon2 - lon1 dlat = lat2 - lat1 a = sin(dlat/2)**2 + cos(lat1)*cos(lat2)*sin(dlon/2)**2 c = 2 * asin(sqrt(a)) r = 3956 return c * r # Assuming `uber_data` is a DataFrame containing 'Lat' and 'Lon' columns # Calculate distances from 'metro_art_coordinates' and 'empire_state_building_coordinates' uber_data['Distance MM'] = uber_data[['Lat', 'Lon']].apply(lambda x: haversine(metro_art_coordinates, tuple(x)), axis=1) uber_data['Distance ESB'] = uber_data[['Lat', 'Lon']].apply(lambda x: haversine(empire_state_building_coordinates, tuple(x)), axis=1) # Count the number of rides within 0.25 miles of each location # print((uber_data[['Distance MM', 'Distance ESB']] < 0.25).sum()) # Create distance range and count the number of rides within each distance distance_range = np.arange(0.1, 5.1, 0.1) distance_data = [(uber_data[['Distance MM', 'Distance ESB']] < dist).sum() for dist in distance_range] distance_data = pd.concat(distance_data, axis=1) distance_data = distance_data.T distance_data.index = distance_range distance_data = distance_data.rename(columns={'Distance MM': 'CloserToMM', 'Distance ESB': 'CloserToESB'}) pn.extension('bokeh') # Create the hvplot figure with customized colors fig = distance_data.hvplot(height=400, width=800, color=['black', 'grey']).opts(title='Number of Rides Closer to ESB and MM', xlabel='Threshold Radius(mi)', ylabel='Rides') # Create a panel with the figure fig_panel = pn.panel(fig) # Define Panel widgets yaxis_radio = pn.widgets.RadioButtonGroup( name='Y axis', options=['Rides vs Days', '15 min of Uber', 'Total Rides per Day', 'Avg Rides per Day', 'Top Rides City', 'Predicting Distance'], button_type='light', button_style='solid', inline=True ) # Define the Panel layout panel_layout = pn.Column( yaxis_radio, pn.pane.HoloViews(uber_rides_graph), ) # Define the callback function for the radio button def update_chart(event): if event.new == 'Rides vs Days': panel_layout[1] = pn.pane.HoloViews(uber_rides_graph) elif event.new == '15 min of Uber': panel_layout[1] = interactive_df_pane elif event.new == 'Total Rides per Day': panel_layout[1] = total_rides_graph elif event.new == 'Avg Rides per Day': panel_layout[1] = avg_rides_graph elif event.new == 'Top Rides City': panel_layout[1] = plot_top_rides_city elif event.new == 'Predicting Distance': panel_layout[1] = fig_panel yaxis_radio.param.watch(update_chart, 'value') panel_layout.append # Display the Panel layout panel_layout import panel as pn pn.extension() # Add this line to load the Panel extension # Layout using Template template = pn.template.FastListTemplate( title='Uber Analysis Dashboard', sidebar=[ pn.pane.PNG('Uber2.png', sizing_mode='scale_both'), pn.pane.Markdown("# Key Performance Indicators (KPIs) of the EDA"), pn.pane.Markdown("1. Let us visualize the total uber rides per day in the month of July 2014"), pn.pane.Markdown("2. Let us have a more closer look at it, say every 15 minutes from July 1 to July 31."), pn.pane.Markdown("3. Grouping weekly_data by days to plot total rides per week in july 2014."), pn.pane.Markdown("4. Finding average rides on any day."), pn.pane.Markdown("5. Now, let's try visualizing the relationship between Base and total number of rides in July 2014"), pn.pane.Markdown("6. The number of rides predicted to Metropolitan Museum (MM) and Empire State Building (ESB)")], main = [pn.Row(pn.Column(panel_layout))], accent_base_color="#000000", header_background="#000000" ) template.servable()