NoCommentsElder's picture
Create app.py
ad908aa
raw
history blame
No virus
1.47 kB
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
# Ensure seaborn is installed in your environment
sns.set(color_codes=True)
# Load the dataset
df = pd.read_csv('/input/us-accidents/US_Accidents_March23.csv')
# Convert 'Start_Time' to datetime format
df['Start_Time'] = pd.to_datetime(df['Start_Time'])
# Create a 4x2 subplot grid
fig, axes = plt.subplots(4, 2, figsize=(18, 10))
plt.subplots_adjust(hspace=0.5) # Adjust horizontal space between plots
# Create a gradient blue color palette
n_colors = 8
blue_palette = sns.light_palette("blue", n_colors=n_colors, reverse=True)
# Day names for labeling
day_names = ['Overall'] + [pd.Timestamp('2023-01-0' + str(i)).day_name() for i in range(1, 8)]
# Plot overall distribution and for each day of the week
for i in range(7):
ax = axes[i//2, i%2] # Determine the position of the subplot
if i == 0:
sns.histplot(df['Start_Time'].dt.hour, bins=24, ax=ax, color=blue_palette[i])
ax.set_title("Overall Hourly Accident Distribution")
else:
day_data = df[df['Start_Time'].dt.dayofweek == i-1]
sns.histplot(day_data['Start_Time'].dt.hour, bins=24, ax=ax, color=blue_palette[i])
ax.set_title(f"Hourly Accident Distribution: {day_names[i+1]}")
ax.set_xlabel(f"Hour of {day_names[i]}")
ax.set_ylabel("No. of Accidents")
plt.tight_layout()