|
import pandas as pd |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
from sklearn.tree import DecisionTreeRegressor |
|
from sklearn.linear_model import LinearRegression |
|
|
|
|
|
sns.set(color_codes=True) |
|
|
|
|
|
df = pd.read_csv('/input/us-accidents/US_Accidents_March23.csv') |
|
|
|
|
|
df['Start_Time'] = pd.to_datetime(df['Start_Time']) |
|
|
|
|
|
fig, axes = plt.subplots(4, 2, figsize=(18, 10)) |
|
plt.subplots_adjust(hspace=0.5) |
|
|
|
|
|
n_colors = 8 |
|
blue_palette = sns.light_palette("blue", n_colors=n_colors, reverse=True) |
|
|
|
|
|
day_names = ['Overall'] + [pd.Timestamp('2023-01-0' + str(i)).day_name() for i in range(1, 8)] |
|
|
|
|
|
for i in range(7): |
|
ax = axes[i//2, i%2] |
|
|
|
if i == 0: |
|
sns.histplot(df['Start_Time'].dt.hour, bins=24, ax=ax, color=blue_palette[i]) |
|
ax.set_title("Overall Hourly Accident Distribution") |
|
else: |
|
day_data = df[df['Start_Time'].dt.dayofweek == i-1] |
|
sns.histplot(day_data['Start_Time'].dt.hour, bins=24, ax=ax, color=blue_palette[i]) |
|
ax.set_title(f"Hourly Accident Distribution: {day_names[i+1]}") |
|
|
|
ax.set_xlabel(f"Hour of {day_names[i]}") |
|
ax.set_ylabel("No. of Accidents") |
|
|
|
plt.tight_layout() |