import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression

# Ensure seaborn is installed in your environment
sns.set(color_codes=True)

# Load the dataset
df = pd.read_csv('/input/us-accidents/US_Accidents_March23.csv')

# Convert 'Start_Time' to datetime format
df['Start_Time'] = pd.to_datetime(df['Start_Time'])

# Create a 4x2 subplot grid
fig, axes = plt.subplots(4, 2, figsize=(18, 10))
plt.subplots_adjust(hspace=0.5)  # Adjust horizontal space between plots

# Create a gradient blue color palette
n_colors = 8
blue_palette = sns.light_palette("blue", n_colors=n_colors, reverse=True)

# Day names for labeling
day_names = ['Overall'] + [pd.Timestamp('2023-01-0' + str(i)).day_name() for i in range(1, 8)]

# Plot overall distribution and for each day of the week
for i in range(7):
    ax = axes[i//2, i%2]  # Determine the position of the subplot
    
    if i == 0:
        sns.histplot(df['Start_Time'].dt.hour, bins=24, ax=ax, color=blue_palette[i])
        ax.set_title("Overall Hourly Accident Distribution")
    else:
        day_data = df[df['Start_Time'].dt.dayofweek == i-1]
        sns.histplot(day_data['Start_Time'].dt.hour, bins=24, ax=ax, color=blue_palette[i])
        ax.set_title(f"Hourly Accident Distribution: {day_names[i+1]}")

    ax.set_xlabel(f"Hour of {day_names[i]}")
    ax.set_ylabel("No. of Accidents")

plt.tight_layout()