Formula1-race-debriefing / db /formula1_databases.py
Draichi's picture
chore: organize the code into `/backup, /db, /notebooks, /regression-models` and `/SQL`
b4f5da6 unverified
raw
history blame
25.9 kB
from typing import Any, cast
import sqlite3
import pandas as pd
from datetime import datetime
from fastf1.core import Session
import fastf1
from rich.console import Console
console = Console(style="chartreuse1 on grey7")
class FastF1ToSQL:
"""
A class to convert FastF1 data into a SQLite database.
"""
def __init__(self, db_path: str) -> None:
"""
Initialize the FastF1ToSQL class.
Args:
db_path (str): Path to the SQLite database file.
"""
self.db_path = db_path
self.conn = sqlite3.connect(db_path, timeout=20)
self.cursor = self.conn.cursor()
self.__create_tables()
def __create_tables(self) -> None:
"""Create all necessary tables and indexes if they don't exist."""
self.cursor.executescript('''
CREATE TABLE IF NOT EXISTS Drivers (
driver_id INTEGER PRIMARY KEY,
driver_name TEXT NOT NULL,
team TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS Tracks (
track_id INTEGER PRIMARY KEY,
track_name TEXT NOT NULL,
country TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS Event (
event_id INTEGER PRIMARY KEY,
round_number INTEGER,
country TEXT,
location TEXT,
event_date DATE,
event_name TEXT,
session_1_date_utc DATETIME,
session_1_name TEXT,
session_2_date_utc DATETIME,
session_2_name TEXT,
session_3_date_utc DATETIME,
session_3_name TEXT,
session_4_date_utc DATETIME,
session_4_name TEXT,
session_5_date_utc DATETIME,
session_5_name TEXT
);
CREATE TABLE IF NOT EXISTS Sessions (
session_id INTEGER PRIMARY KEY,
event_id INTEGER,
track_id INTEGER,
session_type TEXT NOT NULL,
date DATETIME NOT NULL,
FOREIGN KEY (event_id) REFERENCES Event(event_id),
FOREIGN KEY (track_id) REFERENCES Tracks(track_id)
);
CREATE TABLE IF NOT EXISTS Weather (
weather_id INTEGER PRIMARY KEY,
session_id INTEGER,
datetime DATETIME,
air_temperature_in_celsius REAL,
relative_air_humidity_in_percentage REAL,
air_pressure_in_mbar REAL,
is_raining BOOLEAN,
track_temperature_in_celsius REAL,
wind_direction_in_grads REAL,
wind_speed_in_meters_per_seconds REAL,
FOREIGN KEY (session_id) REFERENCES Sessions(session_id)
);
CREATE TABLE IF NOT EXISTS Laps (
lap_id INTEGER PRIMARY KEY,
session_id INTEGER,
driver_name TEXT NOT NULL,
lap_number INTEGER NOT NULL,
stint INTEGER,
sector_1_speed_trap_in_km REAL,
sector_2_speed_trap_in_km REAL,
finish_line_speed_trap_in_km REAL,
longest_strait_speed_trap_in_km REAL,
is_personal_best BOOLEAN,
tyre_compound TEXT,
tyre_life_in_laps INTEGER,
is_fresh_tyre BOOLEAN,
position INTEGER,
lap_time_in_seconds REAL,
sector_1_time_in_seconds REAL,
sector_2_time_in_seconds REAL,
sector_3_time_in_seconds REAL,
lap_start_time_in_datetime DATETIME,
pin_in_time_in_datetime DATETIME,
pin_out_time_in_datetime DATETIME,
FOREIGN KEY (session_id) REFERENCES Sessions(session_id),
UNIQUE (session_id, driver_name, lap_number)
);
CREATE TABLE IF NOT EXISTS Telemetry (
telemetry_id INTEGER PRIMARY KEY,
lap_id INTEGER,
driver_name TEXT NOT NULL,
speed_in_km REAL,
RPM INTEGER,
gear_number INTEGER,
throttle_input REAL,
is_brake_pressed BOOLEAN,
is_DRS_open BOOLEAN,
x_position REAL,
y_position REAL,
z_position REAL,
is_off_track BOOLEAN,
datetime DATETIME,
FOREIGN KEY (lap_id) REFERENCES Laps(lap_id),
FOREIGN KEY (driver_name) REFERENCES Drivers(driver_name)
);
CREATE INDEX IF NOT EXISTS idx_laps_driver_name ON Laps(driver_name);
CREATE INDEX IF NOT EXISTS idx_laps_session_id ON Laps(session_id);
CREATE INDEX IF NOT EXISTS idx_telemetry_lap_id ON Telemetry(lap_id);
CREATE INDEX IF NOT EXISTS idx_telemetry_datetime ON Telemetry(datetime);
CREATE INDEX IF NOT EXISTS idx_weather_session_id ON Weather(session_id);
CREATE INDEX IF NOT EXISTS idx_weather_datetime ON Weather(datetime);
CREATE INDEX IF NOT EXISTS idx_event_date ON Event(event_date);
''')
self.conn.commit()
def process_session(self, session: Session) -> None:
"""
Process a session and insert the data into the database.
Args:
session (Session): The session to process.
"""
console.print(
f"> Processing session: {session.event.EventName} - {session.name}. This may take a while...")
# Load session data
session.load()
# Save session start date
self._session_start_date = session.t0_date
# Insert data into tables
self.insert_event(session)
self.insert_session(session)
self.insert_drivers(session)
self.insert_laps(session)
self.insert_telemetry(session)
self.insert_weather(session)
# Create data analysis views
self.__create_data_analysis_views()
# Commit changes and close connection
self.conn.commit()
self.conn.close()
def insert_event(self, session: Session) -> None:
"""
Insert the event data into the database.
Args:
session (Session): The FastF1 session object.
"""
event_data: dict[str, Any] = {
'round_number': int(session.event.RoundNumber),
'country': session.event.Country,
'location': session.event.Location,
'event_date': str(session.event.EventDate.date()),
'event_name': session.event.EventName,
'session_1_date_utc': str(session.event.Session1DateUtc),
'session_1_name': session.event.Session1.lower(),
'session_2_date_utc': str(session.event.Session2DateUtc),
'session_2_name': session.event.Session2.lower(),
'session_3_date_utc': str(session.event.Session3DateUtc),
'session_3_name': session.event.Session3.lower(),
'session_4_date_utc': str(session.event.Session4DateUtc),
'session_4_name': session.event.Session4.lower(),
'session_5_date_utc': str(session.event.Session5DateUtc),
'session_5_name': session.event.Session5.lower(),
}
columns = ', '.join(event_data.keys())
placeholders = ', '.join(['?' for _ in event_data])
query = f"INSERT OR REPLACE INTO Event ({columns}) VALUES ({placeholders})"
self.cursor.execute(query, list(event_data.values()))
self._event_id = self.cursor.lastrowid
def insert_session(self, session: Session) -> None:
"""
Insert the session data into the database.
Args:
session (Session): The FastF1 session object.
"""
session_data: dict[str, Any] = {
# Assuming this is called right after insert_event
'event_id': self._event_id,
'track_id': self.get_or_create_track(session.event.Location, session.event.Country),
'session_type': session.name,
'date': str(session.date),
}
columns = ', '.join(session_data.keys())
placeholders = ':' + ', :'.join(session_data.keys())
query = f"INSERT INTO Sessions ({columns}) VALUES ({placeholders})"
self.cursor.execute(query, session_data)
self._session_id = self.cursor.lastrowid
def insert_drivers(self, session: Session) -> None:
"""
Insert the drivers data into the database.
Args:
session (Session): The FastF1 session object.
"""
for driver in session.drivers:
driver_info = session.get_driver(driver)
driver_data = {
'driver_name': driver_info['FullName'],
'team': driver_info['TeamName']
}
columns = ', '.join(driver_data.keys())
placeholders = ':' + ', :'.join(driver_data.keys())
query = f"INSERT OR IGNORE INTO Drivers ({columns}) VALUES ({placeholders})"
self.cursor.execute(query, driver_data)
def insert_laps(self, session: Session) -> None:
"""
Insert the laps data into the database.
Args:
session (Session): The FastF1 session object.
"""
console.print("> Inserting laps data...")
laps_df = session.laps.copy()
laps_df['session_id'] = self._session_id
laps_df['lap_start_time_in_datetime'] = pd.to_datetime(
laps_df['LapStartDate'])
laps_df['pin_in_time_in_datetime'] = self._session_start_date + \
laps_df['PitInTime']
laps_df['pin_out_time_in_datetime'] = self._session_start_date + \
laps_df['PitOutTime']
for _, lap in laps_df.iterrows():
lap_data: dict[str, Any] = {
'session_id': lap['session_id'],
'driver_name': lap['Driver'],
'lap_number': lap['LapNumber'],
'sector_1_time_in_seconds': lap['Sector1Time'].total_seconds() if pd.notnull(lap['Sector1Time']) else None,
'sector_2_time_in_seconds': lap['Sector2Time'].total_seconds() if pd.notnull(lap['Sector2Time']) else None,
'sector_3_time_in_seconds': lap['Sector3Time'].total_seconds() if pd.notnull(lap['Sector3Time']) else None,
'lap_time_in_seconds': lap['LapTime'].total_seconds() if pd.notnull(lap['LapTime']) else None,
'finish_line_speed_trap_in_km': lap['SpeedFL'],
'longest_strait_speed_trap_in_km': lap['SpeedST'],
'is_personal_best': lap['IsPersonalBest'],
'tyre_compound': lap['Compound'],
'tyre_life_in_laps': lap['TyreLife'],
'is_fresh_tyre': lap['FreshTyre'],
'position': lap['Position'],
'lap_start_time_in_datetime': str(lap['lap_start_time_in_datetime']),
'pin_in_time_in_datetime': str(lap['pin_in_time_in_datetime']),
'pin_out_time_in_datetime': str(lap['pin_out_time_in_datetime']),
}
columns = ', '.join(lap_data.keys())
placeholders = ':' + ', :'.join(lap_data.keys())
query = f"INSERT INTO Laps ({columns}) VALUES ({placeholders})"
self.cursor.execute(query, lap_data)
self.conn.commit()
def insert_telemetry(self, session: Session) -> None:
"""
Insert the telemetry data into the database.
Args:
session (Session): The FastF1 session object.
"""
console.print('> Inserting telemetry data...')
telemetry_data_list = []
for driver in session.drivers:
laps_per_driver = session.laps.pick_driver(driver)
driver_name = session.get_driver(driver)['Abbreviation']
console.print(f"> Processing telemetry for driver: {driver_name}")
for _, lap in laps_per_driver.iterrows():
lap_number = lap['LapNumber']
console.print(f"> Processing telemetry for lap: {lap_number}")
telemetry = lap.get_telemetry()
telemetry['datetime'] = self._session_start_date + \
telemetry['SessionTime']
# Sort telemetry data by datetime
telemetry_sorted = telemetry.sort_values('datetime')
# Floor the 'datetime' to the specified decimal of a second
telemetry_sorted['floored_datetime'] = telemetry_sorted['datetime'].apply(
lambda x: x.floor(f'{0.1}s')
)
# Keep only the first occurrence for each floored_datetime
telemetry_unique = telemetry_sorted.groupby(
'floored_datetime', as_index=False).first()
for _, sample in telemetry_unique.iterrows():
telemetry_data: dict[str, Any] = {
'lap_id': self.__get_lap_id(session, driver_name, sample['datetime']),
'driver_name': driver_name,
'speed_in_km': sample['Speed'],
'RPM': sample['RPM'],
'gear_number': sample['nGear'],
'throttle_input': sample['Throttle'],
'is_brake_pressed': sample['Brake'],
'is_DRS_open': sample['DRS'],
'x_position': round(sample['X'], 2),
'y_position': round(sample['Y'], 2),
'z_position': round(sample['Z'], 2),
'is_off_track': sample['Status'] == 'OffTrack',
'datetime': str(sample['datetime']),
}
telemetry_data_list.append(telemetry_data)
if telemetry_data_list:
columns = ', '.join(telemetry_data_list[0].keys())
placeholders = ':' + ', :'.join(telemetry_data_list[0].keys())
query = f"INSERT INTO Telemetry ({columns}) VALUES ({placeholders})"
self.cursor.executemany(query, telemetry_data_list)
def insert_weather(self, session: Session) -> None:
"""
Insert weather data into the Weather table.
Args:
session (Session): The FastF1 session containing weather data.
"""
weather_data = cast(pd.DataFrame, session.weather_data)
weather_data['session_id'] = self._session_id
weather_data['datetime'] = self._session_start_date + \
weather_data['Time']
for _, sample in weather_data.iterrows():
weather_sample: dict[str, Any] = {
'session_id': sample['session_id'],
'air_temperature_in_celsius': sample['AirTemp'],
'track_temperature_in_celsius': sample['TrackTemp'],
'wind_speed_in_meters_per_seconds': sample['WindSpeed'],
'wind_direction_in_grads': sample['WindDirection'],
'relative_air_humidity_in_percentage': sample['Humidity'],
'air_pressure_in_mbar': sample['Pressure'],
'is_raining': sample['Rainfall'],
'datetime': str(sample['datetime']),
}
columns = ', '.join(weather_sample.keys())
placeholders = ':' + ', :'.join(weather_sample.keys())
query = f"INSERT INTO Weather ({columns}) VALUES ({placeholders})"
self.cursor.execute(query, weather_sample)
def get_or_create_track(self, track_name: str, country: str) -> int:
"""
Get the track_id for a given track, or create a new track if it doesn't exist.
Args:
track_name (str): The name of the track.
country (str): The country where the track is located.
Returns:
int: The track_id of the existing or newly created track.
"""
self.cursor.execute(
"SELECT track_id FROM Tracks WHERE track_name = ? AND country = ?", (track_name, country))
result = self.cursor.fetchone()
if result:
return result[0]
else:
self.cursor.execute(
"INSERT INTO Tracks (track_name, country) VALUES (?, ?)", (track_name, country))
return self.cursor.lastrowid or 0
def __get_lap_id(self, session: Session, driver_name: str, time: datetime) -> int:
"""
Get the lap_id for a given driver and time.
Args:
session (fastf1.core.Session): The FastF1 session.
driver (str): The driver's name or abbreviation.
time (pd.Timestamp): The timestamp to find the corresponding lap.
Returns:
int: The lap_id of the found lap.
"""
laps = session.laps.pick_driver(driver_name).copy()
# Convert LapStartDate to pd.Timestamp for proper comparison
laps['LapStartTime'] = pd.to_datetime(laps['LapStartDate'])
# Find the lap where the given time falls between LapStartTime and LapStartTime of the next lap
matching_laps = laps.loc[(laps['LapStartTime'] <= time) & (
laps['LapStartTime'].shift(-1) > time)]
if matching_laps.empty:
# Handle the case when no matching lap is found
print(
f"No matching lap found for driver {driver_name} at time {time}")
return 999 # or some default value, or raise a custom exception
lap = matching_laps.iloc[0]
if self._session_id is None:
raise ValueError("No ID was generated")
self.cursor.execute("SELECT lap_id FROM Laps WHERE session_id = ? AND driver_name = ? AND lap_number = ?",
(self._session_id, driver_name, lap['LapNumber']))
return self.cursor.fetchone()[0]
def __update_laps(self) -> None:
"""Update the laps table with the new data."""
console.print('> Updating laps table...')
self.cursor.execute('''
UPDATE Laps
SET
pin_in_time_in_datetime = CASE
WHEN pin_in_time_in_datetime = 'NaT' THEN NULL
ELSE pin_in_time_in_datetime
END,
pin_out_time_in_datetime = CASE
WHEN pin_out_time_in_datetime = 'NaT' THEN NULL
ELSE pin_out_time_in_datetime
END
WHERE
pin_in_time_in_datetime = 'NaT'
OR pin_out_time_in_datetime = 'NaT';
''')
def __create_data_analysis_views(self) -> None:
"""Create data analysis views in the database."""
console.print('> Creating data analysis views...')
self.cursor.executescript('''
-- 1. Driver Performance Summary with Weather
CREATE VIEW IF NOT EXISTS DriverPerformanceSummaryWithWeather AS
SELECT
l.driver_name,
e.event_name,
s.session_type,
t.track_name,
COUNT(l.lap_id) AS total_laps,
AVG(l.lap_time_in_seconds) AS avg_lap_time,
MIN(l.lap_time_in_seconds) AS best_lap_time,
AVG(l.sector_1_time_in_seconds) AS avg_sector1_time,
AVG(l.sector_2_time_in_seconds) AS avg_sector2_time,
AVG(l.sector_3_time_in_seconds) AS avg_sector3_time,
AVG(l.finish_line_speed_trap_in_km) AS avg_finish_line_speed,
COUNT(CASE WHEN l.is_personal_best THEN 1 END) AS personal_best_laps,
AVG(w.air_temperature_in_celsius) AS avg_air_temp,
AVG(w.track_temperature_in_celsius) AS avg_track_temp,
SUM(CASE WHEN w.is_raining THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS rain_percentage
FROM Laps l
JOIN Sessions s ON l.session_id = s.session_id
JOIN Tracks t ON s.track_id = t.track_id
JOIN Event e ON s.event_id = e.event_id
LEFT JOIN Weather w ON s.session_id = w.session_id
AND l.lap_start_time_in_datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
GROUP BY l.driver_name, e.event_id, s.session_id;
-- 2. Tyre Performance Analysis with Weather
CREATE VIEW IF NOT EXISTS TyrePerformanceAnalysisWithWeather AS
SELECT
l.driver_name,
e.event_name,
s.session_type,
t.track_name,
l.tyre_compound,
AVG(l.tyre_life_in_laps) AS avg_tyre_life,
AVG(l.lap_time_in_seconds) AS avg_lap_time,
AVG(l.longest_strait_speed_trap_in_km) AS avg_top_speed,
COUNT(CASE WHEN l.is_fresh_tyre THEN 1 END) AS fresh_tyre_laps,
COUNT(CASE WHEN NOT l.is_fresh_tyre THEN 1 END) AS used_tyre_laps,
AVG(w.track_temperature_in_celsius) AS avg_track_temp,
AVG(w.air_temperature_in_celsius) AS avg_air_temp
FROM Laps l
JOIN Sessions s ON l.session_id = s.session_id
JOIN Tracks t ON s.track_id = t.track_id
JOIN Event e ON s.event_id = e.event_id
LEFT JOIN Weather w ON s.session_id = w.session_id
AND l.lap_start_time_in_datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
GROUP BY l.driver_name, e.event_id, s.session_id, l.tyre_compound;
-- 3. Weather Impact Analysis
CREATE VIEW IF NOT EXISTS WeatherImpactAnalysis AS
SELECT
e.event_name,
s.session_type,
t.track_name,
AVG(w.air_temperature_in_celsius) AS avg_air_temp,
AVG(w.track_temperature_in_celsius) AS avg_track_temp,
AVG(w.relative_air_humidity_in_percentage) AS avg_humidity,
AVG(w.wind_speed_in_meters_per_seconds) AS avg_wind_speed,
SUM(CASE WHEN w.is_raining THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS rain_percentage,
AVG(l.lap_time_in_seconds) AS avg_lap_time,
MIN(l.lap_time_in_seconds) AS best_lap_time
FROM Weather w
JOIN Sessions s ON w.session_id = s.session_id
JOIN Tracks t ON s.track_id = t.track_id
JOIN Event e ON s.event_id = e.event_id
JOIN Laps l ON s.session_id = l.session_id
AND l.lap_start_time_in_datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
GROUP BY e.event_id, s.session_id;
-- 4. Event Performance Overview
CREATE VIEW IF NOT EXISTS EventPerformanceOverview AS
SELECT
e.event_name,
e.country,
e.location,
s.session_type,
COUNT(DISTINCT l.driver_name) AS driver_count,
AVG(l.lap_time_in_seconds) AS avg_lap_time,
MIN(l.lap_time_in_seconds) AS best_lap_time,
MAX(l.finish_line_speed_trap_in_km) AS max_finish_line_speed,
AVG(w.air_temperature_in_celsius) AS avg_air_temp,
AVG(w.track_temperature_in_celsius) AS avg_track_temp,
SUM(CASE WHEN w.is_raining THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS rain_percentage
FROM Event e
JOIN Sessions s ON e.event_id = s.event_id
JOIN Laps l ON s.session_id = l.session_id
LEFT JOIN Weather w ON s.session_id = w.session_id
AND l.lap_start_time_in_datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
GROUP BY e.event_id, s.session_id;
-- 5. Telemetry Analysis with Weather (Optimized)
CREATE VIEW IF NOT EXISTS TelemetryAnalysisWithWeather AS
WITH SampledTelemetry AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY lap_id ORDER BY RANDOM()) as rn
FROM Telemetry
)
SELECT
l.lap_id,
l.driver_name,
e.event_name,
s.session_type,
t.track_name,
l.lap_number,
l.lap_time_in_seconds,
AVG(tel.speed_in_km) AS avg_speed,
MAX(tel.speed_in_km) AS max_speed,
AVG(tel.RPM) AS avg_RPM,
MAX(tel.RPM) AS max_RPM,
AVG(tel.throttle_input) AS avg_throttle,
SUM(CASE WHEN tel.is_brake_pressed THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS brake_percentage,
SUM(CASE WHEN tel.is_DRS_open THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS drs_usage_percentage,
SUM(CASE WHEN tel.is_off_track THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS off_track_percentage,
AVG(w.air_temperature_in_celsius) AS avg_air_temp,
AVG(w.track_temperature_in_celsius) AS avg_track_temp,
AVG(w.wind_speed_in_meters_per_seconds) AS avg_wind_speed
FROM Laps l
JOIN Sessions s ON l.session_id = s.session_id
JOIN Tracks t ON s.track_id = t.track_id
JOIN Event e ON s.event_id = e.event_id
JOIN SampledTelemetry tel ON l.lap_id = tel.lap_id AND tel.rn <= 100
LEFT JOIN Weather w ON s.session_id = w.session_id
AND tel.datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
GROUP BY l.lap_id;
''')
self.conn.commit()
# Usage example:
session = fastf1.get_session(2023, 'Bahrain', 'Q')
converter = FastF1ToSQL('Bahrain_2023_Q.db')
converter.process_session(session)