|
|
import csv |
|
|
import os |
|
|
|
|
|
BASE_PATH = os.path.join(".", "data", "sncf") |
|
|
|
|
|
|
|
|
|
|
|
def charger_infos_gares(fichier_stations): |
|
|
""" |
|
|
Load station information from the 'sncf_stations_databases' file. |
|
|
|
|
|
Args: |
|
|
fichier_stations (str): Path to the 'sncf_stations_databases' file. |
|
|
|
|
|
Returns: |
|
|
Dict[str, Dict[str, str]]: A dictionary containing the station names as keys and |
|
|
their corresponding details (commune, latitude, longitude) as values. |
|
|
""" |
|
|
infos_gares = {} |
|
|
with open(fichier_stations, newline="", encoding="utf-8") as csvfile: |
|
|
reader = csv.DictReader(csvfile, delimiter=";") |
|
|
for row in reader: |
|
|
nom_gare = row["LIBELLE"].strip() |
|
|
infos_gares[nom_gare] = { |
|
|
"commune": row["COMMUNE"].strip(), |
|
|
"latitude": row["Y_WGS84"].strip(), |
|
|
"longitude": row["X_WGS84"].strip(), |
|
|
} |
|
|
return infos_gares |
|
|
|
|
|
|
|
|
|
|
|
def trouver_gare_par_nom(nom_gare, infos_gares): |
|
|
""" |
|
|
Find a station in the loaded station information using partial name matching. |
|
|
|
|
|
Args: |
|
|
nom_gare (str): The name of the station to search for. |
|
|
infos_gares (Dict[str, Dict[str, str]]): A dictionary of station information loaded from the database. |
|
|
|
|
|
Returns: |
|
|
Optional[Dict[str, str]]: The station information if a match is found, or None otherwise. |
|
|
""" |
|
|
for libelle in infos_gares: |
|
|
if nom_gare in libelle: |
|
|
return infos_gares[libelle] |
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
def creer_nouveau_fichier(fichier_timetables, fichier_stations, fichier_sortie): |
|
|
""" |
|
|
Generate a new CSV file containing station information by matching names from the timetables |
|
|
with those in the station database. |
|
|
|
|
|
Args: |
|
|
fichier_timetables (str): Path to the 'timetables.csv' file, which contains station trip data. |
|
|
fichier_stations (str): Path to the 'sncf_stations_databases' file with station details. |
|
|
fichier_sortie (str): Path to the output file where the matched station details will be written. |
|
|
|
|
|
Returns: |
|
|
None: The function writes the output directly to the specified file and prints any stations for which |
|
|
no information was found. |
|
|
""" |
|
|
|
|
|
infos_gares = charger_infos_gares(fichier_stations) |
|
|
|
|
|
|
|
|
gares_deja_inscrites = set() |
|
|
gares_sans_info = set() |
|
|
|
|
|
|
|
|
with open(fichier_sortie, mode="w", newline="", encoding="utf-8") as csvfile: |
|
|
writer = csv.writer(csvfile) |
|
|
|
|
|
writer.writerow(["Nom de la gare", "Commune", "Latitude", "Longitude"]) |
|
|
|
|
|
|
|
|
with open(fichier_timetables, newline="", encoding="utf-8") as timetablefile: |
|
|
reader = csv.DictReader(timetablefile, delimiter="\t") |
|
|
for row in reader: |
|
|
|
|
|
gares = row["trajet"].split(" - ") |
|
|
gare_depart = gares[0].strip() |
|
|
gare_arrivee = gares[1].strip() |
|
|
|
|
|
|
|
|
for gare in [gare_depart, gare_arrivee]: |
|
|
nom_reduit = gare.replace("Gare de ", "").strip() |
|
|
|
|
|
|
|
|
info = trouver_gare_par_nom(nom_reduit, infos_gares) |
|
|
|
|
|
if info and gare not in gares_deja_inscrites: |
|
|
writer.writerow( |
|
|
[gare, info["commune"], info["latitude"], info["longitude"]] |
|
|
) |
|
|
|
|
|
gares_deja_inscrites.add(gare) |
|
|
elif not info: |
|
|
gares_sans_info.add(gare) |
|
|
for gare in gares_sans_info: |
|
|
print(f"Infos non trouvées pour la gare : {gare}") |
|
|
print("Nombre de gares sans informations : ", len(gares_sans_info)) |
|
|
|
|
|
|
|
|
|
|
|
fichier_timetables = os.path.join(BASE_PATH, "timetables.csv") |
|
|
fichier_stations = os.path.join(BASE_PATH, "sncf_stations_database.csv") |
|
|
fichier_sortie = os.path.join(BASE_PATH, "gares_info.csv") |
|
|
|
|
|
creer_nouveau_fichier(fichier_timetables, fichier_stations, fichier_sortie) |
|
|
|