| from __future__ import annotations | |
| import argparse | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| PROJECT_ROOT = Path(__file__).resolve().parent | |
| PYTHON = sys.executable | |
| def run_step(cmd: list[str], desc: str) -> None: | |
| print(f"\n=== {desc} ===") | |
| result = subprocess.run(cmd, check=False) | |
| if result.returncode != 0: | |
| raise SystemExit(f"Echec de l'étape '{desc}' (code {result.returncode}). Commande: {' '.join(cmd)}") | |
| def main() -> None: | |
| parser = argparse.ArgumentParser( | |
| description="Pipeline orchestration: preprocess -> features -> train -> predict", | |
| ) | |
| parser.add_argument("--raw-dir", type=Path, default=Path("data/raw"), help="Répertoire des fichiers bruts.") | |
| parser.add_argument("--mapping", type=Path, default=Path("config/nuances.yaml"), help="Mapping nuances->catégories.") | |
| parser.add_argument("--target-election", type=str, default="municipales", help="Election cible (ex: municipales).") | |
| parser.add_argument("--target-year", type=int, default=2026, help="Année cible.") | |
| parser.add_argument("--commune-code", type=str, default="301", help="Code commune pour la prédiction (Sète=301).") | |
| parser.add_argument("--skip-preprocess", action="store_true", help="Ne pas relancer le prétraitement.") | |
| parser.add_argument("--skip-features", action="store_true", help="Ne pas reconstruire le panel.") | |
| parser.add_argument("--skip-train", action="store_true", help="Ne pas réentraîner le modèle.") | |
| parser.add_argument("--skip-predict", action="store_true", help="Ne pas générer les prédictions CSV.") | |
| args = parser.parse_args() | |
| interim_path = PROJECT_ROOT / "data" / "interim" / "elections_long.parquet" | |
| panel_path = PROJECT_ROOT / "data" / "processed" / "panel.parquet" | |
| model_path = PROJECT_ROOT / "models" / "hist_gradient_boosting.joblib" | |
| if not args.skip_preprocess: | |
| run_step( | |
| [ | |
| PYTHON, | |
| "-m", | |
| "src.data.preprocess", | |
| "--raw-dir", | |
| str(args.raw_dir), | |
| "--output-dir", | |
| str(PROJECT_ROOT / "data" / "interim"), | |
| ], | |
| "Prétraitement (format long)", | |
| ) | |
| if not args.skip_features: | |
| run_step( | |
| [ | |
| PYTHON, | |
| "-m", | |
| "src.features.build_features", | |
| "--elections-long", | |
| str(interim_path), | |
| "--mapping", | |
| str(args.mapping), | |
| "--output", | |
| str(panel_path), | |
| "--output-csv", | |
| str(PROJECT_ROOT / "data" / "processed" / "panel.csv"), | |
| ], | |
| "Construction du panel features+cibles", | |
| ) | |
| if not args.skip_train: | |
| run_step( | |
| [ | |
| PYTHON, | |
| "-m", | |
| "src.model.train", | |
| "--panel", | |
| str(panel_path), | |
| "--reports-dir", | |
| str(PROJECT_ROOT / "reports"), | |
| "--models-dir", | |
| str(PROJECT_ROOT / "models"), | |
| ], | |
| "Entraînement / évaluation des modèles", | |
| ) | |
| if not args.skip_predict: | |
| run_step( | |
| [ | |
| PYTHON, | |
| "-m", | |
| "src.model.predict", | |
| "--model-path", | |
| str(model_path), | |
| "--feature-columns", | |
| str(PROJECT_ROOT / "models" / "feature_columns.json"), | |
| "--elections-long", | |
| str(interim_path), | |
| "--mapping", | |
| str(args.mapping), | |
| "--target-election-type", | |
| args.target_election, | |
| "--target-year", | |
| str(args.target_year), | |
| "--commune-code", | |
| args.commune_code, | |
| "--output-dir", | |
| str(PROJECT_ROOT / "predictions"), | |
| ], | |
| "Génération des prédictions CSV", | |
| ) | |
| print("\nPipeline terminé. Lance Gradio avec `python -m app.gradio_app`.") | |
| if __name__ == "__main__": | |
| main() | |