from datetime import date from pathlib import Path from pandas import DataFrame from modules import ( METADATA, get_date_range, get_rules_in_window, groupby_agency, groupby_date, ) def save_csv(path: Path, df_all: DataFrame, df_agency: DataFrame, df_ym: DataFrame, transition_year: int): files = ( f"rules_{transition_year - 1}_{transition_year}.csv", f"rules_by_agency_{transition_year - 1}_{transition_year}.csv", f"rules_by_month_{transition_year - 1}_{transition_year}.csv" ) dataframes = (df_all, df_agency, df_ym) for data, file in zip(dataframes, files): data.to_csv(path / file, index=False) def main(start_date, save_data: bool = True, path: Path | None = None, metadata: dict | None = None, significant: bool = True): if date.fromisoformat(start_date) < date(2023, 4, 6): significant = False date_range = get_date_range(start_date) transition_year = date_range.get("transition_year") df, _ = get_rules_in_window(start_date, get_significant=significant) df_agency = groupby_agency(df, metadata=metadata, significant=significant) df_ym = groupby_date(df, significant=significant) if save_data: if path is None: path = Path(__file__).parent save_csv(path, df, df_agency, df_ym, transition_year) return df, df_agency, df_ym if __name__ == "__main__": main_path = Path(__file__).parent data_path = main_path.joinpath("data") if not data_path.exists(): data_path.mkdir(parents=True, exist_ok=True) start = input("Enter beginning of CRA window [yyyy-mm-dd]: ") df, agency, ym = main(start, path=data_path, metadata=METADATA, significant=True) print(f"Rules in CRA window: {len(df)}") print("\nRules by agency\n", agency.head(10)) print("\nRules by month\n", ym)