| from datetime import date | |
| from pathlib import Path | |
| from pandas import DataFrame | |
| from modules import ( | |
| METADATA, | |
| get_date_range, | |
| get_rules_in_window, | |
| groupby_agency, | |
| groupby_date, | |
| ) | |
| def save_csv(path: Path, df_all: DataFrame, df_agency: DataFrame, df_ym: DataFrame, transition_year: int): | |
| """Save output as CSV files. | |
| Args: | |
| path (Path): Save data here. | |
| df_all (DataFrame): Data at the rule level. | |
| df_agency (DataFrame): Data grouped by agency. | |
| df_ym (DataFrame): Data grouped by publication year and month. | |
| transition_year (int): Presidential transition year. | |
| """ | |
| files = ( | |
| f"rules_{transition_year - 1}_{transition_year}.csv", | |
| f"rules_by_agency_{transition_year - 1}_{transition_year}.csv", | |
| f"rules_by_month_{transition_year - 1}_{transition_year}.csv" | |
| ) | |
| dataframes = (df_all, df_agency, df_ym) | |
| for data, file in zip(dataframes, files): | |
| data.to_csv(path / file, index=False) | |
| def main(start_date: str, save_data: bool = True, path: Path | None = None, metadata: dict | None = None, significant: bool = True): | |
| """Retrieve rules in CRA window and save resulting data. | |
| """ | |
| if date.fromisoformat(start_date) < date(2023, 4, 6): | |
| significant = False | |
| date_range = get_date_range(start_date) | |
| transition_year = date_range.get("transition_year") | |
| df, _ = get_rules_in_window(start_date, get_significant=significant) | |
| df_agency = groupby_agency(df, metadata=metadata, significant=significant) | |
| df_ym = groupby_date(df, significant=significant) | |
| if save_data: | |
| if path is None: | |
| path = Path(__file__).parent | |
| save_csv(path, df, df_agency, df_ym, transition_year) | |
| return df, df_agency, df_ym | |
| if __name__ == "__main__": | |
| main_path = Path(__file__).parent | |
| data_path = main_path.joinpath("data") | |
| if not data_path.exists(): | |
| data_path.mkdir(parents=True, exist_ok=True) | |
| start = input("Enter beginning of CRA window [yyyy-mm-dd]: ") | |
| df, agency, ym = main(start, path=data_path, metadata=METADATA, significant=True) | |
| print(f"Rules in CRA window: {len(df)}") | |
| print("\nRules by agency\n", agency.head(10)) | |
| print("\nRules by month\n", ym) | |