|
from datetime import date |
|
from pathlib import Path |
|
|
|
from pandas import DataFrame |
|
|
|
from modules import ( |
|
METADATA, |
|
get_date_range, |
|
get_rules_in_window, |
|
groupby_agency, |
|
groupby_date, |
|
) |
|
|
|
|
|
def save_csv(path: Path, df_all: DataFrame, df_agency: DataFrame, df_ym: DataFrame, transition_year: int): |
|
"""Save output as CSV files. |
|
|
|
Args: |
|
path (Path): Save data here. |
|
df_all (DataFrame): Data at the rule level. |
|
df_agency (DataFrame): Data grouped by agency. |
|
df_ym (DataFrame): Data grouped by publication year and month. |
|
transition_year (int): Presidential transition year. |
|
""" |
|
files = ( |
|
f"rules_{transition_year - 1}_{transition_year}.csv", |
|
f"rules_by_agency_{transition_year - 1}_{transition_year}.csv", |
|
f"rules_by_month_{transition_year - 1}_{transition_year}.csv" |
|
) |
|
dataframes = (df_all, df_agency, df_ym) |
|
for data, file in zip(dataframes, files): |
|
data.to_csv(path / file, index=False) |
|
|
|
|
|
def main(start_date: str, save_data: bool = True, path: Path | None = None, metadata: dict | None = None, significant: bool = True): |
|
"""Retrieve rules in CRA window and save resulting data. |
|
""" |
|
if date.fromisoformat(start_date) < date(2023, 4, 6): |
|
significant = False |
|
date_range = get_date_range(start_date) |
|
transition_year = date_range.get("transition_year") |
|
df, _ = get_rules_in_window(start_date, get_significant=significant) |
|
|
|
df_agency = groupby_agency(df, metadata=metadata, significant=significant) |
|
df_ym = groupby_date(df, significant=significant) |
|
|
|
if save_data: |
|
if path is None: |
|
path = Path(__file__).parent |
|
save_csv(path, df, df_agency, df_ym, transition_year) |
|
|
|
return df, df_agency, df_ym |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
main_path = Path(__file__).parent |
|
data_path = main_path.joinpath("data") |
|
if not data_path.exists(): |
|
data_path.mkdir(parents=True, exist_ok=True) |
|
|
|
start = input("Enter beginning of CRA window [yyyy-mm-dd]: ") |
|
df, agency, ym = main(start, path=data_path, metadata=METADATA, significant=True) |
|
print(f"Rules in CRA window: {len(df)}") |
|
print("\nRules by agency\n", agency.head(10)) |
|
print("\nRules by month\n", ym) |
|
|