cra-window-rules / cra_window_rules.py
Mark Febrizio
Documentation (#24)
fe4f734 unverified
raw
history blame
2.29 kB
from datetime import date
from pathlib import Path
from pandas import DataFrame
from modules import (
METADATA,
get_date_range,
get_rules_in_window,
groupby_agency,
groupby_date,
)
def save_csv(path: Path, df_all: DataFrame, df_agency: DataFrame, df_ym: DataFrame, transition_year: int):
"""Save output as CSV files.
Args:
path (Path): Save data here.
df_all (DataFrame): Data at the rule level.
df_agency (DataFrame): Data grouped by agency.
df_ym (DataFrame): Data grouped by publication year and month.
transition_year (int): Presidential transition year.
"""
files = (
f"rules_{transition_year - 1}_{transition_year}.csv",
f"rules_by_agency_{transition_year - 1}_{transition_year}.csv",
f"rules_by_month_{transition_year - 1}_{transition_year}.csv"
)
dataframes = (df_all, df_agency, df_ym)
for data, file in zip(dataframes, files):
data.to_csv(path / file, index=False)
def main(start_date: str, save_data: bool = True, path: Path | None = None, metadata: dict | None = None, significant: bool = True):
"""Retrieve rules in CRA window and save resulting data.
"""
if date.fromisoformat(start_date) < date(2023, 4, 6):
significant = False
date_range = get_date_range(start_date)
transition_year = date_range.get("transition_year")
df, _ = get_rules_in_window(start_date, get_significant=significant)
df_agency = groupby_agency(df, metadata=metadata, significant=significant)
df_ym = groupby_date(df, significant=significant)
if save_data:
if path is None:
path = Path(__file__).parent
save_csv(path, df, df_agency, df_ym, transition_year)
return df, df_agency, df_ym
if __name__ == "__main__":
main_path = Path(__file__).parent
data_path = main_path.joinpath("data")
if not data_path.exists():
data_path.mkdir(parents=True, exist_ok=True)
start = input("Enter beginning of CRA window [yyyy-mm-dd]: ")
df, agency, ym = main(start, path=data_path, metadata=METADATA, significant=True)
print(f"Rules in CRA window: {len(df)}")
print("\nRules by agency\n", agency.head(10))
print("\nRules by month\n", ym)