Mark Febrizio
commited on
Commit
•
a0ce115
1
Parent(s):
7439517
Update get_rules_in_window.py
Browse files
modules/get_rules_in_window.py
CHANGED
@@ -47,6 +47,8 @@ def get_date_range(start_date: str, end_mmdd: str = "01-03"):
|
|
47 |
|
48 |
|
49 |
def get_rules(date_range: dict) -> list[dict]:
|
|
|
|
|
50 |
results, _ = get_documents_by_date(
|
51 |
start_date=date_range.get("start"),
|
52 |
end_date=date_range.get("end"),
|
@@ -90,7 +92,16 @@ def filter_new_admin_rules(
|
|
90 |
transition_year: int,
|
91 |
date_col: str = "publication_date",
|
92 |
):
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
admin_transitions = {
|
95 |
2001: "george-w-bush",
|
96 |
2009: "barack-obama",
|
@@ -139,7 +150,19 @@ def filter_corrections(df: DataFrame):
|
|
139 |
raise SearchError(f"{len(df)} != {len(df_no_corrections)} + {len(df_corrections)}")
|
140 |
|
141 |
|
142 |
-
def get_significant_rules(df, start_date):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
process_columns = ("significant", "3f1_significant", )
|
144 |
if date.fromisoformat(start_date) < date(2023, 4, 6):
|
145 |
raise DataAvailabilityError("This program does not calculate significant rule counts prior to Executive Order 14094 of April 6, 2023.")
|
@@ -159,14 +182,24 @@ def get_significant_rules(df, start_date):
|
|
159 |
return df, last_updated
|
160 |
|
161 |
|
162 |
-
def get_rules_in_window(start_date: str, get_significant: bool = True, metadata=METADATA):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
date_range = get_date_range(start_date)
|
164 |
transition_year = date_range.get("transition_year")
|
165 |
results = get_rules(date_range)
|
166 |
df = format_documents(results)
|
167 |
df, _ = filter_corrections(df)
|
168 |
df = filter_new_admin_rules(df, transition_year)
|
169 |
-
df.loc[:, "acronym"] = get_agency_metadata_values(df, "parent_slug", metadata=
|
170 |
if get_significant:
|
171 |
df, last_updated = get_significant_rules(df, start_date)
|
172 |
else:
|
@@ -174,12 +207,23 @@ def get_rules_in_window(start_date: str, get_significant: bool = True, metadata=
|
|
174 |
return df, last_updated
|
175 |
|
176 |
|
177 |
-
def get_list_agencies(start_date, agency_column: str = "parent_slug", significant: bool = True):
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
df_ex = df.explode(agency_column, ignore_index=True)
|
180 |
return sorted(df_ex[agency_column].value_counts().index.to_list())
|
181 |
|
182 |
|
|
|
183 |
DF, LAST_UPDATED = get_rules_in_window(START_DATE, get_significant=GET_SIGNIFICANT)
|
184 |
AGENCIES = get_list_agencies(START_DATE, significant=GET_SIGNIFICANT)
|
185 |
|
|
|
47 |
|
48 |
|
49 |
def get_rules(date_range: dict) -> list[dict]:
|
50 |
+
"""Get rules within a date range.
|
51 |
+
"""
|
52 |
results, _ = get_documents_by_date(
|
53 |
start_date=date_range.get("start"),
|
54 |
end_date=date_range.get("end"),
|
|
|
92 |
transition_year: int,
|
93 |
date_col: str = "publication_date",
|
94 |
):
|
95 |
+
"""Remove rules issued by the new administration.
|
96 |
+
|
97 |
+
Args:
|
98 |
+
df (DataFrame): Input data.
|
99 |
+
transition_year (int): The year of the presidential transition.
|
100 |
+
date_col (str, optional): Column containing date information. Defaults to "publication_date".
|
101 |
+
|
102 |
+
Returns:
|
103 |
+
DataFrame: Filtered data.
|
104 |
+
"""
|
105 |
admin_transitions = {
|
106 |
2001: "george-w-bush",
|
107 |
2009: "barack-obama",
|
|
|
150 |
raise SearchError(f"{len(df)} != {len(df_no_corrections)} + {len(df_corrections)}")
|
151 |
|
152 |
|
153 |
+
def get_significant_rules(df: DataFrame, start_date: str) -> tuple[DataFrame, date]:
|
154 |
+
"""Get significant rules and merge with FR data.
|
155 |
+
|
156 |
+
Args:
|
157 |
+
df (DataFrame): Input data.
|
158 |
+
start_date (str): Start date of significant rule data.
|
159 |
+
|
160 |
+
Raises:
|
161 |
+
DataAvailabilityError: Raised when requesting significant rule counts prior to Executive Order 14094 of April 6, 2023.
|
162 |
+
|
163 |
+
Returns:
|
164 |
+
tuple[DataFrame, datetime.date]: Data with significant rules, last updated date for significant data
|
165 |
+
"""
|
166 |
process_columns = ("significant", "3f1_significant", )
|
167 |
if date.fromisoformat(start_date) < date(2023, 4, 6):
|
168 |
raise DataAvailabilityError("This program does not calculate significant rule counts prior to Executive Order 14094 of April 6, 2023.")
|
|
|
182 |
return df, last_updated
|
183 |
|
184 |
|
185 |
+
def get_rules_in_window(start_date: str, get_significant: bool = True, metadata: dict = METADATA):
|
186 |
+
"""Retrieve and process rules in a given CRA window.
|
187 |
+
|
188 |
+
Args:
|
189 |
+
start_date (str): Start date of window.
|
190 |
+
get_significant (bool, optional): Get significant rule data. Defaults to True.
|
191 |
+
metadata (dict, optional): Agency metadata. Defaults to METADATA.
|
192 |
+
|
193 |
+
Returns:
|
194 |
+
tuple[DataFrame, datetime.date]: Data with significant rules, last updated date for significant data
|
195 |
+
"""
|
196 |
date_range = get_date_range(start_date)
|
197 |
transition_year = date_range.get("transition_year")
|
198 |
results = get_rules(date_range)
|
199 |
df = format_documents(results)
|
200 |
df, _ = filter_corrections(df)
|
201 |
df = filter_new_admin_rules(df, transition_year)
|
202 |
+
df.loc[:, "acronym"] = get_agency_metadata_values(df, "parent_slug", metadata=metadata, metadata_value="acronym")
|
203 |
if get_significant:
|
204 |
df, last_updated = get_significant_rules(df, start_date)
|
205 |
else:
|
|
|
207 |
return df, last_updated
|
208 |
|
209 |
|
210 |
+
def get_list_agencies(start_date: str, agency_column: str = "parent_slug", significant: bool = True, **kwargs):
|
211 |
+
"""Get list of agencies with rules in dataset.
|
212 |
+
|
213 |
+
Args:
|
214 |
+
start_date (str): Start date of window.
|
215 |
+
agency_column (str, optional): Column containing agency values. Defaults to "parent_slug".
|
216 |
+
significant (bool, optional): Get significant rule data. Defaults to True.
|
217 |
+
|
218 |
+
Returns:
|
219 |
+
list: List of agencies
|
220 |
+
"""
|
221 |
+
df, _ = get_rules_in_window(start_date, get_significant=significant, **kwargs)
|
222 |
df_ex = df.explode(agency_column, ignore_index=True)
|
223 |
return sorted(df_ex[agency_column].value_counts().index.to_list())
|
224 |
|
225 |
|
226 |
+
# create objects to import in app
|
227 |
DF, LAST_UPDATED = get_rules_in_window(START_DATE, get_significant=GET_SIGNIFICANT)
|
228 |
AGENCIES = get_list_agencies(START_DATE, significant=GET_SIGNIFICANT)
|
229 |
|