Mark Febrizio commited on
Commit
a0ce115
1 Parent(s): 7439517

Update get_rules_in_window.py

Browse files
Files changed (1) hide show
  1. modules/get_rules_in_window.py +50 -6
modules/get_rules_in_window.py CHANGED
@@ -47,6 +47,8 @@ def get_date_range(start_date: str, end_mmdd: str = "01-03"):
47
 
48
 
49
  def get_rules(date_range: dict) -> list[dict]:
 
 
50
  results, _ = get_documents_by_date(
51
  start_date=date_range.get("start"),
52
  end_date=date_range.get("end"),
@@ -90,7 +92,16 @@ def filter_new_admin_rules(
90
  transition_year: int,
91
  date_col: str = "publication_date",
92
  ):
93
-
 
 
 
 
 
 
 
 
 
94
  admin_transitions = {
95
  2001: "george-w-bush",
96
  2009: "barack-obama",
@@ -139,7 +150,19 @@ def filter_corrections(df: DataFrame):
139
  raise SearchError(f"{len(df)} != {len(df_no_corrections)} + {len(df_corrections)}")
140
 
141
 
142
- def get_significant_rules(df, start_date):
 
 
 
 
 
 
 
 
 
 
 
 
143
  process_columns = ("significant", "3f1_significant", )
144
  if date.fromisoformat(start_date) < date(2023, 4, 6):
145
  raise DataAvailabilityError("This program does not calculate significant rule counts prior to Executive Order 14094 of April 6, 2023.")
@@ -159,14 +182,24 @@ def get_significant_rules(df, start_date):
159
  return df, last_updated
160
 
161
 
162
- def get_rules_in_window(start_date: str, get_significant: bool = True, metadata=METADATA):
 
 
 
 
 
 
 
 
 
 
163
  date_range = get_date_range(start_date)
164
  transition_year = date_range.get("transition_year")
165
  results = get_rules(date_range)
166
  df = format_documents(results)
167
  df, _ = filter_corrections(df)
168
  df = filter_new_admin_rules(df, transition_year)
169
- df.loc[:, "acronym"] = get_agency_metadata_values(df, "parent_slug", metadata=METADATA, metadata_value="acronym")
170
  if get_significant:
171
  df, last_updated = get_significant_rules(df, start_date)
172
  else:
@@ -174,12 +207,23 @@ def get_rules_in_window(start_date: str, get_significant: bool = True, metadata=
174
  return df, last_updated
175
 
176
 
177
- def get_list_agencies(start_date, agency_column: str = "parent_slug", significant: bool = True):
178
- df, _ = get_rules_in_window(start_date, get_significant=significant)
 
 
 
 
 
 
 
 
 
 
179
  df_ex = df.explode(agency_column, ignore_index=True)
180
  return sorted(df_ex[agency_column].value_counts().index.to_list())
181
 
182
 
 
183
  DF, LAST_UPDATED = get_rules_in_window(START_DATE, get_significant=GET_SIGNIFICANT)
184
  AGENCIES = get_list_agencies(START_DATE, significant=GET_SIGNIFICANT)
185
 
 
47
 
48
 
49
  def get_rules(date_range: dict) -> list[dict]:
50
+ """Get rules within a date range.
51
+ """
52
  results, _ = get_documents_by_date(
53
  start_date=date_range.get("start"),
54
  end_date=date_range.get("end"),
 
92
  transition_year: int,
93
  date_col: str = "publication_date",
94
  ):
95
+ """Remove rules issued by the new administration.
96
+
97
+ Args:
98
+ df (DataFrame): Input data.
99
+ transition_year (int): The year of the presidential transition.
100
+ date_col (str, optional): Column containing date information. Defaults to "publication_date".
101
+
102
+ Returns:
103
+ DataFrame: Filtered data.
104
+ """
105
  admin_transitions = {
106
  2001: "george-w-bush",
107
  2009: "barack-obama",
 
150
  raise SearchError(f"{len(df)} != {len(df_no_corrections)} + {len(df_corrections)}")
151
 
152
 
153
+ def get_significant_rules(df: DataFrame, start_date: str) -> tuple[DataFrame, date]:
154
+ """Get significant rules and merge with FR data.
155
+
156
+ Args:
157
+ df (DataFrame): Input data.
158
+ start_date (str): Start date of significant rule data.
159
+
160
+ Raises:
161
+ DataAvailabilityError: Raised when requesting significant rule counts prior to Executive Order 14094 of April 6, 2023.
162
+
163
+ Returns:
164
+ tuple[DataFrame, datetime.date]: Data with significant rules, last updated date for significant data
165
+ """
166
  process_columns = ("significant", "3f1_significant", )
167
  if date.fromisoformat(start_date) < date(2023, 4, 6):
168
  raise DataAvailabilityError("This program does not calculate significant rule counts prior to Executive Order 14094 of April 6, 2023.")
 
182
  return df, last_updated
183
 
184
 
185
+ def get_rules_in_window(start_date: str, get_significant: bool = True, metadata: dict = METADATA):
186
+ """Retrieve and process rules in a given CRA window.
187
+
188
+ Args:
189
+ start_date (str): Start date of window.
190
+ get_significant (bool, optional): Get significant rule data. Defaults to True.
191
+ metadata (dict, optional): Agency metadata. Defaults to METADATA.
192
+
193
+ Returns:
194
+ tuple[DataFrame, datetime.date]: Data with significant rules, last updated date for significant data
195
+ """
196
  date_range = get_date_range(start_date)
197
  transition_year = date_range.get("transition_year")
198
  results = get_rules(date_range)
199
  df = format_documents(results)
200
  df, _ = filter_corrections(df)
201
  df = filter_new_admin_rules(df, transition_year)
202
+ df.loc[:, "acronym"] = get_agency_metadata_values(df, "parent_slug", metadata=metadata, metadata_value="acronym")
203
  if get_significant:
204
  df, last_updated = get_significant_rules(df, start_date)
205
  else:
 
207
  return df, last_updated
208
 
209
 
210
+ def get_list_agencies(start_date: str, agency_column: str = "parent_slug", significant: bool = True, **kwargs):
211
+ """Get list of agencies with rules in dataset.
212
+
213
+ Args:
214
+ start_date (str): Start date of window.
215
+ agency_column (str, optional): Column containing agency values. Defaults to "parent_slug".
216
+ significant (bool, optional): Get significant rule data. Defaults to True.
217
+
218
+ Returns:
219
+ list: List of agencies
220
+ """
221
+ df, _ = get_rules_in_window(start_date, get_significant=significant, **kwargs)
222
  df_ex = df.explode(agency_column, ignore_index=True)
223
  return sorted(df_ex[agency_column].value_counts().index.to_list())
224
 
225
 
226
+ # create objects to import in app
227
  DF, LAST_UPDATED = get_rules_in_window(START_DATE, get_significant=GET_SIGNIFICANT)
228
  AGENCIES = get_list_agencies(START_DATE, significant=GET_SIGNIFICANT)
229