Mark Febrizio commited on
Commit
393578a
1 Parent(s): 4158978

Select agencies (#23)

Browse files

* move function to utils

* adjust input agencies

* use acronyms instead of slugs

for selection menu

* remove acronyms

keeps improved get metadata values function; values in menu now scrollable via css

app.py CHANGED
@@ -271,6 +271,8 @@ with ui.accordion(open=False):
271
  This dashboard allows users to explore how different lookback window dates would affect the set of rules available for congressional review.
272
 
273
  Rule data are retrieved daily from the [Federal Register API](https://www.federalregister.gov/developers/documentation/api/v1), which publishes new editions of the Federal Register each business day.
 
 
274
  """
275
  )
276
 
@@ -284,7 +286,7 @@ ui.markdown(
284
 
285
 
286
  @reactive.calc
287
- def filtered_df():
288
  filt_df = DF
289
 
290
  # filter dates
@@ -295,7 +297,7 @@ def filtered_df():
295
 
296
  # filter agencies
297
  if (input.menu_agency() is not None) and ("all" not in input.menu_agency()):
298
- bool_agency = [True if sum(selected in agency for selected in input.menu_agency()) > 0 else False for agency in filt_df["parent_slug"]]
299
  filt_df = filt_df.loc[bool_agency]
300
 
301
  # return filtered dataframe
 
271
  This dashboard allows users to explore how different lookback window dates would affect the set of rules available for congressional review.
272
 
273
  Rule data are retrieved daily from the [Federal Register API](https://www.federalregister.gov/developers/documentation/api/v1), which publishes new editions of the Federal Register each business day.
274
+
275
+ A list of common agency acronyms is available from the [U.S. Government Manual](https://www.govinfo.gov/content/pkg/GOVMAN-2022-12-31/pdf/GOVMAN-2022-12-31-Commonly-Used-Acronyms-105.pdf).
276
  """
277
  )
278
 
 
286
 
287
 
288
  @reactive.calc
289
+ def filtered_df(agency_column: str = "parent_slug"):
290
  filt_df = DF
291
 
292
  # filter dates
 
297
 
298
  # filter agencies
299
  if (input.menu_agency() is not None) and ("all" not in input.menu_agency()):
300
+ bool_agency = [True if sum(selected in agency for selected in input.menu_agency()) > 0 else False for agency in filt_df[agency_column]]
301
  filt_df = filt_df.loc[bool_agency]
302
 
303
  # return filtered dataframe
modules/__init__.py CHANGED
@@ -3,6 +3,7 @@ from .grouping import *
3
  from .plotting import *
4
  from .search_columns import *
5
  from .significant import *
 
6
 
7
 
8
  # see: https://docs.python.org/3.11/tutorial/modules.html#packages
@@ -12,4 +13,5 @@ __all__ = [
12
  "plotting",
13
  "search_columns",
14
  "significant",
 
15
  ]
 
3
  from .plotting import *
4
  from .search_columns import *
5
  from .significant import *
6
+ from .utils import *
7
 
8
 
9
  # see: https://docs.python.org/3.11/tutorial/modules.html#packages
 
13
  "plotting",
14
  "search_columns",
15
  "significant",
16
+ "utils",
17
  ]
modules/get_rules_in_window.py CHANGED
@@ -8,9 +8,11 @@ from pandas import DataFrame, to_datetime
8
  try:
9
  from search_columns import search_columns, SearchError
10
  from significant import get_significant_info
 
11
  except (ModuleNotFoundError, ImportError):
12
  from .search_columns import search_columns, SearchError
13
  from .significant import get_significant_info
 
14
 
15
 
16
  METADATA, _ = AgencyMetadata().get_agency_metadata()
@@ -20,14 +22,16 @@ GET_SIGNIFICANT = True if date.fromisoformat(START_DATE) >= date(2023, 4, 6) els
20
 
21
 
22
  class DataAvailabilityError(Exception):
 
23
  pass
24
 
25
 
26
- def get_date_range(start_date: str):
27
  """Define date range of documents returned by the app.
28
 
29
  Args:
30
  start_date (str): The start date for retrieving the documents.
 
31
 
32
  Returns:
33
  dict: Dictionary containing start date, end date, and transition year.
@@ -36,7 +40,7 @@ def get_date_range(start_date: str):
36
  end_year = start_year + 1
37
  date_range = {
38
  "start": start_date,
39
- "end": f"{end_year}-01-03",
40
  "transition_year": end_year,
41
  }
42
  return date_range
@@ -155,13 +159,14 @@ def get_significant_rules(df, start_date):
155
  return df, last_updated
156
 
157
 
158
- def get_rules_in_window(start_date: str, get_significant: bool = True):
159
  date_range = get_date_range(start_date)
160
  transition_year = date_range.get("transition_year")
161
  results = get_rules(date_range)
162
  df = format_documents(results)
163
  df, _ = filter_corrections(df)
164
  df = filter_new_admin_rules(df, transition_year)
 
165
  if get_significant:
166
  df, last_updated = get_significant_rules(df, start_date)
167
  else:
@@ -184,3 +189,4 @@ if __name__ == "__main__":
184
  print(DF.columns)
185
  print(LAST_UPDATED)
186
  print(AGENCIES)
 
 
8
  try:
9
  from search_columns import search_columns, SearchError
10
  from significant import get_significant_info
11
+ from utils import get_agency_metadata_values
12
  except (ModuleNotFoundError, ImportError):
13
  from .search_columns import search_columns, SearchError
14
  from .significant import get_significant_info
15
+ from .utils import get_agency_metadata_values
16
 
17
 
18
  METADATA, _ = AgencyMetadata().get_agency_metadata()
 
22
 
23
 
24
  class DataAvailabilityError(Exception):
25
+ """Raised when data is not available for the requested inputs."""
26
  pass
27
 
28
 
29
+ def get_date_range(start_date: str, end_mmdd: str = "01-03"):
30
  """Define date range of documents returned by the app.
31
 
32
  Args:
33
  start_date (str): The start date for retrieving the documents.
34
+ end_mmdd (str, optional): The month and day for the end date in MM-DD format. Defaults to "01-03".
35
 
36
  Returns:
37
  dict: Dictionary containing start date, end date, and transition year.
 
40
  end_year = start_year + 1
41
  date_range = {
42
  "start": start_date,
43
+ "end": f"{end_year}-{end_mmdd}",
44
  "transition_year": end_year,
45
  }
46
  return date_range
 
159
  return df, last_updated
160
 
161
 
162
+ def get_rules_in_window(start_date: str, get_significant: bool = True, metadata=METADATA):
163
  date_range = get_date_range(start_date)
164
  transition_year = date_range.get("transition_year")
165
  results = get_rules(date_range)
166
  df = format_documents(results)
167
  df, _ = filter_corrections(df)
168
  df = filter_new_admin_rules(df, transition_year)
169
+ df.loc[:, "acronym"] = get_agency_metadata_values(df, "parent_slug", metadata=METADATA, metadata_value="acronym")
170
  if get_significant:
171
  df, last_updated = get_significant_rules(df, start_date)
172
  else:
 
189
  print(DF.columns)
190
  print(LAST_UPDATED)
191
  print(AGENCIES)
192
+ print(len(METADATA.keys()))
modules/grouping.py CHANGED
@@ -5,29 +5,7 @@ from datetime import datetime, date, timedelta
5
  from dateutil.relativedelta import *
6
  from pandas import DataFrame, Timestamp, to_datetime
7
 
8
-
9
- def _get_agency_metadata_values(
10
- df: DataFrame,
11
- agency_column: str,
12
- metadata: dict,
13
- metadata_value: str,
14
- ):
15
- """Get a specific value from agency metadata (e.g., get acronym for Department of Homeland Security).
16
-
17
- Args:
18
- df (DataFrame): Input data.
19
- agency_column (str): Column containing agency identifier.
20
- metadata (dict): Agency metadata.
21
- metadata_value (str): Value of interest from agency metadata.
22
-
23
- Returns:
24
- pd.Series: Pandas Series of new values for adding to DataFrame.
25
- """
26
- if metadata_value == "acronym":
27
- metadata_value = "short_name"
28
- return df.loc[:, agency_column].apply(
29
- lambda x: metadata.get(x, {}).get(metadata_value)
30
- )
31
 
32
 
33
  def _get_first_week_start(dates: list[date], week_start: int | str | "weekday" = MO):
@@ -229,7 +207,7 @@ def groupby_agency(
229
  }, errors="ignore"
230
  )
231
  if metadata is not None:
232
- grouped.loc[:, metadata_value] = _get_agency_metadata_values(
233
  grouped,
234
  agency_column="agency",
235
  metadata=metadata,
 
5
  from dateutil.relativedelta import *
6
  from pandas import DataFrame, Timestamp, to_datetime
7
 
8
+ from .utils import get_agency_metadata_values
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
  def _get_first_week_start(dates: list[date], week_start: int | str | "weekday" = MO):
 
207
  }, errors="ignore"
208
  )
209
  if metadata is not None:
210
+ grouped.loc[:, metadata_value] = get_agency_metadata_values(
211
  grouped,
212
  agency_column="agency",
213
  metadata=metadata,
modules/utils.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pandas import DataFrame
2
+
3
+
4
+ def _get_nested_metadata(metadata_key: str, metadata: dict[dict], metadata_value: str):
5
+ getter = metadata.get(metadata_key, {})
6
+ return getter.get(metadata_value, metadata_key)
7
+
8
+
9
+ def get_agency_metadata_values(
10
+ df: DataFrame,
11
+ agency_column: str,
12
+ metadata: dict,
13
+ metadata_value: str,
14
+ ):
15
+ """Get a specific value from agency metadata (e.g., get acronym for Department of Homeland Security).
16
+
17
+ Args:
18
+ df (DataFrame): Input data.
19
+ agency_column (str): Column containing agency identifier.
20
+ metadata (dict): Agency metadata.
21
+ metadata_value (str): Value of interest from agency metadata.
22
+
23
+ Returns:
24
+ pd.Series: Pandas Series of new values for adding to DataFrame.
25
+ """
26
+ if metadata_value == "acronym":
27
+ metadata_value = "short_name"
28
+ return df.loc[:, agency_column].apply(
29
+ lambda x: [
30
+ _get_nested_metadata(x_item, metadata=metadata, metadata_value=metadata_value)
31
+ for x_item
32
+ in x
33
+ ] if isinstance(x, list) else _get_nested_metadata(x, metadata=metadata, metadata_value=metadata_value)
34
+ )
www/style.css CHANGED
@@ -23,3 +23,7 @@
23
  #frequency {
24
  margin-bottom: 5% !important;
25
  }
 
 
 
 
 
23
  #frequency {
24
  margin-bottom: 5% !important;
25
  }
26
+
27
+ #menu_agency {
28
+ overflow: visible;
29
+ }