Mark Febrizio
commited on
Commit
•
393578a
1
Parent(s):
4158978
Select agencies (#23)
Browse files* move function to utils
* adjust input agencies
* use acronyms instead of slugs
for selection menu
* remove acronyms
keeps improved get metadata values function; values in menu now scrollable via css
- app.py +4 -2
- modules/__init__.py +2 -0
- modules/get_rules_in_window.py +9 -3
- modules/grouping.py +2 -24
- modules/utils.py +34 -0
- www/style.css +4 -0
app.py
CHANGED
@@ -271,6 +271,8 @@ with ui.accordion(open=False):
|
|
271 |
This dashboard allows users to explore how different lookback window dates would affect the set of rules available for congressional review.
|
272 |
|
273 |
Rule data are retrieved daily from the [Federal Register API](https://www.federalregister.gov/developers/documentation/api/v1), which publishes new editions of the Federal Register each business day.
|
|
|
|
|
274 |
"""
|
275 |
)
|
276 |
|
@@ -284,7 +286,7 @@ ui.markdown(
|
|
284 |
|
285 |
|
286 |
@reactive.calc
|
287 |
-
def filtered_df():
|
288 |
filt_df = DF
|
289 |
|
290 |
# filter dates
|
@@ -295,7 +297,7 @@ def filtered_df():
|
|
295 |
|
296 |
# filter agencies
|
297 |
if (input.menu_agency() is not None) and ("all" not in input.menu_agency()):
|
298 |
-
bool_agency = [True if sum(selected in agency for selected in input.menu_agency()) > 0 else False for agency in filt_df[
|
299 |
filt_df = filt_df.loc[bool_agency]
|
300 |
|
301 |
# return filtered dataframe
|
|
|
271 |
This dashboard allows users to explore how different lookback window dates would affect the set of rules available for congressional review.
|
272 |
|
273 |
Rule data are retrieved daily from the [Federal Register API](https://www.federalregister.gov/developers/documentation/api/v1), which publishes new editions of the Federal Register each business day.
|
274 |
+
|
275 |
+
A list of common agency acronyms is available from the [U.S. Government Manual](https://www.govinfo.gov/content/pkg/GOVMAN-2022-12-31/pdf/GOVMAN-2022-12-31-Commonly-Used-Acronyms-105.pdf).
|
276 |
"""
|
277 |
)
|
278 |
|
|
|
286 |
|
287 |
|
288 |
@reactive.calc
|
289 |
+
def filtered_df(agency_column: str = "parent_slug"):
|
290 |
filt_df = DF
|
291 |
|
292 |
# filter dates
|
|
|
297 |
|
298 |
# filter agencies
|
299 |
if (input.menu_agency() is not None) and ("all" not in input.menu_agency()):
|
300 |
+
bool_agency = [True if sum(selected in agency for selected in input.menu_agency()) > 0 else False for agency in filt_df[agency_column]]
|
301 |
filt_df = filt_df.loc[bool_agency]
|
302 |
|
303 |
# return filtered dataframe
|
modules/__init__.py
CHANGED
@@ -3,6 +3,7 @@ from .grouping import *
|
|
3 |
from .plotting import *
|
4 |
from .search_columns import *
|
5 |
from .significant import *
|
|
|
6 |
|
7 |
|
8 |
# see: https://docs.python.org/3.11/tutorial/modules.html#packages
|
@@ -12,4 +13,5 @@ __all__ = [
|
|
12 |
"plotting",
|
13 |
"search_columns",
|
14 |
"significant",
|
|
|
15 |
]
|
|
|
3 |
from .plotting import *
|
4 |
from .search_columns import *
|
5 |
from .significant import *
|
6 |
+
from .utils import *
|
7 |
|
8 |
|
9 |
# see: https://docs.python.org/3.11/tutorial/modules.html#packages
|
|
|
13 |
"plotting",
|
14 |
"search_columns",
|
15 |
"significant",
|
16 |
+
"utils",
|
17 |
]
|
modules/get_rules_in_window.py
CHANGED
@@ -8,9 +8,11 @@ from pandas import DataFrame, to_datetime
|
|
8 |
try:
|
9 |
from search_columns import search_columns, SearchError
|
10 |
from significant import get_significant_info
|
|
|
11 |
except (ModuleNotFoundError, ImportError):
|
12 |
from .search_columns import search_columns, SearchError
|
13 |
from .significant import get_significant_info
|
|
|
14 |
|
15 |
|
16 |
METADATA, _ = AgencyMetadata().get_agency_metadata()
|
@@ -20,14 +22,16 @@ GET_SIGNIFICANT = True if date.fromisoformat(START_DATE) >= date(2023, 4, 6) els
|
|
20 |
|
21 |
|
22 |
class DataAvailabilityError(Exception):
|
|
|
23 |
pass
|
24 |
|
25 |
|
26 |
-
def get_date_range(start_date: str):
|
27 |
"""Define date range of documents returned by the app.
|
28 |
|
29 |
Args:
|
30 |
start_date (str): The start date for retrieving the documents.
|
|
|
31 |
|
32 |
Returns:
|
33 |
dict: Dictionary containing start date, end date, and transition year.
|
@@ -36,7 +40,7 @@ def get_date_range(start_date: str):
|
|
36 |
end_year = start_year + 1
|
37 |
date_range = {
|
38 |
"start": start_date,
|
39 |
-
"end": f"{end_year}-
|
40 |
"transition_year": end_year,
|
41 |
}
|
42 |
return date_range
|
@@ -155,13 +159,14 @@ def get_significant_rules(df, start_date):
|
|
155 |
return df, last_updated
|
156 |
|
157 |
|
158 |
-
def get_rules_in_window(start_date: str, get_significant: bool = True):
|
159 |
date_range = get_date_range(start_date)
|
160 |
transition_year = date_range.get("transition_year")
|
161 |
results = get_rules(date_range)
|
162 |
df = format_documents(results)
|
163 |
df, _ = filter_corrections(df)
|
164 |
df = filter_new_admin_rules(df, transition_year)
|
|
|
165 |
if get_significant:
|
166 |
df, last_updated = get_significant_rules(df, start_date)
|
167 |
else:
|
@@ -184,3 +189,4 @@ if __name__ == "__main__":
|
|
184 |
print(DF.columns)
|
185 |
print(LAST_UPDATED)
|
186 |
print(AGENCIES)
|
|
|
|
8 |
try:
|
9 |
from search_columns import search_columns, SearchError
|
10 |
from significant import get_significant_info
|
11 |
+
from utils import get_agency_metadata_values
|
12 |
except (ModuleNotFoundError, ImportError):
|
13 |
from .search_columns import search_columns, SearchError
|
14 |
from .significant import get_significant_info
|
15 |
+
from .utils import get_agency_metadata_values
|
16 |
|
17 |
|
18 |
METADATA, _ = AgencyMetadata().get_agency_metadata()
|
|
|
22 |
|
23 |
|
24 |
class DataAvailabilityError(Exception):
|
25 |
+
"""Raised when data is not available for the requested inputs."""
|
26 |
pass
|
27 |
|
28 |
|
29 |
+
def get_date_range(start_date: str, end_mmdd: str = "01-03"):
|
30 |
"""Define date range of documents returned by the app.
|
31 |
|
32 |
Args:
|
33 |
start_date (str): The start date for retrieving the documents.
|
34 |
+
end_mmdd (str, optional): The month and day for the end date in MM-DD format. Defaults to "01-03".
|
35 |
|
36 |
Returns:
|
37 |
dict: Dictionary containing start date, end date, and transition year.
|
|
|
40 |
end_year = start_year + 1
|
41 |
date_range = {
|
42 |
"start": start_date,
|
43 |
+
"end": f"{end_year}-{end_mmdd}",
|
44 |
"transition_year": end_year,
|
45 |
}
|
46 |
return date_range
|
|
|
159 |
return df, last_updated
|
160 |
|
161 |
|
162 |
+
def get_rules_in_window(start_date: str, get_significant: bool = True, metadata=METADATA):
|
163 |
date_range = get_date_range(start_date)
|
164 |
transition_year = date_range.get("transition_year")
|
165 |
results = get_rules(date_range)
|
166 |
df = format_documents(results)
|
167 |
df, _ = filter_corrections(df)
|
168 |
df = filter_new_admin_rules(df, transition_year)
|
169 |
+
df.loc[:, "acronym"] = get_agency_metadata_values(df, "parent_slug", metadata=METADATA, metadata_value="acronym")
|
170 |
if get_significant:
|
171 |
df, last_updated = get_significant_rules(df, start_date)
|
172 |
else:
|
|
|
189 |
print(DF.columns)
|
190 |
print(LAST_UPDATED)
|
191 |
print(AGENCIES)
|
192 |
+
print(len(METADATA.keys()))
|
modules/grouping.py
CHANGED
@@ -5,29 +5,7 @@ from datetime import datetime, date, timedelta
|
|
5 |
from dateutil.relativedelta import *
|
6 |
from pandas import DataFrame, Timestamp, to_datetime
|
7 |
|
8 |
-
|
9 |
-
def _get_agency_metadata_values(
|
10 |
-
df: DataFrame,
|
11 |
-
agency_column: str,
|
12 |
-
metadata: dict,
|
13 |
-
metadata_value: str,
|
14 |
-
):
|
15 |
-
"""Get a specific value from agency metadata (e.g., get acronym for Department of Homeland Security).
|
16 |
-
|
17 |
-
Args:
|
18 |
-
df (DataFrame): Input data.
|
19 |
-
agency_column (str): Column containing agency identifier.
|
20 |
-
metadata (dict): Agency metadata.
|
21 |
-
metadata_value (str): Value of interest from agency metadata.
|
22 |
-
|
23 |
-
Returns:
|
24 |
-
pd.Series: Pandas Series of new values for adding to DataFrame.
|
25 |
-
"""
|
26 |
-
if metadata_value == "acronym":
|
27 |
-
metadata_value = "short_name"
|
28 |
-
return df.loc[:, agency_column].apply(
|
29 |
-
lambda x: metadata.get(x, {}).get(metadata_value)
|
30 |
-
)
|
31 |
|
32 |
|
33 |
def _get_first_week_start(dates: list[date], week_start: int | str | "weekday" = MO):
|
@@ -229,7 +207,7 @@ def groupby_agency(
|
|
229 |
}, errors="ignore"
|
230 |
)
|
231 |
if metadata is not None:
|
232 |
-
grouped.loc[:, metadata_value] =
|
233 |
grouped,
|
234 |
agency_column="agency",
|
235 |
metadata=metadata,
|
|
|
5 |
from dateutil.relativedelta import *
|
6 |
from pandas import DataFrame, Timestamp, to_datetime
|
7 |
|
8 |
+
from .utils import get_agency_metadata_values
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
|
11 |
def _get_first_week_start(dates: list[date], week_start: int | str | "weekday" = MO):
|
|
|
207 |
}, errors="ignore"
|
208 |
)
|
209 |
if metadata is not None:
|
210 |
+
grouped.loc[:, metadata_value] = get_agency_metadata_values(
|
211 |
grouped,
|
212 |
agency_column="agency",
|
213 |
metadata=metadata,
|
modules/utils.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pandas import DataFrame
|
2 |
+
|
3 |
+
|
4 |
+
def _get_nested_metadata(metadata_key: str, metadata: dict[dict], metadata_value: str):
|
5 |
+
getter = metadata.get(metadata_key, {})
|
6 |
+
return getter.get(metadata_value, metadata_key)
|
7 |
+
|
8 |
+
|
9 |
+
def get_agency_metadata_values(
|
10 |
+
df: DataFrame,
|
11 |
+
agency_column: str,
|
12 |
+
metadata: dict,
|
13 |
+
metadata_value: str,
|
14 |
+
):
|
15 |
+
"""Get a specific value from agency metadata (e.g., get acronym for Department of Homeland Security).
|
16 |
+
|
17 |
+
Args:
|
18 |
+
df (DataFrame): Input data.
|
19 |
+
agency_column (str): Column containing agency identifier.
|
20 |
+
metadata (dict): Agency metadata.
|
21 |
+
metadata_value (str): Value of interest from agency metadata.
|
22 |
+
|
23 |
+
Returns:
|
24 |
+
pd.Series: Pandas Series of new values for adding to DataFrame.
|
25 |
+
"""
|
26 |
+
if metadata_value == "acronym":
|
27 |
+
metadata_value = "short_name"
|
28 |
+
return df.loc[:, agency_column].apply(
|
29 |
+
lambda x: [
|
30 |
+
_get_nested_metadata(x_item, metadata=metadata, metadata_value=metadata_value)
|
31 |
+
for x_item
|
32 |
+
in x
|
33 |
+
] if isinstance(x, list) else _get_nested_metadata(x, metadata=metadata, metadata_value=metadata_value)
|
34 |
+
)
|
www/style.css
CHANGED
@@ -23,3 +23,7 @@
|
|
23 |
#frequency {
|
24 |
margin-bottom: 5% !important;
|
25 |
}
|
|
|
|
|
|
|
|
|
|
23 |
#frequency {
|
24 |
margin-bottom: 5% !important;
|
25 |
}
|
26 |
+
|
27 |
+
#menu_agency {
|
28 |
+
overflow: visible;
|
29 |
+
}
|