Mark Febrizio commited on
Commit
58bb4c7
1 Parent(s): 072c060

Revisions 2024 06 (#11)

Browse files

* update name of function

* create venv for development

* Update requirements-dev.txt

* Update app.py

minor cleanup

* Create test_plots.py

* Update get_rules_in_window.py

* Update app.py

* modularize

* Update app.py

* add frequency selector
* combine groupby date reactive calcs
* convert panels to expandable cards

* Delete test_plots.py

* minor updates to tests

* update main requirements

use pip-tools to manage dependencies

* handles plots with minimal data

now shows default filler plot instead of confusing plot with little to no data

* dev requirements

* update dev req

* Update significant.py

pass coalesce=True to join; ensures polars breaking change doesn't affect code

* mac req

* Update get_rules_in_window.py

update start of window to May 23

* update app notes

* add logo to header

* Update app.py

* add weekly

* Update app.py

clean up logo info

* updates to weekly calcs

* Update app.py

add asterisks to value boxes

* weekly data viz

improve grouping by week; fix issue where line plots weren't showing;

* Update grouping.py

* Update grouping.py

added function to pad dates in grouped data; still need to implement in output

* Update grouping.py

* Update plotting.py

fix geom_line grouping

* weekly and daily plots

implemented grouping, plotting, etc. for aggregating daily and weekly data

.gitignore CHANGED
@@ -1,8 +1,12 @@
1
  # ----- Project Specific ----- #
2
 
 
 
 
 
3
  # text files except for requirements
4
  *.txt
5
- !requirements.txt
6
 
7
  # HF space folder
8
  .huggingface
 
1
  # ----- Project Specific ----- #
2
 
3
+ # development venv
4
+ .dev/
5
+ dev/
6
+
7
  # text files except for requirements
8
  *.txt
9
+ !requirements*.txt
10
 
11
  # HF space folder
12
  .huggingface
app.py CHANGED
@@ -1,46 +1,74 @@
1
  import asyncio
2
  from datetime import datetime, date, time
 
3
 
4
  from faicons import icon_svg
 
 
5
 
6
- from modules.get_rules_in_window import (
7
  DF,
8
  LAST_UPDATED,
9
  START_DATE,
 
10
  GET_SIGNIFICANT,
11
  METADATA,
12
- AGENCIES,
13
  groupby_agency,
14
- groupby_ym,
 
 
15
  plot_agency,
16
- plot_month,
17
  )
18
 
19
  from shiny import reactive
20
  from shiny.express import input, render, ui
21
 
 
 
 
 
22
  FOOTER = f"""
23
  -----
24
 
25
- Developed by the [GW Regulatory Studies Center](https://go.gwu.edu/regstudies). See our page on the [Congressional Review Act](https://regulatorystudies.columbian.gwu.edu/congressional-review-act) for more information.
26
  """
27
 
28
- ui.page_opts(
29
- title="Rules in the Congressional Review Act (CRA) Window", #fillable=True,
 
 
 
 
 
 
 
 
 
 
30
  )
31
 
32
- with ui.sidebar(title="Settings"):
33
- ui.input_date("start_date", "Start of window", value=START_DATE, min=START_DATE, max=date.today())
34
-
35
- ui.input_switch("switch", "Show significant rules in plots", False)
36
-
 
 
 
 
 
 
 
 
 
 
37
  ui.input_select("menu_agency", "Select agencies", choices=["all"] + AGENCIES, selected="all", multiple=True)
38
 
39
- #ui.input_checkbox_group(
40
- # "significant",
41
- # "EO 12866 Significance",
42
- # ["Section 3(f)(1)", "Other"],
43
- #)
44
 
45
  with ui.layout_column_wrap():
46
  with ui.value_box(showcase=icon_svg("book")):
@@ -51,7 +79,7 @@ with ui.layout_column_wrap():
51
  ui.input_action_button("filter_all", "View", ) #class_="btn-success")
52
 
53
  with ui.value_box(showcase=icon_svg("book")):
54
- "Section 3(f)(1) Significant rules"
55
  @render.text
56
  def count_3f1_significant():
57
  output = "Not available"
@@ -61,7 +89,7 @@ with ui.layout_column_wrap():
61
  ui.input_action_button("filter_3f1", "View", ) #class_="btn-success")
62
 
63
  with ui.value_box(showcase=icon_svg("book")):
64
- "Other Significant rules"
65
  @render.text
66
  def count_other_significant():
67
  output = "Not available"
@@ -70,14 +98,18 @@ with ui.layout_column_wrap():
70
  return output
71
  ui.input_action_button("filter_other", "View", )
72
 
 
 
 
 
 
 
73
  with ui.navset_card_underline(title=""):
74
 
75
  with ui.nav_panel("Rules in detail"):
76
  @render.data_frame
77
  def table_rule_detail():
78
- df = filtered_sig()
79
- #print(df.columns)
80
- #df.loc[:, "date"] = df.apply(lambda x: f"{x['publication_year']}-{x['publication_month']}-{x['publication_day']}", axis=1)
81
  df.loc[:, "date"] = df.loc[:, "publication_date"].apply(lambda x: f"{x.date()}")
82
  char, limit = " ", 10
83
  df.loc[:, "title"] = df["title"].apply(lambda x: x if len(x.split(char)) < (limit + 1) else f"{char.join(x.split(char)[:limit])}...")
@@ -89,58 +121,82 @@ with ui.navset_card_underline(title=""):
89
  "3f1_significant",
90
  "other_significant",
91
  ]
92
- return render.DataGrid(df.loc[:, [c for c in cols if c in df.columns]], width="100%") #filters=True)
93
 
94
- with ui.nav_panel("By month"):
95
 
96
  with ui.layout_columns():
97
 
98
- @render.plot
99
- def plot_by_month():
100
- grouped = grouped_df_month()
101
- return plot_month(
102
- grouped
103
- )
104
-
105
- @render.data_frame
106
- def table_by_month():
107
- grouped = grouped_df_month()
108
- cols = [
109
- "publication_year",
110
- "publication_month",
111
- "rules",
112
- "3f1_significant",
113
- "other_significant",
114
- ]
115
- return render.DataTable(grouped.loc[:, [c for c in cols if c in grouped.columns]])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  with ui.nav_panel("By agency"):
118
 
119
  with ui.layout_columns():
120
 
121
- @render.plot
122
- def plot_by_agency():
123
- grouped = grouped_df_agency()
124
- if input.switch():
125
- pass
126
- # placeholder for stacked bar chart
127
- else:
128
  plot = plot_agency(
129
- grouped.head(10),
130
- )
131
  return plot
132
 
133
- @render.data_frame
134
- def table_by_agency():
135
- grouped = grouped_df_agency()
136
- cols = [
137
- "agency",
138
- "acronym",
139
- "rules",
140
- "3f1_significant",
141
- "other_significant",
142
- ]
143
- return render.DataTable(grouped.loc[:, [c for c in cols if c in grouped.columns]])
 
144
 
145
  with ui.accordion(open=False):
146
 
@@ -183,7 +239,7 @@ with ui.accordion(open=False):
183
  f"""
184
  Rule data retrieved from the [Federal Register API](https://www.federalregister.gov/developers/documentation/api/v1).
185
 
186
- Executive Order 12866 significance data last updated **{LAST_UPDATED}**.
187
  """
188
  )
189
 
@@ -191,8 +247,6 @@ ui.markdown(
191
  FOOTER
192
  )
193
 
194
- #ui.tags.footer()
195
-
196
 
197
  # ----- REACTIVE CALCULATIONS ----- #
198
 
@@ -214,6 +268,7 @@ def filtered_df():
214
 
215
  return filt_df
216
 
 
217
  @reactive.calc
218
  def filtered_sig():
219
  filt_df = filtered_df()
@@ -228,10 +283,48 @@ def filtered_sig():
228
 
229
  return filt_df
230
 
 
231
  @reactive.calc
232
  def grouped_df_month():
233
  filt_df = filtered_sig()
234
- grouped = groupby_ym(filt_df, significant=GET_SIGNIFICANT)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  return grouped
236
 
237
 
@@ -242,6 +335,19 @@ def grouped_df_agency():
242
  return grouped
243
 
244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  # ----- REACTIVE VALUES ----- #
246
 
247
 
 
1
  import asyncio
2
  from datetime import datetime, date, time
3
+ from pathlib import Path
4
 
5
  from faicons import icon_svg
6
+ from pandas import DataFrame, to_datetime
7
+ from plotnine import ggplot, labs
8
 
9
+ from modules import (
10
  DF,
11
  LAST_UPDATED,
12
  START_DATE,
13
+ WINDOW_OPEN_DATE,
14
  GET_SIGNIFICANT,
15
  METADATA,
16
+ AGENCIES,
17
  groupby_agency,
18
+ groupby_date,
19
+ add_weeks_to_data,
20
+ pad_missing_dates,
21
  plot_agency,
22
+ plot_tf,
23
  )
24
 
25
  from shiny import reactive
26
  from shiny.express import input, render, ui
27
 
28
+ TITLE = "CRA Window Tracker - GW Regulatory Studies Center"
29
+
30
+ HEADER = "Rules in the Congressional Review Act (CRA) Window"
31
+
32
  FOOTER = f"""
33
  -----
34
 
35
+ &copy; 2024 [GW Regulatory Studies Center](https://go.gwu.edu/regstudies). See our page on the [Congressional Review Act](https://regulatorystudies.columbian.gwu.edu/congressional-review-act) for more information.
36
  """
37
 
38
+ ui.include_css( Path(__file__).parent.joinpath("www") / "style.css")
39
+
40
+ ui.tags.title(TITLE)
41
+
42
+ sidebar_logo = ui.HTML(
43
+ f"""
44
+ <div class="header">
45
+ <a href="https://go.gwu.edu/regstudies" target="_blank">
46
+ <img src="logo.png" alt="Regulatory Studies Center logo"/>
47
+ </a>
48
+ </div>
49
+ """
50
  )
51
 
52
+ page_header = ui.HTML(
53
+ f"""
54
+ <div class="header">
55
+ <span>{HEADER}</span>
56
+ </div>
57
+ """
58
+ )
59
+
60
+ page_header
61
+
62
+ with ui.sidebar(open={"desktop": "open", "mobile": "closed"}):
63
+ sidebar_logo
64
+
65
+ ui.input_date("start_date", "Select start of window", value=WINDOW_OPEN_DATE, min=START_DATE, max=date.today())
66
+
67
  ui.input_select("menu_agency", "Select agencies", choices=["all"] + AGENCIES, selected="all", multiple=True)
68
 
69
+ ui.input_select("frequency", "Select frequency", choices=["daily", "monthly", "weekly"], selected="daily")
70
+
71
+ #ui.input_switch("switch", "Stack significant rules in plots", False)
 
 
72
 
73
  with ui.layout_column_wrap():
74
  with ui.value_box(showcase=icon_svg("book")):
 
79
  ui.input_action_button("filter_all", "View", ) #class_="btn-success")
80
 
81
  with ui.value_box(showcase=icon_svg("book")):
82
+ "Section 3(f)(1) Significant rules *"
83
  @render.text
84
  def count_3f1_significant():
85
  output = "Not available"
 
89
  ui.input_action_button("filter_3f1", "View", ) #class_="btn-success")
90
 
91
  with ui.value_box(showcase=icon_svg("book")):
92
+ "Other Significant rules *"
93
  @render.text
94
  def count_other_significant():
95
  output = "Not available"
 
98
  return output
99
  ui.input_action_button("filter_other", "View", )
100
 
101
+ ui.markdown(
102
+ f"""
103
+ \* *Executive Order 12866 significance data last updated **{LAST_UPDATED}***.
104
+ """
105
+ )
106
+
107
  with ui.navset_card_underline(title=""):
108
 
109
  with ui.nav_panel("Rules in detail"):
110
  @render.data_frame
111
  def table_rule_detail():
112
+ df = filtered_sig().copy()
 
 
113
  df.loc[:, "date"] = df.loc[:, "publication_date"].apply(lambda x: f"{x.date()}")
114
  char, limit = " ", 10
115
  df.loc[:, "title"] = df["title"].apply(lambda x: x if len(x.split(char)) < (limit + 1) else f"{char.join(x.split(char)[:limit])}...")
 
121
  "3f1_significant",
122
  "other_significant",
123
  ]
124
+ return render.DataGrid(df.loc[:, [c for c in cols if c in df.columns]], width="100%")
125
 
126
+ with ui.nav_panel("Over time"):
127
 
128
  with ui.layout_columns():
129
 
130
+ with ui.card(full_screen=True):
131
+
132
+ @render.plot
133
+ def plot_over_time(value_col: str = "rules"):
134
+ grouped = get_grouped_data_over_time()
135
+ values = grouped.loc[:, value_col].to_numpy()
136
+ count_gte_zero = sum(1 if g > 0 else 0 for g in values)
137
+ max_val = max(values, default=0)
138
+ if (max_val < 2) or (count_gte_zero < 2):
139
+ return (
140
+ ggplot()
141
+ + labs(title="Not enough data available to visualize.")
142
+ )
143
+ else:
144
+ return plot_tf(
145
+ grouped,
146
+ input.frequency()
147
+ )
148
+
149
+ with ui.card(full_screen=True):
150
+ @render.data_frame
151
+ def table_over_time():
152
+ grouped = get_grouped_data_over_time()
153
+ date_cols = ["publication_date", "week_of", ]
154
+ if any(d in grouped.columns for d in date_cols):
155
+ grouped = grouped.astype({d: "str" for d in date_cols if d in grouped.columns}, errors="ignore")
156
+ grouped = grouped.rename(columns={
157
+ "publication_year": "year",
158
+ "publication_month": "month",
159
+ "publication_date": "date",
160
+ }, errors="ignore")
161
+ cols = [
162
+ "date",
163
+ "year",
164
+ "month",
165
+ "week_of",
166
+ "rules",
167
+ "3f1_significant",
168
+ "other_significant",
169
+ ]
170
+ return render.DataTable(grouped.loc[:, [c for c in cols if c in grouped.columns]])
171
 
172
  with ui.nav_panel("By agency"):
173
 
174
  with ui.layout_columns():
175
 
176
+ with ui.card(full_screen=True):
177
+ @render.plot
178
+ def plot_by_agency():
179
+ grouped = grouped_df_agency()
180
+ #if input.switch():
181
+ # pass
182
+ # # placeholder for stacked bar chart
183
  plot = plot_agency(
184
+ grouped.head(10),
185
+ )
186
  return plot
187
 
188
+ with ui.card(full_screen=True):
189
+ @render.data_frame
190
+ def table_by_agency():
191
+ grouped = grouped_df_agency()
192
+ cols = [
193
+ "agency",
194
+ "acronym",
195
+ "rules",
196
+ "3f1_significant",
197
+ "other_significant",
198
+ ]
199
+ return render.DataTable(grouped.loc[:, [c for c in cols if c in grouped.columns]])
200
 
201
  with ui.accordion(open=False):
202
 
 
239
  f"""
240
  Rule data retrieved from the [Federal Register API](https://www.federalregister.gov/developers/documentation/api/v1).
241
 
242
+ The window for the CRA lookback period is [estimated](https://www.huntonak.com/the-nickel-report/federal-agencies-face-looming-congressional-review-act-deadline) to open on May 23, 2024.
243
  """
244
  )
245
 
 
247
  FOOTER
248
  )
249
 
 
 
250
 
251
  # ----- REACTIVE CALCULATIONS ----- #
252
 
 
268
 
269
  return filt_df
270
 
271
+
272
  @reactive.calc
273
  def filtered_sig():
274
  filt_df = filtered_df()
 
283
 
284
  return filt_df
285
 
286
+
287
  @reactive.calc
288
  def grouped_df_month():
289
  filt_df = filtered_sig()
290
+ grouped = groupby_date(filt_df, significant=GET_SIGNIFICANT)
291
+ return grouped
292
+
293
+
294
+ @reactive.calc
295
+ def grouped_df_day():
296
+ filt_df = filtered_sig()
297
+ date_col = "publication_date"
298
+ grouped = groupby_date(filt_df, group_col=date_col, significant=GET_SIGNIFICANT)
299
+ grouped = pad_missing_dates(
300
+ grouped,
301
+ date_col,
302
+ "days",
303
+ fill_padded_values={
304
+ "rules": 0,
305
+ "3f1_significant": 0,
306
+ "other_significant": 0,
307
+ })
308
+ return grouped
309
+
310
+
311
+ @reactive.calc
312
+ def grouped_df_week():
313
+ filt_df = filtered_sig()
314
+ filt_df = add_weeks_to_data(filt_df)
315
+ try:
316
+ grouped = groupby_date(filt_df, group_col=("week_number", "week_of"), significant=GET_SIGNIFICANT)
317
+ grouped = pad_missing_dates(
318
+ grouped,
319
+ "week_of",
320
+ how="weeks",
321
+ fill_padded_values={
322
+ "rules": 0,
323
+ "3f1_significant": 0,
324
+ "other_significant": 0,
325
+ })
326
+ except KeyError as err:
327
+ grouped = DataFrame(columns=["week_number", "week_of", "rules", "3f1_significant", "other_significant"])
328
  return grouped
329
 
330
 
 
335
  return grouped
336
 
337
 
338
+ @reactive.calc
339
+ def get_grouped_data_over_time():
340
+ if input.frequency() == "daily":
341
+ grouped = grouped_df_day()
342
+ elif input.frequency() == "monthly":
343
+ grouped = grouped_df_month()
344
+ elif input.frequency() == "weekly":
345
+ grouped = grouped_df_week()
346
+ else:
347
+ raise ValueError("Only 'daily', 'monthly', or 'weekly' are valid inputs.")
348
+ return grouped
349
+
350
+
351
  # ----- REACTIVE VALUES ----- #
352
 
353
 
cra_window_rules.py CHANGED
@@ -1,6 +1,44 @@
 
1
  from pathlib import Path
2
 
3
- from modules.get_rules_in_window import main, METADATA
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
  if __name__ == "__main__":
@@ -14,4 +52,4 @@ if __name__ == "__main__":
14
  df, agency, ym = main(start, path=data_path, metadata=METADATA, significant=True)
15
  print(f"Rules in CRA window: {len(df)}")
16
  print("\nRules by agency\n", agency.head(10))
17
- print("\nRules by month\n", ym)
 
1
+ from datetime import date
2
  from pathlib import Path
3
 
4
+ from pandas import DataFrame
5
+
6
+ from modules import (
7
+ METADATA,
8
+ get_date_range,
9
+ get_rules_in_window,
10
+ groupby_agency,
11
+ groupby_date,
12
+ )
13
+
14
+
15
+ def save_csv(path: Path, df_all: DataFrame, df_agency: DataFrame, df_ym: DataFrame, transition_year: int):
16
+ files = (
17
+ f"rules_{transition_year - 1}_{transition_year}.csv",
18
+ f"rules_by_agency_{transition_year - 1}_{transition_year}.csv",
19
+ f"rules_by_month_{transition_year - 1}_{transition_year}.csv"
20
+ )
21
+ dataframes = (df_all, df_agency, df_ym)
22
+ for data, file in zip(dataframes, files):
23
+ data.to_csv(path / file, index=False)
24
+
25
+
26
+ def main(start_date, save_data: bool = True, path: Path | None = None, metadata: dict | None = None, significant: bool = True):
27
+ if date.fromisoformat(start_date) < date(2023, 4, 6):
28
+ significant = False
29
+ date_range = get_date_range(start_date)
30
+ transition_year = date_range.get("transition_year")
31
+ df, _ = get_rules_in_window(start_date, get_significant=significant)
32
+
33
+ df_agency = groupby_agency(df, metadata=metadata, significant=significant)
34
+ df_ym = groupby_date(df, significant=significant)
35
+
36
+ if save_data:
37
+ if path is None:
38
+ path = Path(__file__).parent
39
+ save_csv(path, df, df_agency, df_ym, transition_year)
40
+
41
+ return df, df_agency, df_ym
42
 
43
 
44
  if __name__ == "__main__":
 
52
  df, agency, ym = main(start, path=data_path, metadata=METADATA, significant=True)
53
  print(f"Rules in CRA window: {len(df)}")
54
  print("\nRules by agency\n", agency.head(10))
55
+ print("\nRules by month\n", ym)
modules/__init__.py CHANGED
@@ -1,6 +1,15 @@
 
 
 
 
 
 
 
1
  # see: https://docs.python.org/3.11/tutorial/modules.html#packages
2
  __all__ = [
3
  "get_rules_in_window",
 
 
4
  "search_columns",
5
  "significant",
6
  ]
 
1
+ from .get_rules_in_window import *
2
+ from .grouping import *
3
+ from .plotting import *
4
+ from .search_columns import *
5
+ from .significant import *
6
+
7
+
8
  # see: https://docs.python.org/3.11/tutorial/modules.html#packages
9
  __all__ = [
10
  "get_rules_in_window",
11
+ "grouping",
12
+ "plotting",
13
  "search_columns",
14
  "significant",
15
  ]
modules/get_rules_in_window.py CHANGED
@@ -1,19 +1,9 @@
1
  from datetime import date
2
- from pathlib import Path
3
 
4
  from fr_toolbelt.api_requests import get_documents_by_date
5
  from fr_toolbelt.preprocessing import process_documents, AgencyMetadata
6
  from numpy import array
7
  from pandas import DataFrame, to_datetime
8
- from plotnine import (
9
- ggplot,
10
- aes,
11
- geom_col,
12
- labs,
13
- coord_flip,
14
- scale_x_discrete,
15
- theme_light,
16
- )
17
 
18
  try:
19
  from search_columns import search_columns, SearchError
@@ -25,6 +15,7 @@ except (ModuleNotFoundError, ImportError):
25
 
26
  METADATA, _ = AgencyMetadata().get_agency_metadata()
27
  START_DATE = "2024-01-01"
 
28
  GET_SIGNIFICANT = True if date.fromisoformat(START_DATE) >= date(2023, 4, 6) else False
29
 
30
 
@@ -156,129 +147,6 @@ def get_significant_rules(df, start_date):
156
  return df, last_updated
157
 
158
 
159
- def get_agency_metadata_values(
160
- df: DataFrame,
161
- agency_column: str,
162
- metadata: dict,
163
- metadata_value: str,
164
- ):
165
- if metadata_value == "acronym":
166
- metadata_value = "short_name"
167
- return df.loc[:, agency_column].apply(
168
- lambda x: metadata.get(x, {}).get(metadata_value)
169
- )
170
-
171
-
172
- def groupby_agency(
173
- df: DataFrame,
174
- group_col: str = "parent_slug",
175
- value_col: str = "document_number",
176
- aggfunc: str = "count",
177
- significant: bool = True,
178
- metadata: dict | None = None,
179
- metadata_value: str = "acronym",
180
- ):
181
- aggfunc_dict = {value_col: aggfunc, }
182
- if significant:
183
- aggfunc_dict.update({
184
- "3f1_significant": "sum",
185
- "other_significant": "sum",
186
- })
187
- df_ex = df.explode(group_col, ignore_index=True)
188
- grouped = df_ex.groupby(
189
- by=group_col
190
- ).agg(
191
- aggfunc_dict
192
- ).reset_index()
193
- grouped = grouped.sort_values(value_col, ascending=False).rename(
194
- columns={
195
- group_col: "agency",
196
- value_col: "rules",
197
- }, errors="ignore"
198
- )
199
- if metadata is not None:
200
- grouped.loc[:, metadata_value] = get_agency_metadata_values(
201
- grouped,
202
- agency_column="agency",
203
- metadata=metadata,
204
- metadata_value=metadata_value
205
- )
206
- cols = ["agency", metadata_value, "rules", "3f1_significant", "other_significant"]
207
- grouped = grouped.loc[:, [c for c in cols if c in grouped.columns]]
208
- return grouped
209
-
210
-
211
- def groupby_ym(
212
- df: DataFrame,
213
- group_col: tuple | list = ("publication_year", "publication_month", ),
214
- value_col: str = "document_number",
215
- aggfunc: str = "count",
216
- significant: bool = True
217
- ):
218
- aggfunc_dict = {value_col: aggfunc, }
219
- if significant:
220
- aggfunc_dict.update({
221
- "3f1_significant": "sum",
222
- "other_significant": "sum",
223
- })
224
- grouped = df.groupby(
225
- by=list(group_col)
226
- ).agg(
227
- aggfunc_dict
228
- ).reset_index()
229
- grouped = grouped.rename(columns={
230
- value_col: "rules",
231
- }, errors="ignore")
232
- return grouped
233
-
234
-
235
- def save_csv(path: Path, df_all: DataFrame, df_agency: DataFrame, df_ym: DataFrame, transition_year: int):
236
- files = (
237
- f"rules_{transition_year - 1}_{transition_year}.csv",
238
- f"rules_by_agency_{transition_year - 1}_{transition_year}.csv",
239
- f"rules_by_month_{transition_year - 1}_{transition_year}.csv"
240
- )
241
- dataframes = (df_all, df_agency, df_ym)
242
- for data, file in zip(dataframes, files):
243
- data.to_csv(path / file, index=False)
244
-
245
-
246
- def plot_agency(df, group_col = "acronym", value_col = "rules"):
247
-
248
- order_list = df.loc[:, group_col].to_list()[::-1]
249
-
250
- plot = (
251
- ggplot(
252
- df,
253
- aes(x=group_col, y=value_col),
254
- )
255
- + geom_col()
256
- + coord_flip()
257
- + scale_x_discrete(limits=order_list)
258
- + labs(y="", x="", title="Number of Rules Published by Agency")
259
- + theme_light()
260
- )
261
- return plot
262
-
263
-
264
- def plot_month(df, group_cols = ("publication_year", "publication_month"), value_col = "rules"):
265
-
266
- df.loc[:, "ym"] = df[group_cols[0]].astype(str) + "-" + df[group_cols[1]].astype(str).str.pad(2, fillchar="0")
267
- order_list = df.loc[:, "ym"].to_list()
268
-
269
- plot = (
270
- ggplot(
271
- df,
272
- aes(x="ym", y=value_col),
273
- )
274
- + geom_col()
275
- + scale_x_discrete(limits=order_list)
276
- + labs(y="", x="", title="Number of Rules Published by Month")
277
- + theme_light()
278
- )
279
- return plot
280
-
281
-
282
  def get_rules_in_window(start_date: str, get_significant: bool = True):
283
  date_range = get_date_range(start_date)
284
  transition_year = date_range.get("transition_year")
@@ -293,35 +161,18 @@ def get_rules_in_window(start_date: str, get_significant: bool = True):
293
  return df, last_updated
294
 
295
 
296
- def get_list_agencies(start_date, agency_column: str = "agency", metadata: dict | None = None, significant: bool = True):
297
  df, _ = get_rules_in_window(start_date, get_significant=significant)
298
- df_agency = groupby_agency(df, metadata=metadata, significant=significant)
299
- return sorted(list(set(df_agency.loc[df_agency[agency_column].notna(), agency_column].to_list())))
300
-
301
-
302
- def main(start_date, save_data: bool = True, path: Path | None = None, metadata: dict | None = None, significant: bool = True):
303
- if date.fromisoformat(start_date) < date(2023, 4, 6):
304
- significant = False
305
- date_range = get_date_range(start_date)
306
- transition_year = date_range.get("transition_year")
307
- df, _ = get_rules_in_window(start_date, get_significant=significant)
308
-
309
- df_agency = groupby_agency(df, metadata=metadata, significant=significant)
310
- df_ym = groupby_ym(df, significant=significant)
311
-
312
- if save_data:
313
- if path is None:
314
- path = Path(__file__).parent
315
- save_csv(path, df, df_agency, df_ym, transition_year)
316
-
317
- return df, df_agency, df_ym
318
 
319
 
320
  DF, LAST_UPDATED = get_rules_in_window(START_DATE, get_significant=GET_SIGNIFICANT)
321
- AGENCIES = get_list_agencies(START_DATE, metadata=METADATA, significant=GET_SIGNIFICANT)
322
 
323
 
324
  if __name__ == "__main__":
325
 
326
  print(DF.columns)
327
  print(LAST_UPDATED)
 
 
1
  from datetime import date
 
2
 
3
  from fr_toolbelt.api_requests import get_documents_by_date
4
  from fr_toolbelt.preprocessing import process_documents, AgencyMetadata
5
  from numpy import array
6
  from pandas import DataFrame, to_datetime
 
 
 
 
 
 
 
 
 
7
 
8
  try:
9
  from search_columns import search_columns, SearchError
 
15
 
16
  METADATA, _ = AgencyMetadata().get_agency_metadata()
17
  START_DATE = "2024-01-01"
18
+ WINDOW_OPEN_DATE = "2024-05-23"
19
  GET_SIGNIFICANT = True if date.fromisoformat(START_DATE) >= date(2023, 4, 6) else False
20
 
21
 
 
147
  return df, last_updated
148
 
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def get_rules_in_window(start_date: str, get_significant: bool = True):
151
  date_range = get_date_range(start_date)
152
  transition_year = date_range.get("transition_year")
 
161
  return df, last_updated
162
 
163
 
164
+ def get_list_agencies(start_date, agency_column: str = "parent_slug", significant: bool = True):
165
  df, _ = get_rules_in_window(start_date, get_significant=significant)
166
+ df_ex = df.explode(agency_column, ignore_index=True)
167
+ return sorted(df_ex[agency_column].value_counts().index.to_list())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
 
170
  DF, LAST_UPDATED = get_rules_in_window(START_DATE, get_significant=GET_SIGNIFICANT)
171
+ AGENCIES = get_list_agencies(START_DATE, significant=GET_SIGNIFICANT)
172
 
173
 
174
  if __name__ == "__main__":
175
 
176
  print(DF.columns)
177
  print(LAST_UPDATED)
178
+ print(AGENCIES)
modules/grouping.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, date, timedelta
4
+
5
+ from dateutil.relativedelta import *
6
+ from pandas import DataFrame, Timestamp, to_datetime
7
+
8
+
9
+ def _get_agency_metadata_values(
10
+ df: DataFrame,
11
+ agency_column: str,
12
+ metadata: dict,
13
+ metadata_value: str,
14
+ ):
15
+ """Get a specific value from agency metadata (e.g., get acronym for Department of Homeland Security).
16
+
17
+ Args:
18
+ df (DataFrame): Input data.
19
+ agency_column (str): Column containing agency identifier.
20
+ metadata (dict): Agency metadata.
21
+ metadata_value (str): Value of interest from agency metadata.
22
+
23
+ Returns:
24
+ pd.Series: Pandas Series of new values for adding to DataFrame.
25
+ """
26
+ if metadata_value == "acronym":
27
+ metadata_value = "short_name"
28
+ return df.loc[:, agency_column].apply(
29
+ lambda x: metadata.get(x, {}).get(metadata_value)
30
+ )
31
+
32
+
33
+ def _get_first_week_start(dates: list[date], week_start: int | str | "weekday" = MO):
34
+ """Get the start date of the first week from a list of dates.
35
+ Pass "week_start" to select a different start date for each week (defaults to Monday).
36
+ """
37
+ if week_start in (MO, TU, WE, TH, FR, SA, SU):
38
+ pass
39
+ elif isinstance(week_start, str):
40
+ weekdays = {
41
+ "monday": MO,
42
+ "tuesday": TU,
43
+ "wednesday": WE,
44
+ "thursday": TH,
45
+ "friday": FR,
46
+ "saturday": SA,
47
+ "sunday": SU,
48
+ }
49
+ week_start = weekdays.get(week_start.lower(), MO)
50
+ elif isinstance(week_start, int):
51
+ weekdays = {
52
+ 0: MO,
53
+ 1: TU,
54
+ 2: WE,
55
+ 3: TH,
56
+ 4: FR,
57
+ 5: SA,
58
+ 6: SU,
59
+ }
60
+ week_start = weekdays.get(week_start, MO)
61
+ else:
62
+ raise TypeError("Parameter 'week_start' must be type `str`, `int`, or a dateutil weekday instance.")
63
+
64
+ first_day = next(d for d in dates)
65
+ return first_day + relativedelta(weekday=week_start(-1))
66
+
67
+
68
+ def _get_week_start_dates(first_week_start: date | Timestamp, end_date: date | None = None):
69
+ """Get the index and start date for each week.
70
+
71
+ Args:
72
+ first_week_start (date | Timestamp): Start date of the first week in the data.
73
+ end_date (date | None, optional): End date for data. If None is passed (the default), the end date is `date.today()`.
74
+
75
+ Returns:
76
+ list[tuple]: List of tuples containing the week number and the start date.
77
+ """
78
+ if end_date is None:
79
+ end_date = date.today()
80
+ try:
81
+ week_start_dates = [first_week_start.date()]
82
+ except AttributeError as err:
83
+ week_start_dates = [first_week_start]
84
+ while week_start_dates[-1] < end_date:
85
+ next_start_date = week_start_dates[-1] + relativedelta(weeks=1)
86
+ week_start_dates.append(next_start_date)
87
+ week_start_dates = [day for day in week_start_dates if day <= end_date]
88
+ week_start_dates = [d.date() if isinstance(d, (Timestamp, datetime)) else d for d in week_start_dates]
89
+ return [(idx, w) for idx, w in enumerate(week_start_dates)]
90
+
91
+
92
+ def _get_weeks(dates: list[date], end_date: date | None = None, **kwargs) -> list[tuple]:
93
+ """Takes a list, array, or other iterable of `datetime.date` values and returns a list of tuples containing (week_number, week_start_date) pairs.
94
+ Pass keyword arg "week_start" - ranging from 0 (Monday) to 6 (Sunday) - to choose a different start date than Monday for the week.
95
+ """
96
+ # get the start date for the first week
97
+ first_week_start = _get_first_week_start(dates, **kwargs)
98
+
99
+ # get start date for each week in the input values
100
+ weeks = _get_week_start_dates(first_week_start, end_date=end_date)
101
+
102
+ # iterate over inputs, append tuple of week number and start date for each week
103
+ results = []
104
+ for d in dates:
105
+ if isinstance(d, Timestamp):
106
+ d = d.date()
107
+ week_gen = ((idx, start_date) for idx, start_date in weeks if (start_date <= d < (start_date + timedelta(weeks=1))))
108
+ results.append(next(week_gen, (0, first_week_start)))
109
+ return results
110
+
111
+
112
+ def add_weeks_to_data(df: DataFrame, date_column: str = "publication_date", new_columns: tuple[str] = ("week_number", "week_of")):
113
+ """Add week number and week start date to input data.
114
+
115
+ Args:
116
+ df (DataFrame): Input data.
117
+ date_column (str, optional): Name of column containing publication dates. Defaults to "publication_date".
118
+ new_columns (tuple[str], optional): New column names. Defaults to ("week_number", "week_start").
119
+
120
+ Returns:
121
+ DataFrame: Data containing week information.
122
+ """
123
+ df_c = df.copy()
124
+ data = df_c[date_column].to_list()
125
+ if len(data) > 0:
126
+ week_numbers, week_starts = list(zip(*_get_weeks(data)))
127
+ df_c.loc[:, new_columns[0]] = week_numbers
128
+ df_c.loc[:, new_columns[1]] = to_datetime(week_starts)
129
+ return df_c
130
+
131
+
132
+ def _pad_missing_weeks(timeframe_list: list[date], **kwargs):
133
+
134
+ # get the start date for the first week
135
+ first_week_start = _get_first_week_start(timeframe_list)
136
+
137
+ # get start date for each week in the input values
138
+ return _get_week_start_dates(first_week_start, **kwargs)
139
+
140
+
141
+ def _pad_missing_days(timeframe_list: list[date], end_date: date | None = None):
142
+
143
+ start_date = min(timeframe_list)
144
+ if end_date is None:
145
+ end_date = date.today()
146
+
147
+ # create list of weekdays from start to end dates
148
+ # remember that range() objects are exclusive of the stop
149
+ return [
150
+ start_date + relativedelta(days=n)
151
+ for n in range((end_date - start_date).days + 1)
152
+ if (start_date + relativedelta(days=n)).weekday() in range(0, 5)
153
+ ]
154
+
155
+
156
+ def pad_missing_dates(df: DataFrame, pad_column: str, how: str, fill_padded_values: dict | None = None, **kwargs):
157
+
158
+ df_copy = df.copy()
159
+ timeframe_list = [d.date() if isinstance(d, (Timestamp, datetime)) else d for d in df_copy[pad_column].to_list()]
160
+ df_copy = df_copy.astype({pad_column: "object"})
161
+ df_copy.loc[:, pad_column] = timeframe_list
162
+
163
+ # pad dates if dataframe isn't empty
164
+ if len(timeframe_list) > 0:
165
+
166
+ # choose which time frequency needs padding
167
+ if how == "days":
168
+ week_numbers = None
169
+ padded_timeframes = _pad_missing_days(timeframe_list, **kwargs)
170
+ elif how == "weeks":
171
+ week_numbers, padded_timeframes = zip(*_pad_missing_weeks(timeframe_list, **kwargs))
172
+ else:
173
+ raise ValueError
174
+
175
+ # incorporate extended dates into dataframe
176
+ df_merge = DataFrame({pad_column: padded_timeframes})
177
+ pad_cols = [pad_column]
178
+ if week_numbers is not None:
179
+ df_merge.loc[:, "week_number"] = week_numbers
180
+ pad_cols.append("week_number")
181
+ df_copy = df_copy.merge(df_merge, on=pad_cols, how="outer", indicator=True)
182
+ if fill_padded_values is not None:
183
+ for col, val in fill_padded_values.items():
184
+ bool_ = df_copy["_merge"] == "right_only"
185
+ df_copy.loc[bool_, col] = val
186
+
187
+ return df_copy.drop(columns=["_merge"], errors="ignore")
188
+
189
+
190
+ def groupby_agency(
191
+ df: DataFrame,
192
+ group_col: str = "parent_slug",
193
+ value_col: str = "document_number",
194
+ aggfunc: str = "count",
195
+ significant: bool = True,
196
+ metadata: dict | None = None,
197
+ metadata_value: str = "acronym",
198
+ ):
199
+ """_summary_
200
+
201
+ Args:
202
+ df (DataFrame): _description_
203
+ group_col (str, optional): _description_. Defaults to "parent_slug".
204
+ value_col (str, optional): _description_. Defaults to "document_number".
205
+ aggfunc (str, optional): _description_. Defaults to "count".
206
+ significant (bool, optional): _description_. Defaults to True.
207
+ metadata (dict | None, optional): _description_. Defaults to None.
208
+ metadata_value (str, optional): _description_. Defaults to "acronym".
209
+
210
+ Returns:
211
+ _type_: _description_
212
+ """
213
+ aggfunc_dict = {value_col: aggfunc, }
214
+ if significant:
215
+ aggfunc_dict.update({
216
+ "3f1_significant": "sum",
217
+ "other_significant": "sum",
218
+ })
219
+ df_ex = df.explode(group_col, ignore_index=True)
220
+ grouped = df_ex.groupby(
221
+ by=group_col
222
+ ).agg(
223
+ aggfunc_dict
224
+ ).reset_index()
225
+ grouped = grouped.sort_values(value_col, ascending=False).rename(
226
+ columns={
227
+ group_col: "agency",
228
+ value_col: "rules",
229
+ }, errors="ignore"
230
+ )
231
+ if metadata is not None:
232
+ grouped.loc[:, metadata_value] = _get_agency_metadata_values(
233
+ grouped,
234
+ agency_column="agency",
235
+ metadata=metadata,
236
+ metadata_value=metadata_value
237
+ )
238
+ cols = ["agency", metadata_value, "rules", "3f1_significant", "other_significant"]
239
+ grouped = grouped.loc[:, [c for c in cols if c in grouped.columns]]
240
+ return grouped
241
+
242
+
243
+ def groupby_date(
244
+ df: DataFrame,
245
+ group_col: str | tuple | list = ("publication_year", "publication_month", ),
246
+ value_col: str = "document_number",
247
+ aggfunc: str = "count",
248
+ significant: bool = True
249
+ ):
250
+ if isinstance(group_col, str):
251
+ group_col = [group_col]
252
+ elif isinstance(group_col, (list, tuple)):
253
+ group_col = list(group_col)
254
+ else:
255
+ raise TypeError
256
+
257
+ aggfunc_dict = {value_col: aggfunc, }
258
+ if significant:
259
+ aggfunc_dict.update({
260
+ "3f1_significant": "sum",
261
+ "other_significant": "sum",
262
+ })
263
+ grouped = df.groupby(
264
+ by=group_col
265
+ ).agg(
266
+ aggfunc_dict
267
+ ).reset_index()
268
+ grouped = grouped.rename(columns={
269
+ value_col: "rules",
270
+ }, errors="ignore")
271
+ return grouped
272
+
273
+
274
+ if __name__ == "__main__":
275
+
276
+ from datetime import date, timedelta
277
+ from pandas import to_datetime
278
+
279
+ TODAY = date.today()
280
+ WEEKS_AGO = TODAY - timedelta(weeks=10)
281
+
282
+ dates = [(WEEKS_AGO - timedelta(days=r)) for r in range(21) if (r % 3 != 0)][::-1] + [(TODAY - timedelta(days=r)) for r in range(21)][::-1]
283
+ df = DataFrame({"dates": dates, "values": [idx for idx, _ in enumerate(dates)]})
284
+
285
+ df_a = pad_missing_dates(df, "dates", "days", fill_padded_values={"values": 0})
286
+ print(df_a.head(10))
287
+
288
+ df = add_weeks_to_data(df, date_column="dates")
289
+ print(df.head(10))
290
+
291
+ grouped = groupby_date(df, group_col=("week_number", "week_of"), value_col="values", significant=False)
292
+ print(grouped)
293
+
294
+ df_b = pad_missing_dates(grouped, "week_of", how="weeks", fill_padded_values={"rules": 0})
295
+ print(df_b)
modules/plotting.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pandas import DataFrame
2
+ from plotnine import (
3
+ ggplot,
4
+ aes,
5
+ geom_col,
6
+ geom_line,
7
+ labs,
8
+ coord_flip,
9
+ scale_x_discrete,
10
+ scale_x_datetime,
11
+ scale_y_continuous,
12
+ theme_light,
13
+ )
14
+
15
+
16
+ class DataAvailabilityError(Exception):
17
+ """Raised when not enough data available to vizualize."""
18
+ pass
19
+
20
+
21
+ def plot_agency(df, group_col = "acronym", value_col = "rules"):
22
+ """Plot rules by agency.
23
+
24
+ Args:
25
+ df (DataFrame): Input data.
26
+ group_col (str, optional): Column on which the data are grouped. Defaults to "acronym".
27
+ value_col (str, optional): Column of values to be plotted. Defaults to "rules".
28
+
29
+ Returns:
30
+ ggplot: Plotted data.
31
+ """
32
+ order_list = df.loc[:, group_col].to_list()[::-1]
33
+
34
+ plot = (
35
+ ggplot(
36
+ df,
37
+ aes(x=group_col, y=value_col),
38
+ )
39
+ + geom_col()
40
+ + coord_flip()
41
+ + scale_x_discrete(limits=order_list)
42
+ + labs(y="", x="", title="Number of Rules Published by Agency")
43
+ + theme_light()
44
+ )
45
+ return plot
46
+
47
+
48
+ def plot_month(df: DataFrame, group_cols: tuple = ("publication_year", "publication_month"), value_col: str = "rules"):
49
+ """Plot rules by month.
50
+
51
+ Args:
52
+ df (DataFrame): Input data.
53
+ group_cols (tuple, optional): Columns on which the data are grouped. Defaults to ("publication_year", "publication_month").
54
+ value_col (str, optional): Column of values to be plotted. Defaults to "rules".
55
+
56
+ Returns:
57
+ ggplot: Plotted data.
58
+ """
59
+ df.loc[:, "ym"] = df[group_cols[0]].astype(str) + "-" + df[group_cols[1]].astype(str).str.pad(2, fillchar="0")
60
+ order_list = df.loc[:, "ym"].to_list()
61
+
62
+ plot = (
63
+ ggplot(
64
+ df,
65
+ aes(x="ym", y=value_col),
66
+ )
67
+ + geom_col()
68
+ + scale_x_discrete(limits=order_list)
69
+ + labs(y="", x="", title="Number of Rules Published by Month")
70
+ + theme_light()
71
+ )
72
+ return plot
73
+
74
+
75
+ def plot_day(df: DataFrame, group_col: str = "publication_date", value_col: str = "rules"):
76
+ """Plot rules by day.
77
+
78
+ Args:
79
+ df (DataFrame): Input data.
80
+ group_col (str, optional): Column on which the data are grouped. Defaults to ("publication_year", "publication_month").
81
+ value_col (str, optional): Column of values to be plotted. Defaults to "rules".
82
+
83
+ Returns:
84
+ ggplot: Plotted data.
85
+ """
86
+ min_date = df.loc[:, group_col].min()
87
+ max_date = df.loc[:, group_col].max()
88
+ diff = (max_date - min_date).days
89
+ if diff in range(0, 61):
90
+ freq = "1 week"
91
+ elif diff in range(61, 91):
92
+ freq = "2 weeks"
93
+ else:
94
+ freq = "1 month"
95
+
96
+ max_value = df.loc[:, value_col].max()
97
+
98
+ plot = (
99
+ ggplot(
100
+ df,
101
+ aes(x=group_col, y=value_col),
102
+ )
103
+ + geom_line(group=1)
104
+ + scale_x_datetime(date_breaks=freq, date_labels="%m-%d")
105
+ + scale_y_continuous(limits=(0, max_value), expand=(0, 0, 0.1, 0))
106
+ + labs(y="", x="", title="Number of Rules Published by Date")
107
+ + theme_light()
108
+ )
109
+ return plot
110
+
111
+
112
+ def plot_week(df: DataFrame, group_col: str = "week_of", value_col: str = "rules", ):
113
+ max_value = df.loc[:, value_col].max()
114
+
115
+ date_values = df[group_col].to_list()
116
+ num_weeks = len(date_values)
117
+
118
+ if num_weeks in range(8, 16):
119
+ reduce_by = 2
120
+ elif num_weeks in range(16, 24):
121
+ reduce_by = 3
122
+ elif num_weeks in range(24, 32):
123
+ reduce_by = 4
124
+ elif num_weeks >= 32:
125
+ reduce_by = 5
126
+ else:
127
+ reduce_by = 1
128
+
129
+ breaks = [val for idx, val in enumerate(date_values) if idx % reduce_by == 0]
130
+
131
+ plot = (
132
+ ggplot(
133
+ df,
134
+ aes(x=group_col, y=value_col),
135
+ )
136
+ + geom_line(group=1)
137
+ + scale_x_datetime(breaks=breaks, labels=[f"{w.strftime('%m-%d')}" for w in breaks])
138
+ + scale_y_continuous(limits=(0, max_value), expand=(0, 0, 0.1, 0))
139
+ + labs(y="", x="", title="Number of Rules Published by Week")
140
+ + theme_light()
141
+ )
142
+ return plot
143
+
144
+
145
+ def plot_tf(df: DataFrame, frequency: str, **kwargs) -> ggplot:
146
+ """Plot rules over time by given frequency.
147
+
148
+ Args:
149
+ df (DataFrame): Input data.
150
+ frequency (str): Frequency of time for aggregating rules. Accepts "monthly" or "daily".
151
+
152
+ Raises:
153
+ ValueError: Frequency parameter received invalid value.
154
+
155
+ Returns:
156
+ ggplot: Plotted data.
157
+ """
158
+ freq_options = {
159
+ "monthly": plot_month,
160
+ "daily": plot_day,
161
+ "weekly": plot_week,
162
+ }
163
+
164
+ plot_freq = freq_options.get(frequency, None)
165
+ if plot_freq is None:
166
+ raise ValueError(f"Frequency must be one of: {', '.join(freq_options.keys())}")
167
+ return plot_freq(df, **kwargs)
modules/significant.py CHANGED
@@ -89,7 +89,7 @@ def merge_with_api_results(pd_df: pd_DataFrame,
89
  ):
90
 
91
  main_df = pl.from_pandas(pd_df)
92
- df = main_df.join(pl_df, on="document_number", how="left", validate="1:1")
93
  return df.to_pandas()
94
 
95
 
 
89
  ):
90
 
91
  main_df = pl.from_pandas(pd_df)
92
+ df = main_df.join(pl_df, on="document_number", how="left", validate="1:1", coalesce=True)
93
  return df.to_pandas()
94
 
95
 
requirements-dev-mac.txt ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.11
3
+ # by the following command:
4
+ #
5
+ # pip-compile --output-file=requirements-dev-mac.txt requirements-dev.in requirements.in
6
+ #
7
+ anyio==4.3.0
8
+ # via
9
+ # -c requirements.txt
10
+ # httpx
11
+ # jupyter-server
12
+ # starlette
13
+ # watchfiles
14
+ appdirs==1.4.4
15
+ # via
16
+ # -c requirements.txt
17
+ # shiny
18
+ appnope==0.1.4
19
+ # via ipykernel
20
+ argon2-cffi==23.1.0
21
+ # via jupyter-server
22
+ argon2-cffi-bindings==21.2.0
23
+ # via argon2-cffi
24
+ arrow==1.3.0
25
+ # via isoduration
26
+ asgiref==3.8.1
27
+ # via
28
+ # -c requirements.txt
29
+ # shiny
30
+ asttokens==2.4.1
31
+ # via stack-data
32
+ async-lru==2.0.4
33
+ # via jupyterlab
34
+ attrs==23.2.0
35
+ # via
36
+ # jsonschema
37
+ # referencing
38
+ babel==2.15.0
39
+ # via jupyterlab-server
40
+ beautifulsoup4==4.12.3
41
+ # via nbconvert
42
+ bleach==6.1.0
43
+ # via nbconvert
44
+ certifi==2024.2.2
45
+ # via
46
+ # -c requirements.txt
47
+ # httpcore
48
+ # httpx
49
+ # requests
50
+ cffi==1.16.0
51
+ # via argon2-cffi-bindings
52
+ charset-normalizer==3.3.2
53
+ # via
54
+ # -c requirements.txt
55
+ # requests
56
+ click==8.1.7
57
+ # via
58
+ # -c requirements.txt
59
+ # shiny
60
+ # uvicorn
61
+ comm==0.2.2
62
+ # via
63
+ # ipykernel
64
+ # ipywidgets
65
+ contourpy==1.2.1
66
+ # via
67
+ # -c requirements.txt
68
+ # matplotlib
69
+ cursor==1.3.5
70
+ # via
71
+ # -c requirements.txt
72
+ # fr-toolbelt
73
+ cycler==0.12.1
74
+ # via
75
+ # -c requirements.txt
76
+ # matplotlib
77
+ debugpy==1.8.2
78
+ # via ipykernel
79
+ decorator==5.1.1
80
+ # via ipython
81
+ defusedxml==0.7.1
82
+ # via nbconvert
83
+ executing==2.0.1
84
+ # via stack-data
85
+ faicons==0.2.2
86
+ # via
87
+ # -c requirements.txt
88
+ # -r requirements.in
89
+ fastjsonschema==2.20.0
90
+ # via nbformat
91
+ fonttools==4.51.0
92
+ # via
93
+ # -c requirements.txt
94
+ # matplotlib
95
+ fqdn==1.5.1
96
+ # via jsonschema
97
+ fr-toolbelt==0.1.3
98
+ # via
99
+ # -c requirements.txt
100
+ # -r requirements.in
101
+ h11==0.14.0
102
+ # via
103
+ # -c requirements.txt
104
+ # httpcore
105
+ # uvicorn
106
+ htmltools==0.5.1
107
+ # via
108
+ # -c requirements.txt
109
+ # faicons
110
+ # shiny
111
+ httpcore==1.0.5
112
+ # via httpx
113
+ httpx==0.27.0
114
+ # via jupyterlab
115
+ idna==3.7
116
+ # via
117
+ # -c requirements.txt
118
+ # anyio
119
+ # httpx
120
+ # jsonschema
121
+ # requests
122
+ iniconfig==2.0.0
123
+ # via pytest
124
+ ipykernel==6.29.4
125
+ # via
126
+ # -r requirements-dev.in
127
+ # jupyter
128
+ # jupyter-console
129
+ # jupyterlab
130
+ # qtconsole
131
+ ipython==8.18.0
132
+ # via
133
+ # -r requirements-dev.in
134
+ # ipykernel
135
+ # ipywidgets
136
+ # jupyter-console
137
+ ipywidgets==8.1.3
138
+ # via jupyter
139
+ isoduration==20.11.0
140
+ # via jsonschema
141
+ jedi==0.19.1
142
+ # via ipython
143
+ jinja2==3.1.4
144
+ # via
145
+ # jupyter-server
146
+ # jupyterlab
147
+ # jupyterlab-server
148
+ # nbconvert
149
+ json5==0.9.25
150
+ # via jupyterlab-server
151
+ jsonpointer==3.0.0
152
+ # via jsonschema
153
+ jsonschema[format-nongpl]==4.22.0
154
+ # via
155
+ # jupyter-events
156
+ # jupyterlab-server
157
+ # nbformat
158
+ jsonschema-specifications==2023.12.1
159
+ # via jsonschema
160
+ jupyter==1.0.0
161
+ # via -r requirements-dev.in
162
+ jupyter-client==8.6.2
163
+ # via
164
+ # ipykernel
165
+ # jupyter-console
166
+ # jupyter-server
167
+ # nbclient
168
+ # qtconsole
169
+ jupyter-console==6.6.3
170
+ # via jupyter
171
+ jupyter-core==5.7.2
172
+ # via
173
+ # ipykernel
174
+ # jupyter-client
175
+ # jupyter-console
176
+ # jupyter-server
177
+ # jupyterlab
178
+ # nbclient
179
+ # nbconvert
180
+ # nbformat
181
+ # qtconsole
182
+ jupyter-events==0.10.0
183
+ # via jupyter-server
184
+ jupyter-lsp==2.2.5
185
+ # via jupyterlab
186
+ jupyter-server==2.14.1
187
+ # via
188
+ # jupyter-lsp
189
+ # jupyterlab
190
+ # jupyterlab-server
191
+ # notebook
192
+ # notebook-shim
193
+ jupyter-server-terminals==0.5.3
194
+ # via jupyter-server
195
+ jupyterlab==4.2.3
196
+ # via notebook
197
+ jupyterlab-pygments==0.3.0
198
+ # via nbconvert
199
+ jupyterlab-server==2.27.2
200
+ # via
201
+ # jupyterlab
202
+ # notebook
203
+ jupyterlab-widgets==3.0.11
204
+ # via ipywidgets
205
+ kiwisolver==1.4.5
206
+ # via
207
+ # -c requirements.txt
208
+ # matplotlib
209
+ linkify-it-py==2.0.3
210
+ # via
211
+ # -c requirements.txt
212
+ # shiny
213
+ markdown-it-py==3.0.0
214
+ # via
215
+ # -c requirements.txt
216
+ # mdit-py-plugins
217
+ # shiny
218
+ markupsafe==2.1.5
219
+ # via
220
+ # jinja2
221
+ # nbconvert
222
+ matplotlib==3.8.4
223
+ # via
224
+ # -c requirements.txt
225
+ # plotnine
226
+ matplotlib-inline==0.1.7
227
+ # via
228
+ # ipykernel
229
+ # ipython
230
+ mdit-py-plugins==0.4.1
231
+ # via
232
+ # -c requirements.txt
233
+ # shiny
234
+ mdurl==0.1.2
235
+ # via
236
+ # -c requirements.txt
237
+ # markdown-it-py
238
+ mistune==3.0.2
239
+ # via nbconvert
240
+ mizani==0.11.3
241
+ # via
242
+ # -c requirements.txt
243
+ # plotnine
244
+ nbclient==0.10.0
245
+ # via nbconvert
246
+ nbconvert==7.16.4
247
+ # via
248
+ # jupyter
249
+ # jupyter-server
250
+ nbformat==5.10.4
251
+ # via
252
+ # jupyter-server
253
+ # nbclient
254
+ # nbconvert
255
+ nest-asyncio==1.6.0
256
+ # via ipykernel
257
+ notebook==7.2.1
258
+ # via jupyter
259
+ notebook-shim==0.2.4
260
+ # via
261
+ # jupyterlab
262
+ # notebook
263
+ numpy==1.26.4
264
+ # via
265
+ # -c requirements.txt
266
+ # -r requirements.in
267
+ # contourpy
268
+ # fr-toolbelt
269
+ # matplotlib
270
+ # mizani
271
+ # pandas
272
+ # patsy
273
+ # plotnine
274
+ # pyarrow
275
+ # scipy
276
+ # statsmodels
277
+ overrides==7.7.0
278
+ # via jupyter-server
279
+ packaging==24.0
280
+ # via
281
+ # -c requirements.txt
282
+ # htmltools
283
+ # ipykernel
284
+ # jupyter-server
285
+ # jupyterlab
286
+ # jupyterlab-server
287
+ # matplotlib
288
+ # nbconvert
289
+ # pytest
290
+ # qtconsole
291
+ # qtpy
292
+ # shiny
293
+ # statsmodels
294
+ pandas==2.2.2
295
+ # via
296
+ # -c requirements.txt
297
+ # -r requirements.in
298
+ # fr-toolbelt
299
+ # mizani
300
+ # plotnine
301
+ # statsmodels
302
+ pandocfilters==1.5.1
303
+ # via nbconvert
304
+ parso==0.8.4
305
+ # via jedi
306
+ patsy==0.5.6
307
+ # via
308
+ # -c requirements.txt
309
+ # statsmodels
310
+ pexpect==4.9.0
311
+ # via ipython
312
+ pillow==10.3.0
313
+ # via
314
+ # -c requirements.txt
315
+ # matplotlib
316
+ platformdirs==4.2.2
317
+ # via jupyter-core
318
+ plotnine==0.13.6
319
+ # via
320
+ # -c requirements.txt
321
+ # -r requirements.in
322
+ pluggy==1.5.0
323
+ # via pytest
324
+ polars==0.20.31
325
+ # via
326
+ # -c requirements.txt
327
+ # -r requirements.in
328
+ progress==1.6
329
+ # via
330
+ # -c requirements.txt
331
+ # fr-toolbelt
332
+ prometheus-client==0.20.0
333
+ # via jupyter-server
334
+ prompt-toolkit==3.0.36
335
+ # via
336
+ # -c requirements.txt
337
+ # ipython
338
+ # jupyter-console
339
+ # questionary
340
+ psutil==6.0.0
341
+ # via ipykernel
342
+ ptyprocess==0.7.0
343
+ # via
344
+ # pexpect
345
+ # terminado
346
+ pure-eval==0.2.2
347
+ # via stack-data
348
+ pyarrow==16.1.0
349
+ # via
350
+ # -c requirements.txt
351
+ # -r requirements.in
352
+ pycparser==2.22
353
+ # via cffi
354
+ pygments==2.18.0
355
+ # via
356
+ # ipython
357
+ # jupyter-console
358
+ # nbconvert
359
+ # qtconsole
360
+ pyparsing==3.1.2
361
+ # via
362
+ # -c requirements.txt
363
+ # matplotlib
364
+ pytest==8.2.2
365
+ # via -r requirements-dev.in
366
+ python-dateutil==2.9.0.post0
367
+ # via
368
+ # -c requirements.txt
369
+ # -r requirements.in
370
+ # arrow
371
+ # jupyter-client
372
+ # matplotlib
373
+ # pandas
374
+ python-json-logger==2.0.7
375
+ # via jupyter-events
376
+ python-multipart==0.0.9
377
+ # via
378
+ # -c requirements.txt
379
+ # shiny
380
+ pytz==2024.1
381
+ # via
382
+ # -c requirements.txt
383
+ # pandas
384
+ pyyaml==6.0.1
385
+ # via jupyter-events
386
+ pyzmq==26.0.3
387
+ # via
388
+ # ipykernel
389
+ # jupyter-client
390
+ # jupyter-console
391
+ # jupyter-server
392
+ # qtconsole
393
+ qtconsole==5.5.2
394
+ # via jupyter
395
+ qtpy==2.4.1
396
+ # via qtconsole
397
+ questionary==2.0.1
398
+ # via
399
+ # -c requirements.txt
400
+ # shiny
401
+ referencing==0.35.1
402
+ # via
403
+ # jsonschema
404
+ # jsonschema-specifications
405
+ # jupyter-events
406
+ requests==2.32.3
407
+ # via
408
+ # -c requirements.txt
409
+ # -r requirements.in
410
+ # fr-toolbelt
411
+ # jupyterlab-server
412
+ rfc3339-validator==0.1.4
413
+ # via
414
+ # jsonschema
415
+ # jupyter-events
416
+ rfc3986-validator==0.1.1
417
+ # via
418
+ # jsonschema
419
+ # jupyter-events
420
+ rpds-py==0.18.1
421
+ # via
422
+ # jsonschema
423
+ # referencing
424
+ scipy==1.13.0
425
+ # via
426
+ # -c requirements.txt
427
+ # mizani
428
+ # plotnine
429
+ # statsmodels
430
+ send2trash==1.8.3
431
+ # via jupyter-server
432
+ shiny==0.10.2
433
+ # via
434
+ # -c requirements.txt
435
+ # -r requirements.in
436
+ six==1.16.0
437
+ # via
438
+ # -c requirements.txt
439
+ # asttokens
440
+ # bleach
441
+ # patsy
442
+ # python-dateutil
443
+ # rfc3339-validator
444
+ sniffio==1.3.1
445
+ # via
446
+ # -c requirements.txt
447
+ # anyio
448
+ # httpx
449
+ soupsieve==2.5
450
+ # via beautifulsoup4
451
+ stack-data==0.6.3
452
+ # via ipython
453
+ starlette==0.37.2
454
+ # via
455
+ # -c requirements.txt
456
+ # shiny
457
+ statsmodels==0.14.2
458
+ # via
459
+ # -c requirements.txt
460
+ # plotnine
461
+ terminado==0.18.1
462
+ # via
463
+ # jupyter-server
464
+ # jupyter-server-terminals
465
+ tinycss2==1.3.0
466
+ # via nbconvert
467
+ tornado==6.4.1
468
+ # via
469
+ # ipykernel
470
+ # jupyter-client
471
+ # jupyter-server
472
+ # jupyterlab
473
+ # notebook
474
+ # terminado
475
+ traitlets==5.14.3
476
+ # via
477
+ # comm
478
+ # ipykernel
479
+ # ipython
480
+ # ipywidgets
481
+ # jupyter-client
482
+ # jupyter-console
483
+ # jupyter-core
484
+ # jupyter-events
485
+ # jupyter-server
486
+ # jupyterlab
487
+ # matplotlib-inline
488
+ # nbclient
489
+ # nbconvert
490
+ # nbformat
491
+ # qtconsole
492
+ types-python-dateutil==2.9.0.20240316
493
+ # via arrow
494
+ typing-extensions==4.11.0
495
+ # via
496
+ # -c requirements.txt
497
+ # htmltools
498
+ # shiny
499
+ tzdata==2024.1
500
+ # via
501
+ # -c requirements.txt
502
+ # pandas
503
+ uc-micro-py==1.0.3
504
+ # via
505
+ # -c requirements.txt
506
+ # linkify-it-py
507
+ uri-template==1.3.0
508
+ # via jsonschema
509
+ urllib3==2.2.1
510
+ # via
511
+ # -c requirements.txt
512
+ # requests
513
+ uvicorn==0.29.0
514
+ # via
515
+ # -c requirements.txt
516
+ # shiny
517
+ watchfiles==0.21.0
518
+ # via
519
+ # -c requirements.txt
520
+ # shiny
521
+ wcwidth==0.2.13
522
+ # via
523
+ # -c requirements.txt
524
+ # prompt-toolkit
525
+ webcolors==24.6.0
526
+ # via jsonschema
527
+ webencodings==0.5.1
528
+ # via
529
+ # bleach
530
+ # tinycss2
531
+ websocket-client==1.8.0
532
+ # via jupyter-server
533
+ websockets==12.0
534
+ # via
535
+ # -c requirements.txt
536
+ # shiny
537
+ widgetsnbextension==4.0.11
538
+ # via ipywidgets
539
+
540
+ # The following packages are considered to be unsafe in a requirements file:
541
+ # setuptools
requirements-dev.in ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # requirements-dev.in
2
+ -c requirements.txt
3
+ ipykernel>=6.29
4
+ ipython>=8.18
5
+ jupyter>=1.0.0
6
+ pytest>=8.2.2
requirements-dev.txt ADDED
@@ -0,0 +1,547 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.11
3
+ # by the following command:
4
+ #
5
+ # pip-compile --output-file=requirements-dev.txt requirements-dev.in requirements.in
6
+ #
7
+ anyio==4.3.0
8
+ # via
9
+ # -c requirements.txt
10
+ # httpx
11
+ # jupyter-server
12
+ # starlette
13
+ # watchfiles
14
+ appdirs==1.4.4
15
+ # via
16
+ # -c requirements.txt
17
+ # shiny
18
+ argon2-cffi==23.1.0
19
+ # via jupyter-server
20
+ argon2-cffi-bindings==21.2.0
21
+ # via argon2-cffi
22
+ arrow==1.3.0
23
+ # via isoduration
24
+ asgiref==3.8.1
25
+ # via
26
+ # -c requirements.txt
27
+ # shiny
28
+ asttokens==2.4.1
29
+ # via stack-data
30
+ async-lru==2.0.4
31
+ # via jupyterlab
32
+ attrs==23.2.0
33
+ # via
34
+ # jsonschema
35
+ # referencing
36
+ babel==2.15.0
37
+ # via jupyterlab-server
38
+ beautifulsoup4==4.12.3
39
+ # via nbconvert
40
+ bleach==6.1.0
41
+ # via nbconvert
42
+ certifi==2024.2.2
43
+ # via
44
+ # -c requirements.txt
45
+ # httpcore
46
+ # httpx
47
+ # requests
48
+ cffi==1.16.0
49
+ # via argon2-cffi-bindings
50
+ charset-normalizer==3.3.2
51
+ # via
52
+ # -c requirements.txt
53
+ # requests
54
+ click==8.1.7
55
+ # via
56
+ # -c requirements.txt
57
+ # shiny
58
+ # uvicorn
59
+ colorama==0.4.6
60
+ # via
61
+ # -c requirements.txt
62
+ # click
63
+ # ipython
64
+ # pytest
65
+ comm==0.2.2
66
+ # via
67
+ # ipykernel
68
+ # ipywidgets
69
+ contourpy==1.2.1
70
+ # via
71
+ # -c requirements.txt
72
+ # matplotlib
73
+ cursor==1.3.5
74
+ # via
75
+ # -c requirements.txt
76
+ # fr-toolbelt
77
+ cycler==0.12.1
78
+ # via
79
+ # -c requirements.txt
80
+ # matplotlib
81
+ debugpy==1.8.1
82
+ # via ipykernel
83
+ decorator==5.1.1
84
+ # via ipython
85
+ defusedxml==0.7.1
86
+ # via nbconvert
87
+ executing==2.0.1
88
+ # via stack-data
89
+ faicons==0.2.2
90
+ # via
91
+ # -c requirements.txt
92
+ # -r requirements.in
93
+ fastjsonschema==2.20.0
94
+ # via nbformat
95
+ fonttools==4.51.0
96
+ # via
97
+ # -c requirements.txt
98
+ # matplotlib
99
+ fqdn==1.5.1
100
+ # via jsonschema
101
+ fr-toolbelt==0.1.3
102
+ # via
103
+ # -c requirements.txt
104
+ # -r requirements.in
105
+ h11==0.14.0
106
+ # via
107
+ # -c requirements.txt
108
+ # httpcore
109
+ # uvicorn
110
+ htmltools==0.5.1
111
+ # via
112
+ # -c requirements.txt
113
+ # faicons
114
+ # shiny
115
+ httpcore==1.0.5
116
+ # via httpx
117
+ httpx==0.27.0
118
+ # via jupyterlab
119
+ idna==3.7
120
+ # via
121
+ # -c requirements.txt
122
+ # anyio
123
+ # httpx
124
+ # jsonschema
125
+ # requests
126
+ iniconfig==2.0.0
127
+ # via pytest
128
+ ipykernel==6.29.0
129
+ # via
130
+ # -r requirements-dev.in
131
+ # jupyter
132
+ # jupyter-console
133
+ # jupyterlab
134
+ # qtconsole
135
+ ipython==8.18.0
136
+ # via
137
+ # -r requirements-dev.in
138
+ # ipykernel
139
+ # ipywidgets
140
+ # jupyter-console
141
+ ipywidgets==8.1.3
142
+ # via jupyter
143
+ isoduration==20.11.0
144
+ # via jsonschema
145
+ jedi==0.19.1
146
+ # via ipython
147
+ jinja2==3.1.4
148
+ # via
149
+ # jupyter-server
150
+ # jupyterlab
151
+ # jupyterlab-server
152
+ # nbconvert
153
+ json5==0.9.25
154
+ # via jupyterlab-server
155
+ jsonpointer==3.0.0
156
+ # via jsonschema
157
+ jsonschema[format-nongpl]==4.22.0
158
+ # via
159
+ # jupyter-events
160
+ # jupyterlab-server
161
+ # nbformat
162
+ jsonschema-specifications==2023.12.1
163
+ # via jsonschema
164
+ jupyter==1.0.0
165
+ # via -r requirements-dev.in
166
+ jupyter-client==8.6.2
167
+ # via
168
+ # ipykernel
169
+ # jupyter-console
170
+ # jupyter-server
171
+ # nbclient
172
+ # qtconsole
173
+ jupyter-console==6.6.3
174
+ # via jupyter
175
+ jupyter-core==5.7.2
176
+ # via
177
+ # ipykernel
178
+ # jupyter-client
179
+ # jupyter-console
180
+ # jupyter-server
181
+ # jupyterlab
182
+ # nbclient
183
+ # nbconvert
184
+ # nbformat
185
+ # qtconsole
186
+ jupyter-events==0.10.0
187
+ # via jupyter-server
188
+ jupyter-lsp==2.2.5
189
+ # via jupyterlab
190
+ jupyter-server==2.14.1
191
+ # via
192
+ # jupyter-lsp
193
+ # jupyterlab
194
+ # jupyterlab-server
195
+ # notebook
196
+ # notebook-shim
197
+ jupyter-server-terminals==0.5.3
198
+ # via jupyter-server
199
+ jupyterlab==4.2.2
200
+ # via notebook
201
+ jupyterlab-pygments==0.3.0
202
+ # via nbconvert
203
+ jupyterlab-server==2.27.2
204
+ # via
205
+ # jupyterlab
206
+ # notebook
207
+ jupyterlab-widgets==3.0.11
208
+ # via ipywidgets
209
+ kiwisolver==1.4.5
210
+ # via
211
+ # -c requirements.txt
212
+ # matplotlib
213
+ linkify-it-py==2.0.3
214
+ # via
215
+ # -c requirements.txt
216
+ # shiny
217
+ markdown-it-py==3.0.0
218
+ # via
219
+ # -c requirements.txt
220
+ # mdit-py-plugins
221
+ # shiny
222
+ markupsafe==2.1.5
223
+ # via
224
+ # jinja2
225
+ # nbconvert
226
+ matplotlib==3.8.4
227
+ # via
228
+ # -c requirements.txt
229
+ # plotnine
230
+ matplotlib-inline==0.1.7
231
+ # via
232
+ # ipykernel
233
+ # ipython
234
+ mdit-py-plugins==0.4.1
235
+ # via
236
+ # -c requirements.txt
237
+ # shiny
238
+ mdurl==0.1.2
239
+ # via
240
+ # -c requirements.txt
241
+ # markdown-it-py
242
+ mistune==3.0.2
243
+ # via nbconvert
244
+ mizani==0.11.3
245
+ # via
246
+ # -c requirements.txt
247
+ # plotnine
248
+ nbclient==0.10.0
249
+ # via nbconvert
250
+ nbconvert==7.16.4
251
+ # via
252
+ # jupyter
253
+ # jupyter-server
254
+ nbformat==5.10.4
255
+ # via
256
+ # jupyter-server
257
+ # nbclient
258
+ # nbconvert
259
+ nest-asyncio==1.6.0
260
+ # via ipykernel
261
+ notebook==7.2.1
262
+ # via jupyter
263
+ notebook-shim==0.2.4
264
+ # via
265
+ # jupyterlab
266
+ # notebook
267
+ numpy==1.26.4
268
+ # via
269
+ # -c requirements.txt
270
+ # -r requirements.in
271
+ # contourpy
272
+ # fr-toolbelt
273
+ # matplotlib
274
+ # mizani
275
+ # pandas
276
+ # patsy
277
+ # plotnine
278
+ # pyarrow
279
+ # scipy
280
+ # statsmodels
281
+ overrides==7.7.0
282
+ # via jupyter-server
283
+ packaging==24.0
284
+ # via
285
+ # -c requirements.txt
286
+ # htmltools
287
+ # ipykernel
288
+ # jupyter-server
289
+ # jupyterlab
290
+ # jupyterlab-server
291
+ # matplotlib
292
+ # nbconvert
293
+ # pytest
294
+ # qtconsole
295
+ # qtpy
296
+ # shiny
297
+ # statsmodels
298
+ pandas==2.2.2
299
+ # via
300
+ # -c requirements.txt
301
+ # -r requirements.in
302
+ # fr-toolbelt
303
+ # mizani
304
+ # plotnine
305
+ # statsmodels
306
+ pandocfilters==1.5.1
307
+ # via nbconvert
308
+ parso==0.8.4
309
+ # via jedi
310
+ patsy==0.5.6
311
+ # via
312
+ # -c requirements.txt
313
+ # statsmodels
314
+ pillow==10.3.0
315
+ # via
316
+ # -c requirements.txt
317
+ # matplotlib
318
+ platformdirs==4.2.2
319
+ # via jupyter-core
320
+ plotnine==0.13.6
321
+ # via
322
+ # -c requirements.txt
323
+ # -r requirements.in
324
+ pluggy==1.5.0
325
+ # via pytest
326
+ polars==0.20.31
327
+ # via
328
+ # -c requirements.txt
329
+ # -r requirements.in
330
+ progress==1.6
331
+ # via
332
+ # -c requirements.txt
333
+ # fr-toolbelt
334
+ prometheus-client==0.20.0
335
+ # via jupyter-server
336
+ prompt-toolkit==3.0.36
337
+ # via
338
+ # -c requirements.txt
339
+ # ipython
340
+ # jupyter-console
341
+ # questionary
342
+ psutil==5.9.8
343
+ # via ipykernel
344
+ pure-eval==0.2.2
345
+ # via stack-data
346
+ pyarrow==16.1.0
347
+ # via
348
+ # -c requirements.txt
349
+ # -r requirements.in
350
+ pycparser==2.22
351
+ # via cffi
352
+ pygments==2.18.0
353
+ # via
354
+ # ipython
355
+ # jupyter-console
356
+ # nbconvert
357
+ # qtconsole
358
+ pyparsing==3.1.2
359
+ # via
360
+ # -c requirements.txt
361
+ # matplotlib
362
+ pytest==8.2.2
363
+ # via -r requirements-dev.in
364
+ python-dateutil==2.9.0.post0
365
+ # via
366
+ # -c requirements.txt
367
+ # -r requirements.in
368
+ # arrow
369
+ # jupyter-client
370
+ # matplotlib
371
+ # pandas
372
+ python-json-logger==2.0.7
373
+ # via jupyter-events
374
+ python-multipart==0.0.9
375
+ # via
376
+ # -c requirements.txt
377
+ # shiny
378
+ pytz==2024.1
379
+ # via
380
+ # -c requirements.txt
381
+ # pandas
382
+ pywin32==306
383
+ # via jupyter-core
384
+ pywinpty==2.0.13
385
+ # via
386
+ # jupyter-server
387
+ # jupyter-server-terminals
388
+ # terminado
389
+ pyyaml==6.0.1
390
+ # via jupyter-events
391
+ pyzmq==26.0.3
392
+ # via
393
+ # ipykernel
394
+ # jupyter-client
395
+ # jupyter-console
396
+ # jupyter-server
397
+ # qtconsole
398
+ qtconsole==5.5.2
399
+ # via jupyter
400
+ qtpy==2.4.1
401
+ # via qtconsole
402
+ questionary==2.0.1
403
+ # via
404
+ # -c requirements.txt
405
+ # shiny
406
+ referencing==0.35.1
407
+ # via
408
+ # jsonschema
409
+ # jsonschema-specifications
410
+ # jupyter-events
411
+ requests==2.32.3
412
+ # via
413
+ # -c requirements.txt
414
+ # -r requirements.in
415
+ # fr-toolbelt
416
+ # jupyterlab-server
417
+ rfc3339-validator==0.1.4
418
+ # via
419
+ # jsonschema
420
+ # jupyter-events
421
+ rfc3986-validator==0.1.1
422
+ # via
423
+ # jsonschema
424
+ # jupyter-events
425
+ rpds-py==0.18.1
426
+ # via
427
+ # jsonschema
428
+ # referencing
429
+ scipy==1.13.0
430
+ # via
431
+ # -c requirements.txt
432
+ # mizani
433
+ # plotnine
434
+ # statsmodels
435
+ send2trash==1.8.3
436
+ # via jupyter-server
437
+ shiny==0.10.2
438
+ # via
439
+ # -c requirements.txt
440
+ # -r requirements.in
441
+ six==1.16.0
442
+ # via
443
+ # -c requirements.txt
444
+ # asttokens
445
+ # bleach
446
+ # patsy
447
+ # python-dateutil
448
+ # rfc3339-validator
449
+ sniffio==1.3.1
450
+ # via
451
+ # -c requirements.txt
452
+ # anyio
453
+ # httpx
454
+ soupsieve==2.5
455
+ # via beautifulsoup4
456
+ stack-data==0.6.3
457
+ # via ipython
458
+ starlette==0.37.2
459
+ # via
460
+ # -c requirements.txt
461
+ # shiny
462
+ statsmodels==0.14.2
463
+ # via
464
+ # -c requirements.txt
465
+ # plotnine
466
+ terminado==0.18.1
467
+ # via
468
+ # jupyter-server
469
+ # jupyter-server-terminals
470
+ tinycss2==1.3.0
471
+ # via nbconvert
472
+ tornado==6.4.1
473
+ # via
474
+ # ipykernel
475
+ # jupyter-client
476
+ # jupyter-server
477
+ # jupyterlab
478
+ # notebook
479
+ # terminado
480
+ traitlets==5.14.3
481
+ # via
482
+ # comm
483
+ # ipykernel
484
+ # ipython
485
+ # ipywidgets
486
+ # jupyter-client
487
+ # jupyter-console
488
+ # jupyter-core
489
+ # jupyter-events
490
+ # jupyter-server
491
+ # jupyterlab
492
+ # matplotlib-inline
493
+ # nbclient
494
+ # nbconvert
495
+ # nbformat
496
+ # qtconsole
497
+ types-python-dateutil==2.9.0.20240316
498
+ # via arrow
499
+ typing-extensions==4.11.0
500
+ # via
501
+ # -c requirements.txt
502
+ # htmltools
503
+ # shiny
504
+ tzdata==2024.1
505
+ # via
506
+ # -c requirements.txt
507
+ # mizani
508
+ # pandas
509
+ uc-micro-py==1.0.3
510
+ # via
511
+ # -c requirements.txt
512
+ # linkify-it-py
513
+ uri-template==1.3.0
514
+ # via jsonschema
515
+ urllib3==2.2.1
516
+ # via
517
+ # -c requirements.txt
518
+ # requests
519
+ uvicorn==0.29.0
520
+ # via
521
+ # -c requirements.txt
522
+ # shiny
523
+ watchfiles==0.21.0
524
+ # via
525
+ # -c requirements.txt
526
+ # shiny
527
+ wcwidth==0.2.13
528
+ # via
529
+ # -c requirements.txt
530
+ # prompt-toolkit
531
+ webcolors==24.6.0
532
+ # via jsonschema
533
+ webencodings==0.5.1
534
+ # via
535
+ # bleach
536
+ # tinycss2
537
+ websocket-client==1.8.0
538
+ # via jupyter-server
539
+ websockets==12.0
540
+ # via
541
+ # -c requirements.txt
542
+ # shiny
543
+ widgetsnbextension==4.0.11
544
+ # via ipywidgets
545
+
546
+ # The following packages are considered to be unsafe in a requirements file:
547
+ # setuptools
requirements.in ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # requirements.in
2
+ # required dependencies
3
+ faicons>=0.2.2, <1.0
4
+ fr-toolbelt>=0.1.2, <1.0
5
+ numpy>=1.26, <2.0
6
+ pandas>=2.2, <3.0
7
+ plotnine>=0.13.6, <1.0
8
+ polars>=0.20.26, <1.0
9
+ pyarrow>=16.1.0, <17.0
10
+ python-dateutil>=2.9.0.post0, <3.0
11
+ requests>=2.32.2, <3.0
12
+ shiny>=0.9.0, <1.0
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
tests/__init__.py ADDED
File without changes
tests/test_process_data.py CHANGED
@@ -1,4 +1,4 @@
1
- import pytest
2
  from fr_toolbelt.api_requests import get_documents_by_date
3
  from pandas import DataFrame
4
 
 
1
+ #import pytest
2
  from fr_toolbelt.api_requests import get_documents_by_date
3
  from pandas import DataFrame
4
 
www/logo.png ADDED
www/style.css ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .header img {
2
+ float: left;
3
+ width: 200px;
4
+ height: 66px;
5
+ vertical-align: middle;
6
+ }
7
+
8
+ .header span {
9
+ position: right;
10
+ line-height: 66px;
11
+ padding-left: 2.5%;
12
+ padding-top: 0%;
13
+ padding-right: 0%;
14
+ padding-bottom: 7.5%;
15
+ font-size: 30px;
16
+ vertical-align: middle;
17
+ }