Spaces:

regulatorystudies
/

cra-window-rules

Running

Mark Febrizio commited on Jul 9, 2024

Commit

58bb4c7

unverified ·

1 Parent(s): 072c060

Revisions 2024 06 (#11)

* update name of function

* create venv for development

* Update requirements-dev.txt

* Update app.py

minor cleanup

* Create test_plots.py

* Update get_rules_in_window.py

* Update app.py

* modularize

* Update app.py

* add frequency selector
* combine groupby date reactive calcs
* convert panels to expandable cards

* Delete test_plots.py

* minor updates to tests

* update main requirements

use pip-tools to manage dependencies

* handles plots with minimal data

now shows default filler plot instead of confusing plot with little to no data

* dev requirements

* update dev req

* Update significant.py

pass coalesce=True to join; ensures polars breaking change doesn't affect code

* mac req

* Update get_rules_in_window.py

update start of window to May 23

* update app notes

* add logo to header

* Update app.py

* add weekly

* Update app.py

clean up logo info

* updates to weekly calcs

* Update app.py

add asterisks to value boxes

* weekly data viz

improve grouping by week; fix issue where line plots weren't showing;

* Update grouping.py

* Update grouping.py

added function to pad dates in grouped data; still need to implement in output

* Update grouping.py

* Update plotting.py

fix geom_line grouping

* weekly and daily plots

implemented grouping, plotting, etc. for aggregating daily and weekly data

Files changed (17) hide show

.gitignore +5 -1
app.py +172 -66
cra_window_rules.py +40 -2
modules/__init__.py +9 -0
modules/get_rules_in_window.py +6 -155
modules/grouping.py +295 -0
modules/plotting.py +167 -0
modules/significant.py +1 -1
requirements-dev-mac.txt +541 -0
requirements-dev.in +6 -0
requirements-dev.txt +547 -0
requirements.in +12 -0
requirements.txt +0 -0
tests/__init__.py +0 -0
tests/test_process_data.py +1 -1
www/logo.png +0 -0
www/style.css +17 -0

.gitignore CHANGED Viewed

@@ -1,8 +1,12 @@
 # ----- Project Specific ----- #
 # text files except for requirements
 *.txt
-!requirements.txt
 # HF space folder
 .huggingface

 # ----- Project Specific ----- #
+# development venv
+.dev/
+dev/
 # text files except for requirements
 *.txt
+!requirements*.txt
 # HF space folder
 .huggingface

app.py CHANGED Viewed

@@ -1,46 +1,74 @@
 import asyncio
 from datetime import datetime, date, time
 from faicons import icon_svg
-from modules.get_rules_in_window import (
     DF,
     LAST_UPDATED,
     START_DATE,
     GET_SIGNIFICANT,
     METADATA,
-    AGENCIES,
     groupby_agency,
-    groupby_ym,
     plot_agency,
-    plot_month,
     )
 from shiny import reactive
 from shiny.express import input, render, ui
 FOOTER = f"""
     -----
-    Developed by the [GW Regulatory Studies Center](https://go.gwu.edu/regstudies). See our page on the [Congressional Review Act](https://regulatorystudies.columbian.gwu.edu/congressional-review-act) for more information.
     """
-ui.page_opts(
-    title="Rules in the Congressional Review Act (CRA) Window", #fillable=True,
     )
-with ui.sidebar(title="Settings"):
-    ui.input_date("start_date", "Start of window", value=START_DATE, min=START_DATE, max=date.today())
-    ui.input_switch("switch", "Show significant rules in plots", False)
     ui.input_select("menu_agency", "Select agencies", choices=["all"] + AGENCIES, selected="all", multiple=True)
-    #ui.input_checkbox_group(
-    #    "significant",
-    #    "EO 12866 Significance",
-    #    ["Section 3(f)(1)", "Other"],
-    #)
 with ui.layout_column_wrap():
     with ui.value_box(showcase=icon_svg("book")):
@@ -51,7 +79,7 @@ with ui.layout_column_wrap():
         ui.input_action_button("filter_all", "View", ) #class_="btn-success")
     with ui.value_box(showcase=icon_svg("book")):
-        "Section 3(f)(1) Significant rules"
         @render.text
         def count_3f1_significant():
             output = "Not available"
@@ -61,7 +89,7 @@ with ui.layout_column_wrap():
         ui.input_action_button("filter_3f1", "View", ) #class_="btn-success")
     with ui.value_box(showcase=icon_svg("book")):
-        "Other Significant rules"
         @render.text
         def count_other_significant():
             output = "Not available"
@@ -70,14 +98,18 @@ with ui.layout_column_wrap():
             return output
         ui.input_action_button("filter_other", "View", )
 with ui.navset_card_underline(title=""):
     with ui.nav_panel("Rules in detail"):
         @render.data_frame
         def table_rule_detail():
-            df = filtered_sig()
-            #print(df.columns)
-            #df.loc[:, "date"] = df.apply(lambda x: f"{x['publication_year']}-{x['publication_month']}-{x['publication_day']}", axis=1)
             df.loc[:, "date"] = df.loc[:, "publication_date"].apply(lambda x: f"{x.date()}")
             char, limit = " ", 10
             df.loc[:, "title"] = df["title"].apply(lambda x: x if len(x.split(char)) < (limit + 1) else f"{char.join(x.split(char)[:limit])}...")
@@ -89,58 +121,82 @@ with ui.navset_card_underline(title=""):
                 "3f1_significant",
                 "other_significant",
                 ]
-            return render.DataGrid(df.loc[:, [c for c in cols if c in df.columns]], width="100%") #filters=True)
-    with ui.nav_panel("By month"):
         with ui.layout_columns():
-            @render.plot
-            def plot_by_month():
-                grouped = grouped_df_month()
-                return plot_month(
-                    grouped
-                    )
-            @render.data_frame
-            def table_by_month():
-                grouped = grouped_df_month()
-                cols = [
-                    "publication_year",
-                    "publication_month",
-                    "rules",
-                    "3f1_significant",
-                    "other_significant",
-                    ]
-                return render.DataTable(grouped.loc[:, [c for c in cols if c in grouped.columns]])
     with ui.nav_panel("By agency"):
         with ui.layout_columns():
-            @render.plot
-            def plot_by_agency():
-                grouped = grouped_df_agency()
-                if input.switch():
-                    pass
-                    # placeholder for stacked bar chart
-                else:
                     plot = plot_agency(
-                        grouped.head(10),
-                        )
                     return plot
-            @render.data_frame
-            def table_by_agency():
-                grouped = grouped_df_agency()
-                cols = [
-                    "agency",
-                    "acronym",
-                    "rules",
-                    "3f1_significant",
-                    "other_significant",
-                    ]
-                return render.DataTable(grouped.loc[:, [c for c in cols if c in grouped.columns]])
 with ui.accordion(open=False):
@@ -183,7 +239,7 @@ with ui.accordion(open=False):
             f"""
             Rule data retrieved from the [Federal Register API](https://www.federalregister.gov/developers/documentation/api/v1).
-            Executive Order 12866 significance data last updated **{LAST_UPDATED}**.
             """
             )
@@ -191,8 +247,6 @@ ui.markdown(
     FOOTER
 )
-#ui.tags.footer()
 # ----- REACTIVE CALCULATIONS ----- #
@@ -214,6 +268,7 @@ def filtered_df():
     return filt_df
 @reactive.calc
 def filtered_sig():
     filt_df = filtered_df()
@@ -228,10 +283,48 @@ def filtered_sig():
     return filt_df
 @reactive.calc
 def grouped_df_month():
     filt_df = filtered_sig()
-    grouped = groupby_ym(filt_df, significant=GET_SIGNIFICANT)
     return grouped
@@ -242,6 +335,19 @@ def grouped_df_agency():
     return grouped
 # ----- REACTIVE VALUES ----- #

 import asyncio
 from datetime import datetime, date, time
+from pathlib import Path
 from faicons import icon_svg
+from pandas import DataFrame, to_datetime
+from plotnine import ggplot, labs
+from modules import (
     DF,
     LAST_UPDATED,
     START_DATE,
+    WINDOW_OPEN_DATE,
     GET_SIGNIFICANT,
     METADATA,
+    AGENCIES,
     groupby_agency,
+    groupby_date,
+    add_weeks_to_data,
+    pad_missing_dates,
     plot_agency,
+    plot_tf,
     )
 from shiny import reactive
 from shiny.express import input, render, ui
+TITLE = "CRA Window Tracker - GW Regulatory Studies Center"
+HEADER = "Rules in the Congressional Review Act (CRA) Window"
 FOOTER = f"""
     -----
+    &copy; 2024 [GW Regulatory Studies Center](https://go.gwu.edu/regstudies). See our page on the [Congressional Review Act](https://regulatorystudies.columbian.gwu.edu/congressional-review-act) for more information.
     """
+ui.include_css( Path(__file__).parent.joinpath("www") / "style.css")
+ui.tags.title(TITLE)
+sidebar_logo = ui.HTML(
+    f"""
+    <div class="header">
+        <a href="https://go.gwu.edu/regstudies" target="_blank">
+        <img src="logo.png" alt="Regulatory Studies Center logo"/>
+        </a>
+    </div>
+    """
     )
+page_header = ui.HTML(
+    f"""
+    <div class="header">
+        <span>{HEADER}</span>
+    </div>
+    """
+    )
+page_header
+with ui.sidebar(open={"desktop": "open", "mobile": "closed"}):
+    sidebar_logo
+    ui.input_date("start_date", "Select start of window", value=WINDOW_OPEN_DATE, min=START_DATE, max=date.today())
     ui.input_select("menu_agency", "Select agencies", choices=["all"] + AGENCIES, selected="all", multiple=True)
+    ui.input_select("frequency", "Select frequency", choices=["daily", "monthly", "weekly"], selected="daily")
+    #ui.input_switch("switch", "Stack significant rules in plots", False)
 with ui.layout_column_wrap():
     with ui.value_box(showcase=icon_svg("book")):
         ui.input_action_button("filter_all", "View", ) #class_="btn-success")
     with ui.value_box(showcase=icon_svg("book")):
+        "Section 3(f)(1) Significant rules *"
         @render.text
         def count_3f1_significant():
             output = "Not available"
         ui.input_action_button("filter_3f1", "View", ) #class_="btn-success")
     with ui.value_box(showcase=icon_svg("book")):
+        "Other Significant rules *"
         @render.text
         def count_other_significant():
             output = "Not available"
             return output
         ui.input_action_button("filter_other", "View", )
+ui.markdown(
+            f"""
+            \* *Executive Order 12866 significance data last updated **{LAST_UPDATED}***.
+            """
+            )
 with ui.navset_card_underline(title=""):
     with ui.nav_panel("Rules in detail"):
         @render.data_frame
         def table_rule_detail():
+            df = filtered_sig().copy()
             df.loc[:, "date"] = df.loc[:, "publication_date"].apply(lambda x: f"{x.date()}")
             char, limit = " ", 10
             df.loc[:, "title"] = df["title"].apply(lambda x: x if len(x.split(char)) < (limit + 1) else f"{char.join(x.split(char)[:limit])}...")
                 "3f1_significant",
                 "other_significant",
                 ]
+            return render.DataGrid(df.loc[:, [c for c in cols if c in df.columns]], width="100%")
+    with ui.nav_panel("Over time"):
         with ui.layout_columns():
+            with ui.card(full_screen=True):
+                @render.plot
+                def plot_over_time(value_col: str = "rules"):
+                    grouped = get_grouped_data_over_time()
+                    values = grouped.loc[:, value_col].to_numpy()
+                    count_gte_zero = sum(1 if g > 0 else 0 for g in values)
+                    max_val = max(values, default=0)
+                    if (max_val < 2) or (count_gte_zero < 2):
+                        return (
+                            ggplot()
+                            + labs(title="Not enough data available to visualize.")
+                        )
+                    else:
+                        return plot_tf(
+                            grouped,
+                            input.frequency()
+                            )
+            with ui.card(full_screen=True):
+                @render.data_frame
+                def table_over_time():
+                    grouped = get_grouped_data_over_time()
+                    date_cols = ["publication_date", "week_of", ]
+                    if any(d in grouped.columns for d in date_cols):
+                        grouped = grouped.astype({d: "str" for d in date_cols if d in grouped.columns}, errors="ignore")
+                    grouped = grouped.rename(columns={
+                        "publication_year": "year",
+                        "publication_month": "month",
+                        "publication_date": "date",
+                        }, errors="ignore")
+                    cols = [
+                        "date",
+                        "year",
+                        "month",
+                        "week_of",
+                        "rules",
+                        "3f1_significant",
+                        "other_significant",
+                        ]
+                    return render.DataTable(grouped.loc[:, [c for c in cols if c in grouped.columns]])
     with ui.nav_panel("By agency"):
         with ui.layout_columns():
+            with ui.card(full_screen=True):
+                @render.plot
+                def plot_by_agency():
+                    grouped = grouped_df_agency()
+                    #if input.switch():
+                    #    pass
+                    #    # placeholder for stacked bar chart
                     plot = plot_agency(
+                            grouped.head(10),
+                            )
                     return plot
+            with ui.card(full_screen=True):
+                @render.data_frame
+                def table_by_agency():
+                    grouped = grouped_df_agency()
+                    cols = [
+                        "agency",
+                        "acronym",
+                        "rules",
+                        "3f1_significant",
+                        "other_significant",
+                        ]
+                    return render.DataTable(grouped.loc[:, [c for c in cols if c in grouped.columns]])
 with ui.accordion(open=False):
             f"""
             Rule data retrieved from the [Federal Register API](https://www.federalregister.gov/developers/documentation/api/v1).
+            The window for the CRA lookback period is [estimated](https://www.huntonak.com/the-nickel-report/federal-agencies-face-looming-congressional-review-act-deadline) to open on May 23, 2024.
             """
             )
     FOOTER
 )
 # ----- REACTIVE CALCULATIONS ----- #
     return filt_df
 @reactive.calc
 def filtered_sig():
     filt_df = filtered_df()
     return filt_df
 @reactive.calc
 def grouped_df_month():
     filt_df = filtered_sig()
+    grouped = groupby_date(filt_df, significant=GET_SIGNIFICANT)
+    return grouped
+@reactive.calc
+def grouped_df_day():
+    filt_df = filtered_sig()
+    date_col = "publication_date"
+    grouped = groupby_date(filt_df, group_col=date_col, significant=GET_SIGNIFICANT)
+    grouped = pad_missing_dates(
+        grouped,
+        date_col,
+        "days",
+        fill_padded_values={
+            "rules": 0,
+            "3f1_significant": 0,
+            "other_significant": 0,
+            })
+    return grouped
+@reactive.calc
+def grouped_df_week():
+    filt_df = filtered_sig()
+    filt_df = add_weeks_to_data(filt_df)
+    try:
+        grouped = groupby_date(filt_df, group_col=("week_number", "week_of"), significant=GET_SIGNIFICANT)
+        grouped = pad_missing_dates(
+            grouped,
+            "week_of",
+            how="weeks",
+            fill_padded_values={
+                "rules": 0,
+                "3f1_significant": 0,
+                "other_significant": 0,
+                })
+    except KeyError as err:
+        grouped = DataFrame(columns=["week_number", "week_of", "rules", "3f1_significant", "other_significant"])
     return grouped
     return grouped
+@reactive.calc
+def get_grouped_data_over_time():
+    if input.frequency() == "daily":
+        grouped = grouped_df_day()
+    elif input.frequency() == "monthly":
+        grouped = grouped_df_month()
+    elif input.frequency() == "weekly":
+        grouped = grouped_df_week()
+    else:
+        raise ValueError("Only 'daily', 'monthly', or 'weekly' are valid inputs.")
+    return grouped
 # ----- REACTIVE VALUES ----- #

cra_window_rules.py CHANGED Viewed

@@ -1,6 +1,44 @@
 from pathlib import Path
-from modules.get_rules_in_window import main, METADATA
 if __name__ == "__main__":
@@ -14,4 +52,4 @@ if __name__ == "__main__":
     df, agency, ym = main(start, path=data_path, metadata=METADATA, significant=True)
     print(f"Rules in CRA window: {len(df)}")
     print("\nRules by agency\n", agency.head(10))
-    print("\nRules by month\n", ym)

+from datetime import date
 from pathlib import Path
+from pandas import DataFrame
+from modules import (
+    METADATA,
+    get_date_range,
+    get_rules_in_window,
+    groupby_agency,
+    groupby_date,
+    )
+def save_csv(path: Path, df_all: DataFrame, df_agency: DataFrame, df_ym: DataFrame, transition_year: int):
+    files = (
+        f"rules_{transition_year - 1}_{transition_year}.csv",
+        f"rules_by_agency_{transition_year - 1}_{transition_year}.csv",
+        f"rules_by_month_{transition_year - 1}_{transition_year}.csv"
+        )
+    dataframes = (df_all, df_agency, df_ym)
+    for data, file in zip(dataframes, files):
+        data.to_csv(path / file, index=False)
+def main(start_date, save_data: bool = True, path: Path | None = None, metadata: dict | None = None, significant: bool = True):
+    if date.fromisoformat(start_date) < date(2023, 4, 6):
+        significant = False
+    date_range = get_date_range(start_date)
+    transition_year = date_range.get("transition_year")
+    df, _ = get_rules_in_window(start_date, get_significant=significant)
+    df_agency = groupby_agency(df, metadata=metadata, significant=significant)
+    df_ym = groupby_date(df, significant=significant)
+    if save_data:
+        if path is None:
+            path = Path(__file__).parent
+        save_csv(path, df, df_agency, df_ym, transition_year)
+    return df, df_agency, df_ym
 if __name__ == "__main__":
     df, agency, ym = main(start, path=data_path, metadata=METADATA, significant=True)
     print(f"Rules in CRA window: {len(df)}")
     print("\nRules by agency\n", agency.head(10))
+    print("\nRules by month\n", ym)

modules/__init__.py CHANGED Viewed

@@ -1,6 +1,15 @@
 # see: https://docs.python.org/3.11/tutorial/modules.html#packages
 __all__ = [
     "get_rules_in_window",
     "search_columns",
     "significant",
     ]

+from .get_rules_in_window import *
+from .grouping import *
+from .plotting import *
+from .search_columns import *
+from .significant import *
 # see: https://docs.python.org/3.11/tutorial/modules.html#packages
 __all__ = [
     "get_rules_in_window",
+    "grouping",
+    "plotting",
     "search_columns",
     "significant",
     ]

modules/get_rules_in_window.py CHANGED Viewed

@@ -1,19 +1,9 @@
 from datetime import date
-from pathlib import Path
 from fr_toolbelt.api_requests import get_documents_by_date
 from fr_toolbelt.preprocessing import process_documents, AgencyMetadata
 from numpy import array
 from pandas import DataFrame, to_datetime
-from plotnine import (
-    ggplot,
-    aes,
-    geom_col,
-    labs,
-    coord_flip,
-    scale_x_discrete,
-    theme_light,
-    )
 try:
     from search_columns import search_columns, SearchError
@@ -25,6 +15,7 @@ except (ModuleNotFoundError, ImportError):
 METADATA, _ = AgencyMetadata().get_agency_metadata()
 START_DATE = "2024-01-01"
 GET_SIGNIFICANT = True if date.fromisoformat(START_DATE) >= date(2023, 4, 6) else False
@@ -156,129 +147,6 @@ def get_significant_rules(df, start_date):
     return df, last_updated
-def get_agency_metadata_values(
-        df: DataFrame,
-        agency_column: str,
-        metadata: dict,
-        metadata_value: str,
-    ):
-    if metadata_value == "acronym":
-        metadata_value = "short_name"
-    return df.loc[:, agency_column].apply(
-        lambda x: metadata.get(x, {}).get(metadata_value)
-        )
-def groupby_agency(
-        df: DataFrame,
-        group_col: str = "parent_slug",
-        value_col: str = "document_number",
-        aggfunc: str = "count",
-        significant: bool = True,
-        metadata: dict | None = None,
-        metadata_value: str = "acronym",
-    ):
-    aggfunc_dict = {value_col: aggfunc, }
-    if significant:
-        aggfunc_dict.update({
-            "3f1_significant": "sum",
-            "other_significant": "sum",
-            })
-    df_ex = df.explode(group_col, ignore_index=True)
-    grouped = df_ex.groupby(
-        by=group_col
-    ).agg(
-        aggfunc_dict
-        ).reset_index()
-    grouped = grouped.sort_values(value_col, ascending=False).rename(
-        columns={
-            group_col: "agency",
-            value_col: "rules",
-            }, errors="ignore"
-        )
-    if metadata is not None:
-        grouped.loc[:, metadata_value] = get_agency_metadata_values(
-        grouped,
-        agency_column="agency",
-        metadata=metadata,
-        metadata_value=metadata_value
-        )
-        cols = ["agency", metadata_value, "rules", "3f1_significant", "other_significant"]
-        grouped = grouped.loc[:, [c for c in cols if c in grouped.columns]]
-    return grouped
-def groupby_ym(
-        df: DataFrame,
-        group_col: tuple | list = ("publication_year", "publication_month", ),
-        value_col: str = "document_number",
-        aggfunc: str = "count",
-        significant: bool = True
-    ):
-    aggfunc_dict = {value_col: aggfunc, }
-    if significant:
-        aggfunc_dict.update({
-            "3f1_significant": "sum",
-            "other_significant": "sum",
-            })
-    grouped = df.groupby(
-        by=list(group_col)
-    ).agg(
-        aggfunc_dict
-        ).reset_index()
-    grouped = grouped.rename(columns={
-        value_col: "rules",
-        }, errors="ignore")
-    return grouped
-def save_csv(path: Path, df_all: DataFrame, df_agency: DataFrame, df_ym: DataFrame, transition_year: int):
-    files = (
-        f"rules_{transition_year - 1}_{transition_year}.csv",
-        f"rules_by_agency_{transition_year - 1}_{transition_year}.csv",
-        f"rules_by_month_{transition_year - 1}_{transition_year}.csv"
-        )
-    dataframes = (df_all, df_agency, df_ym)
-    for data, file in zip(dataframes, files):
-        data.to_csv(path / file, index=False)
-def plot_agency(df, group_col = "acronym", value_col = "rules"):
-    order_list = df.loc[:, group_col].to_list()[::-1]
-    plot = (
-        ggplot(
-            df,
-            aes(x=group_col, y=value_col),
-            )
-        + geom_col()
-        + coord_flip()
-        + scale_x_discrete(limits=order_list)
-        + labs(y="", x="", title="Number of Rules Published by Agency")
-        + theme_light()
-        )
-    return plot
-def plot_month(df, group_cols = ("publication_year", "publication_month"), value_col = "rules"):
-    df.loc[:, "ym"] = df[group_cols[0]].astype(str) + "-" + df[group_cols[1]].astype(str).str.pad(2, fillchar="0")
-    order_list = df.loc[:, "ym"].to_list()
-    plot = (
-        ggplot(
-            df,
-            aes(x="ym", y=value_col),
-            )
-        + geom_col()
-        + scale_x_discrete(limits=order_list)
-        + labs(y="", x="", title="Number of Rules Published by Month")
-        + theme_light()
-        )
-    return plot
 def get_rules_in_window(start_date: str, get_significant: bool = True):
     date_range = get_date_range(start_date)
     transition_year = date_range.get("transition_year")
@@ -293,35 +161,18 @@ def get_rules_in_window(start_date: str, get_significant: bool = True):
     return df, last_updated
-def get_list_agencies(start_date, agency_column: str = "agency", metadata: dict | None = None, significant: bool = True):
     df, _ = get_rules_in_window(start_date, get_significant=significant)
-    df_agency = groupby_agency(df, metadata=metadata, significant=significant)
-    return sorted(list(set(df_agency.loc[df_agency[agency_column].notna(), agency_column].to_list())))
-def main(start_date, save_data: bool = True, path: Path | None = None, metadata: dict | None = None, significant: bool = True):
-    if date.fromisoformat(start_date) < date(2023, 4, 6):
-        significant = False
-    date_range = get_date_range(start_date)
-    transition_year = date_range.get("transition_year")
-    df, _ = get_rules_in_window(start_date, get_significant=significant)
-    df_agency = groupby_agency(df, metadata=metadata, significant=significant)
-    df_ym = groupby_ym(df, significant=significant)
-    if save_data:
-        if path is None:
-            path = Path(__file__).parent
-        save_csv(path, df, df_agency, df_ym, transition_year)
-    return df, df_agency, df_ym
 DF, LAST_UPDATED = get_rules_in_window(START_DATE, get_significant=GET_SIGNIFICANT)
-AGENCIES = get_list_agencies(START_DATE, metadata=METADATA, significant=GET_SIGNIFICANT)
 if __name__ == "__main__":
     print(DF.columns)
     print(LAST_UPDATED)

 from datetime import date
 from fr_toolbelt.api_requests import get_documents_by_date
 from fr_toolbelt.preprocessing import process_documents, AgencyMetadata
 from numpy import array
 from pandas import DataFrame, to_datetime
 try:
     from search_columns import search_columns, SearchError
 METADATA, _ = AgencyMetadata().get_agency_metadata()
 START_DATE = "2024-01-01"
+WINDOW_OPEN_DATE = "2024-05-23"
 GET_SIGNIFICANT = True if date.fromisoformat(START_DATE) >= date(2023, 4, 6) else False
     return df, last_updated
 def get_rules_in_window(start_date: str, get_significant: bool = True):
     date_range = get_date_range(start_date)
     transition_year = date_range.get("transition_year")
     return df, last_updated
+def get_list_agencies(start_date, agency_column: str = "parent_slug", significant: bool = True):
     df, _ = get_rules_in_window(start_date, get_significant=significant)
+    df_ex = df.explode(agency_column, ignore_index=True)
+    return sorted(df_ex[agency_column].value_counts().index.to_list())
 DF, LAST_UPDATED = get_rules_in_window(START_DATE, get_significant=GET_SIGNIFICANT)
+AGENCIES = get_list_agencies(START_DATE, significant=GET_SIGNIFICANT)
 if __name__ == "__main__":
     print(DF.columns)
     print(LAST_UPDATED)
+    print(AGENCIES)

modules/grouping.py ADDED Viewed

	@@ -0,0 +1,295 @@

+from __future__ import annotations
+from datetime import datetime, date, timedelta
+from dateutil.relativedelta import *
+from pandas import DataFrame, Timestamp, to_datetime
+def _get_agency_metadata_values(
+        df: DataFrame,
+        agency_column: str,
+        metadata: dict,
+        metadata_value: str,
+    ):
+    """Get a specific value from agency metadata (e.g., get acronym for Department of Homeland Security).
+    Args:
+        df (DataFrame): Input data.
+        agency_column (str): Column containing agency identifier.
+        metadata (dict): Agency metadata.
+        metadata_value (str): Value of interest from agency metadata.
+    Returns:
+        pd.Series: Pandas Series of new values for adding to DataFrame.
+    """
+    if metadata_value == "acronym":
+        metadata_value = "short_name"
+    return df.loc[:, agency_column].apply(
+        lambda x: metadata.get(x, {}).get(metadata_value)
+        )
+def _get_first_week_start(dates: list[date], week_start: int | str | "weekday" = MO):
+    """Get the start date of the first week from a list of dates.
+    Pass "week_start" to select a different start date for each week (defaults to Monday).
+    """
+    if week_start in (MO, TU, WE, TH, FR, SA, SU):
+        pass
+    elif isinstance(week_start, str):
+        weekdays = {
+            "monday": MO,
+            "tuesday": TU,
+            "wednesday": WE,
+            "thursday": TH,
+            "friday": FR,
+            "saturday": SA,
+            "sunday": SU,
+        }
+        week_start = weekdays.get(week_start.lower(), MO)
+    elif isinstance(week_start, int):
+        weekdays = {
+            0: MO,
+            1: TU,
+            2: WE,
+            3: TH,
+            4: FR,
+            5: SA,
+            6: SU,
+        }
+        week_start = weekdays.get(week_start, MO)
+    else:
+        raise TypeError("Parameter 'week_start' must be type `str`, `int`, or a dateutil weekday instance.")
+    first_day = next(d for d in dates)
+    return first_day + relativedelta(weekday=week_start(-1))
+def _get_week_start_dates(first_week_start: date | Timestamp, end_date: date | None = None):
+    """Get the index and start date for each week.
+    Args:
+        first_week_start (date | Timestamp): Start date of the first week in the data.
+        end_date (date | None, optional): End date for data. If None is passed (the default), the end date is `date.today()`.
+    Returns:
+        list[tuple]: List of tuples containing the week number and the start date.
+    """
+    if end_date is None:
+        end_date = date.today()
+    try:
+        week_start_dates = [first_week_start.date()]
+    except AttributeError as err:
+        week_start_dates = [first_week_start]
+    while week_start_dates[-1] < end_date:
+        next_start_date = week_start_dates[-1] + relativedelta(weeks=1)
+        week_start_dates.append(next_start_date)
+    week_start_dates = [day for day in week_start_dates if day <= end_date]
+    week_start_dates = [d.date() if isinstance(d, (Timestamp, datetime)) else d for d in week_start_dates]
+    return [(idx, w) for idx, w in enumerate(week_start_dates)]
+def _get_weeks(dates: list[date], end_date: date | None = None, **kwargs) -> list[tuple]:
+    """Takes a list, array, or other iterable of `datetime.date` values and returns a list of tuples containing (week_number, week_start_date) pairs.
+    Pass keyword arg "week_start" - ranging from 0 (Monday) to 6 (Sunday) - to choose a different start date than Monday for the week.
+    """
+    # get the start date for the first week
+    first_week_start = _get_first_week_start(dates, **kwargs)
+    # get start date for each week in the input values
+    weeks = _get_week_start_dates(first_week_start, end_date=end_date)
+    # iterate over inputs, append tuple of week number and start date for each week
+    results = []
+    for d in dates:
+        if isinstance(d, Timestamp):
+            d = d.date()
+        week_gen = ((idx, start_date) for idx, start_date in weeks if (start_date <= d < (start_date + timedelta(weeks=1))))
+        results.append(next(week_gen, (0, first_week_start)))
+    return results
+def add_weeks_to_data(df: DataFrame, date_column: str = "publication_date", new_columns: tuple[str] = ("week_number", "week_of")):
+    """Add week number and week start date to input data.
+    Args:
+        df (DataFrame): Input data.
+        date_column (str, optional): Name of column containing publication dates. Defaults to "publication_date".
+        new_columns (tuple[str], optional): New column names. Defaults to ("week_number", "week_start").
+    Returns:
+        DataFrame: Data containing week information.
+    """
+    df_c = df.copy()
+    data = df_c[date_column].to_list()
+    if len(data) > 0:
+        week_numbers, week_starts = list(zip(*_get_weeks(data)))
+        df_c.loc[:, new_columns[0]] = week_numbers
+        df_c.loc[:, new_columns[1]] = to_datetime(week_starts)
+    return df_c
+def _pad_missing_weeks(timeframe_list: list[date], **kwargs):
+    # get the start date for the first week
+    first_week_start = _get_first_week_start(timeframe_list)
+    # get start date for each week in the input values
+    return _get_week_start_dates(first_week_start, **kwargs)
+def _pad_missing_days(timeframe_list: list[date], end_date: date | None = None):
+    start_date = min(timeframe_list)
+    if end_date is None:
+        end_date = date.today()
+    # create list of weekdays from start to end dates
+    # remember that range() objects are exclusive of the stop
+    return [
+        start_date + relativedelta(days=n)
+        for n in range((end_date - start_date).days + 1)
+        if (start_date + relativedelta(days=n)).weekday() in range(0, 5)
+        ]
+def pad_missing_dates(df: DataFrame, pad_column: str, how: str, fill_padded_values: dict | None = None, **kwargs):
+    df_copy = df.copy()
+    timeframe_list = [d.date() if isinstance(d, (Timestamp, datetime)) else d for d in df_copy[pad_column].to_list()]
+    df_copy = df_copy.astype({pad_column: "object"})
+    df_copy.loc[:, pad_column] = timeframe_list
+    # pad dates if dataframe isn't empty
+    if len(timeframe_list) > 0:
+        # choose which time frequency needs padding
+        if how == "days":
+            week_numbers = None
+            padded_timeframes = _pad_missing_days(timeframe_list, **kwargs)
+        elif how == "weeks":
+            week_numbers, padded_timeframes = zip(*_pad_missing_weeks(timeframe_list, **kwargs))
+        else:
+            raise ValueError
+        # incorporate extended dates into dataframe
+        df_merge = DataFrame({pad_column: padded_timeframes})
+        pad_cols = [pad_column]
+        if week_numbers is not None:
+            df_merge.loc[:, "week_number"] = week_numbers
+            pad_cols.append("week_number")
+        df_copy = df_copy.merge(df_merge, on=pad_cols, how="outer", indicator=True)
+        if fill_padded_values is not None:
+            for col, val in fill_padded_values.items():
+                bool_ = df_copy["_merge"] == "right_only"
+                df_copy.loc[bool_, col] = val
+    return df_copy.drop(columns=["_merge"], errors="ignore")
+def groupby_agency(
+        df: DataFrame,
+        group_col: str = "parent_slug",
+        value_col: str = "document_number",
+        aggfunc: str = "count",
+        significant: bool = True,
+        metadata: dict | None = None,
+        metadata_value: str = "acronym",
+    ):
+    """_summary_
+    Args:
+        df (DataFrame): _description_
+        group_col (str, optional): _description_. Defaults to "parent_slug".
+        value_col (str, optional): _description_. Defaults to "document_number".
+        aggfunc (str, optional): _description_. Defaults to "count".
+        significant (bool, optional): _description_. Defaults to True.
+        metadata (dict | None, optional): _description_. Defaults to None.
+        metadata_value (str, optional): _description_. Defaults to "acronym".
+    Returns:
+        _type_: _description_
+    """
+    aggfunc_dict = {value_col: aggfunc, }
+    if significant:
+        aggfunc_dict.update({
+            "3f1_significant": "sum",
+            "other_significant": "sum",
+            })
+    df_ex = df.explode(group_col, ignore_index=True)
+    grouped = df_ex.groupby(
+        by=group_col
+    ).agg(
+        aggfunc_dict
+        ).reset_index()
+    grouped = grouped.sort_values(value_col, ascending=False).rename(
+        columns={
+            group_col: "agency",
+            value_col: "rules",
+            }, errors="ignore"
+        )
+    if metadata is not None:
+        grouped.loc[:, metadata_value] = _get_agency_metadata_values(
+        grouped,
+        agency_column="agency",
+        metadata=metadata,
+        metadata_value=metadata_value
+        )
+        cols = ["agency", metadata_value, "rules", "3f1_significant", "other_significant"]
+        grouped = grouped.loc[:, [c for c in cols if c in grouped.columns]]
+    return grouped
+def groupby_date(
+        df: DataFrame,
+        group_col: str | tuple | list = ("publication_year", "publication_month", ),
+        value_col: str = "document_number",
+        aggfunc: str = "count",
+        significant: bool = True
+    ):
+    if isinstance(group_col, str):
+        group_col = [group_col]
+    elif isinstance(group_col, (list, tuple)):
+        group_col = list(group_col)
+    else:
+        raise TypeError
+    aggfunc_dict = {value_col: aggfunc, }
+    if significant:
+        aggfunc_dict.update({
+            "3f1_significant": "sum",
+            "other_significant": "sum",
+            })
+    grouped = df.groupby(
+        by=group_col
+    ).agg(
+        aggfunc_dict
+        ).reset_index()
+    grouped = grouped.rename(columns={
+        value_col: "rules",
+        }, errors="ignore")
+    return grouped
+if __name__ == "__main__":
+    from datetime import date, timedelta
+    from pandas import to_datetime
+    TODAY = date.today()
+    WEEKS_AGO = TODAY - timedelta(weeks=10)
+    dates = [(WEEKS_AGO - timedelta(days=r)) for r in range(21) if (r % 3 != 0)][::-1] + [(TODAY - timedelta(days=r)) for r in range(21)][::-1]
+    df = DataFrame({"dates": dates, "values": [idx for idx, _ in enumerate(dates)]})
+    df_a = pad_missing_dates(df, "dates", "days", fill_padded_values={"values": 0})
+    print(df_a.head(10))
+    df = add_weeks_to_data(df, date_column="dates")
+    print(df.head(10))
+    grouped = groupby_date(df, group_col=("week_number", "week_of"), value_col="values", significant=False)
+    print(grouped)
+    df_b = pad_missing_dates(grouped, "week_of", how="weeks", fill_padded_values={"rules": 0})
+    print(df_b)

modules/plotting.py ADDED Viewed

	@@ -0,0 +1,167 @@

+from pandas import DataFrame
+from plotnine import (
+    ggplot,
+    aes,
+    geom_col,
+    geom_line,
+    labs,
+    coord_flip,
+    scale_x_discrete,
+    scale_x_datetime,
+    scale_y_continuous,
+    theme_light,
+    )
+class DataAvailabilityError(Exception):
+    """Raised when not enough data available to vizualize."""
+    pass
+def plot_agency(df, group_col = "acronym", value_col = "rules"):
+    """Plot rules by agency.
+    Args:
+        df (DataFrame): Input data.
+        group_col (str, optional): Column on which the data are grouped. Defaults to "acronym".
+        value_col (str, optional): Column of values to be plotted. Defaults to "rules".
+    Returns:
+        ggplot: Plotted data.
+    """
+    order_list = df.loc[:, group_col].to_list()[::-1]
+    plot = (
+        ggplot(
+            df,
+            aes(x=group_col, y=value_col),
+            )
+        + geom_col()
+        + coord_flip()
+        + scale_x_discrete(limits=order_list)
+        + labs(y="", x="", title="Number of Rules Published by Agency")
+        + theme_light()
+        )
+    return plot
+def plot_month(df: DataFrame, group_cols: tuple = ("publication_year", "publication_month"), value_col: str = "rules"):
+    """Plot rules by month.
+    Args:
+        df (DataFrame): Input data.
+        group_cols (tuple, optional): Columns on which the data are grouped. Defaults to ("publication_year", "publication_month").
+        value_col (str, optional): Column of values to be plotted. Defaults to "rules".
+    Returns:
+        ggplot: Plotted data.
+    """
+    df.loc[:, "ym"] = df[group_cols[0]].astype(str) + "-" + df[group_cols[1]].astype(str).str.pad(2, fillchar="0")
+    order_list = df.loc[:, "ym"].to_list()
+    plot = (
+        ggplot(
+            df,
+            aes(x="ym", y=value_col),
+            )
+        + geom_col()
+        + scale_x_discrete(limits=order_list)
+        + labs(y="", x="", title="Number of Rules Published by Month")
+        + theme_light()
+        )
+    return plot
+def plot_day(df: DataFrame, group_col: str = "publication_date", value_col: str = "rules"):
+    """Plot rules by day.
+    Args:
+        df (DataFrame): Input data.
+        group_col (str, optional): Column on which the data are grouped. Defaults to ("publication_year", "publication_month").
+        value_col (str, optional): Column of values to be plotted. Defaults to "rules".
+    Returns:
+        ggplot: Plotted data.
+    """
+    min_date = df.loc[:, group_col].min()
+    max_date = df.loc[:, group_col].max()
+    diff = (max_date - min_date).days
+    if diff in range(0, 61):
+        freq = "1 week"
+    elif diff in range(61, 91):
+        freq = "2 weeks"
+    else:
+        freq = "1 month"
+    max_value = df.loc[:, value_col].max()
+    plot = (
+        ggplot(
+            df,
+            aes(x=group_col, y=value_col),
+            )
+        + geom_line(group=1)
+        + scale_x_datetime(date_breaks=freq, date_labels="%m-%d")
+        + scale_y_continuous(limits=(0, max_value), expand=(0, 0, 0.1, 0))
+        + labs(y="", x="", title="Number of Rules Published by Date")
+        + theme_light()
+        )
+    return plot
+def plot_week(df: DataFrame, group_col: str = "week_of", value_col: str = "rules", ):
+    max_value = df.loc[:, value_col].max()
+    date_values = df[group_col].to_list()
+    num_weeks = len(date_values)
+    if num_weeks in range(8, 16):
+        reduce_by = 2
+    elif num_weeks in range(16, 24):
+        reduce_by = 3
+    elif num_weeks in range(24, 32):
+        reduce_by = 4
+    elif num_weeks >= 32:
+        reduce_by = 5
+    else:
+        reduce_by = 1
+    breaks = [val for idx, val in enumerate(date_values) if idx % reduce_by == 0]
+    plot = (
+        ggplot(
+            df,
+            aes(x=group_col, y=value_col),
+            )
+        + geom_line(group=1)
+        + scale_x_datetime(breaks=breaks, labels=[f"{w.strftime('%m-%d')}" for w in breaks])
+        + scale_y_continuous(limits=(0, max_value), expand=(0, 0, 0.1, 0))
+        + labs(y="", x="", title="Number of Rules Published by Week")
+        + theme_light()
+        )
+    return plot
+def plot_tf(df: DataFrame, frequency: str, **kwargs) -> ggplot:
+    """Plot rules over time by given frequency.
+    Args:
+        df (DataFrame): Input data.
+        frequency (str): Frequency of time for aggregating rules. Accepts "monthly" or "daily".
+    Raises:
+        ValueError: Frequency parameter received invalid value.
+    Returns:
+        ggplot: Plotted data.
+    """
+    freq_options = {
+        "monthly": plot_month,
+        "daily": plot_day,
+        "weekly": plot_week,
+        }
+    plot_freq = freq_options.get(frequency, None)
+    if plot_freq is None:
+        raise ValueError(f"Frequency must be one of: {', '.join(freq_options.keys())}")
+    return plot_freq(df, **kwargs)

modules/significant.py CHANGED Viewed

@@ -89,7 +89,7 @@ def merge_with_api_results(pd_df: pd_DataFrame,
                            ):
     main_df = pl.from_pandas(pd_df)
-    df = main_df.join(pl_df, on="document_number", how="left", validate="1:1")
     return df.to_pandas()

                            ):
     main_df = pl.from_pandas(pd_df)
+    df = main_df.join(pl_df, on="document_number", how="left", validate="1:1", coalesce=True)
     return df.to_pandas()

requirements-dev-mac.txt ADDED Viewed

	@@ -0,0 +1,541 @@

+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile --output-file=requirements-dev-mac.txt requirements-dev.in requirements.in
+#
+anyio==4.3.0
+    # via
+    #   -c requirements.txt
+    #   httpx
+    #   jupyter-server
+    #   starlette
+    #   watchfiles
+appdirs==1.4.4
+    # via
+    #   -c requirements.txt
+    #   shiny
+appnope==0.1.4
+    # via ipykernel
+argon2-cffi==23.1.0
+    # via jupyter-server
+argon2-cffi-bindings==21.2.0
+    # via argon2-cffi
+arrow==1.3.0
+    # via isoduration
+asgiref==3.8.1
+    # via
+    #   -c requirements.txt
+    #   shiny
+asttokens==2.4.1
+    # via stack-data
+async-lru==2.0.4
+    # via jupyterlab
+attrs==23.2.0
+    # via
+    #   jsonschema
+    #   referencing
+babel==2.15.0
+    # via jupyterlab-server
+beautifulsoup4==4.12.3
+    # via nbconvert
+bleach==6.1.0
+    # via nbconvert
+certifi==2024.2.2
+    # via
+    #   -c requirements.txt
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==1.16.0
+    # via argon2-cffi-bindings
+charset-normalizer==3.3.2
+    # via
+    #   -c requirements.txt
+    #   requests
+click==8.1.7
+    # via
+    #   -c requirements.txt
+    #   shiny
+    #   uvicorn
+comm==0.2.2
+    # via
+    #   ipykernel
+    #   ipywidgets
+contourpy==1.2.1
+    # via
+    #   -c requirements.txt
+    #   matplotlib
+cursor==1.3.5
+    # via
+    #   -c requirements.txt
+    #   fr-toolbelt
+cycler==0.12.1
+    # via
+    #   -c requirements.txt
+    #   matplotlib
+debugpy==1.8.2
+    # via ipykernel
+decorator==5.1.1
+    # via ipython
+defusedxml==0.7.1
+    # via nbconvert
+executing==2.0.1
+    # via stack-data
+faicons==0.2.2
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+fastjsonschema==2.20.0
+    # via nbformat
+fonttools==4.51.0
+    # via
+    #   -c requirements.txt
+    #   matplotlib
+fqdn==1.5.1
+    # via jsonschema
+fr-toolbelt==0.1.3
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+h11==0.14.0
+    # via
+    #   -c requirements.txt
+    #   httpcore
+    #   uvicorn
+htmltools==0.5.1
+    # via
+    #   -c requirements.txt
+    #   faicons
+    #   shiny
+httpcore==1.0.5
+    # via httpx
+httpx==0.27.0
+    # via jupyterlab
+idna==3.7
+    # via
+    #   -c requirements.txt
+    #   anyio
+    #   httpx
+    #   jsonschema
+    #   requests
+iniconfig==2.0.0
+    # via pytest
+ipykernel==6.29.4
+    # via
+    #   -r requirements-dev.in
+    #   jupyter
+    #   jupyter-console
+    #   jupyterlab
+    #   qtconsole
+ipython==8.18.0
+    # via
+    #   -r requirements-dev.in
+    #   ipykernel
+    #   ipywidgets
+    #   jupyter-console
+ipywidgets==8.1.3
+    # via jupyter
+isoduration==20.11.0
+    # via jsonschema
+jedi==0.19.1
+    # via ipython
+jinja2==3.1.4
+    # via
+    #   jupyter-server
+    #   jupyterlab
+    #   jupyterlab-server
+    #   nbconvert
+json5==0.9.25
+    # via jupyterlab-server
+jsonpointer==3.0.0
+    # via jsonschema
+jsonschema[format-nongpl]==4.22.0
+    # via
+    #   jupyter-events
+    #   jupyterlab-server
+    #   nbformat
+jsonschema-specifications==2023.12.1
+    # via jsonschema
+jupyter==1.0.0
+    # via -r requirements-dev.in
+jupyter-client==8.6.2
+    # via
+    #   ipykernel
+    #   jupyter-console
+    #   jupyter-server
+    #   nbclient
+    #   qtconsole
+jupyter-console==6.6.3
+    # via jupyter
+jupyter-core==5.7.2
+    # via
+    #   ipykernel
+    #   jupyter-client
+    #   jupyter-console
+    #   jupyter-server
+    #   jupyterlab
+    #   nbclient
+    #   nbconvert
+    #   nbformat
+    #   qtconsole
+jupyter-events==0.10.0
+    # via jupyter-server
+jupyter-lsp==2.2.5
+    # via jupyterlab
+jupyter-server==2.14.1
+    # via
+    #   jupyter-lsp
+    #   jupyterlab
+    #   jupyterlab-server
+    #   notebook
+    #   notebook-shim
+jupyter-server-terminals==0.5.3
+    # via jupyter-server
+jupyterlab==4.2.3
+    # via notebook
+jupyterlab-pygments==0.3.0
+    # via nbconvert
+jupyterlab-server==2.27.2
+    # via
+    #   jupyterlab
+    #   notebook
+jupyterlab-widgets==3.0.11
+    # via ipywidgets
+kiwisolver==1.4.5
+    # via
+    #   -c requirements.txt
+    #   matplotlib
+linkify-it-py==2.0.3
+    # via
+    #   -c requirements.txt
+    #   shiny
+markdown-it-py==3.0.0
+    # via
+    #   -c requirements.txt
+    #   mdit-py-plugins
+    #   shiny
+markupsafe==2.1.5
+    # via
+    #   jinja2
+    #   nbconvert
+matplotlib==3.8.4
+    # via
+    #   -c requirements.txt
+    #   plotnine
+matplotlib-inline==0.1.7
+    # via
+    #   ipykernel
+    #   ipython
+mdit-py-plugins==0.4.1
+    # via
+    #   -c requirements.txt
+    #   shiny
+mdurl==0.1.2
+    # via
+    #   -c requirements.txt
+    #   markdown-it-py
+mistune==3.0.2
+    # via nbconvert
+mizani==0.11.3
+    # via
+    #   -c requirements.txt
+    #   plotnine
+nbclient==0.10.0
+    # via nbconvert
+nbconvert==7.16.4
+    # via
+    #   jupyter
+    #   jupyter-server
+nbformat==5.10.4
+    # via
+    #   jupyter-server
+    #   nbclient
+    #   nbconvert
+nest-asyncio==1.6.0
+    # via ipykernel
+notebook==7.2.1
+    # via jupyter
+notebook-shim==0.2.4
+    # via
+    #   jupyterlab
+    #   notebook
+numpy==1.26.4
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+    #   contourpy
+    #   fr-toolbelt
+    #   matplotlib
+    #   mizani
+    #   pandas
+    #   patsy
+    #   plotnine
+    #   pyarrow
+    #   scipy
+    #   statsmodels
+overrides==7.7.0
+    # via jupyter-server
+packaging==24.0
+    # via
+    #   -c requirements.txt
+    #   htmltools
+    #   ipykernel
+    #   jupyter-server
+    #   jupyterlab
+    #   jupyterlab-server
+    #   matplotlib
+    #   nbconvert
+    #   pytest
+    #   qtconsole
+    #   qtpy
+    #   shiny
+    #   statsmodels
+pandas==2.2.2
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+    #   fr-toolbelt
+    #   mizani
+    #   plotnine
+    #   statsmodels
+pandocfilters==1.5.1
+    # via nbconvert
+parso==0.8.4
+    # via jedi
+patsy==0.5.6
+    # via
+    #   -c requirements.txt
+    #   statsmodels
+pexpect==4.9.0
+    # via ipython
+pillow==10.3.0
+    # via
+    #   -c requirements.txt
+    #   matplotlib
+platformdirs==4.2.2
+    # via jupyter-core
+plotnine==0.13.6
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+pluggy==1.5.0
+    # via pytest
+polars==0.20.31
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+progress==1.6
+    # via
+    #   -c requirements.txt
+    #   fr-toolbelt
+prometheus-client==0.20.0
+    # via jupyter-server
+prompt-toolkit==3.0.36
+    # via
+    #   -c requirements.txt
+    #   ipython
+    #   jupyter-console
+    #   questionary
+psutil==6.0.0
+    # via ipykernel
+ptyprocess==0.7.0
+    # via
+    #   pexpect
+    #   terminado
+pure-eval==0.2.2
+    # via stack-data
+pyarrow==16.1.0
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+pycparser==2.22
+    # via cffi
+pygments==2.18.0
+    # via
+    #   ipython
+    #   jupyter-console
+    #   nbconvert
+    #   qtconsole
+pyparsing==3.1.2
+    # via
+    #   -c requirements.txt
+    #   matplotlib
+pytest==8.2.2
+    # via -r requirements-dev.in
+python-dateutil==2.9.0.post0
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+    #   arrow
+    #   jupyter-client
+    #   matplotlib
+    #   pandas
+python-json-logger==2.0.7
+    # via jupyter-events
+python-multipart==0.0.9
+    # via
+    #   -c requirements.txt
+    #   shiny
+pytz==2024.1
+    # via
+    #   -c requirements.txt
+    #   pandas
+pyyaml==6.0.1
+    # via jupyter-events
+pyzmq==26.0.3
+    # via
+    #   ipykernel
+    #   jupyter-client
+    #   jupyter-console
+    #   jupyter-server
+    #   qtconsole
+qtconsole==5.5.2
+    # via jupyter
+qtpy==2.4.1
+    # via qtconsole
+questionary==2.0.1
+    # via
+    #   -c requirements.txt
+    #   shiny
+referencing==0.35.1
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+    #   jupyter-events
+requests==2.32.3
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+    #   fr-toolbelt
+    #   jupyterlab-server
+rfc3339-validator==0.1.4
+    # via
+    #   jsonschema
+    #   jupyter-events
+rfc3986-validator==0.1.1
+    # via
+    #   jsonschema
+    #   jupyter-events
+rpds-py==0.18.1
+    # via
+    #   jsonschema
+    #   referencing
+scipy==1.13.0
+    # via
+    #   -c requirements.txt
+    #   mizani
+    #   plotnine
+    #   statsmodels
+send2trash==1.8.3
+    # via jupyter-server
+shiny==0.10.2
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+six==1.16.0
+    # via
+    #   -c requirements.txt
+    #   asttokens
+    #   bleach
+    #   patsy
+    #   python-dateutil
+    #   rfc3339-validator
+sniffio==1.3.1
+    # via
+    #   -c requirements.txt
+    #   anyio
+    #   httpx
+soupsieve==2.5
+    # via beautifulsoup4
+stack-data==0.6.3
+    # via ipython
+starlette==0.37.2
+    # via
+    #   -c requirements.txt
+    #   shiny
+statsmodels==0.14.2
+    # via
+    #   -c requirements.txt
+    #   plotnine
+terminado==0.18.1
+    # via
+    #   jupyter-server
+    #   jupyter-server-terminals
+tinycss2==1.3.0
+    # via nbconvert
+tornado==6.4.1
+    # via
+    #   ipykernel
+    #   jupyter-client
+    #   jupyter-server
+    #   jupyterlab
+    #   notebook
+    #   terminado
+traitlets==5.14.3
+    # via
+    #   comm
+    #   ipykernel
+    #   ipython
+    #   ipywidgets
+    #   jupyter-client
+    #   jupyter-console
+    #   jupyter-core
+    #   jupyter-events
+    #   jupyter-server
+    #   jupyterlab
+    #   matplotlib-inline
+    #   nbclient
+    #   nbconvert
+    #   nbformat
+    #   qtconsole
+types-python-dateutil==2.9.0.20240316
+    # via arrow
+typing-extensions==4.11.0
+    # via
+    #   -c requirements.txt
+    #   htmltools
+    #   shiny
+tzdata==2024.1
+    # via
+    #   -c requirements.txt
+    #   pandas
+uc-micro-py==1.0.3
+    # via
+    #   -c requirements.txt
+    #   linkify-it-py
+uri-template==1.3.0
+    # via jsonschema
+urllib3==2.2.1
+    # via
+    #   -c requirements.txt
+    #   requests
+uvicorn==0.29.0
+    # via
+    #   -c requirements.txt
+    #   shiny
+watchfiles==0.21.0
+    # via
+    #   -c requirements.txt
+    #   shiny
+wcwidth==0.2.13
+    # via
+    #   -c requirements.txt
+    #   prompt-toolkit
+webcolors==24.6.0
+    # via jsonschema
+webencodings==0.5.1
+    # via
+    #   bleach
+    #   tinycss2
+websocket-client==1.8.0
+    # via jupyter-server
+websockets==12.0
+    # via
+    #   -c requirements.txt
+    #   shiny
+widgetsnbextension==4.0.11
+    # via ipywidgets
+# The following packages are considered to be unsafe in a requirements file:
+# setuptools

requirements-dev.in ADDED Viewed

	@@ -0,0 +1,6 @@

+# requirements-dev.in
+-c requirements.txt
+ipykernel>=6.29
+ipython>=8.18
+jupyter>=1.0.0
+pytest>=8.2.2

requirements-dev.txt ADDED Viewed

	@@ -0,0 +1,547 @@

+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile --output-file=requirements-dev.txt requirements-dev.in requirements.in
+#
+anyio==4.3.0
+    # via
+    #   -c requirements.txt
+    #   httpx
+    #   jupyter-server
+    #   starlette
+    #   watchfiles
+appdirs==1.4.4
+    # via
+    #   -c requirements.txt
+    #   shiny
+argon2-cffi==23.1.0
+    # via jupyter-server
+argon2-cffi-bindings==21.2.0
+    # via argon2-cffi
+arrow==1.3.0
+    # via isoduration
+asgiref==3.8.1
+    # via
+    #   -c requirements.txt
+    #   shiny
+asttokens==2.4.1
+    # via stack-data
+async-lru==2.0.4
+    # via jupyterlab
+attrs==23.2.0
+    # via
+    #   jsonschema
+    #   referencing
+babel==2.15.0
+    # via jupyterlab-server
+beautifulsoup4==4.12.3
+    # via nbconvert
+bleach==6.1.0
+    # via nbconvert
+certifi==2024.2.2
+    # via
+    #   -c requirements.txt
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==1.16.0
+    # via argon2-cffi-bindings
+charset-normalizer==3.3.2
+    # via
+    #   -c requirements.txt
+    #   requests
+click==8.1.7
+    # via
+    #   -c requirements.txt
+    #   shiny
+    #   uvicorn
+colorama==0.4.6
+    # via
+    #   -c requirements.txt
+    #   click
+    #   ipython
+    #   pytest
+comm==0.2.2
+    # via
+    #   ipykernel
+    #   ipywidgets
+contourpy==1.2.1
+    # via
+    #   -c requirements.txt
+    #   matplotlib
+cursor==1.3.5
+    # via
+    #   -c requirements.txt
+    #   fr-toolbelt
+cycler==0.12.1
+    # via
+    #   -c requirements.txt
+    #   matplotlib
+debugpy==1.8.1
+    # via ipykernel
+decorator==5.1.1
+    # via ipython
+defusedxml==0.7.1
+    # via nbconvert
+executing==2.0.1
+    # via stack-data
+faicons==0.2.2
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+fastjsonschema==2.20.0
+    # via nbformat
+fonttools==4.51.0
+    # via
+    #   -c requirements.txt
+    #   matplotlib
+fqdn==1.5.1
+    # via jsonschema
+fr-toolbelt==0.1.3
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+h11==0.14.0
+    # via
+    #   -c requirements.txt
+    #   httpcore
+    #   uvicorn
+htmltools==0.5.1
+    # via
+    #   -c requirements.txt
+    #   faicons
+    #   shiny
+httpcore==1.0.5
+    # via httpx
+httpx==0.27.0
+    # via jupyterlab
+idna==3.7
+    # via
+    #   -c requirements.txt
+    #   anyio
+    #   httpx
+    #   jsonschema
+    #   requests
+iniconfig==2.0.0
+    # via pytest
+ipykernel==6.29.0
+    # via
+    #   -r requirements-dev.in
+    #   jupyter
+    #   jupyter-console
+    #   jupyterlab
+    #   qtconsole
+ipython==8.18.0
+    # via
+    #   -r requirements-dev.in
+    #   ipykernel
+    #   ipywidgets
+    #   jupyter-console
+ipywidgets==8.1.3
+    # via jupyter
+isoduration==20.11.0
+    # via jsonschema
+jedi==0.19.1
+    # via ipython
+jinja2==3.1.4
+    # via
+    #   jupyter-server
+    #   jupyterlab
+    #   jupyterlab-server
+    #   nbconvert
+json5==0.9.25
+    # via jupyterlab-server
+jsonpointer==3.0.0
+    # via jsonschema
+jsonschema[format-nongpl]==4.22.0
+    # via
+    #   jupyter-events
+    #   jupyterlab-server
+    #   nbformat
+jsonschema-specifications==2023.12.1
+    # via jsonschema
+jupyter==1.0.0
+    # via -r requirements-dev.in
+jupyter-client==8.6.2
+    # via
+    #   ipykernel
+    #   jupyter-console
+    #   jupyter-server
+    #   nbclient
+    #   qtconsole
+jupyter-console==6.6.3
+    # via jupyter
+jupyter-core==5.7.2
+    # via
+    #   ipykernel
+    #   jupyter-client
+    #   jupyter-console
+    #   jupyter-server
+    #   jupyterlab
+    #   nbclient
+    #   nbconvert
+    #   nbformat
+    #   qtconsole
+jupyter-events==0.10.0
+    # via jupyter-server
+jupyter-lsp==2.2.5
+    # via jupyterlab
+jupyter-server==2.14.1
+    # via
+    #   jupyter-lsp
+    #   jupyterlab
+    #   jupyterlab-server
+    #   notebook
+    #   notebook-shim
+jupyter-server-terminals==0.5.3
+    # via jupyter-server
+jupyterlab==4.2.2
+    # via notebook
+jupyterlab-pygments==0.3.0
+    # via nbconvert
+jupyterlab-server==2.27.2
+    # via
+    #   jupyterlab
+    #   notebook
+jupyterlab-widgets==3.0.11
+    # via ipywidgets
+kiwisolver==1.4.5
+    # via
+    #   -c requirements.txt
+    #   matplotlib
+linkify-it-py==2.0.3
+    # via
+    #   -c requirements.txt
+    #   shiny
+markdown-it-py==3.0.0
+    # via
+    #   -c requirements.txt
+    #   mdit-py-plugins
+    #   shiny
+markupsafe==2.1.5
+    # via
+    #   jinja2
+    #   nbconvert
+matplotlib==3.8.4
+    # via
+    #   -c requirements.txt
+    #   plotnine
+matplotlib-inline==0.1.7
+    # via
+    #   ipykernel
+    #   ipython
+mdit-py-plugins==0.4.1
+    # via
+    #   -c requirements.txt
+    #   shiny
+mdurl==0.1.2
+    # via
+    #   -c requirements.txt
+    #   markdown-it-py
+mistune==3.0.2
+    # via nbconvert
+mizani==0.11.3
+    # via
+    #   -c requirements.txt
+    #   plotnine
+nbclient==0.10.0
+    # via nbconvert
+nbconvert==7.16.4
+    # via
+    #   jupyter
+    #   jupyter-server
+nbformat==5.10.4
+    # via
+    #   jupyter-server
+    #   nbclient
+    #   nbconvert
+nest-asyncio==1.6.0
+    # via ipykernel
+notebook==7.2.1
+    # via jupyter
+notebook-shim==0.2.4
+    # via
+    #   jupyterlab
+    #   notebook
+numpy==1.26.4
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+    #   contourpy
+    #   fr-toolbelt
+    #   matplotlib
+    #   mizani
+    #   pandas
+    #   patsy
+    #   plotnine
+    #   pyarrow
+    #   scipy
+    #   statsmodels
+overrides==7.7.0
+    # via jupyter-server
+packaging==24.0
+    # via
+    #   -c requirements.txt
+    #   htmltools
+    #   ipykernel
+    #   jupyter-server
+    #   jupyterlab
+    #   jupyterlab-server
+    #   matplotlib
+    #   nbconvert
+    #   pytest
+    #   qtconsole
+    #   qtpy
+    #   shiny
+    #   statsmodels
+pandas==2.2.2
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+    #   fr-toolbelt
+    #   mizani
+    #   plotnine
+    #   statsmodels
+pandocfilters==1.5.1
+    # via nbconvert
+parso==0.8.4
+    # via jedi
+patsy==0.5.6
+    # via
+    #   -c requirements.txt
+    #   statsmodels
+pillow==10.3.0
+    # via
+    #   -c requirements.txt
+    #   matplotlib
+platformdirs==4.2.2
+    # via jupyter-core
+plotnine==0.13.6
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+pluggy==1.5.0
+    # via pytest
+polars==0.20.31
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+progress==1.6
+    # via
+    #   -c requirements.txt
+    #   fr-toolbelt
+prometheus-client==0.20.0
+    # via jupyter-server
+prompt-toolkit==3.0.36
+    # via
+    #   -c requirements.txt
+    #   ipython
+    #   jupyter-console
+    #   questionary
+psutil==5.9.8
+    # via ipykernel
+pure-eval==0.2.2
+    # via stack-data
+pyarrow==16.1.0
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+pycparser==2.22
+    # via cffi
+pygments==2.18.0
+    # via
+    #   ipython
+    #   jupyter-console
+    #   nbconvert
+    #   qtconsole
+pyparsing==3.1.2
+    # via
+    #   -c requirements.txt
+    #   matplotlib
+pytest==8.2.2
+    # via -r requirements-dev.in
+python-dateutil==2.9.0.post0
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+    #   arrow
+    #   jupyter-client
+    #   matplotlib
+    #   pandas
+python-json-logger==2.0.7
+    # via jupyter-events
+python-multipart==0.0.9
+    # via
+    #   -c requirements.txt
+    #   shiny
+pytz==2024.1
+    # via
+    #   -c requirements.txt
+    #   pandas
+pywin32==306
+    # via jupyter-core
+pywinpty==2.0.13
+    # via
+    #   jupyter-server
+    #   jupyter-server-terminals
+    #   terminado
+pyyaml==6.0.1
+    # via jupyter-events
+pyzmq==26.0.3
+    # via
+    #   ipykernel
+    #   jupyter-client
+    #   jupyter-console
+    #   jupyter-server
+    #   qtconsole
+qtconsole==5.5.2
+    # via jupyter
+qtpy==2.4.1
+    # via qtconsole
+questionary==2.0.1
+    # via
+    #   -c requirements.txt
+    #   shiny
+referencing==0.35.1
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+    #   jupyter-events
+requests==2.32.3
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+    #   fr-toolbelt
+    #   jupyterlab-server
+rfc3339-validator==0.1.4
+    # via
+    #   jsonschema
+    #   jupyter-events
+rfc3986-validator==0.1.1
+    # via
+    #   jsonschema
+    #   jupyter-events
+rpds-py==0.18.1
+    # via
+    #   jsonschema
+    #   referencing
+scipy==1.13.0
+    # via
+    #   -c requirements.txt
+    #   mizani
+    #   plotnine
+    #   statsmodels
+send2trash==1.8.3
+    # via jupyter-server
+shiny==0.10.2
+    # via
+    #   -c requirements.txt
+    #   -r requirements.in
+six==1.16.0
+    # via
+    #   -c requirements.txt
+    #   asttokens
+    #   bleach
+    #   patsy
+    #   python-dateutil
+    #   rfc3339-validator
+sniffio==1.3.1
+    # via
+    #   -c requirements.txt
+    #   anyio
+    #   httpx
+soupsieve==2.5
+    # via beautifulsoup4
+stack-data==0.6.3
+    # via ipython
+starlette==0.37.2
+    # via
+    #   -c requirements.txt
+    #   shiny
+statsmodels==0.14.2
+    # via
+    #   -c requirements.txt
+    #   plotnine
+terminado==0.18.1
+    # via
+    #   jupyter-server
+    #   jupyter-server-terminals
+tinycss2==1.3.0
+    # via nbconvert
+tornado==6.4.1
+    # via
+    #   ipykernel
+    #   jupyter-client
+    #   jupyter-server
+    #   jupyterlab
+    #   notebook
+    #   terminado
+traitlets==5.14.3
+    # via
+    #   comm
+    #   ipykernel
+    #   ipython
+    #   ipywidgets
+    #   jupyter-client
+    #   jupyter-console
+    #   jupyter-core
+    #   jupyter-events
+    #   jupyter-server
+    #   jupyterlab
+    #   matplotlib-inline
+    #   nbclient
+    #   nbconvert
+    #   nbformat
+    #   qtconsole
+types-python-dateutil==2.9.0.20240316
+    # via arrow
+typing-extensions==4.11.0
+    # via
+    #   -c requirements.txt
+    #   htmltools
+    #   shiny
+tzdata==2024.1
+    # via
+    #   -c requirements.txt
+    #   mizani
+    #   pandas
+uc-micro-py==1.0.3
+    # via
+    #   -c requirements.txt
+    #   linkify-it-py
+uri-template==1.3.0
+    # via jsonschema
+urllib3==2.2.1
+    # via
+    #   -c requirements.txt
+    #   requests
+uvicorn==0.29.0
+    # via
+    #   -c requirements.txt
+    #   shiny
+watchfiles==0.21.0
+    # via
+    #   -c requirements.txt
+    #   shiny
+wcwidth==0.2.13
+    # via
+    #   -c requirements.txt
+    #   prompt-toolkit
+webcolors==24.6.0
+    # via jsonschema
+webencodings==0.5.1
+    # via
+    #   bleach
+    #   tinycss2
+websocket-client==1.8.0
+    # via jupyter-server
+websockets==12.0
+    # via
+    #   -c requirements.txt
+    #   shiny
+widgetsnbextension==4.0.11
+    # via ipywidgets
+# The following packages are considered to be unsafe in a requirements file:
+# setuptools

requirements.in ADDED Viewed

	@@ -0,0 +1,12 @@

+# requirements.in
+# required dependencies
+faicons>=0.2.2, <1.0
+fr-toolbelt>=0.1.2, <1.0
+numpy>=1.26, <2.0
+pandas>=2.2, <3.0
+plotnine>=0.13.6, <1.0
+polars>=0.20.26, <1.0
+pyarrow>=16.1.0, <17.0
+python-dateutil>=2.9.0.post0, <3.0
+requests>=2.32.2, <3.0
+shiny>=0.9.0, <1.0

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

tests/__init__.py ADDED Viewed

File without changes

tests/test_process_data.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import pytest
 from fr_toolbelt.api_requests import get_documents_by_date
 from pandas import DataFrame

+#import pytest
 from fr_toolbelt.api_requests import get_documents_by_date
 from pandas import DataFrame

www/logo.png ADDED Viewed

www/style.css ADDED Viewed

	@@ -0,0 +1,17 @@

+.header img {
+  float: left;
+  width: 200px;
+  height: 66px;
+  vertical-align: middle;
+}
+.header span {
+  position: right;
+  line-height: 66px;
+  padding-left: 2.5%;
+  padding-top: 0%;
+  padding-right: 0%;
+  padding-bottom: 7.5%;
+  font-size: 30px;
+  vertical-align: middle;
+}