github-actions[bot] commited on
Commit ·
e9832fd
1
Parent(s): 6e141d6
Deploy from GitHub Actions
Browse files- app.py +0 -2
- pages.py +0 -6
- plots/base.py +0 -208
- plots/sector.py +174 -0
- plotting_style_guide.md +163 -0
- ui/pages/sector_compare.py +21 -2
app.py
CHANGED
|
@@ -7,7 +7,6 @@ from pages import (
|
|
| 7 |
calendar_heatmaps_page,
|
| 8 |
dissolved_oxygen_page,
|
| 9 |
do_temp_relationship_page,
|
| 10 |
-
grouped_bar_charts_page,
|
| 11 |
home_page,
|
| 12 |
nutrient_ratios_page,
|
| 13 |
parameter_correlations_page,
|
|
@@ -51,7 +50,6 @@ page_dict["Annual Report Draft Charts/Tables"] = [
|
|
| 51 |
seasonal_maps_page,
|
| 52 |
sector_compare_page,
|
| 53 |
scatter_plots_page,
|
| 54 |
-
grouped_bar_charts_page,
|
| 55 |
parameter_summary_tables_page,
|
| 56 |
]
|
| 57 |
|
|
|
|
| 7 |
calendar_heatmaps_page,
|
| 8 |
dissolved_oxygen_page,
|
| 9 |
do_temp_relationship_page,
|
|
|
|
| 10 |
home_page,
|
| 11 |
nutrient_ratios_page,
|
| 12 |
parameter_correlations_page,
|
|
|
|
| 50 |
seasonal_maps_page,
|
| 51 |
sector_compare_page,
|
| 52 |
scatter_plots_page,
|
|
|
|
| 53 |
parameter_summary_tables_page,
|
| 54 |
]
|
| 55 |
|
pages.py
CHANGED
|
@@ -669,11 +669,6 @@ scatter_plots_page = st.Page(
|
|
| 669 |
title="Scatter Plots",
|
| 670 |
icon=":material/scatter_plot:",
|
| 671 |
)
|
| 672 |
-
grouped_bar_charts_page = st.Page(
|
| 673 |
-
"ui/pages/grouped_bar_charts.py",
|
| 674 |
-
title="Grouped Bar Charts",
|
| 675 |
-
icon=":material/bar_chart:",
|
| 676 |
-
)
|
| 677 |
seasonal_line_charts_page = st.Page(
|
| 678 |
"ui/pages/seasonal_line_charts.py",
|
| 679 |
title="Seasonal Line Charts",
|
|
@@ -694,7 +689,6 @@ __all__ = [
|
|
| 694 |
"settings_page",
|
| 695 |
"parameter_summary_tables_page",
|
| 696 |
"scatter_plots_page",
|
| 697 |
-
"grouped_bar_charts_page",
|
| 698 |
"seasonal_line_charts_page",
|
| 699 |
"sector_compare_page",
|
| 700 |
]
|
|
|
|
| 669 |
title="Scatter Plots",
|
| 670 |
icon=":material/scatter_plot:",
|
| 671 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 672 |
seasonal_line_charts_page = st.Page(
|
| 673 |
"ui/pages/seasonal_line_charts.py",
|
| 674 |
title="Seasonal Line Charts",
|
|
|
|
| 689 |
"settings_page",
|
| 690 |
"parameter_summary_tables_page",
|
| 691 |
"scatter_plots_page",
|
|
|
|
| 692 |
"seasonal_line_charts_page",
|
| 693 |
"sector_compare_page",
|
| 694 |
]
|
plots/base.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import math
|
| 2 |
-
import textwrap
|
| 3 |
|
| 4 |
import matplotlib.dates as mdates
|
| 5 |
import matplotlib.pyplot as plt
|
|
@@ -8,8 +7,6 @@ import pandas as pd
|
|
| 8 |
import seaborn as sns
|
| 9 |
from matplotlib.figure import Figure
|
| 10 |
|
| 11 |
-
from utils.data_loading import timer
|
| 12 |
-
|
| 13 |
COLOR_SCALE = [
|
| 14 |
"#6D3E91",
|
| 15 |
"#C05917",
|
|
@@ -707,208 +704,3 @@ def plot_scatter(
|
|
| 707 |
|
| 708 |
plt.tight_layout()
|
| 709 |
return (fig, param_data)
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
@timer(include_params=True)
|
| 713 |
-
def plot_grouped_bars(
|
| 714 |
-
df: pd.DataFrame,
|
| 715 |
-
parameter: str,
|
| 716 |
-
year_range: tuple[int, int],
|
| 717 |
-
group_by: str = "sector",
|
| 718 |
-
) -> tuple[Figure, pd.DataFrame]:
|
| 719 |
-
"""
|
| 720 |
-
Create a grouped bar chart showing means by sector or year for a selected parameter.
|
| 721 |
-
|
| 722 |
-
Parameters:
|
| 723 |
-
-----------
|
| 724 |
-
df : pd.DataFrame
|
| 725 |
-
Input dataframe containing water quality measurements
|
| 726 |
-
parameter : str
|
| 727 |
-
Name of the parameter to plot
|
| 728 |
-
year_range : tuple[int, int]
|
| 729 |
-
Start and end years to include in plot
|
| 730 |
-
group_by : str
|
| 731 |
-
How to group the bars - either "sector" (default) or "year"
|
| 732 |
-
|
| 733 |
-
Returns:
|
| 734 |
-
--------
|
| 735 |
-
tuple[Figure, pd.DataFrame]
|
| 736 |
-
- Figure: Matplotlib figure containing the grouped bar chart
|
| 737 |
-
- DataFrame: Contains the plotted data points with means and standard errors
|
| 738 |
-
"""
|
| 739 |
-
# Filter data for parameter and year range
|
| 740 |
-
plot_df = df[
|
| 741 |
-
(df["Org_Analyte_Name"] == parameter)
|
| 742 |
-
& (df["Reporting_Year"] >= year_range[0])
|
| 743 |
-
& (df["Reporting_Year"] <= year_range[1])
|
| 744 |
-
].copy()
|
| 745 |
-
|
| 746 |
-
if plot_df.empty:
|
| 747 |
-
raise ValueError(
|
| 748 |
-
f"No data available for {parameter} between {year_range[0]}-{year_range[1]}"
|
| 749 |
-
)
|
| 750 |
-
|
| 751 |
-
# Calculate annual means by sector
|
| 752 |
-
means_df = (
|
| 753 |
-
plot_df.groupby(["Reporting_Year", "Sector"], observed=True)["Org_Result_Value"]
|
| 754 |
-
.agg(["mean", "sem"])
|
| 755 |
-
.reset_index()
|
| 756 |
-
)
|
| 757 |
-
|
| 758 |
-
# Get unique years and sectors for plotting
|
| 759 |
-
years = sorted(means_df["Reporting_Year"].unique())
|
| 760 |
-
sectors = sorted(means_df["Sector"].unique())
|
| 761 |
-
|
| 762 |
-
# Determine primary and secondary categories based on grouping
|
| 763 |
-
if group_by == "year":
|
| 764 |
-
primary_categories = sectors
|
| 765 |
-
secondary_categories = years
|
| 766 |
-
x_values = years
|
| 767 |
-
group_column = "Reporting_Year"
|
| 768 |
-
category_column = "Sector"
|
| 769 |
-
x_label = "Reporting Year"
|
| 770 |
-
legend_title = "Sector"
|
| 771 |
-
else: # group_by == "sector"
|
| 772 |
-
primary_categories = years
|
| 773 |
-
secondary_categories = sectors
|
| 774 |
-
x_values = sectors # noqa: F841
|
| 775 |
-
group_column = "Sector" # noqa: F841
|
| 776 |
-
category_column = "Reporting_Year"
|
| 777 |
-
x_label = "Sector"
|
| 778 |
-
legend_title = "Year" # noqa: F841
|
| 779 |
-
|
| 780 |
-
n_groups = len(primary_categories)
|
| 781 |
-
|
| 782 |
-
colors = [
|
| 783 |
-
"#E69F00", # Orange
|
| 784 |
-
"#56B4E9", # Sky Blue
|
| 785 |
-
"#009E73", # Bluish Green
|
| 786 |
-
"#F0E442", # Yellow
|
| 787 |
-
"#0072B2", # Blue
|
| 788 |
-
"#D55E00", # Vermilion
|
| 789 |
-
"#CC79A7", # Reddish Purple
|
| 790 |
-
"#999999", # Gray
|
| 791 |
-
"#F5C710", # Golden Yellow
|
| 792 |
-
"#93AA00", # Lime Green
|
| 793 |
-
"#482677", # Dark Purple
|
| 794 |
-
"#DA5724", # Rust
|
| 795 |
-
"#5082CF", # Steel Blue
|
| 796 |
-
"#CD9BCD", # Lavender
|
| 797 |
-
"#C1A43A", # Olive Green
|
| 798 |
-
]
|
| 799 |
-
|
| 800 |
-
# Create figure
|
| 801 |
-
fig, ax = plt.subplots(figsize=(12, 6))
|
| 802 |
-
|
| 803 |
-
# Calculate bar positions
|
| 804 |
-
bar_width = 0.8 / n_groups # Standard bar width
|
| 805 |
-
|
| 806 |
-
# Calculate center positions for x-axis labels
|
| 807 |
-
group_centers = (
|
| 808 |
-
np.arange(len(secondary_categories)) + (bar_width * (n_groups - 1)) / 2
|
| 809 |
-
)
|
| 810 |
-
|
| 811 |
-
# Plot bars for each primary category
|
| 812 |
-
for i, (category, color) in enumerate(zip(primary_categories, colors)):
|
| 813 |
-
category_data = means_df[means_df[category_column] == category]
|
| 814 |
-
|
| 815 |
-
# Create bars with simple offset calculation
|
| 816 |
-
bars = ax.bar( # noqa: F841
|
| 817 |
-
np.arange(len(secondary_categories)) + i * bar_width,
|
| 818 |
-
category_data["mean"],
|
| 819 |
-
bar_width,
|
| 820 |
-
label=str(category),
|
| 821 |
-
color=color,
|
| 822 |
-
alpha=0.7,
|
| 823 |
-
zorder=2,
|
| 824 |
-
)
|
| 825 |
-
|
| 826 |
-
# Add error bars
|
| 827 |
-
ax.errorbar(
|
| 828 |
-
np.arange(len(secondary_categories)) + i * bar_width,
|
| 829 |
-
category_data["mean"],
|
| 830 |
-
yerr=category_data["sem"],
|
| 831 |
-
fmt="none",
|
| 832 |
-
color="black",
|
| 833 |
-
capsize=3,
|
| 834 |
-
capthick=1,
|
| 835 |
-
linewidth=1,
|
| 836 |
-
alpha=0.5,
|
| 837 |
-
zorder=3,
|
| 838 |
-
)
|
| 839 |
-
|
| 840 |
-
# Customize plot
|
| 841 |
-
unit = plot_df["Org_Result_Unit"].iloc[0]
|
| 842 |
-
ax.set_xlabel(x_label)
|
| 843 |
-
title = f"{parameter} (Mean Annual{' ' + unit if unit else ''})"
|
| 844 |
-
ax.set_title(title)
|
| 845 |
-
|
| 846 |
-
# Function to wrap text
|
| 847 |
-
def wrap_labels(text, width=10):
|
| 848 |
-
"""Wrap text at specified width using textwrap."""
|
| 849 |
-
# Convert to string and wrap if needed
|
| 850 |
-
text_str = str(text)
|
| 851 |
-
if len(text_str) > width:
|
| 852 |
-
return textwrap.fill(text_str, width=width)
|
| 853 |
-
return text_str
|
| 854 |
-
|
| 855 |
-
# Set x-axis ticks and labels with wrapping using centered positions
|
| 856 |
-
ax.set_xticks(group_centers)
|
| 857 |
-
wrapped_labels = [wrap_labels(str(label)) for label in secondary_categories]
|
| 858 |
-
ax.set_xticklabels(
|
| 859 |
-
wrapped_labels,
|
| 860 |
-
ha="center",
|
| 861 |
-
va="top",
|
| 862 |
-
rotation=0,
|
| 863 |
-
)
|
| 864 |
-
|
| 865 |
-
# Remove x-axis tick marks
|
| 866 |
-
ax.tick_params(axis="x", length=0)
|
| 867 |
-
|
| 868 |
-
# Add error bar note with adjusted position
|
| 869 |
-
ax.text(
|
| 870 |
-
0.99,
|
| 871 |
-
-0.15,
|
| 872 |
-
"Error bars represent ±1 standard error of the mean",
|
| 873 |
-
ha="right",
|
| 874 |
-
va="top",
|
| 875 |
-
transform=ax.transAxes,
|
| 876 |
-
fontsize=9,
|
| 877 |
-
fontstyle="italic",
|
| 878 |
-
)
|
| 879 |
-
|
| 880 |
-
# Adjust layout with more vertical space for wrapped labels
|
| 881 |
-
plt.tight_layout(rect=(0, 0.2, 1, 1))
|
| 882 |
-
|
| 883 |
-
# Add grid
|
| 884 |
-
ax.grid(True, axis="y", alpha=0.2, linestyle="-", zorder=1)
|
| 885 |
-
|
| 886 |
-
# Customize spines
|
| 887 |
-
ax.spines["top"].set_visible(False)
|
| 888 |
-
ax.spines["right"].set_visible(False)
|
| 889 |
-
ax.spines["left"].set_visible(False)
|
| 890 |
-
|
| 891 |
-
# Remove tick marks but keep labels
|
| 892 |
-
ax.tick_params(axis="y", which="both", length=0)
|
| 893 |
-
|
| 894 |
-
ax.legend(
|
| 895 |
-
bbox_to_anchor=(1.02, 1), # Position at top-right
|
| 896 |
-
loc="upper left",
|
| 897 |
-
frameon=False,
|
| 898 |
-
ncol=1,
|
| 899 |
-
handletextpad=0.5,
|
| 900 |
-
fontsize=9,
|
| 901 |
-
)
|
| 902 |
-
|
| 903 |
-
# Determine if log scale should be used
|
| 904 |
-
if parameter in [
|
| 905 |
-
# "Turbidity",
|
| 906 |
-
"Fecal Coliform (MPN)",
|
| 907 |
-
"Total Nitrogen",
|
| 908 |
-
"Total Phosphorus",
|
| 909 |
-
]:
|
| 910 |
-
ax.set_yscale("log")
|
| 911 |
-
ax.yaxis.set_major_formatter(plt.ScalarFormatter()) # type: ignore
|
| 912 |
-
|
| 913 |
-
means_df.insert(0, "parameter", parameter)
|
| 914 |
-
return fig, means_df
|
|
|
|
| 1 |
import math
|
|
|
|
| 2 |
|
| 3 |
import matplotlib.dates as mdates
|
| 4 |
import matplotlib.pyplot as plt
|
|
|
|
| 7 |
import seaborn as sns
|
| 8 |
from matplotlib.figure import Figure
|
| 9 |
|
|
|
|
|
|
|
| 10 |
COLOR_SCALE = [
|
| 11 |
"#6D3E91",
|
| 12 |
"#C05917",
|
|
|
|
| 704 |
|
| 705 |
plt.tight_layout()
|
| 706 |
return (fig, param_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
plots/sector.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
import altair as alt
|
| 2 |
import matplotlib.pyplot as plt
|
| 3 |
import numpy as np
|
|
@@ -809,3 +811,175 @@ def plot_sector_trends(
|
|
| 809 |
# Adjust layout with more vertical space between subplots
|
| 810 |
plt.tight_layout(rect=(0, 0, 0.85, 1), h_pad=2.0)
|
| 811 |
return fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import textwrap
|
| 2 |
+
|
| 3 |
import altair as alt
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
import numpy as np
|
|
|
|
| 811 |
# Adjust layout with more vertical space between subplots
|
| 812 |
plt.tight_layout(rect=(0, 0, 0.85, 1), h_pad=2.0)
|
| 813 |
return fig
|
| 814 |
+
|
| 815 |
+
|
| 816 |
+
@timer(include_params=True)
|
| 817 |
+
def plot_grouped_bars(
|
| 818 |
+
df: pd.DataFrame,
|
| 819 |
+
parameter: str,
|
| 820 |
+
year_range: tuple[int, int],
|
| 821 |
+
group_by: str = "sector",
|
| 822 |
+
) -> tuple[Figure, pd.DataFrame]:
|
| 823 |
+
"""Create a grouped bar chart showing means by sector or year.
|
| 824 |
+
|
| 825 |
+
Parameters:
|
| 826 |
+
-----------
|
| 827 |
+
df : pd.DataFrame
|
| 828 |
+
Input dataframe containing water quality measurements
|
| 829 |
+
parameter : str
|
| 830 |
+
Name of the parameter to plot
|
| 831 |
+
year_range : tuple[int, int]
|
| 832 |
+
Start and end years to include in plot
|
| 833 |
+
group_by : str
|
| 834 |
+
How to group the bars - either "sector" (default) or "year"
|
| 835 |
+
|
| 836 |
+
Returns:
|
| 837 |
+
--------
|
| 838 |
+
tuple[Figure, pd.DataFrame]
|
| 839 |
+
- Figure: Matplotlib figure containing the grouped bar chart
|
| 840 |
+
- DataFrame: Contains the plotted data points with means and standard errors
|
| 841 |
+
"""
|
| 842 |
+
# Define consistent colors for styling
|
| 843 |
+
GREY30 = "#4d4d4d"
|
| 844 |
+
GREY40 = "#666666"
|
| 845 |
+
|
| 846 |
+
# Filter data for parameter and year range
|
| 847 |
+
plot_df = df[
|
| 848 |
+
(df["Org_Analyte_Name"] == parameter)
|
| 849 |
+
& (df["Reporting_Year"] >= year_range[0])
|
| 850 |
+
& (df["Reporting_Year"] <= year_range[1])
|
| 851 |
+
].copy()
|
| 852 |
+
|
| 853 |
+
if plot_df.empty:
|
| 854 |
+
raise ValueError(
|
| 855 |
+
f"No data available for {parameter} between {year_range[0]}-{year_range[1]}"
|
| 856 |
+
)
|
| 857 |
+
|
| 858 |
+
# Calculate annual means by sector
|
| 859 |
+
means_df = (
|
| 860 |
+
plot_df.groupby(["Reporting_Year", "Sector"], observed=True)["Org_Result_Value"]
|
| 861 |
+
.agg(["mean", "sem"])
|
| 862 |
+
.reset_index()
|
| 863 |
+
)
|
| 864 |
+
|
| 865 |
+
# Create figure with standard dimensions
|
| 866 |
+
fig, ax = plt.subplots(figsize=(12, 6))
|
| 867 |
+
|
| 868 |
+
# Setup grouping logic
|
| 869 |
+
years = sorted(means_df["Reporting_Year"].unique())
|
| 870 |
+
sectors = sorted(means_df["Sector"].unique())
|
| 871 |
+
|
| 872 |
+
if group_by == "year":
|
| 873 |
+
primary_cats = sectors
|
| 874 |
+
secondary_cats = years
|
| 875 |
+
x_label = "Reporting Year"
|
| 876 |
+
category_col = "Sector"
|
| 877 |
+
else:
|
| 878 |
+
primary_cats = years
|
| 879 |
+
secondary_cats = sectors
|
| 880 |
+
x_label = "Sector"
|
| 881 |
+
category_col = "Reporting_Year"
|
| 882 |
+
|
| 883 |
+
# Calculate bar positions
|
| 884 |
+
n_groups = len(primary_cats)
|
| 885 |
+
bar_width = 0.8 / n_groups
|
| 886 |
+
group_centers = np.arange(len(secondary_cats)) + (bar_width * (n_groups - 1)) / 2
|
| 887 |
+
|
| 888 |
+
# Plot bars for each primary category
|
| 889 |
+
for i, category in enumerate(primary_cats):
|
| 890 |
+
category_data = means_df[means_df[category_col] == category]
|
| 891 |
+
|
| 892 |
+
# Create bars
|
| 893 |
+
ax.bar(
|
| 894 |
+
np.arange(len(secondary_cats)) + i * bar_width,
|
| 895 |
+
category_data["mean"],
|
| 896 |
+
bar_width,
|
| 897 |
+
label=str(category),
|
| 898 |
+
color=COLOR_SCALE[i % len(COLOR_SCALE)],
|
| 899 |
+
alpha=0.7,
|
| 900 |
+
zorder=2,
|
| 901 |
+
)
|
| 902 |
+
|
| 903 |
+
# Add error bars
|
| 904 |
+
ax.errorbar(
|
| 905 |
+
np.arange(len(secondary_cats)) + i * bar_width,
|
| 906 |
+
category_data["mean"],
|
| 907 |
+
yerr=category_data["sem"],
|
| 908 |
+
fmt="none",
|
| 909 |
+
color=GREY40,
|
| 910 |
+
capsize=3,
|
| 911 |
+
capthick=1,
|
| 912 |
+
linewidth=1,
|
| 913 |
+
alpha=0.5,
|
| 914 |
+
zorder=3,
|
| 915 |
+
)
|
| 916 |
+
|
| 917 |
+
# Configure axes and spines
|
| 918 |
+
ax.spines["top"].set_visible(False)
|
| 919 |
+
ax.spines["right"].set_visible(False)
|
| 920 |
+
ax.spines["left"].set_visible(False)
|
| 921 |
+
ax.spines["bottom"].set_color(GREY40)
|
| 922 |
+
ax.spines["bottom"].set_linewidth(0.5)
|
| 923 |
+
|
| 924 |
+
# Configure grid
|
| 925 |
+
ax.grid(True, axis="y", alpha=0.15, linestyle="-", color="gray", zorder=1)
|
| 926 |
+
|
| 927 |
+
# Configure ticks
|
| 928 |
+
ax.tick_params(axis="both", which="both", length=0, colors=GREY40)
|
| 929 |
+
|
| 930 |
+
# Set x-axis ticks and wrapped labels
|
| 931 |
+
ax.set_xticks(group_centers)
|
| 932 |
+
wrapped_labels = [textwrap.fill(str(label), width=10) for label in secondary_cats]
|
| 933 |
+
ax.set_xticklabels(wrapped_labels, ha="center", va="top", color=GREY40)
|
| 934 |
+
|
| 935 |
+
# Add titles and labels
|
| 936 |
+
unit = plot_df["Org_Result_Unit"].iloc[0] if not plot_df.empty else ""
|
| 937 |
+
ax.set_title(
|
| 938 |
+
f"{parameter} (Mean Annual{' ' + unit if unit else ''})",
|
| 939 |
+
pad=10,
|
| 940 |
+
fontsize=12,
|
| 941 |
+
color=GREY30,
|
| 942 |
+
)
|
| 943 |
+
ax.set_xlabel(x_label, color=GREY40, fontsize=10)
|
| 944 |
+
|
| 945 |
+
# Configure legend
|
| 946 |
+
ax.legend(
|
| 947 |
+
bbox_to_anchor=(1.02, 1),
|
| 948 |
+
loc="upper left",
|
| 949 |
+
frameon=False,
|
| 950 |
+
fontsize=9,
|
| 951 |
+
handletextpad=0.5,
|
| 952 |
+
)
|
| 953 |
+
|
| 954 |
+
# Add error bar note
|
| 955 |
+
ax.text(
|
| 956 |
+
0.99,
|
| 957 |
+
-0.15,
|
| 958 |
+
"Error bars represent ±1 standard error of the mean",
|
| 959 |
+
ha="right",
|
| 960 |
+
va="top",
|
| 961 |
+
transform=ax.transAxes,
|
| 962 |
+
fontsize=8,
|
| 963 |
+
fontstyle="italic",
|
| 964 |
+
color=GREY40,
|
| 965 |
+
)
|
| 966 |
+
|
| 967 |
+
# Set log scale if needed
|
| 968 |
+
if parameter in [
|
| 969 |
+
"Fecal Coliform (MPN)",
|
| 970 |
+
"Total Nitrogen",
|
| 971 |
+
"Total Phosphorus",
|
| 972 |
+
]:
|
| 973 |
+
ax.set_yscale("log")
|
| 974 |
+
ax.yaxis.set_major_formatter(plt.ScalarFormatter()) # type: ignore
|
| 975 |
+
# Special case for pH
|
| 976 |
+
elif parameter == "pH":
|
| 977 |
+
ax.set_ylim(5.5, 8.5)
|
| 978 |
+
ax.yaxis.set_major_locator(plt.MultipleLocator(1)) # type: ignore
|
| 979 |
+
ax.yaxis.set_minor_locator(plt.MultipleLocator(0.5)) # type: ignore
|
| 980 |
+
|
| 981 |
+
plt.tight_layout(rect=(0, 0.2, 1, 1))
|
| 982 |
+
|
| 983 |
+
# Prepare return data
|
| 984 |
+
means_df.insert(0, "parameter", parameter)
|
| 985 |
+
return fig, means_df
|
plotting_style_guide.md
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# State of the Bay Plotting Style Guide
|
| 2 |
+
|
| 3 |
+
This guide ensures consistency across all data visualizations in the State of the Bay project.
|
| 4 |
+
|
| 5 |
+
## 1. Layout and Sizing
|
| 6 |
+
|
| 7 |
+
### Figure Dimensions
|
| 8 |
+
- Standard plots: `figsize=(12, 8)`
|
| 9 |
+
- Panel/faceted plots: `figsize=(15, 2.5 * n_panels)`
|
| 10 |
+
- Use `plt.tight_layout()` for proper spacing
|
| 11 |
+
- Arrange multi-panel plots vertically for better comparison
|
| 12 |
+
|
| 13 |
+
## 2. Colors and Visual Elements
|
| 14 |
+
|
| 15 |
+
### Color Palette
|
| 16 |
+
- Use predefined `COLOR_SCALE` for consistency
|
| 17 |
+
- Grey tones:
|
| 18 |
+
```python
|
| 19 |
+
GREY30 = "#4d4d4d" # Dark grey for titles
|
| 20 |
+
GREY40 = "#666666" # Medium grey for axes and labels
|
| 21 |
+
```
|
| 22 |
+
- Use alpha transparency (0.5-0.7) for overlays
|
| 23 |
+
|
| 24 |
+
### Line Styles
|
| 25 |
+
- Trend lines: dashed (`--`), red, `alpha=0.7`, `linewidth=1.5`
|
| 26 |
+
- Grid lines: light grey, `alpha=0.15`
|
| 27 |
+
|
| 28 |
+
## 3. Axes and Spines
|
| 29 |
+
|
| 30 |
+
### Spine Configuration
|
| 31 |
+
```python
|
| 32 |
+
# Remove unnecessary spines
|
| 33 |
+
ax.spines["top"].set_visible(False)
|
| 34 |
+
ax.spines["right"].set_visible(False)
|
| 35 |
+
|
| 36 |
+
# Style bottom spine
|
| 37 |
+
ax.spines["bottom"].set_color(GREY40)
|
| 38 |
+
ax.spines["bottom"].set_linewidth(0.5)
|
| 39 |
+
|
| 40 |
+
# Remove tick marks but keep labels
|
| 41 |
+
ax.tick_params(axis="both", which="both", length=0, colors=GREY40)
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
## 4. Grid Lines
|
| 45 |
+
|
| 46 |
+
### Configuration
|
| 47 |
+
```python
|
| 48 |
+
ax.grid(True, axis="y", alpha=0.15, linestyle="-", color="gray")
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
## 5. Text Elements
|
| 52 |
+
|
| 53 |
+
### Typography
|
| 54 |
+
- Main titles: centered, size 12
|
| 55 |
+
- Panel titles: size 10, `color=GREY30`, `pad=10`
|
| 56 |
+
- Axis labels: size 10, `color=GREY40`
|
| 57 |
+
|
| 58 |
+
### Statistics and Annotations
|
| 59 |
+
```python
|
| 60 |
+
ax.text(
|
| 61 |
+
0.02, 0.98,
|
| 62 |
+
stats_text,
|
| 63 |
+
transform=ax.transAxes,
|
| 64 |
+
verticalalignment="top",
|
| 65 |
+
fontsize=8,
|
| 66 |
+
bbox=dict(facecolor="white", alpha=0.8, edgecolor="none")
|
| 67 |
+
)
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
## 6. Data Visualization
|
| 71 |
+
|
| 72 |
+
### Line Charts
|
| 73 |
+
- Include confidence intervals (shaded regions)
|
| 74 |
+
- Solid lines for main trends
|
| 75 |
+
- Consistent line thickness
|
| 76 |
+
|
| 77 |
+
### Box Plots
|
| 78 |
+
```python
|
| 79 |
+
boxplot_props = {
|
| 80 |
+
"patch_artist": True,
|
| 81 |
+
"medianprops": dict(color="black"),
|
| 82 |
+
"flierprops": dict(
|
| 83 |
+
marker="o",
|
| 84 |
+
markerfacecolor=color_scale[idx],
|
| 85 |
+
alpha=0.5,
|
| 86 |
+
markersize=4
|
| 87 |
+
),
|
| 88 |
+
"boxprops": dict(facecolor=color_scale[idx], alpha=0.6),
|
| 89 |
+
"widths": 0.6
|
| 90 |
+
}
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
## 7. Scales and Ranges
|
| 94 |
+
|
| 95 |
+
### Automatic Log Scaling
|
| 96 |
+
```python
|
| 97 |
+
use_log_scale = parameter in [
|
| 98 |
+
"Turbidity",
|
| 99 |
+
"Fecal Coliform (MPN)",
|
| 100 |
+
"Total Nitrogen",
|
| 101 |
+
"Total Phosphorus",
|
| 102 |
+
]
|
| 103 |
+
```
|
| 104 |
+
|
| 105 |
+
### Best Practices
|
| 106 |
+
- Add padding to axis limits
|
| 107 |
+
- Use consistent y-axis ranges across comparison panels
|
| 108 |
+
- Handle edge cases gracefully
|
| 109 |
+
|
| 110 |
+
## 8. Function Structure
|
| 111 |
+
|
| 112 |
+
### Return Values
|
| 113 |
+
```python
|
| 114 |
+
def plot_function(df: pd.DataFrame, parameter: str) -> tuple[Figure, pd.DataFrame, pd.DataFrame]:
|
| 115 |
+
"""
|
| 116 |
+
Create a visualization.
|
| 117 |
+
|
| 118 |
+
Parameters:
|
| 119 |
+
-----------
|
| 120 |
+
df : pd.DataFrame
|
| 121 |
+
Input dataframe
|
| 122 |
+
parameter : str
|
| 123 |
+
Parameter to plot
|
| 124 |
+
|
| 125 |
+
Returns:
|
| 126 |
+
--------
|
| 127 |
+
tuple[Figure, pd.DataFrame, pd.DataFrame]
|
| 128 |
+
- Figure: Matplotlib figure
|
| 129 |
+
- DataFrame: Raw data used in plot
|
| 130 |
+
- DataFrame: Processed data points
|
| 131 |
+
"""
|
| 132 |
+
# ... plotting code ...
|
| 133 |
+
return fig, raw_data, plot_data
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
## 9. Error Handling
|
| 137 |
+
|
| 138 |
+
### Guidelines
|
| 139 |
+
- Handle missing data gracefully
|
| 140 |
+
- Include data validation
|
| 141 |
+
- Provide appropriate fallbacks for edge cases
|
| 142 |
+
- Log warnings for potential issues
|
| 143 |
+
|
| 144 |
+
## 10. Optional Features
|
| 145 |
+
|
| 146 |
+
### Configurable Elements
|
| 147 |
+
```python
|
| 148 |
+
def plot_function(
|
| 149 |
+
df: pd.DataFrame,
|
| 150 |
+
parameter: str,
|
| 151 |
+
show_sem: bool = True,
|
| 152 |
+
show_trend: bool = True,
|
| 153 |
+
panel_chart: bool = False,
|
| 154 |
+
color_scale: list[str] = COLOR_SCALE,
|
| 155 |
+
) -> tuple[Figure, pd.DataFrame, pd.DataFrame]:
|
| 156 |
+
"""..."""
|
| 157 |
+
```
|
| 158 |
+
|
| 159 |
+
### Common Options
|
| 160 |
+
- `show_sem`: Toggle standard error margins
|
| 161 |
+
- `show_trend`: Toggle trend lines and statistics
|
| 162 |
+
- `panel_chart`: Toggle between single and multi-panel layouts
|
| 163 |
+
- `color_scale`: Override default color palette
|
ui/pages/sector_compare.py
CHANGED
|
@@ -6,12 +6,14 @@ import streamlit as st
|
|
| 6 |
from components import render_filtered_data_preview
|
| 7 |
from dashboard_analytics import log_visit
|
| 8 |
from plots.sector import (
|
|
|
|
| 9 |
plot_sector_box_charts,
|
| 10 |
plot_sector_heatmap,
|
| 11 |
plot_sector_line_charts,
|
| 12 |
)
|
| 13 |
|
| 14 |
st.title("Sector Comparison Charts")
|
|
|
|
| 15 |
log_visit("Sector Comparison Charts")
|
| 16 |
|
| 17 |
# Get data from session state
|
|
@@ -21,13 +23,13 @@ raw_df["Date"] = pd.to_datetime(raw_df["Activity_Start_Date_Time"]).dt.date
|
|
| 21 |
# Create sidebar controls
|
| 22 |
chart_type = st.sidebar.radio(
|
| 23 |
"Chart Type:",
|
| 24 |
-
options=["Line Charts", "Box and Whisker", "Heatmap"],
|
| 25 |
help="Select the type of chart to display.",
|
| 26 |
key="sector_compare_chart_type",
|
| 27 |
)
|
| 28 |
|
| 29 |
# Only show relevant controls based on chart type
|
| 30 |
-
if chart_type in ["Line Charts", "Heatmap", "Box and Whisker"]:
|
| 31 |
st.sidebar.markdown("#### Chart Specific Options")
|
| 32 |
if chart_type == "Line Charts":
|
| 33 |
show_sem = st.sidebar.checkbox(
|
|
@@ -50,6 +52,14 @@ elif chart_type == "Box and Whisker":
|
|
| 50 |
help="Display trend line with R² and p-values",
|
| 51 |
key="sector_compare_show_trend",
|
| 52 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
selected_parameter = st.sidebar.selectbox(
|
| 55 |
"Parameter:",
|
|
@@ -99,6 +109,15 @@ try:
|
|
| 99 |
filtered_raw_df, selected_parameter, show_trend=show_trend
|
| 100 |
)
|
| 101 |
st.pyplot(fig)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
else: # Heatmap
|
| 103 |
# Create heatmap
|
| 104 |
fig, param_data, plot_data = plot_sector_heatmap(
|
|
|
|
| 6 |
from components import render_filtered_data_preview
|
| 7 |
from dashboard_analytics import log_visit
|
| 8 |
from plots.sector import (
|
| 9 |
+
plot_grouped_bars,
|
| 10 |
plot_sector_box_charts,
|
| 11 |
plot_sector_heatmap,
|
| 12 |
plot_sector_line_charts,
|
| 13 |
)
|
| 14 |
|
| 15 |
st.title("Sector Comparison Charts")
|
| 16 |
+
st.info("👈 Select a chart type and parameter from the sidebar")
|
| 17 |
log_visit("Sector Comparison Charts")
|
| 18 |
|
| 19 |
# Get data from session state
|
|
|
|
| 23 |
# Create sidebar controls
|
| 24 |
chart_type = st.sidebar.radio(
|
| 25 |
"Chart Type:",
|
| 26 |
+
options=["Line Charts", "Box and Whisker", "Heatmap", "Grouped Bars"],
|
| 27 |
help="Select the type of chart to display.",
|
| 28 |
key="sector_compare_chart_type",
|
| 29 |
)
|
| 30 |
|
| 31 |
# Only show relevant controls based on chart type
|
| 32 |
+
if chart_type in ["Line Charts", "Heatmap", "Box and Whisker", "Grouped Bars"]:
|
| 33 |
st.sidebar.markdown("#### Chart Specific Options")
|
| 34 |
if chart_type == "Line Charts":
|
| 35 |
show_sem = st.sidebar.checkbox(
|
|
|
|
| 52 |
help="Display trend line with R² and p-values",
|
| 53 |
key="sector_compare_show_trend",
|
| 54 |
)
|
| 55 |
+
elif chart_type == "Grouped Bars":
|
| 56 |
+
group_by = st.sidebar.radio(
|
| 57 |
+
"Group bars by:",
|
| 58 |
+
options=["Sector", "Year"],
|
| 59 |
+
index=0,
|
| 60 |
+
key="sector_compare_group_by",
|
| 61 |
+
help="Choose how to group the bars.",
|
| 62 |
+
).lower()
|
| 63 |
|
| 64 |
selected_parameter = st.sidebar.selectbox(
|
| 65 |
"Parameter:",
|
|
|
|
| 109 |
filtered_raw_df, selected_parameter, show_trend=show_trend
|
| 110 |
)
|
| 111 |
st.pyplot(fig)
|
| 112 |
+
elif chart_type == "Grouped Bars":
|
| 113 |
+
# Create grouped bar charts
|
| 114 |
+
fig, plot_data = plot_grouped_bars(
|
| 115 |
+
filtered_raw_df, selected_parameter, year_range, group_by
|
| 116 |
+
)
|
| 117 |
+
st.pyplot(fig)
|
| 118 |
+
param_data = filtered_raw_df[
|
| 119 |
+
filtered_raw_df["Org_Analyte_Name"] == selected_parameter
|
| 120 |
+
].copy()
|
| 121 |
else: # Heatmap
|
| 122 |
# Create heatmap
|
| 123 |
fig, param_data, plot_data = plot_sector_heatmap(
|