EnFoBench-GasDemand / components.py
attila-balint-kul's picture
Upload components.py
1c6134e verified
raw
history blame contribute delete
No virus
20.8 kB
import pandas as pd
import plotly.express as px
import streamlit as st
from pandas.io.formats.style import Styler
from utils import get_leaderboard, get_model_ranks
def header(title: str) -> None:
st.title(title)
st.markdown(
"""
[EnFoBench](https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit)
is a community driven benchmarking framework for energy forecasting models.
"""
)
st.divider()
def logos() -> None:
left, right = st.columns(2)
with left:
st.image("./images/ku_leuven_logo.png")
with right:
st.image("./images/energyville_logo.png")
def links(current: str) -> None:
st.header("Sources")
st.link_button(
"GitHub Repository",
url="https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit",
use_container_width=True,
)
st.link_button(
"Documentation",
url="https://attila-balint-kul.github.io/energy-forecast-benchmark-toolkit/",
use_container_width=True,
)
st.link_button(
"Electricity Demand Dataset",
url="https://huggingface.co/datasets/EDS-lab/electricity-demand",
use_container_width=True,
)
st.link_button(
"HuggingFace Organization",
url="https://huggingface.co/EDS-lab",
use_container_width=True,
)
st.header("Other Dashboards")
if current != "ElectricityDemand":
st.link_button(
"Electricity Demand",
url="https://huggingface.co/spaces/EDS-lab/EnFoBench-ElectricityDemand",
use_container_width=True,
)
if current != "GasDemand":
st.link_button(
"Gas Demand",
url="https://huggingface.co/spaces/EDS-lab/EnFoBench-GasDemand",
use_container_width=True,
)
if current != "PVGeneration":
st.link_button(
"PVGeneration",
url="https://huggingface.co/spaces/EDS-lab/EnFoBench-PVGeneration",
use_container_width=True,
)
def model_selector(models: list[str], data: pd.DataFrame) -> set[str]:
# Group models by their prefix
model_groups: dict[str, list[str]] = {}
for model in models:
group, model_name = model.split(".", maxsplit=1)
if group not in model_groups:
model_groups[group] = []
model_groups[group].append(model_name)
models_to_plot = set()
st.header("Models to include")
left, middle, right = st.columns(3)
with left:
best_by_mae = st.button("Best by MAE", use_container_width=True)
if best_by_mae:
best_models_by_mae = get_model_ranks(data, "MAE.mean").head(10).model.tolist()
for model in models:
if model in best_models_by_mae:
st.session_state[model] = True
else:
st.session_state[model] = False
with middle:
best_by_rmse = st.button("Best by RMSE", use_container_width=True)
if best_by_rmse:
best_models_by_rmse = get_model_ranks(data, "RMSE.mean").head(10).model.tolist()
for model in models:
if model in best_models_by_rmse:
st.session_state[model] = True
else:
st.session_state[model] = False
with right:
best_by_rmae = st.button("Best by rMAE", use_container_width=True)
if best_by_rmae:
best_models_by_rmae = get_model_ranks(data, "rMAE.mean").head(10).model.tolist()
for model in models:
if model in best_models_by_rmae:
st.session_state[model] = True
else:
st.session_state[model] = False
left, right = st.columns(2)
with left:
select_none = st.button("Select None", use_container_width=True)
if select_none:
for model in models:
st.session_state[model] = False
with right:
select_all = st.button("Select All", use_container_width=True)
if select_all:
for model in models:
st.session_state[model] = True
for model_group, models in model_groups.items():
st.text(model_group)
for model_name in models:
to_plot = st.checkbox(
model_name, value=True, key=f"{model_group}.{model_name}"
)
if to_plot:
models_to_plot.add(f"{model_group}.{model_name}")
return models_to_plot
def overview_view(data: pd.DataFrame):
st.markdown("## Leaderboard")
leaderboard = get_leaderboard(data, ["MAE.mean", "RMSE.mean", "rMAE.mean"])
left, middle, right = st.columns(3)
with left:
best_models_mae = (
leaderboard.sort_values("MAE.mean", ascending=False)
.head(10)
.sort_values("MAE.mean")
)
fig = px.bar(best_models_mae, x="MAE.mean", y=best_models_mae.index)
fig.update_layout(
title="Top 10 models by MAE",
xaxis_title="",
yaxis_title="Model",
height=600,
)
st.plotly_chart(fig, use_container_width=True)
with middle:
best_models_mae = (
leaderboard.sort_values("RMSE.mean", ascending=False)
.head(10)
.sort_values("RMSE.mean")
)
fig = px.bar(best_models_mae, x="RMSE.mean", y=best_models_mae.index)
fig.update_layout(
title="Top 10 models by RMSE", xaxis_title="", yaxis_title="", height=600
)
st.plotly_chart(fig, use_container_width=True)
with right:
best_models_mae = (
leaderboard.sort_values("rMAE.mean", ascending=False)
.head(10)
.sort_values("rMAE.mean")
)
fig = px.bar(best_models_mae, x="rMAE.mean", y=best_models_mae.index)
fig.update_layout(
title="Top 10 models by rMAE", xaxis_title="", yaxis_title="", height=600
)
st.plotly_chart(fig, use_container_width=True)
st.dataframe(leaderboard, use_container_width=True)
def buildings_view(data: pd.DataFrame):
if 'metadata.cluster_size' not in data.columns:
data['metadata.cluster_size'] = 1
if 'metadata.building_class' not in data.columns:
data['metadata.building_class'] = "Unknown"
buildings = (
data[
[
"unique_id",
"metadata.cluster_size",
"metadata.building_class",
"metadata.location_id",
"metadata.timezone",
"dataset.available_history.days",
"dataset.available_history.observations",
"metadata.freq",
]
]
.groupby("unique_id")
.first()
.rename(
columns={
"metadata.cluster_size": "Cluster size",
"metadata.building_class": "Building class",
"metadata.location_id": "Location ID",
"metadata.timezone": "Timezone",
"dataset.available_history.days": "Available history (days)",
"dataset.available_history.observations": "Available history (#)",
"metadata.freq": "Frequency",
}
)
)
left, middle, right = st.columns(3)
with left:
st.metric("Number of buildings", data["unique_id"].nunique())
with middle:
st.metric(
"Residential",
data[data["metadata.building_class"] == "Residential"][
"unique_id"
].nunique(),
)
with right:
st.metric(
"Commercial",
data[data["metadata.building_class"] == "Commercial"][
"unique_id"
].nunique(),
)
st.divider()
left, middle, right = st.columns(3, gap="large")
with left:
st.markdown("#### Building classes")
fig = px.pie(
buildings.groupby("Building class").size().reset_index(),
values=0,
names="Building class",
)
fig.update_layout(
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
st.plotly_chart(fig, use_container_width=True)
with middle:
st.markdown("#### Timezones")
fig = px.pie(
buildings.groupby("Timezone").size().reset_index(),
values=0,
names="Timezone",
)
fig.update_layout(
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
st.plotly_chart(fig, use_container_width=True)
with right:
st.markdown("#### Frequencies")
fig = px.pie(
buildings.groupby("Frequency").size().reset_index(),
values=0,
names="Frequency",
)
fig.update_layout(
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
st.plotly_chart(fig, use_container_width=True)
st.divider()
st.markdown("#### Buildings")
st.dataframe(
buildings.sort_values("Available history (days)"),
use_container_width=True,
column_config={
"Available history (days)": st.column_config.ProgressColumn(
"Available history (days)",
help="Available training data during the first prediction.",
format="%f",
min_value=0,
max_value=float(buildings["Available history (days)"].max()),
),
"Available history (#)": st.column_config.ProgressColumn(
"Available history (#)",
help="Available training data during the first prediction.",
format="%f",
min_value=0,
max_value=float(buildings["Available history (#)"].max()),
),
},
)
def models_view(data: pd.DataFrame):
models = (
data[
[
"model",
"cv_config.folds",
"cv_config.horizon",
"cv_config.step",
"cv_config.time",
"model_info.repository",
"model_info.tag",
"model_info.variate_type",
]
]
.groupby("model")
.first()
.rename(
columns={
"cv_config.folds": "CV Folds",
"cv_config.horizon": "CV Horizon",
"cv_config.step": "CV Step",
"cv_config.time": "CV Time",
"model_info.repository": "Image Repository",
"model_info.tag": "Image Tag",
"model_info.variate_type": "Variate type",
}
)
)
left, middle, right = st.columns(3)
with left:
st.metric("Models", len(models))
with middle:
st.metric(
"Univariate",
data[data["model_info.variate_type"] == "univariate"]["model"].nunique(),
)
with right:
st.metric(
"Univariate",
data[data["model_info.variate_type"] == "multivariate"]["model"].nunique(),
)
st.divider()
left, right = st.columns(2, gap="large")
with left:
st.markdown("#### Variate types")
fig = px.pie(
models.groupby("Variate type").size().reset_index(),
values=0,
names="Variate type",
)
st.plotly_chart(fig, use_container_width=True)
with right:
st.markdown("#### Frameworks")
_df = models.copy()
_df["Framework"] = _df.index.str.split(".").str[0]
fig = px.pie(
_df.groupby("Framework").size().reset_index(),
values=0,
names="Framework",
)
st.plotly_chart(fig, use_container_width=True)
st.divider()
st.markdown("### Models")
st.dataframe(models, use_container_width=True)
def accuracy_view(data: pd.DataFrame, models_to_plot: set[str]):
data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
by="model", ascending=True
)
left, right = st.columns(2, gap="small")
with left:
metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
with right:
aggregation = st.selectbox(
"Aggregation", ["min", "mean", "median", "max", "std"], index=1
)
st.markdown(f"#### {aggregation.capitalize()} {metric} per building")
if data_to_plot.empty:
st.warning("No data to display.")
else:
model_ranks = get_model_ranks(data_to_plot, f"{metric}.{aggregation}")
fig = px.box(
data_to_plot.merge(model_ranks, on="model").sort_values(by="rank"),
x=f"{metric}.{aggregation}",
y="model",
color="model",
points="all",
)
fig.update_layout(showlegend=False, height=50 * len(models_to_plot))
st.plotly_chart(fig, use_container_width=True)
st.divider()
left, right = st.columns(2, gap="large")
with left:
x_metric = st.selectbox(
"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="x_metric"
)
x_aggregation = st.selectbox(
"Aggregation",
["min", "mean", "median", "max", "std"],
index=1,
key="x_aggregation",
)
with right:
y_metric = st.selectbox(
"Aggregation", ["MAE", "RMSE", "MBE", "rMAE"], index=1, key="y_metric"
)
y_aggregation = st.selectbox(
"Aggregation",
["min", "mean", "median", "max", "std"],
index=1,
key="y_aggregation",
)
st.markdown(
f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}"
)
if data_to_plot.empty:
st.warning("No data to display.")
else:
fig = px.scatter(
data_to_plot,
x=f"{x_metric}.{x_aggregation}",
y=f"{y_metric}.{y_aggregation}",
color="model",
)
fig.update_layout(height=600)
st.plotly_chart(fig, use_container_width=True)
st.divider()
left, right = st.columns(2, gap="small")
with left:
metric = st.selectbox(
"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="table_metric"
)
with right:
aggregation = st.selectbox(
"Aggregation across folds",
["min", "mean", "median", "max", "std"],
index=1,
key="table_aggregation",
)
metrics_table = data_to_plot.groupby(["model"]).agg(aggregation, numeric_only=True)[
[
f"{metric}.min",
f"{metric}.mean",
f"{metric}.median",
f"{metric}.max",
f"{metric}.std",
]
].sort_values(by=f"{metric}.mean")
def custom_table(styler):
styler.background_gradient(cmap="seismic", axis=0)
styler.format(precision=2)
# center text and increase font size
styler.map(lambda x: "text-align: center; font-size: 14px;")
return styler
st.markdown(f"#### {aggregation.capitalize()} {metric} stats per model")
styled_table = metrics_table.style.pipe(custom_table)
st.dataframe(styled_table, use_container_width=True)
metrics_per_building_table = (
data_to_plot.groupby(["model", "unique_id"])
.apply(aggregation, numeric_only=True)
.reset_index()
.pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}")
)
metrics_per_building_table.insert(
0, "mean", metrics_per_building_table.mean(axis=1)
)
metrics_per_building_table = metrics_per_building_table.sort_values(by="mean").drop(columns="mean")
def custom_table(styler: Styler):
styler.background_gradient(cmap="seismic", axis=None)
styler.format(precision=2)
# center text and increase font size
styler.map(lambda x: "text-align: center; font-size: 14px;")
return styler
st.markdown(f"#### {aggregation.capitalize()} {metric} stats per building")
styled_table = metrics_per_building_table.style.pipe(custom_table)
st.dataframe(styled_table, use_container_width=True)
def relative_performance_view(data: pd.DataFrame, models_to_plot: set[str]):
data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
by="model", ascending=True
)
st.markdown("#### Relative performance")
if data_to_plot.empty:
st.warning("No data to display.")
else:
baseline_choices = sorted(
data.filter(like="better_than")
.columns.str.removeprefix("better_than.")
.tolist()
)
if len(baseline_choices) > 1:
better_than_baseline = st.selectbox("Baseline model", options=baseline_choices)
else:
better_than_baseline = baseline_choices[0]
data_to_plot.loc[:, f"better_than.{better_than_baseline}.percentage"] = (
pd.json_normalize(data_to_plot[f"better_than.{better_than_baseline}"])[
"percentage"
].values
* 100
)
model_rank = get_model_ranks(data_to_plot, f"better_than.{better_than_baseline}.percentage")
fig = px.box(
data_to_plot.merge(model_rank).sort_values(by="rank"),
x=f"better_than.{better_than_baseline}.percentage",
y="model",
points="all",
)
fig.update_xaxes(range=[0, 100], title_text="Better than baseline (%)")
fig.update_layout(
showlegend=False,
height=50 * len(models_to_plot),
title=f"Better than {better_than_baseline} on % of days per building",
)
st.plotly_chart(fig, use_container_width=True)
def computation_view(data: pd.DataFrame, models_to_plot: set[str]):
data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
by="model", ascending=True
)
data_to_plot["resource_usage.CPU"] /= 3600
st.markdown("#### Computational Resources")
left, center, right = st.columns(3, gap="small")
with left:
metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
with center:
aggregation_per_building = st.selectbox(
"Aggregation per building", ["min", "mean", "median", "max", "std"], index=1
)
with right:
aggregation_per_model = st.selectbox(
"Aggregation per model", ["min", "mean", "median", "max", "std"], index=1
)
st.markdown(
f"#### {aggregation_per_model.capitalize()} {aggregation_per_building.capitalize()} {metric} vs CPU usage"
)
if data_to_plot.empty:
st.warning("No data to display.")
else:
aggregated_data = (
data_to_plot.groupby("model")
.agg(aggregation_per_building, numeric_only=True)
.reset_index()
)
fig = px.scatter(
aggregated_data,
x="resource_usage.CPU",
y=f"{metric}.{aggregation_per_model}",
color="model",
log_x=True,
)
fig.update_layout(height=600)
fig.update_xaxes(title_text="CPU usage (hours)")
fig.update_yaxes(
title_text=f"{metric} ({aggregation_per_building}, {aggregation_per_model})"
)
st.plotly_chart(fig, use_container_width=True)
st.divider()
st.markdown("#### Computational time vs historical data")
if data_to_plot.empty:
st.warning("No data to display.")
else:
fig = px.scatter(
data_to_plot,
x="dataset.available_history.observations",
y="resource_usage.CPU",
color="model",
trendline="ols",
hover_data=["model", "unique_id"],
)
fig.update_layout(height=600)
fig.update_xaxes(title_text="Available historical observations (#)")
fig.update_yaxes(title_text="CPU usage (hours)")
st.plotly_chart(fig, use_container_width=True)
st.divider()
cpu_per_building_table = (
data_to_plot.pivot(index="model", columns="unique_id", values="resource_usage.CPU")
)
def custom_table(styler: Styler):
styler.background_gradient(cmap="seismic", axis=None)
styler.format(precision=2)
# center text and increase font size
styler.map(lambda x: "text-align: center; font-size: 14px;")
return styler
st.markdown(f"#### Computational time per building")
styled_table = cpu_per_building_table.style.pipe(custom_table)
st.dataframe(styled_table, use_container_width=True)