EnFoBench-GasDemand / components.py
attila-balint-kul's picture
Upload 8 files
f7b117b verified
raw
history blame
13.1 kB
import pandas as pd
import streamlit as st
import plotly.express as px
from utils import get_leaderboard
def header() -> None:
st.title("EnFoBench - Gas Demand")
st.divider()
def logos() -> None:
left, right = st.columns(2)
with left:
st.image("./images/ku_leuven_logo.png")
with right:
st.image("./images/energyville_logo.png")
def model_selector(models: list[str]) -> set[str]:
# Group models by their prefix
model_groups: dict[str, list[str]] = {}
for model in models:
group, model_name = model.split(".", maxsplit=1)
if group not in model_groups:
model_groups[group] = []
model_groups[group].append(model_name)
models_to_plot = set()
st.header("Models to include")
left, right = st.columns(2)
with left:
select_none = st.button("Select None", use_container_width=True)
if select_none:
for model in models:
st.session_state[model] = False
with right:
select_all = st.button("Select All", use_container_width=True)
if select_all:
for model in models:
st.session_state[model] = True
for model_group, models in model_groups.items():
st.text(model_group)
for model_name in models:
to_plot = st.checkbox(
model_name, value=True, key=f"{model_group}.{model_name}"
)
if to_plot:
models_to_plot.add(f"{model_group}.{model_name}")
return models_to_plot
def overview_view(data):
st.markdown(
"""
[EnFoBench](https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit)
is a community driven benchmarking framework for energy forecasting models.
This dashboard presents the results of the gas demand forecasting usecase. All models were cross-validated
on **365 days** of day ahead forecasting horizon *(10AM until midnight of the next day)*.
"""
)
st.divider()
st.markdown("## Leaderboard")
leaderboard = get_leaderboard(data, ["MAE.mean", "RMSE.mean", "rMAE.mean"])
left, middle, right = st.columns(3)
with left:
best_models_mae = (
leaderboard.sort_values("MAE.mean", ascending=False)
.head(10)
.sort_values("MAE.mean")
)
fig = px.bar(best_models_mae, x="MAE.mean", y=best_models_mae.index)
fig.update_layout(
title="Top 10 models by MAE", xaxis_title="", yaxis_title="Model"
)
st.plotly_chart(fig, use_container_width=True)
with middle:
best_models_mae = (
leaderboard.sort_values("RMSE.mean", ascending=False)
.head(10)
.sort_values("RMSE.mean")
)
fig = px.bar(best_models_mae, x="RMSE.mean", y=best_models_mae.index)
fig.update_layout(title="Top 10 models by RMSE", xaxis_title="", yaxis_title="")
st.plotly_chart(fig, use_container_width=True)
with right:
best_models_mae = (
leaderboard.sort_values("rMAE.mean", ascending=False)
.head(10)
.sort_values("rMAE.mean")
)
fig = px.bar(best_models_mae, x="rMAE.mean", y=best_models_mae.index)
fig.update_layout(title="Top 10 models by rMAE", xaxis_title="", yaxis_title="")
st.plotly_chart(fig, use_container_width=True)
st.dataframe(leaderboard, use_container_width=True)
def buildings_view(data):
buildings = (
data[
[
"unique_id",
"metadata.cluster_size",
"metadata.building_class",
"metadata.location_id",
"metadata.timezone",
"dataset.available_history.days",
]
]
.groupby("unique_id")
.first()
.rename(
columns={
"metadata.cluster_size": "Cluster size",
"metadata.building_class": "Building class",
"metadata.location_id": "Location ID",
"metadata.timezone": "Timezone",
"dataset.available_history.days": "Available history (days)",
}
)
)
st.metric("Number of buildings", len(buildings))
st.divider()
st.markdown("### Buildings")
st.dataframe(
buildings,
use_container_width=True,
column_config={
"Available history (days)": st.column_config.ProgressColumn(
"Available history (days)",
help="Available training data during the first prediction.",
format="%f",
min_value=0,
max_value=float(buildings["Available history (days)"].max()),
),
},
)
left, right = st.columns(2, gap="large")
with left:
st.markdown("#### Building classes")
fig = px.pie(
buildings.groupby("Building class").size().reset_index(),
values=0,
names="Building class",
)
st.plotly_chart(fig, use_container_width=True)
with right:
st.markdown("#### Timezones")
fig = px.pie(
buildings.groupby("Timezone").size().reset_index(),
values=0,
names="Timezone",
)
st.plotly_chart(fig, use_container_width=True)
def models_view(data):
models = (
data[
[
"model",
"cv_config.folds",
"cv_config.horizon",
"cv_config.step",
"cv_config.time",
"model_info.repository",
"model_info.tag",
"model_info.variate_type",
]
]
.groupby("model")
.first()
.rename(
columns={
"cv_config.folds": "CV Folds",
"cv_config.horizon": "CV Horizon",
"cv_config.step": "CV Step",
"cv_config.time": "CV Time",
"model_info.repository": "Image Repository",
"model_info.tag": "Image Tag",
"model_info.variate_type": "Variate type",
}
)
)
st.metric("Number of models", len(models))
st.divider()
st.markdown("### Models")
st.dataframe(models, use_container_width=True)
left, right = st.columns(2, gap="large")
with left:
st.markdown("#### Variate types")
fig = px.pie(
models.groupby("Variate type").size().reset_index(),
values=0,
names="Variate type",
)
st.plotly_chart(fig, use_container_width=True)
with right:
st.markdown("#### Frameworks")
_df = models.copy()
_df["Framework"] = _df.index.str.split(".").str[0]
fig = px.pie(
_df.groupby("Framework").size().reset_index(),
values=0,
names="Framework",
)
st.plotly_chart(fig, use_container_width=True)
def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
by="model", ascending=True
)
left, right = st.columns(2, gap="small")
with left:
metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
with right:
aggregation = st.selectbox(
"Aggregation", ["min", "mean", "median", "max", "std"], index=1
)
st.markdown(f"#### {aggregation.capitalize()} {metric} per building")
rank_df = (
data_to_plot.groupby(["model"])
.agg("median", numeric_only=True)
.sort_values(by=f"{metric}.{aggregation}")
.reset_index()
.rename_axis("rank")
.reset_index()[["rank", "model"]]
)
fig = px.box(
data_to_plot.merge(rank_df, on="model").sort_values(by="rank"),
x=f"{metric}.{aggregation}",
y="model",
color="model",
points="all",
)
fig.update_layout(showlegend=False, height=40 * len(models_to_plot))
st.plotly_chart(fig, use_container_width=True)
st.divider()
left, right = st.columns(2, gap="large")
with left:
x_metric = st.selectbox(
"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="x_metric"
)
x_aggregation = st.selectbox(
"Aggregation",
["min", "mean", "median", "max", "std"],
index=1,
key="x_aggregation",
)
with right:
y_metric = st.selectbox(
"Aggregation", ["MAE", "RMSE", "MBE", "rMAE"], index=1, key="y_metric"
)
y_aggregation = st.selectbox(
"Aggregation",
["min", "mean", "median", "max", "std"],
index=1,
key="y_aggregation",
)
st.markdown(
f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}"
)
fig = px.scatter(
data_to_plot,
x=f"{x_metric}.{x_aggregation}",
y=f"{y_metric}.{y_aggregation}",
color="model",
)
fig.update_layout(height=600)
st.plotly_chart(fig, use_container_width=True)
st.divider()
left, right = st.columns(2, gap="small")
with left:
metric = st.selectbox(
"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="table_metric"
)
with right:
aggregation = st.selectbox(
"Aggregation across folds",
["min", "mean", "median", "max", "std"],
index=1,
key="table_aggregation",
)
metrics_table = data_to_plot.groupby(["model"]).agg(
aggregation, numeric_only=True
)[
[
f"{metric}.min",
f"{metric}.mean",
f"{metric}.median",
f"{metric}.max",
f"{metric}.std",
]
]
def custom_table(styler):
styler.background_gradient(cmap="seismic", axis=0)
styler.format(precision=2)
# center text and increase font size
styler.map(lambda x: "text-align: center; font-size: 14px;")
return styler
st.markdown(f"#### {aggregation.capitalize()} {metric} stats per model")
styled_table = metrics_table.style.pipe(custom_table)
st.dataframe(styled_table, use_container_width=True)
metrics_per_building_table = (
data_to_plot.groupby(["model", "unique_id"])
.apply(aggregation, numeric_only=True)
.reset_index()
.pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}")
)
metrics_per_building_table.insert(
0, "median", metrics_per_building_table.median(axis=1)
)
metrics_per_building_table.insert(
0, "mean", metrics_per_building_table.mean(axis=1)
)
metrics_per_building_table = metrics_per_building_table.sort_values(by="mean")
def custom_table(styler):
styler.background_gradient(cmap="seismic", axis=None)
styler.format(precision=2)
# center text and increase font size
styler.map(lambda x: "text-align: center; font-size: 14px;")
return styler
st.markdown(f"#### {aggregation.capitalize()} {metric} stats per building")
styled_table = metrics_per_building_table.style.pipe(custom_table)
st.dataframe(styled_table, use_container_width=True)
def computation_view(data, models_to_plot: set[str]):
data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
by="model", ascending=True
)
st.markdown("#### Computational Resources")
fig = px.parallel_coordinates(
data_to_plot.groupby("model").mean(numeric_only=True).reset_index(),
dimensions=[
"model",
"resource_usage.CPU",
"resource_usage.memory",
"MAE.mean",
"RMSE.mean",
"MBE.mean",
"rMAE.mean",
],
color="rMAE.mean",
color_continuous_scale=px.colors.diverging.Portland,
)
st.plotly_chart(fig, use_container_width=True)
st.divider()
left, center, right = st.columns(3, gap="small")
with left:
metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
with center:
aggregation_per_building = st.selectbox(
"Aggregation per building", ["min", "mean", "median", "max", "std"], index=1
)
with right:
aggregation_per_model = st.selectbox(
"Aggregation per model", ["min", "mean", "median", "max", "std"], index=1
)
st.markdown(
f"#### {aggregation_per_model.capitalize()} {aggregation_per_building.capitalize()} {metric} vs CPU usage"
)
aggregated_data = (
data_to_plot.groupby("model")
.agg(aggregation_per_building, numeric_only=True)
.reset_index()
)
fig = px.scatter(
aggregated_data,
x="resource_usage.CPU",
y=f"{metric}.{aggregation_per_model}",
color="model",
log_x=True,
)
fig.update_layout(height=600)
st.plotly_chart(fig, use_container_width=True)