Spaces:

EDS-lab
/

EnFoBench-GasDemand

Sleeping

App Files Files Community

attila-balint-kul commited on Jun 12, 2024

Commit

f7b117b

verified ·

1 Parent(s): fa64f07

Upload 8 files

Browse files

Files changed (8) hide show

.gitignore +1 -0
.streamlit/secrets.toml +2 -0
app.py +84 -0
components.py +415 -0
images/energyville_logo.png +0 -0
images/ku_leuven_logo.png +0 -0
requirements.txt +2 -0
utils.py +44 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .streamlit/

.streamlit/secrets.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ wandb_entity = "attila-balint-kul"
2	+ wandb_api_key = "70458ee5feafed530c7656bada194778e034813b"

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import streamlit as st
+from components import (
+    buildings_view,
+    models_view,
+    performance_view,
+    computation_view,
+    logos,
+    model_selector,
+    header,
+    overview_view,
+)
+import utils
+PAGES = [
+    "Overview",
+    "Buildings",
+    "Models",
+    "Performance",
+    "Computational Resources",
+]
+st.set_page_config(page_title="Gas Demand Dashboard", layout="wide")
+@st.cache_data(ttl=86400)
+def fetch_data():
+    return utils.get_wandb_data(
+        entity=st.secrets["wandb_entity"],
+        project="enfobench-gas-demand",
+        api_key=st.secrets["wandb_api_key"],
+        job_type="metrics",
+    )
+# Load data
+data = fetch_data()
+# Extract models
+models = sorted(data["model"].unique().tolist())
+with st.sidebar:
+    logos()
+    view = st.selectbox("View", PAGES, index=0)
+    if view == "Performance" or view == "Computational Resources":
+        models_to_plot = model_selector(models)
+    if view == "Overview":
+        st.header("Sources")
+        st.link_button("GitHub Repository", url="https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit", use_container_width=True)
+        st.link_button("Documentation", url="https://attila-balint-kul.github.io/energy-forecast-benchmark-toolkit/", use_container_width=True)
+        st.link_button("Electricity Demand Dataset", url="https://huggingface.co/datasets/EDS-lab/electricity-demand", use_container_width=True)
+        st.link_button("HuggingFace Organization", url="https://huggingface.co/EDS-lab", use_container_width=True)
+        st.header("Other Dashboards")
+        st.link_button("Electricity Demand", url="https://huggingface.co/spaces/EDS-lab/EnFoBench-ElectricityDemand", use_container_width=True)
+        st.link_button("PV Generation", url="https://huggingface.co/spaces/EDS-lab/EnFoBench-PVGeneration", use_container_width=True)
+    st.header("Refresh data")
+    refresh = st.button(
+        "Refresh", use_container_width=True, help="Fetch the latest data from W&B"
+    )
+    if refresh:
+        fetch_data.clear()
+        st.rerun()
+header()
+if view == "Overview":
+    overview_view(data)
+elif view == "Buildings":
+    buildings_view(data)
+elif view == "Models":
+    models_view(data)
+elif view == "Performance":
+    performance_view(data, models_to_plot)
+elif view == "Computational Resources":
+    computation_view(data, models_to_plot)
+else:
+    st.write("Not implemented yet")

components.py ADDED Viewed

	@@ -0,0 +1,415 @@

+import pandas as pd
+import streamlit as st
+import plotly.express as px
+from utils import get_leaderboard
+def header() -> None:
+    st.title("EnFoBench - Gas Demand")
+    st.divider()
+def logos() -> None:
+    left, right = st.columns(2)
+    with left:
+        st.image("./images/ku_leuven_logo.png")
+    with right:
+        st.image("./images/energyville_logo.png")
+def model_selector(models: list[str]) -> set[str]:
+    # Group models by their prefix
+    model_groups: dict[str, list[str]] = {}
+    for model in models:
+        group, model_name = model.split(".", maxsplit=1)
+        if group not in model_groups:
+            model_groups[group] = []
+        model_groups[group].append(model_name)
+    models_to_plot = set()
+    st.header("Models to include")
+    left, right = st.columns(2)
+    with left:
+        select_none = st.button("Select None", use_container_width=True)
+        if select_none:
+            for model in models:
+                st.session_state[model] = False
+    with right:
+        select_all = st.button("Select All", use_container_width=True)
+        if select_all:
+            for model in models:
+                st.session_state[model] = True
+    for model_group, models in model_groups.items():
+        st.text(model_group)
+        for model_name in models:
+            to_plot = st.checkbox(
+                model_name, value=True, key=f"{model_group}.{model_name}"
+            )
+            if to_plot:
+                models_to_plot.add(f"{model_group}.{model_name}")
+    return models_to_plot
+def overview_view(data):
+    st.markdown(
+        """
+    [EnFoBench](https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit)
+    is a community driven benchmarking framework for energy forecasting models.
+    This dashboard presents the results of the gas demand forecasting usecase. All models were cross-validated
+    on **365 days** of day ahead forecasting horizon *(10AM until midnight of the next day)*.
+    """
+    )
+    st.divider()
+    st.markdown("## Leaderboard")
+    leaderboard = get_leaderboard(data, ["MAE.mean", "RMSE.mean", "rMAE.mean"])
+    left, middle, right = st.columns(3)
+    with left:
+        best_models_mae = (
+            leaderboard.sort_values("MAE.mean", ascending=False)
+            .head(10)
+            .sort_values("MAE.mean")
+        )
+        fig = px.bar(best_models_mae, x="MAE.mean", y=best_models_mae.index)
+        fig.update_layout(
+            title="Top 10 models by MAE", xaxis_title="", yaxis_title="Model"
+        )
+        st.plotly_chart(fig, use_container_width=True)
+    with middle:
+        best_models_mae = (
+            leaderboard.sort_values("RMSE.mean", ascending=False)
+            .head(10)
+            .sort_values("RMSE.mean")
+        )
+        fig = px.bar(best_models_mae, x="RMSE.mean", y=best_models_mae.index)
+        fig.update_layout(title="Top 10 models by RMSE", xaxis_title="", yaxis_title="")
+        st.plotly_chart(fig, use_container_width=True)
+    with right:
+        best_models_mae = (
+            leaderboard.sort_values("rMAE.mean", ascending=False)
+            .head(10)
+            .sort_values("rMAE.mean")
+        )
+        fig = px.bar(best_models_mae, x="rMAE.mean", y=best_models_mae.index)
+        fig.update_layout(title="Top 10 models by rMAE", xaxis_title="", yaxis_title="")
+        st.plotly_chart(fig, use_container_width=True)
+    st.dataframe(leaderboard, use_container_width=True)
+def buildings_view(data):
+    buildings = (
+        data[
+            [
+                "unique_id",
+                "metadata.cluster_size",
+                "metadata.building_class",
+                "metadata.location_id",
+                "metadata.timezone",
+                "dataset.available_history.days",
+            ]
+        ]
+        .groupby("unique_id")
+        .first()
+        .rename(
+            columns={
+                "metadata.cluster_size": "Cluster size",
+                "metadata.building_class": "Building class",
+                "metadata.location_id": "Location ID",
+                "metadata.timezone": "Timezone",
+                "dataset.available_history.days": "Available history (days)",
+            }
+        )
+    )
+    st.metric("Number of buildings", len(buildings))
+    st.divider()
+    st.markdown("### Buildings")
+    st.dataframe(
+        buildings,
+        use_container_width=True,
+        column_config={
+            "Available history (days)": st.column_config.ProgressColumn(
+                "Available history (days)",
+                help="Available training data during the first prediction.",
+                format="%f",
+                min_value=0,
+                max_value=float(buildings["Available history (days)"].max()),
+            ),
+        },
+    )
+    left, right = st.columns(2, gap="large")
+    with left:
+        st.markdown("#### Building classes")
+        fig = px.pie(
+            buildings.groupby("Building class").size().reset_index(),
+            values=0,
+            names="Building class",
+        )
+        st.plotly_chart(fig, use_container_width=True)
+    with right:
+        st.markdown("#### Timezones")
+        fig = px.pie(
+            buildings.groupby("Timezone").size().reset_index(),
+            values=0,
+            names="Timezone",
+        )
+        st.plotly_chart(fig, use_container_width=True)
+def models_view(data):
+    models = (
+        data[
+            [
+                "model",
+                "cv_config.folds",
+                "cv_config.horizon",
+                "cv_config.step",
+                "cv_config.time",
+                "model_info.repository",
+                "model_info.tag",
+                "model_info.variate_type",
+            ]
+        ]
+        .groupby("model")
+        .first()
+        .rename(
+            columns={
+                "cv_config.folds": "CV Folds",
+                "cv_config.horizon": "CV Horizon",
+                "cv_config.step": "CV Step",
+                "cv_config.time": "CV Time",
+                "model_info.repository": "Image Repository",
+                "model_info.tag": "Image Tag",
+                "model_info.variate_type": "Variate type",
+            }
+        )
+    )
+    st.metric("Number of models", len(models))
+    st.divider()
+    st.markdown("### Models")
+    st.dataframe(models, use_container_width=True)
+    left, right = st.columns(2, gap="large")
+    with left:
+        st.markdown("#### Variate types")
+        fig = px.pie(
+            models.groupby("Variate type").size().reset_index(),
+            values=0,
+            names="Variate type",
+        )
+        st.plotly_chart(fig, use_container_width=True)
+    with right:
+        st.markdown("#### Frameworks")
+        _df = models.copy()
+        _df["Framework"] = _df.index.str.split(".").str[0]
+        fig = px.pie(
+            _df.groupby("Framework").size().reset_index(),
+            values=0,
+            names="Framework",
+        )
+        st.plotly_chart(fig, use_container_width=True)
+def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
+    data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
+        by="model", ascending=True
+    )
+    left, right = st.columns(2, gap="small")
+    with left:
+        metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
+    with right:
+        aggregation = st.selectbox(
+            "Aggregation", ["min", "mean", "median", "max", "std"], index=1
+        )
+    st.markdown(f"#### {aggregation.capitalize()} {metric} per building")
+    rank_df = (
+        data_to_plot.groupby(["model"])
+        .agg("median", numeric_only=True)
+        .sort_values(by=f"{metric}.{aggregation}")
+        .reset_index()
+        .rename_axis("rank")
+        .reset_index()[["rank", "model"]]
+    )
+    fig = px.box(
+        data_to_plot.merge(rank_df, on="model").sort_values(by="rank"),
+        x=f"{metric}.{aggregation}",
+        y="model",
+        color="model",
+        points="all",
+    )
+    fig.update_layout(showlegend=False, height=40 * len(models_to_plot))
+    st.plotly_chart(fig, use_container_width=True)
+    st.divider()
+    left, right = st.columns(2, gap="large")
+    with left:
+        x_metric = st.selectbox(
+            "Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="x_metric"
+        )
+        x_aggregation = st.selectbox(
+            "Aggregation",
+            ["min", "mean", "median", "max", "std"],
+            index=1,
+            key="x_aggregation",
+        )
+    with right:
+        y_metric = st.selectbox(
+            "Aggregation", ["MAE", "RMSE", "MBE", "rMAE"], index=1, key="y_metric"
+        )
+        y_aggregation = st.selectbox(
+            "Aggregation",
+            ["min", "mean", "median", "max", "std"],
+            index=1,
+            key="y_aggregation",
+        )
+    st.markdown(
+        f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}"
+    )
+    fig = px.scatter(
+        data_to_plot,
+        x=f"{x_metric}.{x_aggregation}",
+        y=f"{y_metric}.{y_aggregation}",
+        color="model",
+    )
+    fig.update_layout(height=600)
+    st.plotly_chart(fig, use_container_width=True)
+    st.divider()
+    left, right = st.columns(2, gap="small")
+    with left:
+        metric = st.selectbox(
+            "Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="table_metric"
+        )
+    with right:
+        aggregation = st.selectbox(
+            "Aggregation across folds",
+            ["min", "mean", "median", "max", "std"],
+            index=1,
+            key="table_aggregation",
+        )
+    metrics_table = data_to_plot.groupby(["model"]).agg(
+        aggregation, numeric_only=True
+    )[
+        [
+            f"{metric}.min",
+            f"{metric}.mean",
+            f"{metric}.median",
+            f"{metric}.max",
+            f"{metric}.std",
+        ]
+    ]
+    def custom_table(styler):
+        styler.background_gradient(cmap="seismic", axis=0)
+        styler.format(precision=2)
+        # center text and increase font size
+        styler.map(lambda x: "text-align: center; font-size: 14px;")
+        return styler
+    st.markdown(f"#### {aggregation.capitalize()} {metric} stats per model")
+    styled_table = metrics_table.style.pipe(custom_table)
+    st.dataframe(styled_table, use_container_width=True)
+    metrics_per_building_table = (
+        data_to_plot.groupby(["model", "unique_id"])
+        .apply(aggregation, numeric_only=True)
+        .reset_index()
+        .pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}")
+    )
+    metrics_per_building_table.insert(
+        0, "median", metrics_per_building_table.median(axis=1)
+    )
+    metrics_per_building_table.insert(
+        0, "mean", metrics_per_building_table.mean(axis=1)
+    )
+    metrics_per_building_table = metrics_per_building_table.sort_values(by="mean")
+    def custom_table(styler):
+        styler.background_gradient(cmap="seismic", axis=None)
+        styler.format(precision=2)
+        # center text and increase font size
+        styler.map(lambda x: "text-align: center; font-size: 14px;")
+        return styler
+    st.markdown(f"#### {aggregation.capitalize()} {metric} stats per building")
+    styled_table = metrics_per_building_table.style.pipe(custom_table)
+    st.dataframe(styled_table, use_container_width=True)
+def computation_view(data, models_to_plot: set[str]):
+    data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
+        by="model", ascending=True
+    )
+    st.markdown("#### Computational Resources")
+    fig = px.parallel_coordinates(
+        data_to_plot.groupby("model").mean(numeric_only=True).reset_index(),
+        dimensions=[
+            "model",
+            "resource_usage.CPU",
+            "resource_usage.memory",
+            "MAE.mean",
+            "RMSE.mean",
+            "MBE.mean",
+            "rMAE.mean",
+        ],
+        color="rMAE.mean",
+        color_continuous_scale=px.colors.diverging.Portland,
+    )
+    st.plotly_chart(fig, use_container_width=True)
+    st.divider()
+    left, center, right = st.columns(3, gap="small")
+    with left:
+        metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
+    with center:
+        aggregation_per_building = st.selectbox(
+            "Aggregation per building", ["min", "mean", "median", "max", "std"], index=1
+        )
+    with right:
+        aggregation_per_model = st.selectbox(
+            "Aggregation per model", ["min", "mean", "median", "max", "std"], index=1
+        )
+    st.markdown(
+        f"#### {aggregation_per_model.capitalize()} {aggregation_per_building.capitalize()} {metric} vs CPU usage"
+    )
+    aggregated_data = (
+        data_to_plot.groupby("model")
+        .agg(aggregation_per_building, numeric_only=True)
+        .reset_index()
+    )
+    fig = px.scatter(
+        aggregated_data,
+        x="resource_usage.CPU",
+        y=f"{metric}.{aggregation_per_model}",
+        color="model",
+        log_x=True,
+    )
+    fig.update_layout(height=600)
+    st.plotly_chart(fig, use_container_width=True)

images/energyville_logo.png ADDED Viewed

images/ku_leuven_logo.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ wandb==0.17.0
2	+ plotly==5.20.0

utils.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import pandas as pd
+import wandb
+def get_wandb_data(entity: str, project: str, api_key: str, job_type: str) -> pd.DataFrame:
+    api = wandb.Api(api_key=api_key)
+    # Project is specified by <entity/project-name>
+    filter_dict = {"jobType": job_type}
+    runs = api.runs(f"{entity}/{project}", filters=filter_dict)
+    summary_list, config_list, name_list = [], [], []
+    for run in runs:
+        # .summary contains the output keys/values for metrics like accuracy.
+        #  We call ._json_dict to omit large files
+        summary_list.append(run.summary._json_dict)
+        # .config contains the hyperparameters.
+        #  We remove special values that start with _.
+        config_list.append({k: v for k, v in run.config.items()})
+        # .name is the human-readable name of the run.
+        name_list.append(run.name)
+    summary_df = pd.json_normalize(summary_list, max_level=1)
+    config_df = pd.json_normalize(config_list, max_level=2)
+    runs_df = pd.concat([summary_df, config_df], axis=1)
+    runs_df.index = name_list
+    return runs_df
+def get_leaderboard(runs_df: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
+    leaderboard = pd.DataFrame(
+        index=runs_df['model'].unique(),
+        columns=metrics
+    ).fillna(0)
+    for _, building_df in runs_df.groupby("unique_id"):
+        for column in leaderboard.columns:
+            best_model = building_df.loc[building_df[column].idxmin()].model
+            leaderboard.loc[best_model, column] += 1
+    leaderboard = leaderboard.sort_values(by=list(leaderboard.columns), ascending=False)
+    return leaderboard