Spaces:

EDS-lab
/

EnFoBench-GasDemand

Sleeping

App Files Files Community

EnFoBench-GasDemand / components.py

attila-balint-kul

Upload components.py

1c6134e verified about 1 month ago

raw

history blame contribute delete

No virus

20.8 kB

	import pandas as pd
	import plotly.express as px
	import streamlit as st
	from pandas.io.formats.style import Styler

	from utils import get_leaderboard, get_model_ranks


	def header(title: str) -> None:
	st.title(title)
	st.markdown(
	"""
	[EnFoBench](https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit)
	is a community driven benchmarking framework for energy forecasting models.
	"""
	)
	st.divider()


	def logos() -> None:
	left, right = st.columns(2)
	with left:
	st.image("./images/ku_leuven_logo.png")
	with right:
	st.image("./images/energyville_logo.png")


	def links(current: str) -> None:
	st.header("Sources")
	st.link_button(
	"GitHub Repository",
	url="https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit",
	use_container_width=True,
	)
	st.link_button(
	"Documentation",
	url="https://attila-balint-kul.github.io/energy-forecast-benchmark-toolkit/",
	use_container_width=True,
	)
	st.link_button(
	"Electricity Demand Dataset",
	url="https://huggingface.co/datasets/EDS-lab/electricity-demand",
	use_container_width=True,
	)
	st.link_button(
	"HuggingFace Organization",
	url="https://huggingface.co/EDS-lab",
	use_container_width=True,
	)

	st.header("Other Dashboards")
	if current != "ElectricityDemand":
	st.link_button(
	"Electricity Demand",
	url="https://huggingface.co/spaces/EDS-lab/EnFoBench-ElectricityDemand",
	use_container_width=True,
	)
	if current != "GasDemand":
	st.link_button(
	"Gas Demand",
	url="https://huggingface.co/spaces/EDS-lab/EnFoBench-GasDemand",
	use_container_width=True,
	)
	if current != "PVGeneration":
	st.link_button(
	"PVGeneration",
	url="https://huggingface.co/spaces/EDS-lab/EnFoBench-PVGeneration",
	use_container_width=True,
	)


	def model_selector(models: list[str], data: pd.DataFrame) -> set[str]:
	# Group models by their prefix
	model_groups: dict[str, list[str]] = {}
	for model in models:
	group, model_name = model.split(".", maxsplit=1)
	if group not in model_groups:
	model_groups[group] = []
	model_groups[group].append(model_name)

	models_to_plot = set()

	st.header("Models to include")
	left, middle, right = st.columns(3)
	with left:
	best_by_mae = st.button("Best by MAE", use_container_width=True)
	if best_by_mae:
	best_models_by_mae = get_model_ranks(data, "MAE.mean").head(10).model.tolist()
	for model in models:
	if model in best_models_by_mae:
	st.session_state[model] = True
	else:
	st.session_state[model] = False
	with middle:
	best_by_rmse = st.button("Best by RMSE", use_container_width=True)
	if best_by_rmse:
	best_models_by_rmse = get_model_ranks(data, "RMSE.mean").head(10).model.tolist()
	for model in models:
	if model in best_models_by_rmse:
	st.session_state[model] = True
	else:
	st.session_state[model] = False
	with right:
	best_by_rmae = st.button("Best by rMAE", use_container_width=True)
	if best_by_rmae:
	best_models_by_rmae = get_model_ranks(data, "rMAE.mean").head(10).model.tolist()
	for model in models:
	if model in best_models_by_rmae:
	st.session_state[model] = True
	else:
	st.session_state[model] = False

	left, right = st.columns(2)
	with left:
	select_none = st.button("Select None", use_container_width=True)
	if select_none:
	for model in models:
	st.session_state[model] = False
	with right:
	select_all = st.button("Select All", use_container_width=True)
	if select_all:
	for model in models:
	st.session_state[model] = True

	for model_group, models in model_groups.items():
	st.text(model_group)
	for model_name in models:
	to_plot = st.checkbox(
	model_name, value=True, key=f"{model_group}.{model_name}"
	)
	if to_plot:
	models_to_plot.add(f"{model_group}.{model_name}")
	return models_to_plot


	def overview_view(data: pd.DataFrame):
	st.markdown("## Leaderboard")

	leaderboard = get_leaderboard(data, ["MAE.mean", "RMSE.mean", "rMAE.mean"])

	left, middle, right = st.columns(3)
	with left:
	best_models_mae = (
	leaderboard.sort_values("MAE.mean", ascending=False)
	.head(10)
	.sort_values("MAE.mean")
	)
	fig = px.bar(best_models_mae, x="MAE.mean", y=best_models_mae.index)
	fig.update_layout(
	title="Top 10 models by MAE",
	xaxis_title="",
	yaxis_title="Model",
	height=600,
	)
	st.plotly_chart(fig, use_container_width=True)

	with middle:
	best_models_mae = (
	leaderboard.sort_values("RMSE.mean", ascending=False)
	.head(10)
	.sort_values("RMSE.mean")
	)
	fig = px.bar(best_models_mae, x="RMSE.mean", y=best_models_mae.index)
	fig.update_layout(
	title="Top 10 models by RMSE", xaxis_title="", yaxis_title="", height=600
	)
	st.plotly_chart(fig, use_container_width=True)

	with right:
	best_models_mae = (
	leaderboard.sort_values("rMAE.mean", ascending=False)
	.head(10)
	.sort_values("rMAE.mean")
	)
	fig = px.bar(best_models_mae, x="rMAE.mean", y=best_models_mae.index)
	fig.update_layout(
	title="Top 10 models by rMAE", xaxis_title="", yaxis_title="", height=600
	)
	st.plotly_chart(fig, use_container_width=True)

	st.dataframe(leaderboard, use_container_width=True)


	def buildings_view(data: pd.DataFrame):
	if 'metadata.cluster_size' not in data.columns:
	data['metadata.cluster_size'] = 1
	if 'metadata.building_class' not in data.columns:
	data['metadata.building_class'] = "Unknown"

	buildings = (
	data[
	[
	"unique_id",
	"metadata.cluster_size",
	"metadata.building_class",
	"metadata.location_id",
	"metadata.timezone",
	"dataset.available_history.days",
	"dataset.available_history.observations",
	"metadata.freq",
	]
	]
	.groupby("unique_id")
	.first()
	.rename(
	columns={
	"metadata.cluster_size": "Cluster size",
	"metadata.building_class": "Building class",
	"metadata.location_id": "Location ID",
	"metadata.timezone": "Timezone",
	"dataset.available_history.days": "Available history (days)",
	"dataset.available_history.observations": "Available history (#)",
	"metadata.freq": "Frequency",
	}
	)
	)

	left, middle, right = st.columns(3)
	with left:
	st.metric("Number of buildings", data["unique_id"].nunique())
	with middle:
	st.metric(
	"Residential",
	data[data["metadata.building_class"] == "Residential"][
	"unique_id"
	].nunique(),
	)
	with right:
	st.metric(
	"Commercial",
	data[data["metadata.building_class"] == "Commercial"][
	"unique_id"
	].nunique(),
	)
	st.divider()

	left, middle, right = st.columns(3, gap="large")
	with left:
	st.markdown("#### Building classes")
	fig = px.pie(
	buildings.groupby("Building class").size().reset_index(),
	values=0,
	names="Building class",
	)
	fig.update_layout(
	legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
	)
	st.plotly_chart(fig, use_container_width=True)

	with middle:
	st.markdown("#### Timezones")
	fig = px.pie(
	buildings.groupby("Timezone").size().reset_index(),
	values=0,
	names="Timezone",
	)
	fig.update_layout(
	legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
	)
	st.plotly_chart(fig, use_container_width=True)

	with right:
	st.markdown("#### Frequencies")
	fig = px.pie(
	buildings.groupby("Frequency").size().reset_index(),
	values=0,
	names="Frequency",
	)
	fig.update_layout(
	legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
	)
	st.plotly_chart(fig, use_container_width=True)

	st.divider()

	st.markdown("#### Buildings")
	st.dataframe(
	buildings.sort_values("Available history (days)"),
	use_container_width=True,
	column_config={
	"Available history (days)": st.column_config.ProgressColumn(
	"Available history (days)",
	help="Available training data during the first prediction.",
	format="%f",
	min_value=0,
	max_value=float(buildings["Available history (days)"].max()),
	),
	"Available history (#)": st.column_config.ProgressColumn(
	"Available history (#)",
	help="Available training data during the first prediction.",
	format="%f",
	min_value=0,
	max_value=float(buildings["Available history (#)"].max()),
	),
	},
	)


	def models_view(data: pd.DataFrame):
	models = (
	data[
	[
	"model",
	"cv_config.folds",
	"cv_config.horizon",
	"cv_config.step",
	"cv_config.time",
	"model_info.repository",
	"model_info.tag",
	"model_info.variate_type",
	]
	]
	.groupby("model")
	.first()
	.rename(
	columns={
	"cv_config.folds": "CV Folds",
	"cv_config.horizon": "CV Horizon",
	"cv_config.step": "CV Step",
	"cv_config.time": "CV Time",
	"model_info.repository": "Image Repository",
	"model_info.tag": "Image Tag",
	"model_info.variate_type": "Variate type",
	}
	)
	)

	left, middle, right = st.columns(3)
	with left:
	st.metric("Models", len(models))
	with middle:
	st.metric(
	"Univariate",
	data[data["model_info.variate_type"] == "univariate"]["model"].nunique(),
	)
	with right:
	st.metric(
	"Univariate",
	data[data["model_info.variate_type"] == "multivariate"]["model"].nunique(),
	)
	st.divider()

	left, right = st.columns(2, gap="large")
	with left:
	st.markdown("#### Variate types")
	fig = px.pie(
	models.groupby("Variate type").size().reset_index(),
	values=0,
	names="Variate type",
	)
	st.plotly_chart(fig, use_container_width=True)

	with right:
	st.markdown("#### Frameworks")
	_df = models.copy()
	_df["Framework"] = _df.index.str.split(".").str[0]
	fig = px.pie(
	_df.groupby("Framework").size().reset_index(),
	values=0,
	names="Framework",
	)
	st.plotly_chart(fig, use_container_width=True)

	st.divider()
	st.markdown("### Models")
	st.dataframe(models, use_container_width=True)


	def accuracy_view(data: pd.DataFrame, models_to_plot: set[str]):
	data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
	by="model", ascending=True
	)

	left, right = st.columns(2, gap="small")
	with left:
	metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
	with right:
	aggregation = st.selectbox(
	"Aggregation", ["min", "mean", "median", "max", "std"], index=1
	)
	st.markdown(f"#### {aggregation.capitalize()} {metric} per building")

	if data_to_plot.empty:
	st.warning("No data to display.")
	else:
	model_ranks = get_model_ranks(data_to_plot, f"{metric}.{aggregation}")

	fig = px.box(
	data_to_plot.merge(model_ranks, on="model").sort_values(by="rank"),
	x=f"{metric}.{aggregation}",
	y="model",
	color="model",
	points="all",
	)
	fig.update_layout(showlegend=False, height=50 * len(models_to_plot))
	st.plotly_chart(fig, use_container_width=True)

	st.divider()

	left, right = st.columns(2, gap="large")
	with left:
	x_metric = st.selectbox(
	"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="x_metric"
	)
	x_aggregation = st.selectbox(
	"Aggregation",
	["min", "mean", "median", "max", "std"],
	index=1,
	key="x_aggregation",
	)
	with right:
	y_metric = st.selectbox(
	"Aggregation", ["MAE", "RMSE", "MBE", "rMAE"], index=1, key="y_metric"
	)
	y_aggregation = st.selectbox(
	"Aggregation",
	["min", "mean", "median", "max", "std"],
	index=1,
	key="y_aggregation",
	)

	st.markdown(
	f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}"
	)
	if data_to_plot.empty:
	st.warning("No data to display.")
	else:
	fig = px.scatter(
	data_to_plot,
	x=f"{x_metric}.{x_aggregation}",
	y=f"{y_metric}.{y_aggregation}",
	color="model",
	)
	fig.update_layout(height=600)
	st.plotly_chart(fig, use_container_width=True)

	st.divider()

	left, right = st.columns(2, gap="small")
	with left:
	metric = st.selectbox(
	"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="table_metric"
	)
	with right:
	aggregation = st.selectbox(
	"Aggregation across folds",
	["min", "mean", "median", "max", "std"],
	index=1,
	key="table_aggregation",
	)

	metrics_table = data_to_plot.groupby(["model"]).agg(aggregation, numeric_only=True)[
	[
	f"{metric}.min",
	f"{metric}.mean",
	f"{metric}.median",
	f"{metric}.max",
	f"{metric}.std",
	]
	].sort_values(by=f"{metric}.mean")

	def custom_table(styler):
	styler.background_gradient(cmap="seismic", axis=0)
	styler.format(precision=2)

	# center text and increase font size
	styler.map(lambda x: "text-align: center; font-size: 14px;")
	return styler

	st.markdown(f"#### {aggregation.capitalize()} {metric} stats per model")
	styled_table = metrics_table.style.pipe(custom_table)
	st.dataframe(styled_table, use_container_width=True)

	metrics_per_building_table = (
	data_to_plot.groupby(["model", "unique_id"])
	.apply(aggregation, numeric_only=True)
	.reset_index()
	.pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}")
	)
	metrics_per_building_table.insert(
	0, "mean", metrics_per_building_table.mean(axis=1)
	)
	metrics_per_building_table = metrics_per_building_table.sort_values(by="mean").drop(columns="mean")

	def custom_table(styler: Styler):
	styler.background_gradient(cmap="seismic", axis=None)
	styler.format(precision=2)

	# center text and increase font size
	styler.map(lambda x: "text-align: center; font-size: 14px;")
	return styler

	st.markdown(f"#### {aggregation.capitalize()} {metric} stats per building")
	styled_table = metrics_per_building_table.style.pipe(custom_table)
	st.dataframe(styled_table, use_container_width=True)


	def relative_performance_view(data: pd.DataFrame, models_to_plot: set[str]):
	data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
	by="model", ascending=True
	)

	st.markdown("#### Relative performance")
	if data_to_plot.empty:
	st.warning("No data to display.")
	else:
	baseline_choices = sorted(
	data.filter(like="better_than")
	.columns.str.removeprefix("better_than.")
	.tolist()
	)
	if len(baseline_choices) > 1:
	better_than_baseline = st.selectbox("Baseline model", options=baseline_choices)
	else:
	better_than_baseline = baseline_choices[0]
	data_to_plot.loc[:, f"better_than.{better_than_baseline}.percentage"] = (
	pd.json_normalize(data_to_plot[f"better_than.{better_than_baseline}"])[
	"percentage"
	].values
	* 100
	)
	model_rank = get_model_ranks(data_to_plot, f"better_than.{better_than_baseline}.percentage")

	fig = px.box(
	data_to_plot.merge(model_rank).sort_values(by="rank"),
	x=f"better_than.{better_than_baseline}.percentage",
	y="model",
	points="all",
	)
	fig.update_xaxes(range=[0, 100], title_text="Better than baseline (%)")
	fig.update_layout(
	showlegend=False,
	height=50 * len(models_to_plot),
	title=f"Better than {better_than_baseline} on % of days per building",
	)
	st.plotly_chart(fig, use_container_width=True)


	def computation_view(data: pd.DataFrame, models_to_plot: set[str]):
	data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
	by="model", ascending=True
	)
	data_to_plot["resource_usage.CPU"] /= 3600

	st.markdown("#### Computational Resources")

	left, center, right = st.columns(3, gap="small")
	with left:
	metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
	with center:
	aggregation_per_building = st.selectbox(
	"Aggregation per building", ["min", "mean", "median", "max", "std"], index=1
	)
	with right:
	aggregation_per_model = st.selectbox(
	"Aggregation per model", ["min", "mean", "median", "max", "std"], index=1
	)

	st.markdown(
	f"#### {aggregation_per_model.capitalize()} {aggregation_per_building.capitalize()} {metric} vs CPU usage"
	)
	if data_to_plot.empty:
	st.warning("No data to display.")
	else:
	aggregated_data = (
	data_to_plot.groupby("model")
	.agg(aggregation_per_building, numeric_only=True)
	.reset_index()
	)
	fig = px.scatter(
	aggregated_data,
	x="resource_usage.CPU",
	y=f"{metric}.{aggregation_per_model}",
	color="model",
	log_x=True,
	)
	fig.update_layout(height=600)
	fig.update_xaxes(title_text="CPU usage (hours)")
	fig.update_yaxes(
	title_text=f"{metric} ({aggregation_per_building}, {aggregation_per_model})"
	)
	st.plotly_chart(fig, use_container_width=True)

	st.divider()

	st.markdown("#### Computational time vs historical data")
	if data_to_plot.empty:
	st.warning("No data to display.")
	else:
	fig = px.scatter(
	data_to_plot,
	x="dataset.available_history.observations",
	y="resource_usage.CPU",
	color="model",
	trendline="ols",
	hover_data=["model", "unique_id"],
	)
	fig.update_layout(height=600)
	fig.update_xaxes(title_text="Available historical observations (#)")
	fig.update_yaxes(title_text="CPU usage (hours)")
	st.plotly_chart(fig, use_container_width=True)

	st.divider()

	cpu_per_building_table = (
	data_to_plot.pivot(index="model", columns="unique_id", values="resource_usage.CPU")
	)

	def custom_table(styler: Styler):
	styler.background_gradient(cmap="seismic", axis=None)
	styler.format(precision=2)

	# center text and increase font size
	styler.map(lambda x: "text-align: center; font-size: 14px;")
	return styler

	st.markdown(f"#### Computational time per building")
	styled_table = cpu_per_building_table.style.pipe(custom_table)
	st.dataframe(styled_table, use_container_width=True)