import pandas as pd import streamlit as st import plotly.express as px def buildings_view(data): buildings = ( data[ [ "unique_id", "metadata.cluster_size", "metadata.building_class", "metadata.location_id", "metadata.timezone", "dataset.available_history.days", ] ] .groupby("unique_id") .first() .rename( columns={ "metadata.cluster_size": "Cluster size", "metadata.building_class": "Building class", "metadata.location_id": "Location ID", "metadata.timezone": "Timezone", "dataset.available_history.days": "Available history (days)", } ) ) st.metric("Number of buildings", len(buildings)) st.divider() st.markdown("### Buildings") st.dataframe( buildings, use_container_width=True, column_config={ "Available history (days)": st.column_config.ProgressColumn( "Available history (days)", help="Available training data during the first prediction.", format="%f", min_value=0, max_value=1000, ), }, ) left, right = st.columns(2, gap="large") with left: st.markdown("#### Building classes") fig = px.pie( buildings.groupby("Building class").size().reset_index(), values=0, names="Building class", ) st.plotly_chart(fig, use_container_width=True) with right: st.markdown("#### Timezones") fig = px.pie( buildings.groupby("Timezone").size().reset_index(), values=0, names="Timezone", ) st.plotly_chart(fig, use_container_width=True) def models_view(data): models = ( data[ [ "model", "cv_config.folds", "cv_config.horizon", "cv_config.step", "cv_config.time", "model_info.repository", "model_info.tag", "model_info.variate_type", ] ] .groupby("model") .first() .rename( columns={ "cv_config.folds": "CV Folds", "cv_config.horizon": "CV Horizon", "cv_config.step": "CV Step", "cv_config.time": "CV Time", "model_info.repository": "Image Repository", "model_info.tag": "Image Tag", "model_info.variate_type": "Variate type", } ) ) st.metric("Number of models", len(models)) st.divider() st.markdown("### Models") st.dataframe(models, use_container_width=True) left, right = st.columns(2, gap="large") with left: st.markdown("#### Variate types") fig = px.pie( models.groupby("Variate type").size().reset_index(), values=0, names="Variate type", ) st.plotly_chart(fig, use_container_width=True) with right: st.markdown("#### Frameworks") _df = models.copy() _df["Framework"] = _df.index.str.split(".").str[0] fig = px.pie( _df.groupby("Framework").size().reset_index(), values=0, names="Framework", ) st.plotly_chart(fig, use_container_width=True) def performance_view(data: pd.DataFrame, models_to_plot: set[str]): data_to_plot = data[data["model"].isin(models_to_plot)].sort_values( by="model", ascending=True ) left, right = st.columns(2, gap="small") with left: metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0) with right: aggregation = st.selectbox( "Aggregation", ["min", "mean", "median", "max", "std"], index=1 ) st.markdown(f"#### {aggregation.capitalize()} {metric} per building") fig = px.box( data_to_plot, x=f"{metric}.{aggregation}", y="model", color="model", points="all", ) fig.update_layout(showlegend=False, height=40 * len(models_to_plot)) st.plotly_chart(fig, use_container_width=True) st.divider() left, right = st.columns(2, gap="large") with left: x_metric = st.selectbox( "Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="x_metric" ) x_aggregation = st.selectbox( "Aggregation", ["min", "mean", "median", "max", "std"], index=1, key="x_aggregation", ) with right: y_metric = st.selectbox( "Aggregation", ["MAE", "RMSE", "MBE", "rMAE"], index=1, key="y_metric" ) y_aggregation = st.selectbox( "Aggregation", ["min", "mean", "median", "max", "std"], index=1, key="y_aggregation", ) st.markdown( f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}" ) fig = px.scatter( data_to_plot, x=f"{x_metric}.{x_aggregation}", y=f"{y_metric}.{y_aggregation}", color="model", ) fig.update_layout(height=600) st.plotly_chart(fig, use_container_width=True) st.divider() left, right = st.columns(2, gap="small") with left: metric = st.selectbox( "Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="table_metric" ) with right: aggregation = st.selectbox( "Aggregation across folds", ["min", "mean", "median", "max", "std"], index=1, key="table_aggregation", ) metrics_table = data_to_plot.groupby(["model"]).agg(aggregation, numeric_only=True)[ [ f"{metric}.min", f"{metric}.mean", f"{metric}.median", f"{metric}.max", f"{metric}.std", ] ] def custom_table(styler): styler.background_gradient(cmap="seismic", axis=0) styler.format(precision=2) # center text and increase font size styler.map(lambda x: "text-align: center; font-size: 14px;") return styler st.markdown(f"#### {aggregation.capitalize()} {metric} stats per model") styled_table = metrics_table.style.pipe(custom_table) st.dataframe(styled_table, use_container_width=True) metrics_table = ( data_to_plot.groupby(["model", "unique_id"]) .apply(aggregation, numeric_only=True) .reset_index() .pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}") ) def custom_table(styler): styler.background_gradient(cmap="seismic", axis=None) styler.format(precision=2) # center text and increase font size styler.map(lambda x: "text-align: center; font-size: 14px;") return styler st.markdown(f"#### {aggregation.capitalize()} {metric} stats per building") styled_table = metrics_table.style.pipe(custom_table) st.dataframe(styled_table, use_container_width=True)