Spaces:
Sleeping
Sleeping
Evgueni Poloukarov
commited on
Commit
·
2a32f6f
1
Parent(s):
6331963
refactor: improve Marimo notebook readability with proper number formatting
Browse filesFixed excessive decimal places throughout the notebook:
- Round all MAE/RMSE values to 1 decimal place (MW precision)
- Round percentage increases to 1 decimal place
- Format chart tooltips with .1f precision
- Clean up table displays for better readability
Changes:
- Load: Round all mae_d1-d14, mae_overall, rmse_overall at load time
- Daily MAE: Round mean/median values to 1 decimal
- Degradation table: Round pct_increase to 1 decimal
- Best/worst performers: Round all MAE/RMSE columns
- Outliers table: Round all MAE/RMSE columns
- Heatmap: Ensure MAE values rounded to 1 decimal
- All charts: Format tooltips with .1f for consistency
Result: All numbers now display with sensible precision (e.g., 15.9 MW instead of 15.923705433358656 MW)
notebooks/october_2024_evaluation.py
CHANGED
|
@@ -46,11 +46,19 @@ def _(mo):
|
|
| 46 |
def _(Path, pl):
|
| 47 |
# Load evaluation results
|
| 48 |
results_path = Path(__file__).parent.parent / 'results' / 'october_2024_multivariate.csv'
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
print(f"Loaded {len(eval_df)} border evaluations")
|
| 52 |
print(f"Columns: {eval_df.columns}")
|
| 53 |
-
eval_df.head()
|
| 54 |
return (eval_df,)
|
| 55 |
|
| 56 |
|
|
@@ -99,7 +107,9 @@ def _(alt, eval_df):
|
|
| 99 |
hist_chart = alt.Chart(eval_df.to_pandas()).mark_bar().encode(
|
| 100 |
x=alt.X('mae_d1:Q', bin=alt.Bin(maxbins=20), title='D+1 MAE (MW)'),
|
| 101 |
y=alt.Y('count()', title='Number of Borders'),
|
| 102 |
-
tooltip=[
|
|
|
|
|
|
|
| 103 |
).properties(
|
| 104 |
width=600,
|
| 105 |
height=300,
|
|
@@ -123,9 +133,13 @@ def _(mo):
|
|
| 123 |
|
| 124 |
|
| 125 |
@app.cell
|
| 126 |
-
def _(eval_df):
|
| 127 |
-
# Top 10 best performers
|
| 128 |
-
best_performers = eval_df.sort('mae_d1').head(10)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
best_performers.select(['border', 'mae_d1', 'mae_overall', 'rmse_overall'])
|
| 130 |
return
|
| 131 |
|
|
@@ -143,9 +157,13 @@ def _(mo):
|
|
| 143 |
|
| 144 |
|
| 145 |
@app.cell
|
| 146 |
-
def _(eval_df):
|
| 147 |
-
# Top 10 worst performers
|
| 148 |
-
worst_performers = eval_df.sort('mae_d1', descending=True).head(10)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
worst_performers.select(['border', 'mae_d1', 'mae_overall', 'rmse_overall'])
|
| 150 |
return
|
| 151 |
|
|
@@ -166,12 +184,12 @@ def _(mo):
|
|
| 166 |
|
| 167 |
@app.cell
|
| 168 |
def _(eval_df, pl):
|
| 169 |
-
# Calculate mean MAE for each day
|
| 170 |
daily_mae_data = []
|
| 171 |
for day in range(1, 15):
|
| 172 |
col_name = f'mae_d{day}'
|
| 173 |
-
mean_mae = eval_df[col_name].mean()
|
| 174 |
-
median_mae = eval_df[col_name].median()
|
| 175 |
daily_mae_data.append({
|
| 176 |
'day': day,
|
| 177 |
'mean_mae': mean_mae,
|
|
@@ -189,7 +207,11 @@ def _(alt, daily_mae_df):
|
|
| 189 |
degradation_chart = alt.Chart(daily_mae_df.to_pandas()).mark_line(point=True).encode(
|
| 190 |
x=alt.X('day:Q', title='Forecast Day', scale=alt.Scale(domain=[1, 14])),
|
| 191 |
y=alt.Y('mean_mae:Q', title='Mean MAE (MW)', scale=alt.Scale(zero=True)),
|
| 192 |
-
tooltip=[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
).properties(
|
| 194 |
width=700,
|
| 195 |
height=400,
|
|
@@ -202,12 +224,12 @@ def _(alt, daily_mae_df):
|
|
| 202 |
|
| 203 |
@app.cell
|
| 204 |
def _(daily_mae_df, mo, pl):
|
| 205 |
-
# MAE degradation table with explicit baseline
|
| 206 |
mae_list = daily_mae_df['mean_mae'].to_list()
|
| 207 |
baseline_mae = mae_list[0]
|
| 208 |
|
| 209 |
degradation_table = daily_mae_df.with_columns([
|
| 210 |
-
((pl.col('mean_mae') - baseline_mae) / baseline_mae * 100).alias('pct_increase')
|
| 211 |
])
|
| 212 |
|
| 213 |
# Extract specific days for readability
|
|
@@ -222,10 +244,10 @@ def _(daily_mae_df, mo, pl):
|
|
| 222 |
{mo.as_html(degradation_table.to_pandas())}
|
| 223 |
|
| 224 |
**Key Observations**:
|
| 225 |
-
- D+1 baseline: {degradation_d1_mae:.
|
| 226 |
- D+2 degradation: {((degradation_d2_mae - degradation_d1_mae) / degradation_d1_mae * 100):.1f}%
|
| 227 |
-
- D+14 final: {degradation_d14_mae:.
|
| 228 |
-
- Largest jump: D+8 at {degradation_d8_mae:.
|
| 229 |
""")
|
| 230 |
return
|
| 231 |
|
|
@@ -249,14 +271,15 @@ def _(eval_df, pl):
|
|
| 249 |
# Reshape data for heatmap (unpivot daily MAE columns)
|
| 250 |
heatmap_data = eval_df.select(['border'] + [f'mae_d{i}' for i in range(1, 15)])
|
| 251 |
|
| 252 |
-
# Unpivot to long format
|
| 253 |
heatmap_long = heatmap_data.unpivot(
|
| 254 |
index='border',
|
| 255 |
on=[f'mae_d{i}' for i in range(1, 15)],
|
| 256 |
variable_name='day',
|
| 257 |
value_name='mae'
|
| 258 |
).with_columns([
|
| 259 |
-
pl.col('day').str.replace('mae_d', '').cast(pl.Int32)
|
|
|
|
| 260 |
])
|
| 261 |
|
| 262 |
heatmap_long.head()
|
|
@@ -299,8 +322,15 @@ def _(mo):
|
|
| 299 |
|
| 300 |
@app.cell
|
| 301 |
def _(eval_df, pl):
|
| 302 |
-
# Identify outliers
|
| 303 |
-
outliers = eval_df.filter(pl.col('mae_d1') > 150).sort('mae_d1', descending=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
|
| 305 |
outliers.select(['border', 'mae_d1', 'mae_d2', 'mae_d7', 'mae_d14', 'mae_overall', 'rmse_overall'])
|
| 306 |
return (outliers,)
|
|
@@ -409,7 +439,11 @@ def _(alt, eval_df):
|
|
| 409 |
alt.value('#e74c3c'),
|
| 410 |
alt.value('#3498db')
|
| 411 |
),
|
| 412 |
-
tooltip=[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
).properties(
|
| 414 |
width=600,
|
| 415 |
height=400,
|
|
|
|
| 46 |
def _(Path, pl):
|
| 47 |
# Load evaluation results
|
| 48 |
results_path = Path(__file__).parent.parent / 'results' / 'october_2024_multivariate.csv'
|
| 49 |
+
eval_df_raw = pl.read_csv(results_path)
|
| 50 |
+
|
| 51 |
+
# Round all MAE and RMSE columns for readability
|
| 52 |
+
mae_cols = [f'mae_d{i}' for i in range(1, 15)] + ['mae_overall']
|
| 53 |
+
rmse_cols = ['rmse_overall']
|
| 54 |
+
|
| 55 |
+
eval_df = eval_df_raw.with_columns([
|
| 56 |
+
pl.col(col).round(1) for col in mae_cols + rmse_cols
|
| 57 |
+
])
|
| 58 |
|
| 59 |
print(f"Loaded {len(eval_df)} border evaluations")
|
| 60 |
print(f"Columns: {eval_df.columns}")
|
| 61 |
+
eval_df.head(38)
|
| 62 |
return (eval_df,)
|
| 63 |
|
| 64 |
|
|
|
|
| 107 |
hist_chart = alt.Chart(eval_df.to_pandas()).mark_bar().encode(
|
| 108 |
x=alt.X('mae_d1:Q', bin=alt.Bin(maxbins=20), title='D+1 MAE (MW)'),
|
| 109 |
y=alt.Y('count()', title='Number of Borders'),
|
| 110 |
+
tooltip=[
|
| 111 |
+
alt.Tooltip('count()', title='Number of Borders')
|
| 112 |
+
]
|
| 113 |
).properties(
|
| 114 |
width=600,
|
| 115 |
height=300,
|
|
|
|
| 133 |
|
| 134 |
|
| 135 |
@app.cell
|
| 136 |
+
def _(eval_df, pl):
|
| 137 |
+
# Top 10 best performers (rounded for readability)
|
| 138 |
+
best_performers = eval_df.sort('mae_d1').head(10).with_columns([
|
| 139 |
+
pl.col('mae_d1').round(1),
|
| 140 |
+
pl.col('mae_overall').round(1),
|
| 141 |
+
pl.col('rmse_overall').round(1)
|
| 142 |
+
])
|
| 143 |
best_performers.select(['border', 'mae_d1', 'mae_overall', 'rmse_overall'])
|
| 144 |
return
|
| 145 |
|
|
|
|
| 157 |
|
| 158 |
|
| 159 |
@app.cell
|
| 160 |
+
def _(eval_df, pl):
|
| 161 |
+
# Top 10 worst performers (rounded for readability)
|
| 162 |
+
worst_performers = eval_df.sort('mae_d1', descending=True).head(10).with_columns([
|
| 163 |
+
pl.col('mae_d1').round(1),
|
| 164 |
+
pl.col('mae_overall').round(1),
|
| 165 |
+
pl.col('rmse_overall').round(1)
|
| 166 |
+
])
|
| 167 |
worst_performers.select(['border', 'mae_d1', 'mae_overall', 'rmse_overall'])
|
| 168 |
return
|
| 169 |
|
|
|
|
| 184 |
|
| 185 |
@app.cell
|
| 186 |
def _(eval_df, pl):
|
| 187 |
+
# Calculate mean MAE for each day (rounded for readability)
|
| 188 |
daily_mae_data = []
|
| 189 |
for day in range(1, 15):
|
| 190 |
col_name = f'mae_d{day}'
|
| 191 |
+
mean_mae = round(eval_df[col_name].mean(), 1)
|
| 192 |
+
median_mae = round(eval_df[col_name].median(), 1)
|
| 193 |
daily_mae_data.append({
|
| 194 |
'day': day,
|
| 195 |
'mean_mae': mean_mae,
|
|
|
|
| 207 |
degradation_chart = alt.Chart(daily_mae_df.to_pandas()).mark_line(point=True).encode(
|
| 208 |
x=alt.X('day:Q', title='Forecast Day', scale=alt.Scale(domain=[1, 14])),
|
| 209 |
y=alt.Y('mean_mae:Q', title='Mean MAE (MW)', scale=alt.Scale(zero=True)),
|
| 210 |
+
tooltip=[
|
| 211 |
+
alt.Tooltip('day:Q', title='Day'),
|
| 212 |
+
alt.Tooltip('mean_mae:Q', title='Mean MAE (MW)', format='.1f'),
|
| 213 |
+
alt.Tooltip('median_mae:Q', title='Median MAE (MW)', format='.1f')
|
| 214 |
+
]
|
| 215 |
).properties(
|
| 216 |
width=700,
|
| 217 |
height=400,
|
|
|
|
| 224 |
|
| 225 |
@app.cell
|
| 226 |
def _(daily_mae_df, mo, pl):
|
| 227 |
+
# MAE degradation table with explicit baseline (rounded for readability)
|
| 228 |
mae_list = daily_mae_df['mean_mae'].to_list()
|
| 229 |
baseline_mae = mae_list[0]
|
| 230 |
|
| 231 |
degradation_table = daily_mae_df.with_columns([
|
| 232 |
+
(((pl.col('mean_mae') - baseline_mae) / baseline_mae * 100).round(1)).alias('pct_increase')
|
| 233 |
])
|
| 234 |
|
| 235 |
# Extract specific days for readability
|
|
|
|
| 244 |
{mo.as_html(degradation_table.to_pandas())}
|
| 245 |
|
| 246 |
**Key Observations**:
|
| 247 |
+
- D+1 baseline: {degradation_d1_mae:.1f} MW
|
| 248 |
- D+2 degradation: {((degradation_d2_mae - degradation_d1_mae) / degradation_d1_mae * 100):.1f}%
|
| 249 |
+
- D+14 final: {degradation_d14_mae:.1f} MW (+{((degradation_d14_mae - degradation_d1_mae) / degradation_d1_mae * 100):.1f}%)
|
| 250 |
+
- Largest jump: D+8 at {degradation_d8_mae:.1f} MW (investigate cause)
|
| 251 |
""")
|
| 252 |
return
|
| 253 |
|
|
|
|
| 271 |
# Reshape data for heatmap (unpivot daily MAE columns)
|
| 272 |
heatmap_data = eval_df.select(['border'] + [f'mae_d{i}' for i in range(1, 15)])
|
| 273 |
|
| 274 |
+
# Unpivot to long format (already rounded in eval_df)
|
| 275 |
heatmap_long = heatmap_data.unpivot(
|
| 276 |
index='border',
|
| 277 |
on=[f'mae_d{i}' for i in range(1, 15)],
|
| 278 |
variable_name='day',
|
| 279 |
value_name='mae'
|
| 280 |
).with_columns([
|
| 281 |
+
pl.col('day').str.replace('mae_d', '').cast(pl.Int32),
|
| 282 |
+
pl.col('mae').round(1) # Ensure rounding for display
|
| 283 |
])
|
| 284 |
|
| 285 |
heatmap_long.head()
|
|
|
|
| 322 |
|
| 323 |
@app.cell
|
| 324 |
def _(eval_df, pl):
|
| 325 |
+
# Identify outliers (rounded for readability)
|
| 326 |
+
outliers = eval_df.filter(pl.col('mae_d1') > 150).sort('mae_d1', descending=True).with_columns([
|
| 327 |
+
pl.col('mae_d1').round(1),
|
| 328 |
+
pl.col('mae_d2').round(1),
|
| 329 |
+
pl.col('mae_d7').round(1),
|
| 330 |
+
pl.col('mae_d14').round(1),
|
| 331 |
+
pl.col('mae_overall').round(1),
|
| 332 |
+
pl.col('rmse_overall').round(1)
|
| 333 |
+
])
|
| 334 |
|
| 335 |
outliers.select(['border', 'mae_d1', 'mae_d2', 'mae_d7', 'mae_d14', 'mae_overall', 'rmse_overall'])
|
| 336 |
return (outliers,)
|
|
|
|
| 439 |
alt.value('#e74c3c'),
|
| 440 |
alt.value('#3498db')
|
| 441 |
),
|
| 442 |
+
tooltip=[
|
| 443 |
+
alt.Tooltip('border:N', title='Border'),
|
| 444 |
+
alt.Tooltip('mae_d1:Q', title='D+1 MAE (MW)', format='.1f'),
|
| 445 |
+
alt.Tooltip('mae_overall:Q', title='Overall MAE (MW)', format='.1f')
|
| 446 |
+
]
|
| 447 |
).properties(
|
| 448 |
width=600,
|
| 449 |
height=400,
|