Spaces:
Sleeping
fix: CRITICAL timestamp bug causing 313 vs 336 hour mismatch + validation
Browse files**ROOT CAUSE**: Forecast timestamps started at run_date + 1 hour instead of + 1 day
- Forecasted: Sept 30 01:00 to Oct 14 00:00 (336 hours)
- Actuals: Oct 1 00:00 to Oct 14 23:00 (336 hours)
- Overlap: Oct 1 00:00 to Oct 14 00:00 (313 hours only)
- Missing: 23 hours from Oct 14
**FIX 1**: src/forecasting/chronos_inference.py:279
- Changed: timedelta(hours=1) → timedelta(days=1)
- Result: Forecast now covers Oct 1 00:00 to Oct 14 23:00 (correct 336 hours)
**FIX 2**: scripts/evaluate_october_2024.py - Add validation checks:
- Warn if n_hours != 336 (missing data)
- Flag MAE < 0.001 MW as suspicious (data leakage)
- Detect constant forecasts (MAE std < 0.01 across days)
This explains why 84% of borders showed invalid results:
- 13 borders: Zero MAE (need to re-evaluate with correct timestamps)
- 13 borders: Constant forecasts (model behavior issue)
- 6 Poland borders: Identical near-zero MAE (feature issue)
Previous evaluation INVALID - need to re-run with corrected timestamps
|
@@ -131,10 +131,18 @@ def main():
|
|
| 131 |
print(f" [{i:2d}/{len(borders)}] {border:15s} - SKIPPED (no valid data)")
|
| 132 |
continue
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
# Calculate overall metrics
|
| 135 |
mae = (valid_data[forecast_col] - valid_data[target_col]).abs().mean()
|
| 136 |
rmse = ((valid_data[forecast_col] - valid_data[target_col])**2).mean()**0.5
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
# Calculate per-day MAE (D+1 through D+14)
|
| 139 |
per_day_mae = []
|
| 140 |
for day in range(1, 15):
|
|
@@ -165,6 +173,14 @@ def main():
|
|
| 165 |
day_num = day_idx + 1
|
| 166 |
result_dict[f'mae_d{day_num}'] = per_day_mae[day_idx] if len(per_day_mae) > day_idx else np.nan
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
results.append(result_dict)
|
| 169 |
|
| 170 |
# Status indicator
|
|
|
|
| 131 |
print(f" [{i:2d}/{len(borders)}] {border:15s} - SKIPPED (no valid data)")
|
| 132 |
continue
|
| 133 |
|
| 134 |
+
# Validation: Check for expected 336 hours (14 days × 24 hours)
|
| 135 |
+
if len(valid_data) != 336:
|
| 136 |
+
print(f" [{i:2d}/{len(borders)}] {border:15s} - WARNING: Only {len(valid_data)}/336 hours (missing {336-len(valid_data)} hours)")
|
| 137 |
+
|
| 138 |
# Calculate overall metrics
|
| 139 |
mae = (valid_data[forecast_col] - valid_data[target_col]).abs().mean()
|
| 140 |
rmse = ((valid_data[forecast_col] - valid_data[target_col])**2).mean()**0.5
|
| 141 |
|
| 142 |
+
# Validation: Flag suspicious MAE values
|
| 143 |
+
if mae < 0.001:
|
| 144 |
+
print(f" [{i:2d}/{len(borders)}] {border:15s} - WARNING: Suspiciously low MAE = {mae:.2e} MW (possible data leakage)")
|
| 145 |
+
|
| 146 |
# Calculate per-day MAE (D+1 through D+14)
|
| 147 |
per_day_mae = []
|
| 148 |
for day in range(1, 15):
|
|
|
|
| 173 |
day_num = day_idx + 1
|
| 174 |
result_dict[f'mae_d{day_num}'] = per_day_mae[day_idx] if len(per_day_mae) > day_idx else np.nan
|
| 175 |
|
| 176 |
+
# Validation: Check for constant forecasts (same MAE across all days)
|
| 177 |
+
valid_mae_values = [m for m in per_day_mae if not np.isnan(m)]
|
| 178 |
+
if len(valid_mae_values) >= 5: # Need at least 5 days to check
|
| 179 |
+
mae_std = np.std(valid_mae_values)
|
| 180 |
+
mae_mean = np.mean(valid_mae_values)
|
| 181 |
+
if mae_std < 0.01 and mae_mean > 0: # Essentially zero variation but non-zero MAE
|
| 182 |
+
print(f" [{i:2d}/{len(borders)}] {border:15s} - WARNING: Constant forecast detected (MAE std={mae_std:.2e})")
|
| 183 |
+
|
| 184 |
results.append(result_dict)
|
| 185 |
|
| 186 |
# Status indicator
|
|
@@ -276,7 +276,7 @@ class ChronosInferencePipeline:
|
|
| 276 |
"""
|
| 277 |
# Create forecast timestamps
|
| 278 |
run_datetime = datetime.strptime(results['run_date'], "%Y-%m-%d")
|
| 279 |
-
forecast_start = run_datetime + timedelta(
|
| 280 |
forecast_hours = results['forecast_days'] * 24
|
| 281 |
|
| 282 |
timestamps = [
|
|
|
|
| 276 |
"""
|
| 277 |
# Create forecast timestamps
|
| 278 |
run_datetime = datetime.strptime(results['run_date'], "%Y-%m-%d")
|
| 279 |
+
forecast_start = run_datetime + timedelta(days=1) # Next day at midnight, not +1 hour
|
| 280 |
forecast_hours = results['forecast_days'] * 24
|
| 281 |
|
| 282 |
timestamps = [
|