Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -129,6 +129,7 @@ upper_space.markdown("""
|
|
| 129 |
|
| 130 |
|
| 131 |
countries = {
|
|
|
|
| 132 |
'Netherlands': 'NL',
|
| 133 |
'Germany': 'DE',
|
| 134 |
'France': 'FR',
|
|
@@ -143,54 +144,55 @@ st.sidebar.caption("Choose the country for which you want to display data or for
|
|
| 143 |
|
| 144 |
selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
|
| 145 |
|
|
|
|
| 146 |
|
| 147 |
-
st.sidebar.subheader("Select Date Range ")
|
| 148 |
-
st.sidebar.caption("Define the time period over which the accuracy metrics will be calculated.")
|
| 149 |
|
| 150 |
-
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
-
# Ensure the date range provides two dates
|
| 155 |
-
if len(date_range) == 2:
|
| 156 |
-
start_date = pd.Timestamp(date_range[0])
|
| 157 |
-
end_date = pd.Timestamp(date_range[1])
|
| 158 |
else:
|
| 159 |
-
|
| 160 |
-
st.stop()
|
| 161 |
-
|
| 162 |
-
st.sidebar.subheader("Section")
|
| 163 |
-
st.sidebar.caption("Select the type of information you want to explore.")
|
| 164 |
|
| 165 |
-
|
| 166 |
-
#
|
| 167 |
-
section =
|
| 168 |
-
|
| 169 |
-
country_code = countries[selected_country]
|
| 170 |
-
if country_code == 'BE':
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
elif country_code == 'DE':
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
elif country_code == 'NL':
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
elif country_code == 'FR':
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
|
| 195 |
def add_feature(df2, df_main):
|
| 196 |
#df_main.index = pd.to_datetime(df_main.index)
|
|
@@ -208,12 +210,7 @@ def add_feature(df2, df_main):
|
|
| 208 |
forecast_columns = [
|
| 209 |
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
| 210 |
|
| 211 |
-
if section == 'Data':
|
| 212 |
-
st.header("Data")
|
| 213 |
-
st.write("""
|
| 214 |
-
This section allows you to explore and upload your datasets.
|
| 215 |
-
You can visualize raw data, clean it, and prepare it for analysis.
|
| 216 |
-
""")
|
| 217 |
|
| 218 |
st.header('Data Quality')
|
| 219 |
|
|
@@ -282,17 +279,14 @@ if section == 'Data':
|
|
| 282 |
st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
|
| 283 |
|
| 284 |
# Section 2: Forecasts
|
| 285 |
-
elif section == 'Forecasts':
|
| 286 |
|
| 287 |
st.header('Forecast Quality')
|
| 288 |
|
| 289 |
# Time series for last 1 week
|
| 290 |
-
st.subheader('Time Series: Last 1 Week')
|
| 291 |
last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
|
| 292 |
-
st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform
|
| 293 |
|
| 294 |
-
forecast_columns = [
|
| 295 |
-
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
| 296 |
num_per_var=2
|
| 297 |
|
| 298 |
forecast_columns_line=forecast_columns
|
|
@@ -308,259 +302,6 @@ elif section == 'Forecasts':
|
|
| 308 |
fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
|
| 309 |
|
| 310 |
st.plotly_chart(fig)
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
def plot_category(df_dict, category_prefix, title):
|
| 314 |
-
fig = go.Figure()
|
| 315 |
-
|
| 316 |
-
# Define base colors for each model
|
| 317 |
-
model_colors = {
|
| 318 |
-
'LightGBMModel.TimeCov.Temp.Forecast_elia': '#1f77b4', # Blue
|
| 319 |
-
'LightGBMModel.TimeCov.Temp': '#2ca02c', # Green
|
| 320 |
-
'Naive': '#ff7f0e' # Orange
|
| 321 |
-
}
|
| 322 |
-
|
| 323 |
-
# To keep track of which model has been added to the legend
|
| 324 |
-
legend_added = {'LightGBMModel.TimeCov.Temp.Forecast_elia': False, 'LightGBMModel.TimeCov.Temp': False, 'Naive': False}
|
| 325 |
-
|
| 326 |
-
for file_name, df in df_dict.items():
|
| 327 |
-
# Extract the hour from the filename, assuming the format is "Predictions_Xh.csv"
|
| 328 |
-
hour = int(file_name.split('_')[1].replace('h.csv', ''))
|
| 329 |
-
|
| 330 |
-
filtered_columns = [col for col in df.columns if col.startswith(category_prefix)]
|
| 331 |
-
for column in filtered_columns:
|
| 332 |
-
# Identify the model type with more precise logic
|
| 333 |
-
if 'LightGBMModel' in column:
|
| 334 |
-
if 'Forecast_elia' in column:
|
| 335 |
-
model_key = 'LightGBMModel.TimeCov.Temp.Forecast_elia'
|
| 336 |
-
elif 'TimeCov' in column:
|
| 337 |
-
model_key = 'LightGBMModel.TimeCov.Temp'
|
| 338 |
-
elif 'Naive' in column:
|
| 339 |
-
model_key = 'Naive'
|
| 340 |
-
else:
|
| 341 |
-
continue # Skip if it doesn't match any model type
|
| 342 |
-
|
| 343 |
-
# Extract the relevant part of the model name
|
| 344 |
-
parts = column.split('.')
|
| 345 |
-
model_name_parts = parts[1:] # Skip the variable prefix
|
| 346 |
-
model_name = '.'.join(model_name_parts) # Rejoin the parts to form the model name
|
| 347 |
-
|
| 348 |
-
# Get the base color for the model
|
| 349 |
-
base_color = model_colors[model_key]
|
| 350 |
-
|
| 351 |
-
# Calculate the color shade based on the hour
|
| 352 |
-
color_scale = pc.hex_to_rgb(base_color)
|
| 353 |
-
scale_factor = 0.3 + (hour / 40) # Adjust scale to ensure the gradient is visible
|
| 354 |
-
adjusted_color = tuple(int(c * scale_factor) for c in color_scale)
|
| 355 |
-
# Convert to RGBA with transparency for plot lines
|
| 356 |
-
line_color = f'rgba({adjusted_color[0]}, {adjusted_color[1]}, {adjusted_color[2]}, 0.1)' # Transparent color for lines
|
| 357 |
-
|
| 358 |
-
# Combine the hour and the model name for the legend, but only add the legend entry once
|
| 359 |
-
show_legend = not legend_added[model_key]
|
| 360 |
-
|
| 361 |
-
fig.add_trace(go.Scatter(
|
| 362 |
-
x=df.index, # Assuming 'Date' is the index, use 'df.index' for x-axis
|
| 363 |
-
y=df[column],
|
| 364 |
-
mode='lines',
|
| 365 |
-
name=model_name if show_legend else None, # Use the model name for the legend, but only once
|
| 366 |
-
line=dict(color=base_color if show_legend else line_color), # Use opaque color for legend, transparent for lines
|
| 367 |
-
showlegend=show_legend, # Show legend only once per model
|
| 368 |
-
legendgroup=model_key # Grouping for consistent legend color
|
| 369 |
-
))
|
| 370 |
-
|
| 371 |
-
# Mark that this model has been added to the legend
|
| 372 |
-
if show_legend:
|
| 373 |
-
legend_added[model_key] = True
|
| 374 |
-
|
| 375 |
-
# Add real values as a separate trace, if provided
|
| 376 |
-
filtered_Data_BE_df = Data_BE.loc[df.index]
|
| 377 |
-
|
| 378 |
-
if filtered_Data_BE_df[f'{category_prefix}_entsoe'].notna().any():
|
| 379 |
-
fig.add_trace(go.Scatter(
|
| 380 |
-
x=filtered_Data_BE_df.index,
|
| 381 |
-
y=filtered_Data_BE_df[f'{category_prefix}_entsoe'],
|
| 382 |
-
mode='lines',
|
| 383 |
-
name=f'Actual {category_prefix}',
|
| 384 |
-
line=dict(color='black', width=2), # Black line for real values
|
| 385 |
-
showlegend=True # Always show this in the legend
|
| 386 |
-
))
|
| 387 |
-
|
| 388 |
-
# Update layout to position the legend at the top, side by side
|
| 389 |
-
fig.update_layout(
|
| 390 |
-
title=dict(
|
| 391 |
-
text=title,
|
| 392 |
-
x=0, # Center the title horizontally
|
| 393 |
-
y=1.00, # Slightly lower the title to create more space
|
| 394 |
-
xanchor='left',
|
| 395 |
-
yanchor='top'
|
| 396 |
-
),
|
| 397 |
-
xaxis_title='Date',
|
| 398 |
-
yaxis_title='Value',
|
| 399 |
-
legend=dict(
|
| 400 |
-
orientation="h", # Horizontal legend
|
| 401 |
-
yanchor="bottom", # Align to the bottom of the legend box
|
| 402 |
-
y=1, # Increase y position to avoid overlap with the title
|
| 403 |
-
xanchor="center", # Center the legend horizontally
|
| 404 |
-
x=0.5 # Position at the center of the plot
|
| 405 |
-
)
|
| 406 |
-
)
|
| 407 |
-
return fig
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
def calculate_mae(y_true, y_pred):
|
| 411 |
-
return np.mean(np.abs(y_true - y_pred))
|
| 412 |
-
def plot_mae_comparison(df_dict, category_prefix, title, real_values_df):
|
| 413 |
-
hours = list(range(24))
|
| 414 |
-
if category_prefix=='Load':
|
| 415 |
-
model_colors = {
|
| 416 |
-
'LightGBMModel.7D.TimeCov.Temp.Forecast_elia': '#1F77B4', # Blue
|
| 417 |
-
'LightGBMModel.7D.TimeCov.Temp': '#2CA02C', # Green
|
| 418 |
-
'Naive': '#FF7F0E' # Orange
|
| 419 |
-
}
|
| 420 |
-
else:
|
| 421 |
-
model_colors = {
|
| 422 |
-
'LightGBMModel.1D.TimeCov.Temp.Forecast_elia': '#1F77B4', # Blue
|
| 423 |
-
'LightGBMModel.1D.TimeCov.Temp': '#2CA02C', # Green
|
| 424 |
-
'Naive': '#FF7F0E' # Orange
|
| 425 |
-
}
|
| 426 |
-
fig = go.Figure()
|
| 427 |
-
for model_key, base_color in model_colors.items():
|
| 428 |
-
hours_with_data = []
|
| 429 |
-
mae_ratios = []
|
| 430 |
-
for hour in hours:
|
| 431 |
-
file_name = f'Predictions_{hour}h.csv'
|
| 432 |
-
df = df_dict.get(file_name, None)
|
| 433 |
-
if df is None:
|
| 434 |
-
continue
|
| 435 |
-
if isinstance(df.index, pd.DatetimeIndex):
|
| 436 |
-
first_day = df.index.min().normalize()
|
| 437 |
-
last_day = df.index.max().normalize()
|
| 438 |
-
df = df[df.index.normalize() != first_day]
|
| 439 |
-
df = df[df.index.normalize() != last_day]
|
| 440 |
-
# Adjusted filtering logic based on actual column names
|
| 441 |
-
filtered_columns = [col for col in df.columns if col.startswith(f"{category_prefix}_entsoe") and model_key in col]
|
| 442 |
-
if not filtered_columns:
|
| 443 |
-
continue
|
| 444 |
-
# Assuming only one column matches, otherwise refine the selection logic
|
| 445 |
-
model_predictions = df[filtered_columns[0]]
|
| 446 |
-
actual_values = real_values_df[f'{category_prefix}_entsoe']
|
| 447 |
-
actual_values = actual_values.dropna()
|
| 448 |
-
# Align both series by their common indices
|
| 449 |
-
common_indices = model_predictions.index.intersection(actual_values.index)
|
| 450 |
-
aligned_model_predictions = model_predictions.loc[common_indices]
|
| 451 |
-
aligned_actual_values = actual_values.loc[common_indices]
|
| 452 |
-
# Calculate MAE for the model
|
| 453 |
-
model_mae = calculate_mae(aligned_actual_values, aligned_model_predictions)
|
| 454 |
-
# Calculate MAE for the entsoe forecast
|
| 455 |
-
entsoe_forecast = real_values_df[f'{category_prefix}_forecast_entsoe'].loc[common_indices]
|
| 456 |
-
entsoe_mae = calculate_mae(aligned_actual_values, entsoe_forecast)
|
| 457 |
-
# Calculate MAE ratio
|
| 458 |
-
mae_ratio = model_mae / entsoe_mae
|
| 459 |
-
mae_ratios.append(mae_ratio)
|
| 460 |
-
hours_with_data.append(hour)
|
| 461 |
-
# Plot the MAE ratio for this model as points
|
| 462 |
-
if mae_ratios: # Only plot if there's data
|
| 463 |
-
fig.add_trace(go.Scatter(
|
| 464 |
-
x=hours_with_data, # The hours where we have data
|
| 465 |
-
y=mae_ratios,
|
| 466 |
-
mode='markers+lines', # Plot as points connected by lines
|
| 467 |
-
name=model_key,
|
| 468 |
-
line=dict(color=base_color),
|
| 469 |
-
marker=dict(color=base_color, size=8) # Customize marker size
|
| 470 |
-
))
|
| 471 |
-
# Update layout
|
| 472 |
-
fig.update_layout(
|
| 473 |
-
title=f'{category_prefix}: rMAE<span style="font-size:11px;">ENTSO-E</span> by hour of Forecasting.',
|
| 474 |
-
xaxis_title='Hour of Forecast',
|
| 475 |
-
yaxis_title='MAE Ratio (Model / entsoe)',
|
| 476 |
-
legend=dict(
|
| 477 |
-
orientation="h",
|
| 478 |
-
yanchor="bottom",
|
| 479 |
-
y=1.02,
|
| 480 |
-
xanchor="center",
|
| 481 |
-
x=0.5
|
| 482 |
-
)
|
| 483 |
-
)
|
| 484 |
-
return fig
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
def plot_mae_comparison_clock(df_dict, category_prefix, title, real_values_df):
|
| 489 |
-
hours = list(range(24))
|
| 490 |
-
if category_prefix=='Load':
|
| 491 |
-
model_colors = {
|
| 492 |
-
'LightGBM_with_Forecast_elia': '#1F77B4', # Blue
|
| 493 |
-
'LightGBM': '#2CA02C', # Green
|
| 494 |
-
'Naive': '#FF7F0E' # Orange
|
| 495 |
-
}
|
| 496 |
-
else:
|
| 497 |
-
model_colors = {
|
| 498 |
-
'LightGBM_with_Forecast_elia': '#1F77B4', # Blue
|
| 499 |
-
'LightGBM': '#2CA02C', # Green
|
| 500 |
-
'Naive': '#FF7F0E' # Orange
|
| 501 |
-
}
|
| 502 |
-
|
| 503 |
-
fig = go.Figure()
|
| 504 |
-
|
| 505 |
-
for model_key, base_color in model_colors.items():
|
| 506 |
-
hours_with_data = []
|
| 507 |
-
mae_ratios = []
|
| 508 |
-
|
| 509 |
-
for hour in hours:
|
| 510 |
-
file_name = f'Predictions_{hour}h.csv'
|
| 511 |
-
df = df_dict.get(file_name, None)
|
| 512 |
-
if df is None:
|
| 513 |
-
continue
|
| 514 |
-
|
| 515 |
-
if isinstance(df.index, pd.DatetimeIndex):
|
| 516 |
-
first_day = df.index.min().normalize()
|
| 517 |
-
last_day = df.index.max().normalize()
|
| 518 |
-
df = df[df.index.normalize() != first_day]
|
| 519 |
-
df = df[df.index.normalize() != last_day]
|
| 520 |
-
|
| 521 |
-
filtered_columns = [col for col in df.columns if col.startswith(f"{category_prefix}_entsoe") and model_key in col]
|
| 522 |
-
if not filtered_columns:
|
| 523 |
-
print(f"No matching columns for {model_key} at hour {hour}. Skipping...")
|
| 524 |
-
continue
|
| 525 |
-
|
| 526 |
-
model_predictions = df[filtered_columns[0]]
|
| 527 |
-
actual_values = real_values_df[f'{category_prefix}_entsoe']
|
| 528 |
-
actual_values = actual_values.dropna()
|
| 529 |
-
|
| 530 |
-
common_indices = model_predictions.index.intersection(actual_values.index)
|
| 531 |
-
aligned_model_predictions = model_predictions.loc[common_indices]
|
| 532 |
-
aligned_actual_values = actual_values.loc[common_indices]
|
| 533 |
-
|
| 534 |
-
model_mae = calculate_mae(aligned_actual_values, aligned_model_predictions)
|
| 535 |
-
entsoe_forecast = real_values_df[f'{category_prefix}_forecast_entsoe'].loc[common_indices]
|
| 536 |
-
entsoe_mae = calculate_mae(aligned_actual_values, entsoe_forecast)
|
| 537 |
-
|
| 538 |
-
mae_ratio = model_mae / entsoe_mae
|
| 539 |
-
mae_ratios.append(mae_ratio)
|
| 540 |
-
hours_with_data.append(hour)
|
| 541 |
-
|
| 542 |
-
if mae_ratios:
|
| 543 |
-
fig.add_trace(go.Scatterpolar(
|
| 544 |
-
r=mae_ratios + [mae_ratios[0]], # Ensure closure of the polar plot
|
| 545 |
-
theta=[h * 15 for h in hours_with_data] + [0], # Ensure closure at 0 degrees
|
| 546 |
-
mode='lines+markers',
|
| 547 |
-
name=model_key,
|
| 548 |
-
line=dict(color=base_color),
|
| 549 |
-
marker=dict(color=base_color, size=8)
|
| 550 |
-
))
|
| 551 |
-
else:
|
| 552 |
-
print(f"No data to plot for {model_key}.") # Debugging print
|
| 553 |
-
|
| 554 |
-
fig.update_layout(
|
| 555 |
-
polar=dict(
|
| 556 |
-
radialaxis=dict(visible=True, range=[0, max(max(mae_ratios), 1.0) * 1.1] if mae_ratios else [0, 1.0]),
|
| 557 |
-
angularaxis=dict(tickmode='array', tickvals=[h * 15 for h in hours], ticktext=hours)
|
| 558 |
-
),
|
| 559 |
-
title=f'{category_prefix}: rMAE<span style="font-size:11px;">ENTSO-E</span> by Hour of Forecasting',
|
| 560 |
-
showlegend=True
|
| 561 |
-
)
|
| 562 |
-
|
| 563 |
-
return fig
|
| 564 |
|
| 565 |
|
| 566 |
# Scatter plots for error distribution
|
|
@@ -637,10 +378,8 @@ elif section == 'Forecasts':
|
|
| 637 |
|
| 638 |
$\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
|
| 639 |
|
| 640 |
-
|
| 641 |
""")
|
| 642 |
|
| 643 |
-
|
| 644 |
|
| 645 |
st.subheader('ACF plots of Errors')
|
| 646 |
st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
|
|
@@ -669,44 +408,151 @@ elif section == 'Insights':
|
|
| 669 |
""")
|
| 670 |
|
| 671 |
# Scatter plots for correlation between wind, solar, and load
|
| 672 |
-
st.subheader('Correlation between Wind, Solar, and
|
| 673 |
-
st.write('The below scatter plots are made for checking whether there exists a correlation between
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 695 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 696 |
|
| 697 |
-
|
| 698 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 699 |
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
if weather_col in data.columns and actual_col in data.columns:
|
| 703 |
-
clean_label = actual_col.replace('_entsoe', '')
|
| 704 |
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
fig.update_layout(title=f'{weather_col} vs {actual_col}')
|
| 710 |
-
st.plotly_chart(fig)
|
| 711 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 712 |
|
|
|
|
| 129 |
|
| 130 |
|
| 131 |
countries = {
|
| 132 |
+
'Overall': 'Overall',
|
| 133 |
'Netherlands': 'NL',
|
| 134 |
'Germany': 'DE',
|
| 135 |
'France': 'FR',
|
|
|
|
| 144 |
|
| 145 |
selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
|
| 146 |
|
| 147 |
+
# Ensure the date range provides two dates
|
| 148 |
|
|
|
|
|
|
|
| 149 |
|
| 150 |
+
# Sidebar with radio buttons for different sections
|
| 151 |
+
if selected_country != 'Overall':
|
| 152 |
+
st.sidebar.subheader("Section")
|
| 153 |
+
st.sidebar.caption("Select the type of information you want to explore.")
|
| 154 |
+
section = st.sidebar.radio('', ['Data Quality', 'Forecasts Quality', 'Insights'], index=1)
|
| 155 |
+
date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:",
|
| 156 |
value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
|
| 157 |
+
if len(date_range) == 2:
|
| 158 |
+
start_date = pd.Timestamp(date_range[0])
|
| 159 |
+
end_date = pd.Timestamp(date_range[1])
|
| 160 |
+
else:
|
| 161 |
+
st.error("Please select a valid date range.")
|
| 162 |
+
st.stop()
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
else:
|
| 165 |
+
section = None # No section is shown when "Overall" is selected
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
+
if selected_country == 'Overall':
|
| 168 |
+
data = None # You can set data to None or a specific dataset based on your logic
|
| 169 |
+
section = None # No section selected when "Overall" is chosen
|
| 170 |
+
else:
|
| 171 |
+
country_code = countries[selected_country]
|
| 172 |
+
if country_code == 'BE':
|
| 173 |
+
data = Data_BE
|
| 174 |
+
weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
|
| 175 |
+
data['Temperature'] = data['temperature_2m_8']
|
| 176 |
+
data['Wind Speed Offshore'] = data['wind_speed_100m_4']
|
| 177 |
+
data['Wind Speed Onshore'] = data['wind_speed_100m_8']
|
| 178 |
+
|
| 179 |
+
elif country_code == 'DE':
|
| 180 |
+
data = Data_DE
|
| 181 |
+
weather_columns = ['Temperature', 'Wind Speed']
|
| 182 |
+
data['Temperature'] = data['temperature_2m']
|
| 183 |
+
data['Wind Speed'] = data['wind_speed_100m']
|
| 184 |
+
|
| 185 |
+
elif country_code == 'NL':
|
| 186 |
+
data = Data_NL
|
| 187 |
+
weather_columns = ['Temperature', 'Wind Speed']
|
| 188 |
+
data['Temperature'] = data['temperature_2m']
|
| 189 |
+
data['Wind Speed'] = data['wind_speed_100m']
|
| 190 |
+
|
| 191 |
+
elif country_code == 'FR':
|
| 192 |
+
data = Data_FR
|
| 193 |
+
weather_columns = ['Temperature', 'Wind Speed']
|
| 194 |
+
data['Temperature'] = data['temperature_2m']
|
| 195 |
+
data['Wind Speed'] = data['wind_speed_100m']
|
| 196 |
|
| 197 |
def add_feature(df2, df_main):
|
| 198 |
#df_main.index = pd.to_datetime(df_main.index)
|
|
|
|
| 210 |
forecast_columns = [
|
| 211 |
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
| 212 |
|
| 213 |
+
if section == 'Data Quality':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
st.header('Data Quality')
|
| 216 |
|
|
|
|
| 279 |
st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
|
| 280 |
|
| 281 |
# Section 2: Forecasts
|
| 282 |
+
elif section == 'Forecasts Quality':
|
| 283 |
|
| 284 |
st.header('Forecast Quality')
|
| 285 |
|
| 286 |
# Time series for last 1 week
|
|
|
|
| 287 |
last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
|
| 288 |
+
st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
|
| 289 |
|
|
|
|
|
|
|
| 290 |
num_per_var=2
|
| 291 |
|
| 292 |
forecast_columns_line=forecast_columns
|
|
|
|
| 302 |
fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
|
| 303 |
|
| 304 |
st.plotly_chart(fig)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
|
| 307 |
# Scatter plots for error distribution
|
|
|
|
| 378 |
|
| 379 |
$\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
|
| 380 |
|
|
|
|
| 381 |
""")
|
| 382 |
|
|
|
|
| 383 |
|
| 384 |
st.subheader('ACF plots of Errors')
|
| 385 |
st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
|
|
|
|
| 408 |
""")
|
| 409 |
|
| 410 |
# Scatter plots for correlation between wind, solar, and load
|
| 411 |
+
st.subheader('Correlation between Wind, Solar, Load and Weather Features')
|
| 412 |
+
st.write('The below scatter plots are made for checking whether there exists a correlation between the data fields obtained: Solar, Wind, Load and Weather Features.')
|
| 413 |
+
|
| 414 |
+
selected_columns=['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
|
| 415 |
+
selected_df=data[selected_columns]
|
| 416 |
+
selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
|
| 417 |
+
selected_df = selected_df.dropna()
|
| 418 |
+
print(selected_df)
|
| 419 |
+
sns.set_theme(style="ticks")
|
| 420 |
+
pairplot_fig = sns.pairplot(selected_df)
|
| 421 |
+
|
| 422 |
+
# Display the pairplot in Streamlit
|
| 423 |
+
st.pyplot(pairplot_fig)
|
| 424 |
+
|
| 425 |
+
elif selected_country == 'Overall':
|
| 426 |
+
st.subheader("Net Load Error Map")
|
| 427 |
+
st.write("""
|
| 428 |
+
The net load error map highlights the error in the forecasted versus actual net load for each country.
|
| 429 |
+
Hover over each country to see details on the latest net load error and the timestamp of the last recorded data.
|
| 430 |
+
""")
|
| 431 |
+
|
| 432 |
+
def plot_net_load_error_map(data_dict):
|
| 433 |
+
# Define forecast columns used in calculation
|
| 434 |
+
|
| 435 |
+
def calculate_net_load_error(df):
|
| 436 |
+
filter_df = df[forecast_columns].dropna()
|
| 437 |
+
net_load = filter_df['Load_entsoe'] - filter_df['Wind_onshore_entsoe'] - filter_df['Wind_offshore_entsoe'] - filter_df['Solar_entsoe']
|
| 438 |
+
net_load_forecast = filter_df['Load_forecast_entsoe'] - filter_df['Wind_onshore_forecast_entsoe'] - filter_df['Wind_offshore_forecast_entsoe'] - filter_df['Solar_forecast_entsoe']
|
| 439 |
+
error = (net_load - net_load_forecast).iloc[-1]
|
| 440 |
+
date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format
|
| 441 |
+
return error, date
|
| 442 |
+
|
| 443 |
+
# Calculate net load errors and dates for each country
|
| 444 |
+
net_load_errors = {country_name: calculate_net_load_error(data) for country_name, data in data_dict.items()}
|
| 445 |
+
|
| 446 |
+
# Create DataFrame for Folium with additional date column
|
| 447 |
+
df_net_load_error = pd.DataFrame({
|
| 448 |
+
'country': list(net_load_errors.keys()),
|
| 449 |
+
'net_load_error': [v[0] for v in net_load_errors.values()],
|
| 450 |
+
'date': [v[1] for v in net_load_errors.values()]
|
| 451 |
+
})
|
| 452 |
+
|
| 453 |
+
# Load the GeoJSON file
|
| 454 |
+
geojson_url = "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/world-countries.json"
|
| 455 |
+
geo_data = requests.get(geojson_url).json()
|
| 456 |
+
|
| 457 |
+
# Filter GeoJSON to only include the selected countries
|
| 458 |
+
selected_countries = list(data_dict.keys()) # Get the list of selected countries (Belgium, France, Germany, Netherlands)
|
| 459 |
+
filtered_geojson = {
|
| 460 |
+
"type": "FeatureCollection",
|
| 461 |
+
"features": [feature for feature in geo_data["features"] if feature["properties"]["name"] in selected_countries]
|
| 462 |
+
}
|
| 463 |
|
| 464 |
+
# Merge the geojson with the error and date data
|
| 465 |
+
for feature in filtered_geojson["features"]:
|
| 466 |
+
country_name = feature["properties"]["name"]
|
| 467 |
+
row = df_net_load_error[df_net_load_error['country'] == country_name]
|
| 468 |
+
if not row.empty:
|
| 469 |
+
feature["properties"]["net_load_error"] = row.iloc[0]["net_load_error"]
|
| 470 |
+
feature["properties"]["date"] = row.iloc[0]["date"]
|
| 471 |
+
|
| 472 |
+
# Initialize the Folium map centered on Central Europe
|
| 473 |
+
m = folium.Map(location=[51, 10], zoom_start=5, tiles="cartodb positron")
|
| 474 |
+
|
| 475 |
+
# Add choropleth layer to map net load errors by country
|
| 476 |
+
folium.Choropleth(
|
| 477 |
+
geo_data=filtered_geojson,
|
| 478 |
+
name="choropleth",
|
| 479 |
+
data=df_net_load_error,
|
| 480 |
+
columns=["country", "net_load_error"],
|
| 481 |
+
key_on="feature.properties.name",
|
| 482 |
+
fill_color="RdYlBu", # Use a more vibrant color palette
|
| 483 |
+
fill_opacity=0.7,
|
| 484 |
+
line_opacity=0.5,
|
| 485 |
+
line_color="black", # Neutral border color
|
| 486 |
+
legend_name="Net Load Error"
|
| 487 |
+
).add_to(m)
|
| 488 |
+
|
| 489 |
+
# Add a GeoJson layer with custom tooltip for country, error, and date
|
| 490 |
+
folium.GeoJson(
|
| 491 |
+
filtered_geojson,
|
| 492 |
+
style_function=lambda x: {'fillOpacity': 0, 'color': 'black', 'weight': 0},
|
| 493 |
+
tooltip=folium.GeoJsonTooltip(
|
| 494 |
+
fields=["name", "net_load_error", "date"],
|
| 495 |
+
aliases=["Country:", "Net Load Error:", "Date:"],
|
| 496 |
+
localize=True
|
| 497 |
+
)
|
| 498 |
+
).add_to(m)
|
| 499 |
+
|
| 500 |
+
# Display Folium map in Streamlit
|
| 501 |
+
st_folium(m, width=700, height=600)
|
| 502 |
|
| 503 |
+
# Data dictionary with full country names
|
| 504 |
+
data_dict = {
|
| 505 |
+
'Belgium': Data_BE,
|
| 506 |
+
'France': Data_FR,
|
| 507 |
+
'Germany': Data_DE,
|
| 508 |
+
'Netherlands': Data_NL
|
| 509 |
+
}
|
| 510 |
|
| 511 |
+
# Call the function to plot the map
|
| 512 |
+
plot_net_load_error_map(data_dict)
|
|
|
|
|
|
|
| 513 |
|
| 514 |
+
st.subheader("rMAE of Forecasts published on ENTSO-E TP")
|
| 515 |
+
st.write("""
|
| 516 |
+
The radar chart below compares the forecast accuracy across Load, Onshore Wind, Offshore Wind, and Solar for each country.
|
| 517 |
+
""")
|
|
|
|
|
|
|
| 518 |
|
| 519 |
+
def calculate_mae(actual, forecast):
|
| 520 |
+
return np.mean(np.abs(actual - forecast))
|
| 521 |
+
|
| 522 |
+
# Function to calculate persistence MAE
|
| 523 |
+
def calculate_persistence_mae(data, shift_hours):
|
| 524 |
+
return np.mean(np.abs(data - data.shift(shift_hours)))
|
| 525 |
+
|
| 526 |
+
# Function to calculate rMAE for each country
|
| 527 |
+
def calculate_rmae_for_country(df):
|
| 528 |
+
rmae = {}
|
| 529 |
+
rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
|
| 530 |
+
rmae['Wind_onshore'] = calculate_mae(df['Wind_onshore_entsoe'], df['Wind_onshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_onshore_entsoe'], 24)
|
| 531 |
+
rmae['Wind_offshore'] = calculate_mae(df['Wind_offshore_entsoe'], df['Wind_offshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_offshore_entsoe'], 24)
|
| 532 |
+
rmae['Solar'] = calculate_mae(df['Solar_entsoe'], df['Solar_forecast_entsoe']) / calculate_persistence_mae(df['Solar_entsoe'], 24)
|
| 533 |
+
return rmae
|
| 534 |
+
|
| 535 |
+
# Function to create rMAE DataFrame
|
| 536 |
+
def create_rmae_dataframe(data_dict):
|
| 537 |
+
rmae_values = {'Country': [], 'Load': [], 'Wind_onshore': [], 'Wind_offshore': [], 'Solar': []}
|
| 538 |
+
for country_name, df in data_dict.items():
|
| 539 |
+
df_filtered = df[forecast_columns].dropna()
|
| 540 |
+
rmae = calculate_rmae_for_country(df_filtered)
|
| 541 |
+
rmae_values['Country'].append(country_name)
|
| 542 |
+
for key in rmae:
|
| 543 |
+
rmae_values[key].append(rmae[key])
|
| 544 |
+
return pd.DataFrame(rmae_values)
|
| 545 |
+
|
| 546 |
+
# Function to plot radar chart
|
| 547 |
+
def plot_rmae_radar_chart(rmae_df):
|
| 548 |
+
fig = go.Figure()
|
| 549 |
+
angles = ['Load', 'Wind_onshore', 'Wind_offshore', 'Solar']
|
| 550 |
+
for _, row in rmae_df.iterrows():
|
| 551 |
+
fig.add_trace(go.Scatterpolar(r=[row[angle] for angle in angles], theta=angles, fill='toself', name=row['Country']))
|
| 552 |
+
fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 2])), showlegend=True, title="rMAE Radar Chart by Country")
|
| 553 |
+
st.plotly_chart(fig)
|
| 554 |
+
|
| 555 |
+
# Main execution to create and display radar plot
|
| 556 |
+
rmae_df = create_rmae_dataframe(data_dict)
|
| 557 |
+
plot_rmae_radar_chart(rmae_df)
|
| 558 |
|