Spaces:
Running
Running
File size: 9,297 Bytes
e67fcfa c221e61 e67fcfa deb692e 9e2e619 deec0a3 3a10adc b632f83 9e2e619 c58f85c 9e2e619 e67fcfa 7097d3e e67fcfa 7097d3e 2c9db99 7097d3e 2c9db99 e67fcfa 3b1c0d9 e67fcfa 2c9db99 e67fcfa 9efed08 3b1c0d9 2c9db99 3b1c0d9 2c9db99 3b1c0d9 2c9db99 3b1c0d9 2c9db99 3b1c0d9 2c9db99 3b1c0d9 e67fcfa 2c9db99 7097d3e e67fcfa 2c9db99 7097d3e 2c9db99 e67fcfa 2c9db99 3b1c0d9 7097d3e 3b1c0d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from io import BytesIO
from datasets import load_dataset
@st.cache_data
def load_data_predictions():
#df = pd.read_csv('Predictions.csv')
df=load_dataset("mmmapms/Forecasts")
df = df.rename(columns={
'Price': 'Real Price',
'DNN1': 'Neural Network 1',
'DNN2': 'Neural Network 2',
'DNN3': 'Neural Network 3',
'DNN4': 'Neural Network 4',
'DNN_Ensemble': 'Neural Network Ensemble',
'LEAR56': 'Regularized Linear Model 1',
'LEAR84': 'Regularized Linear Model 2',
'LEAR112': 'Regularized Linear Model 3',
'LEAR730': 'Regularized Linear Model 4',
'LEAR_Ensemble': 'Regularized Linear Model Ensemble',
'Persis': 'Persistence Model',
'Hybrid_Ensemble': 'Hybrid Ensemble'
})
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
df_filtered = df.dropna(subset=['Real Price'])
return df, df_filtered
df, df_filtered = load_data_predictions()
min_date_allowed_pred = df_filtered['Date'].min().date()
max_date_allowed_pred = df_filtered['Date'].max().date()
end_date = df['Date'].max().date()
start_date = end_date - pd.Timedelta(days=7)
models_corr_matrix = ['Neural Network 1', 'Neural Network 2', 'Neural Network 3',
'Neural Network 4', 'Regularized Linear Model 1',
'Regularized Linear Model 2', 'Regularized Linear Model 3',
'Regularized Linear Model 4']
st.title("Belgium: Electricity Price Forecasting")
# Sidebar for inputs
with st.sidebar:
st.write("### Variables Selection for Graph")
st.write("Select which variables you'd like to include in the graph. This will affect the displayed charts and available data for download.")
selected_variables = st.multiselect("Select variables to display:", options=['Real Price', 'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Neural Network Ensemble', 'Regularized Linear Model 1', 'Regularized Linear Model 2','Regularized Linear Model 3', 'Regularized Linear Model 4', 'Regularized Linear Model Ensemble', 'Hybrid Ensemble', 'Persistence Model'], default=['Real Price', 'Neural Network Ensemble', 'Regularized Linear Model Ensemble', 'Persistence Model'])
st.write("### Model Selection for Scatter Plot")
model_selection = st.selectbox("Select which model's predictions to display:", options=['Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Neural Network Ensemble', 'Regularized Linear Model 1', 'Regularized Linear Model 2','Regularized Linear Model 3', 'Regularized Linear Model 4', 'Regularized Linear Model Ensemble', 'Hybrid Ensemble', 'Persistence Model'], index=10) # Adjust the index as needed to default to your desired option
st.write("### Date Range for Metrics Calculation")
st.write("Select the date range to calculate the metrics for the predictions. This will influence the accuracy metrics displayed below. The complete dataset ranges from 10/03/2024 until today.")
start_date_pred, end_date_pred = st.date_input("Select Date Range for Metrics Calculation:", [min_date_allowed_pred, max_date_allowed_pred])
# Main content
if not selected_variables:
st.warning("Please select at least one variable to display.")
else:
# Plotting
st.write("## Belgian Day-Ahead Electricity Prices")
temp_df = df[(df['Date'] >= pd.Timestamp(start_date))] #& (df['Date'] <= pd.Timestamp(end_date))]
fig = go.Figure()
for variable in selected_variables:
fig.add_trace(go.Scatter(x=temp_df['Date'], y=temp_df[variable], mode='lines', name=variable))
fig.update_layout(xaxis_title="Date", yaxis_title="Price [EUR/MWh]")
st.plotly_chart(fig, use_container_width=True)
st.write("The graph presented here illustrates the day-ahead electricity price forecasts for Belgium, covering the period from one week ago up to tomorrow. It incorporates predictions from three distinct models: a Neural Network, a Regularized Linear Model, and Persistence, alongside the actual electricity prices up until today.")
if not selected_variables:
st.warning("Please select at least one variable to display.")
else:
# Plotting
st.write("## Scatter Plot: Real Price vs Model Predictions")
# Filter based on the selected date range for plotting
plot_df = df[(df['Date'] >= pd.Timestamp(min_date_allowed_pred)) & (df['Date'] <= pd.Timestamp(max_date_allowed_pred))]
model_column = model_selection
# Create the scatter plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=plot_df[model_column], mode='markers', name=f"Real Price vs {model_selection} Predictions"))
# Calculate the line of best fit
m, b = np.polyfit(plot_df['Real Price'], plot_df[model_column], 1)
# Calculate the y-values based on the line of best fit
regression_line = m * plot_df['Real Price'] + b
# Format the equation to display as the legend name
equation = f"y = {m:.2f}x + {b:.2f}"
# Add the line of best fit to the figure with the equation as the legend name
fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=regression_line, mode='lines', name=equation, line=dict(color='black')))
# Update layout with appropriate titles
fig.update_layout(xaxis_title="Real Price [EUR/MWh]", yaxis_title=f"{model_selection} Predictions [EUR/MWh]", title=f"Scatter Plot of Real Price vs {model_selection} Predictions from {min_date_allowed_pred} to {max_date_allowed_pred}")
st.plotly_chart(fig, use_container_width=True)
# Calculating and displaying metrics
if start_date_pred and end_date_pred:
st.header("Accuracy Metrics")
#st.write(f"The accuracy metrics are calculated from {start_date_pred} to {end_date_pred}, this intervale can be changed in the sidebar.")
st.write(f"The accuracy metrics are calculated from **{start_date_pred}** to **{end_date_pred}**. This interval can be changed in the sidebar. Evaluate the forecasting accuracy of our models with key performance indicators. The table summarizes the Mean Absolute Error (MAE), Symmetric Mean Absolute Percentage Error (SMAPE), and Root Mean Square Error (RMSE) for the selected models over your selected date range. Lower values indicate higher precision and reliability of the forecasts.")
filtered_df = df_filtered[(df_filtered['Date'] >= pd.Timestamp(start_date_pred)) & (df_filtered['Date'] <= pd.Timestamp(end_date_pred))]
# List of models for convenience
models = [
'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Neural Network Ensemble',
'Regularized Linear Model 1', 'Regularized Linear Model 2', 'Regularized Linear Model 3', 'Regularized Linear Model 4', 'Regularized Linear Model Ensemble',
'Persistence Model', 'Hybrid Ensemble'
]
# Placeholder for results
results = {'Metric': ['MAE', 'sMAPE', 'RMSE', 'rMAE']}
p_real = filtered_df['Real Price']
# Iterate through each model to calculate and store metrics
for model in models:
# Assuming column names in filtered_df match the model names directly for simplicity
p_pred = filtered_df[model]
mae = np.mean(np.abs(p_real - p_pred))
smape = 100 * np.mean(np.abs(p_real - p_pred) / ((np.abs(p_real) + np.abs(p_pred)) / 2))
rmse = np.sqrt(np.mean((p_real - p_pred) ** 2))
rmae = mae/np.mean(np.abs(p_real - filtered_df['Persistence Model']))
# Store the results
results[model] = [f"{mae:.2f}", f"{smape:.2f}%", f"{rmse:.2f}", f"{rmae:.2f}"]
# Convert the results to a DataFrame for display
metrics_df = pd.DataFrame(results)
transposed_metrics_df = metrics_df.set_index('Metric').T
col1, col2 = st.columns([3, 2])
# Display the transposed DataFrame
with col1:
# Assuming 'transposed_metrics_df' is your final DataFrame with metrics
st.dataframe(transposed_metrics_df, hide_index=False)
with col2:
st.markdown("""
<style>
.big-font {
font-size: 20px;
font-weight: 500;
}
</style>
<div class="big-font">
Equations
</div>
""", unsafe_allow_html=True)
# Rendering LaTeX equations
st.markdown(r"""
$\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
$\text{sMAPE} =100\frac{1}{n} \sum_{i=1}^{n} \frac{|y_i - \hat{y}_i|}{\left(|y_i| + |\hat{y}_i|\right)/2}$
$\text{RMSE} = \sqrt{\frac{1}{n}\sum_{i=1}^{n}\left(y_i - \hat{y}_i\right)^2}$
$\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
""")
st.write("## Correlation Matrix")
models_df = df_filtered[models_corr_matrix]
corr_matrix = models_df.corr()
fig = go.Figure(data=go.Heatmap(
z=corr_matrix.values,
x=corr_matrix.columns,
y=corr_matrix.index))
fig.update_layout(
yaxis_autorange='reversed' # Ensure the y-axis starts from the top
)
st.plotly_chart(fig, use_container_width=True)
st.write("## Access Predictions")
st.write("If you are interested in accessing the predictions made by the models, please contact Margarida Mascarenhas (KU Leuven PhD Student) at margarida.mascarenhas@kuleuven.be")
|