mmmapms commited on
Commit
2c9db99
1 Parent(s): 3b1c0d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -62
app.py CHANGED
@@ -4,33 +4,30 @@ import numpy as np
4
  import plotly.graph_objs as go
5
  from io import BytesIO
6
 
7
- # Function to convert df to csv for download
8
- def convert_df_to_csv(df):
9
- return df.to_csv(index=False).encode('utf-8')
10
 
11
- # Load your data
12
- @st.cache_data
13
- def load_data_elia():
14
- df = pd.read_csv('DATA_ELIA.csv')
15
- df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
16
- return df
17
-
18
- # Caching data loading for Predictions.csv
19
  @st.cache_data
20
  def load_data_predictions():
21
  df = pd.read_csv('Predictions.csv')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
23
- df_filtered = df.dropna(subset=['Price'])
24
  return df, df_filtered
25
 
26
- # Load your data
27
- df_input = load_data_elia()
28
  df, df_filtered = load_data_predictions()
29
 
30
- # Determine the first and last date
31
- min_date_allowed = df_input['Date'].min().date()
32
- max_date_allowed = df_input['Date'].max().date()
33
-
34
  min_date_allowed_pred = df_filtered['Date'].min().date()
35
  max_date_allowed_pred = df_filtered['Date'].max().date()
36
 
@@ -43,16 +40,14 @@ st.title("Belgium: Electricity Price Forecasting")
43
  with st.sidebar:
44
  st.write("### Variables Selection for Graph")
45
  st.write("Select which variables you'd like to include in the graph. This will affect the displayed charts and available data for download.")
46
- selected_variables = st.multiselect("Select variables to display:", options=['Price', 'DNN', 'LEAR', 'Persis'], default=['Price', 'DNN', 'LEAR', 'Persis'])
47
-
 
 
48
  st.write("### Date Range for Metrics Calculation")
49
  st.write("Select the date range to calculate the metrics for the predictions. This will influence the accuracy metrics displayed below. The complete dataset ranges from 10/03/2024 until today.")
50
  start_date_pred, end_date_pred = st.date_input("Select Date Range for Metrics Calculation:", [min_date_allowed_pred, max_date_allowed_pred])
51
 
52
- st.write("### Model Selection for Scatter Plot")
53
- model_selection = st.selectbox("Select which model's predictions to display:", options=['DNN', 'LEAR', 'Persistence'], index=0) # Default to 'DNN'
54
-
55
-
56
  # Main content
57
  if not selected_variables:
58
  st.warning("Please select at least one variable to display.")
@@ -71,7 +66,7 @@ else:
71
  }
72
 
73
  for variable in selected_variables:
74
- fig.add_trace(go.Scatter(x=temp_df['Date'], y=temp_df[variable], mode='lines', name=variable_labels[variable]))
75
 
76
  fig.update_layout(xaxis_title="Date", yaxis_title="Price [EUR/MWh]")
77
  st.plotly_chart(fig, use_container_width=True)
@@ -87,65 +82,97 @@ else:
87
  plot_df = df[(df['Date'] >= pd.Timestamp(min_date_allowed_pred)) & (df['Date'] <= pd.Timestamp(max_date_allowed_pred))]
88
 
89
  model_column = model_selection
90
- if model_selection == 'Persistence':
91
- model_column = 'Persis' # Assuming the DataFrame uses 'Persis' as the column name
92
 
93
  # Create the scatter plot
94
  fig = go.Figure()
95
- fig.add_trace(go.Scatter(x=plot_df['Price'], y=plot_df[model_column], mode='markers', name=f"Real Price vs {model_selection} Predictions"))
96
 
97
  # Calculate the line of best fit
98
- m, b = np.polyfit(plot_df['Price'], plot_df[model_column], 1)
99
  # Calculate the y-values based on the line of best fit
100
- regression_line = m * plot_df['Price'] + b
101
 
102
  # Format the equation to display as the legend name
103
  equation = f"y = {m:.2f}x + {b:.2f}"
104
 
105
  # Add the line of best fit to the figure with the equation as the legend name
106
- fig.add_trace(go.Scatter(x=plot_df['Price'], y=regression_line, mode='lines', name=equation, line=dict(color='black')))
107
 
108
  # Update layout with appropriate titles
109
- fig.update_layout(xaxis_title="Real Price [EUR/MWh]", yaxis_title=f"{model_selection} Predictions [EUR/MWh]", title=f"Scatter Plot of Real Price vs {model_selection} Predictions")
110
  st.plotly_chart(fig, use_container_width=True)
111
 
112
 
113
  # Calculating and displaying metrics
114
  if start_date_pred and end_date_pred:
115
  st.header("Accuracy Metrics")
116
- st.write("Evaluate the forecasting accuracy of our models with key performance indicators. The table summarizes the Mean Absolute Error (MAE), Symmetric Mean Absolute Percentage Error (SMAPE), and Root Mean Square Error (RMSE) for the Persistence, DNN and LEAR models over your selected date range. Lower values indicate higher precision and reliability of the forecasts.")
 
117
  filtered_df = df_filtered[(df_filtered['Date'] >= pd.Timestamp(start_date_pred)) & (df_filtered['Date'] <= pd.Timestamp(end_date_pred))]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
- # Here you would calculate your metrics based on filtered_df
120
- # For demonstration, let's assume these are your metrics
121
- p_real = filtered_df['Price']
122
- p_pred_dnn = filtered_df['DNN']
123
- p_pred_lear = filtered_df['LEAR']
124
- p_pred_persis = filtered_df['Persis']
125
-
126
- # Recalculate the metrics
127
- mae_dnn = np.mean(np.abs(p_real - p_pred_dnn))
128
- smape_dnn = 100 * np.mean(np.abs(p_real - p_pred_dnn) / ((np.abs(p_real) + np.abs(p_pred_dnn)) / 2))
129
- rmse_dnn = np.sqrt(np.mean((p_real - p_pred_dnn) ** 2))
130
-
131
-
132
- mae_lear = np.mean(np.abs(p_real - p_pred_lear))
133
- smape_lear = 100 * np.mean(np.abs(p_real - p_pred_lear) / ((np.abs(p_real) + np.abs(p_pred_lear)) / 2))
134
- rmse_lear = np.sqrt(np.mean((p_real - p_pred_lear) ** 2))
135
-
136
-
137
- mae_persis = np.mean(np.abs(p_real - p_pred_persis))
138
- smape_persis = 100 * np.mean(np.abs(p_real - p_pred_persis) / ((np.abs(p_real) + np.abs(p_pred_persis)) / 2))
139
- rmse_persis = np.sqrt(np.mean((p_real - p_pred_persis) ** 2))
140
-
141
 
142
- new_metrics_df = pd.DataFrame({
143
- 'Metric': ['MAE', 'SMAPE', 'RMSE'],
144
- 'Persistence': [f"{mae_persis:.2f}", f"{smape_persis:.2f}%", f"{rmse_persis:.2f}"],
145
- 'DNN': [f"{mae_dnn:.2f}", f"{smape_dnn:.2f}%", f"{rmse_dnn:.2f}"],
146
- 'LEAR': [f"{mae_lear:.2f}", f"{smape_lear:.2f}%", f"{rmse_lear:.2f}"]
147
- })
148
- st.dataframe(new_metrics_df, hide_index=True)
149
 
150
  # Download Predictions Button
151
  st.write("## Access Predictions")
 
4
  import plotly.graph_objs as go
5
  from io import BytesIO
6
 
 
 
 
7
 
 
 
 
 
 
 
 
 
8
  @st.cache_data
9
  def load_data_predictions():
10
  df = pd.read_csv('Predictions.csv')
11
+ df = df.rename(columns={
12
+ 'Price': 'Real Price',
13
+ 'DNN1': 'Neural Network 1',
14
+ 'DNN2': 'Neural Network 2',
15
+ 'DNN3': 'Neural Network 3',
16
+ 'DNN4': 'Neural Network 4',
17
+ 'DNN_Ensemble': 'Neural Network Ensemble',
18
+ 'LEAR56': 'Regularized Linear Model CW56',
19
+ 'LEAR84': 'Regularized Linear Model CW84',
20
+ 'LEAR112': 'Regularized Linear Model CW112',
21
+ 'LEAR730': 'Regularized Linear Model CW730',
22
+ 'LEAR_Ensemble': 'Regularized Linear Model Ensemble',
23
+ 'Persis': 'Persistence Model'
24
+ })
25
  df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
26
+ df_filtered = df.dropna(subset=['Real Price'])
27
  return df, df_filtered
28
 
 
 
29
  df, df_filtered = load_data_predictions()
30
 
 
 
 
 
31
  min_date_allowed_pred = df_filtered['Date'].min().date()
32
  max_date_allowed_pred = df_filtered['Date'].max().date()
33
 
 
40
  with st.sidebar:
41
  st.write("### Variables Selection for Graph")
42
  st.write("Select which variables you'd like to include in the graph. This will affect the displayed charts and available data for download.")
43
+ selected_variables = st.multiselect("Select variables to display:", options=['Real Price', 'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Neural Network Ensemble', 'Regularized Linear Model CW56', 'Regularized Linear Model CW84','Regularized Linear Model CW112', 'Regularized Linear Model CW730', 'Regularized Linear Model Ensemble', 'Persistence Model'], default=['Real Price', 'Neural Network Ensemble', 'Regularized Linear Model Ensemble', 'Persistence Model'])
44
+ st.write("### Model Selection for Scatter Plot")
45
+ model_selection = st.selectbox("Select which model's predictions to display:", options=['Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Neural Network Ensemble', 'Regularized Linear Model CW56', 'Regularized Linear Model CW84','Regularized Linear Model CW112', 'Regularized Linear Model CW730', 'Regularized Linear Model Ensemble', 'Persistence Model'], index=0) # Default to 'DNN'
46
+
47
  st.write("### Date Range for Metrics Calculation")
48
  st.write("Select the date range to calculate the metrics for the predictions. This will influence the accuracy metrics displayed below. The complete dataset ranges from 10/03/2024 until today.")
49
  start_date_pred, end_date_pred = st.date_input("Select Date Range for Metrics Calculation:", [min_date_allowed_pred, max_date_allowed_pred])
50
 
 
 
 
 
51
  # Main content
52
  if not selected_variables:
53
  st.warning("Please select at least one variable to display.")
 
66
  }
67
 
68
  for variable in selected_variables:
69
+ fig.add_trace(go.Scatter(x=temp_df['Date'], y=temp_df[variable], mode='lines', name=variable))
70
 
71
  fig.update_layout(xaxis_title="Date", yaxis_title="Price [EUR/MWh]")
72
  st.plotly_chart(fig, use_container_width=True)
 
82
  plot_df = df[(df['Date'] >= pd.Timestamp(min_date_allowed_pred)) & (df['Date'] <= pd.Timestamp(max_date_allowed_pred))]
83
 
84
  model_column = model_selection
 
 
85
 
86
  # Create the scatter plot
87
  fig = go.Figure()
88
+ fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=plot_df[model_column], mode='markers', name=f"Real Price vs {model_selection} Predictions"))
89
 
90
  # Calculate the line of best fit
91
+ m, b = np.polyfit(plot_df['Real Price'], plot_df[model_column], 1)
92
  # Calculate the y-values based on the line of best fit
93
+ regression_line = m * plot_df['Real Price'] + b
94
 
95
  # Format the equation to display as the legend name
96
  equation = f"y = {m:.2f}x + {b:.2f}"
97
 
98
  # Add the line of best fit to the figure with the equation as the legend name
99
+ fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=regression_line, mode='lines', name=equation, line=dict(color='black')))
100
 
101
  # Update layout with appropriate titles
102
+ fig.update_layout(xaxis_title="Real Price [EUR/MWh]", yaxis_title=f"{model_selection} Predictions [EUR/MWh]", title=f"Scatter Plot of Real Price vs {model_selection} Predictions from {min_date_allowed_pred} to {max_date_allowed_pred}")
103
  st.plotly_chart(fig, use_container_width=True)
104
 
105
 
106
  # Calculating and displaying metrics
107
  if start_date_pred and end_date_pred:
108
  st.header("Accuracy Metrics")
109
+ #st.write(f"The accuracy metrics are calculated from {start_date_pred} to {end_date_pred}, this intervale can be changed in the sidebar.")
110
+ st.write(f"The accuracy metrics are calculated from **{start_date_pred}** to **{end_date_pred}**. This interval can be changed in the sidebar.. Evaluate the forecasting accuracy of our models with key performance indicators. The table summarizes the Mean Absolute Error (MAE), Symmetric Mean Absolute Percentage Error (SMAPE), and Root Mean Square Error (RMSE) for the selected models over your selected date range. Lower values indicate higher precision and reliability of the forecasts.")
111
  filtered_df = df_filtered[(df_filtered['Date'] >= pd.Timestamp(start_date_pred)) & (df_filtered['Date'] <= pd.Timestamp(end_date_pred))]
112
+
113
+ # List of models for convenience
114
+ models = [
115
+ 'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Neural Network Ensemble',
116
+ 'Regularized Linear Model CW56', 'Regularized Linear Model CW84', 'Regularized Linear Model CW112', 'Regularized Linear Model CW730', 'Regularized Linear Model Ensemble',
117
+ 'Persistence Model'
118
+ ]
119
+
120
+ # Placeholder for results
121
+ results = {'Metric': ['MAE', 'sMAPE', 'RMSE', 'rMAE']}
122
+
123
+ p_real = filtered_df['Real Price']
124
+
125
+ # Iterate through each model to calculate and store metrics
126
+ for model in models:
127
+ # Assuming column names in filtered_df match the model names directly for simplicity
128
+ p_pred = filtered_df[model]
129
+
130
+ mae = np.mean(np.abs(p_real - p_pred))
131
+ smape = 100 * np.mean(np.abs(p_real - p_pred) / ((np.abs(p_real) + np.abs(p_pred)) / 2))
132
+ rmse = np.sqrt(np.mean((p_real - p_pred) ** 2))
133
+ rmae = mae/np.mean(np.abs(p_real - filtered_df['Persistence Model']))
134
+
135
+ # Store the results
136
+ results[model] = [f"{mae:.2f}", f"{smape:.2f}%", f"{rmse:.2f}", f"{rmae:.2f}"]
137
+
138
+ # Convert the results to a DataFrame for display
139
+ metrics_df = pd.DataFrame(results)
140
+
141
+ transposed_metrics_df = metrics_df.set_index('Metric').T
142
+ col1, col2 = st.columns([3, 2])
143
+
144
+ # Display the transposed DataFrame
145
+ with col1:
146
+ # Assuming 'transposed_metrics_df' is your final DataFrame with metrics
147
+ st.dataframe(transposed_metrics_df, hide_index=False)
148
+
149
+ with col2:
150
+ st.markdown("""
151
+ <style>
152
+ .big-font {
153
+ font-size: 20px;
154
+ font-weight: 500;
155
+ }
156
+ </style>
157
+ <div class="big-font">
158
+ Equations
159
+ </div>
160
+ """, unsafe_allow_html=True)
161
+
162
+ # Rendering LaTeX equations
163
+ st.markdown(r"""
164
+ $\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
165
+
166
 
167
+ $\text{sMAPE} =100\frac{1}{n} \sum_{i=1}^{n} \frac{|y_i - \hat{y}_i|}{\left(|y_i| + |\hat{y}_i|\right)/2}$
168
+
169
+
170
+ $\text{RMSE} = \sqrt{\frac{1}{n}\sum_{i=1}^{n}\left(y_i - \hat{y}_i\right)^2}$
171
+
172
+
173
+ $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
174
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
 
 
 
 
 
 
 
176
 
177
  # Download Predictions Button
178
  st.write("## Access Predictions")