Spaces:

EDS-lab
/

DAM-price-forecast

Running

App Files Files Community

mmmapms commited on Apr 15

Commit

2c9db99

•

1 Parent(s): 3b1c0d9

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -62

app.py CHANGED Viewed

@@ -4,33 +4,30 @@ import numpy as np
 import plotly.graph_objs as go
 from io import BytesIO
-# Function to convert df to csv for download
-def convert_df_to_csv(df):
-    return df.to_csv(index=False).encode('utf-8')
-# Load your data
-@st.cache_data
-def load_data_elia():
-    df = pd.read_csv('DATA_ELIA.csv')
-    df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
-    return df
-# Caching data loading for Predictions.csv
 @st.cache_data
 def load_data_predictions():
     df = pd.read_csv('Predictions.csv')
     df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
-    df_filtered = df.dropna(subset=['Price'])
     return df, df_filtered
-# Load your data
-df_input = load_data_elia()
 df, df_filtered = load_data_predictions()
-# Determine the first and last date
-min_date_allowed = df_input['Date'].min().date()
-max_date_allowed = df_input['Date'].max().date()
 min_date_allowed_pred = df_filtered['Date'].min().date()
 max_date_allowed_pred = df_filtered['Date'].max().date()
@@ -43,16 +40,14 @@ st.title("Belgium: Electricity Price Forecasting")
 with st.sidebar:
     st.write("### Variables Selection for Graph")
     st.write("Select which variables you'd like to include in the graph. This will affect the displayed charts and available data for download.")
-    selected_variables = st.multiselect("Select variables to display:", options=['Price', 'DNN', 'LEAR', 'Persis'], default=['Price', 'DNN', 'LEAR', 'Persis'])
     st.write("### Date Range for Metrics Calculation")
     st.write("Select the date range to calculate the metrics for the predictions. This will influence the accuracy metrics displayed below. The complete dataset ranges from 10/03/2024 until today.")
     start_date_pred, end_date_pred = st.date_input("Select Date Range for Metrics Calculation:", [min_date_allowed_pred, max_date_allowed_pred])
-    st.write("### Model Selection for Scatter Plot")
-    model_selection = st.selectbox("Select which model's predictions to display:", options=['DNN', 'LEAR', 'Persistence'], index=0)  # Default to 'DNN'
 # Main content
 if not selected_variables:
     st.warning("Please select at least one variable to display.")
@@ -71,7 +66,7 @@ else:
     }
     for variable in selected_variables:
-        fig.add_trace(go.Scatter(x=temp_df['Date'], y=temp_df[variable], mode='lines', name=variable_labels[variable]))
     fig.update_layout(xaxis_title="Date", yaxis_title="Price [EUR/MWh]")
     st.plotly_chart(fig, use_container_width=True)
@@ -87,65 +82,97 @@ else:
     plot_df = df[(df['Date'] >= pd.Timestamp(min_date_allowed_pred)) & (df['Date'] <= pd.Timestamp(max_date_allowed_pred))]
     model_column = model_selection
-    if model_selection == 'Persistence':
-        model_column = 'Persis'  # Assuming the DataFrame uses 'Persis' as the column name
     # Create the scatter plot
     fig = go.Figure()
-    fig.add_trace(go.Scatter(x=plot_df['Price'], y=plot_df[model_column], mode='markers', name=f"Real Price vs {model_selection} Predictions"))
     # Calculate the line of best fit
-    m, b = np.polyfit(plot_df['Price'], plot_df[model_column], 1)
     # Calculate the y-values based on the line of best fit
-    regression_line = m * plot_df['Price'] + b
     # Format the equation to display as the legend name
     equation = f"y = {m:.2f}x + {b:.2f}"
     # Add the line of best fit to the figure with the equation as the legend name
-    fig.add_trace(go.Scatter(x=plot_df['Price'], y=regression_line, mode='lines', name=equation, line=dict(color='black')))
     # Update layout with appropriate titles
-    fig.update_layout(xaxis_title="Real Price [EUR/MWh]", yaxis_title=f"{model_selection} Predictions [EUR/MWh]", title=f"Scatter Plot of Real Price vs {model_selection} Predictions")
     st.plotly_chart(fig, use_container_width=True)
 # Calculating and displaying metrics
 if start_date_pred and end_date_pred:
     st.header("Accuracy Metrics")
-    st.write("Evaluate the forecasting accuracy of our models with key performance indicators. The table summarizes the Mean Absolute Error (MAE), Symmetric Mean Absolute Percentage Error (SMAPE), and Root Mean Square Error (RMSE) for the Persistence, DNN and LEAR models over your selected date range. Lower values indicate higher precision and reliability of the forecasts.")
     filtered_df = df_filtered[(df_filtered['Date'] >= pd.Timestamp(start_date_pred)) & (df_filtered['Date'] <= pd.Timestamp(end_date_pred))]
-    # Here you would calculate your metrics based on filtered_df
-    # For demonstration, let's assume these are your metrics
-    p_real = filtered_df['Price']
-    p_pred_dnn = filtered_df['DNN']
-    p_pred_lear = filtered_df['LEAR']
-    p_pred_persis = filtered_df['Persis']
-    # Recalculate the metrics
-    mae_dnn = np.mean(np.abs(p_real - p_pred_dnn))
-    smape_dnn = 100 * np.mean(np.abs(p_real - p_pred_dnn) / ((np.abs(p_real) + np.abs(p_pred_dnn)) / 2))
-    rmse_dnn = np.sqrt(np.mean((p_real - p_pred_dnn) ** 2))
-    mae_lear = np.mean(np.abs(p_real - p_pred_lear))
-    smape_lear = 100 * np.mean(np.abs(p_real - p_pred_lear) / ((np.abs(p_real) + np.abs(p_pred_lear)) / 2))
-    rmse_lear = np.sqrt(np.mean((p_real - p_pred_lear) ** 2))
-    mae_persis = np.mean(np.abs(p_real - p_pred_persis))
-    smape_persis = 100 * np.mean(np.abs(p_real - p_pred_persis) / ((np.abs(p_real) + np.abs(p_pred_persis)) / 2))
-    rmse_persis = np.sqrt(np.mean((p_real - p_pred_persis) ** 2))
-    new_metrics_df = pd.DataFrame({
-        'Metric': ['MAE', 'SMAPE', 'RMSE'],
-        'Persistence': [f"{mae_persis:.2f}", f"{smape_persis:.2f}%", f"{rmse_persis:.2f}"],
-        'DNN': [f"{mae_dnn:.2f}", f"{smape_dnn:.2f}%", f"{rmse_dnn:.2f}"],
-        'LEAR': [f"{mae_lear:.2f}", f"{smape_lear:.2f}%", f"{rmse_lear:.2f}"]
-    })
-    st.dataframe(new_metrics_df, hide_index=True)
 # Download Predictions Button
 st.write("## Access Predictions")

 import plotly.graph_objs as go
 from io import BytesIO
 @st.cache_data
 def load_data_predictions():
     df = pd.read_csv('Predictions.csv')
+    df = df.rename(columns={
+    'Price': 'Real Price',
+    'DNN1': 'Neural Network 1',
+    'DNN2': 'Neural Network 2',
+    'DNN3': 'Neural Network 3',
+    'DNN4': 'Neural Network 4',
+    'DNN_Ensemble': 'Neural Network Ensemble',
+    'LEAR56': 'Regularized Linear Model CW56',
+    'LEAR84': 'Regularized Linear Model CW84',
+    'LEAR112': 'Regularized Linear Model CW112',
+    'LEAR730': 'Regularized Linear Model CW730',
+    'LEAR_Ensemble': 'Regularized Linear Model Ensemble',
+    'Persis': 'Persistence Model'
+})
     df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
+    df_filtered = df.dropna(subset=['Real Price'])
     return df, df_filtered
 df, df_filtered = load_data_predictions()
 min_date_allowed_pred = df_filtered['Date'].min().date()
 max_date_allowed_pred = df_filtered['Date'].max().date()
 with st.sidebar:
     st.write("### Variables Selection for Graph")
     st.write("Select which variables you'd like to include in the graph. This will affect the displayed charts and available data for download.")
+    selected_variables = st.multiselect("Select variables to display:", options=['Real Price', 'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Neural Network Ensemble', 'Regularized Linear Model CW56', 'Regularized Linear Model CW84','Regularized Linear Model CW112', 'Regularized Linear Model CW730', 'Regularized Linear Model Ensemble', 'Persistence Model'], default=['Real Price', 'Neural Network Ensemble', 'Regularized Linear Model Ensemble', 'Persistence Model'])
+    st.write("### Model Selection for Scatter Plot")
+    model_selection = st.selectbox("Select which model's predictions to display:", options=['Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Neural Network Ensemble', 'Regularized Linear Model CW56', 'Regularized Linear Model CW84','Regularized Linear Model CW112', 'Regularized Linear Model CW730', 'Regularized Linear Model Ensemble', 'Persistence Model'], index=0)  # Default to 'DNN'
     st.write("### Date Range for Metrics Calculation")
     st.write("Select the date range to calculate the metrics for the predictions. This will influence the accuracy metrics displayed below. The complete dataset ranges from 10/03/2024 until today.")
     start_date_pred, end_date_pred = st.date_input("Select Date Range for Metrics Calculation:", [min_date_allowed_pred, max_date_allowed_pred])
 # Main content
 if not selected_variables:
     st.warning("Please select at least one variable to display.")
     }
     for variable in selected_variables:
+        fig.add_trace(go.Scatter(x=temp_df['Date'], y=temp_df[variable], mode='lines', name=variable))
     fig.update_layout(xaxis_title="Date", yaxis_title="Price [EUR/MWh]")
     st.plotly_chart(fig, use_container_width=True)
     plot_df = df[(df['Date'] >= pd.Timestamp(min_date_allowed_pred)) & (df['Date'] <= pd.Timestamp(max_date_allowed_pred))]
     model_column = model_selection
     # Create the scatter plot
     fig = go.Figure()
+    fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=plot_df[model_column], mode='markers', name=f"Real Price vs {model_selection} Predictions"))
     # Calculate the line of best fit
+    m, b = np.polyfit(plot_df['Real Price'], plot_df[model_column], 1)
     # Calculate the y-values based on the line of best fit
+    regression_line = m * plot_df['Real Price'] + b
     # Format the equation to display as the legend name
     equation = f"y = {m:.2f}x + {b:.2f}"
     # Add the line of best fit to the figure with the equation as the legend name
+    fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=regression_line, mode='lines', name=equation, line=dict(color='black')))
     # Update layout with appropriate titles
+    fig.update_layout(xaxis_title="Real Price [EUR/MWh]", yaxis_title=f"{model_selection} Predictions [EUR/MWh]", title=f"Scatter Plot of Real Price vs {model_selection} Predictions from {min_date_allowed_pred} to {max_date_allowed_pred}")
     st.plotly_chart(fig, use_container_width=True)
 # Calculating and displaying metrics
 if start_date_pred and end_date_pred:
     st.header("Accuracy Metrics")
+    #st.write(f"The accuracy metrics are calculated from {start_date_pred} to {end_date_pred}, this intervale can be changed in the sidebar.")
+    st.write(f"The accuracy metrics are calculated from **{start_date_pred}** to **{end_date_pred}**. This interval can be changed in the sidebar.. Evaluate the forecasting accuracy of our models with key performance indicators. The table summarizes the Mean Absolute Error (MAE), Symmetric Mean Absolute Percentage Error (SMAPE), and Root Mean Square Error (RMSE) for the selected models over your selected date range. Lower values indicate higher precision and reliability of the forecasts.")
     filtered_df = df_filtered[(df_filtered['Date'] >= pd.Timestamp(start_date_pred)) & (df_filtered['Date'] <= pd.Timestamp(end_date_pred))]
+    # List of models for convenience
+    models = [
+        'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Neural Network Ensemble',
+        'Regularized Linear Model CW56', 'Regularized Linear Model CW84', 'Regularized Linear Model CW112', 'Regularized Linear Model CW730', 'Regularized Linear Model Ensemble',
+        'Persistence Model'
+    ]
+    # Placeholder for results
+    results = {'Metric': ['MAE', 'sMAPE', 'RMSE', 'rMAE']}
+    p_real = filtered_df['Real Price']
+    # Iterate through each model to calculate and store metrics
+    for model in models:
+        # Assuming column names in filtered_df match the model names directly for simplicity
+        p_pred = filtered_df[model]
+        mae = np.mean(np.abs(p_real - p_pred))
+        smape = 100 * np.mean(np.abs(p_real - p_pred) / ((np.abs(p_real) + np.abs(p_pred)) / 2))
+        rmse = np.sqrt(np.mean((p_real - p_pred) ** 2))
+        rmae = mae/np.mean(np.abs(p_real - filtered_df['Persistence Model']))
+        # Store the results
+        results[model] = [f"{mae:.2f}", f"{smape:.2f}%", f"{rmse:.2f}", f"{rmae:.2f}"]
+    # Convert the results to a DataFrame for display
+    metrics_df = pd.DataFrame(results)
+    transposed_metrics_df = metrics_df.set_index('Metric').T
+    col1, col2 = st.columns([3, 2])
+    # Display the transposed DataFrame
+    with col1:
+        # Assuming 'transposed_metrics_df' is your final DataFrame with metrics
+        st.dataframe(transposed_metrics_df,  hide_index=False)
+with col2:
+    st.markdown("""
+        <style>
+        .big-font {
+            font-size: 20px;
+            font-weight: 500;
+        }
+        </style>
+        <div class="big-font">
+        Equations
+        </div>
+        """, unsafe_allow_html=True)
+    # Rendering LaTeX equations
+    st.markdown(r"""
+    $\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
+    $\text{sMAPE} =100\frac{1}{n} \sum_{i=1}^{n} \frac{|y_i - \hat{y}_i|}{\left(|y_i| + |\hat{y}_i|\right)/2}$
+    $\text{RMSE} = \sqrt{\frac{1}{n}\sum_{i=1}^{n}\left(y_i - \hat{y}_i\right)^2}$
+    $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
+    """)
 # Download Predictions Button
 st.write("## Access Predictions")