André Catarino commited on
Commit
90ac5c0
·
1 Parent(s): 5401cb8
Files changed (1) hide show
  1. app/app.py +214 -98
app/app.py CHANGED
@@ -1,125 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
 
 
 
 
 
 
 
 
2
 
3
- data = pd.DataFrame(columns=["Dataset", "Alpha", "Top K", "Recall", "Precision"])
4
- data = pd.concat(
5
- [
6
- data,
7
- pd.DataFrame(
8
- [["ml-100k", 0.1, 20, 0.2, 0.2]],
9
- columns=["Dataset", "Alpha", "Top K", "Recall", "Precision"],
10
- ),
11
- ]
12
- )
13
-
14
- import os
15
- import plotly.express as px
16
- import pandas as pd
17
- from dash import Dash, html, dcc, Input, Output, callback
18
- import plotly.express as px
19
- from dataclasses import dataclass
20
- import json
21
-
22
- data = pd.DataFrame(columns=["Dataset", "Alpha", "Top K", "Recall", "Precision"])
23
- data = pd.concat(
24
- [
25
- data,
26
- pd.DataFrame(
27
- [["ml-100k", 0.1, 20, 0.2, 0.2]],
28
- columns=["Dataset", "Alpha", "Top K", "Recall", "Precision"],
29
- ),
30
- ]
31
- )
32
- debug = False
33
-
34
 
35
- external_stylesheets = ["https://codepen.io/chriddyp/pen/bWLwgP.css"]
 
36
 
37
- app = Dash(__name__, external_stylesheets=external_stylesheets)
 
 
38
 
39
- server = app.server
 
 
40
 
 
 
41
 
42
- dataset_options = [
43
- {"label": entry, "value": entry} for entry in data["Dataset"].unique()
44
- ]
45
- dataset_options_default_value = data["Dataset"].unique()[0]
46
 
47
- alpha_options = [{"label": entry, "value": entry} for entry in data["Alpha"].unique()]
48
- alpha_options_default_value = data["Alpha"].unique()[0]
49
 
50
- top_k_options = [{"label": entry, "value": entry} for entry in data["Top K"].unique()]
51
- top_k_options_default_value = data["Top K"].unique()[0]
52
 
53
  app.layout = html.Div(
54
- [
55
- html.H1("System Evaluation"),
56
  html.Div(
57
  [
58
- html.Div(
59
- [
60
- html.H3("Dataset"),
61
- dcc.Dropdown(
62
- id="dataset-dropdown",
63
- options=dataset_options,
64
- value=dataset_options_default_value,
65
- ),
 
 
 
66
  ],
67
- className="three columns",
 
 
68
  ),
 
69
  html.Div(
70
- [
71
- html.H3("Alpha"),
72
- dcc.Dropdown(
73
- id="alpha-dropdown",
74
- options=alpha_options,
75
- value=alpha_options_default_value,
76
- ),
77
- ],
78
- className="three columns",
79
  ),
80
- html.Div(
81
- [
82
- html.H3("Top K"),
83
- dcc.Dropdown(
84
- id="top_k-dropdown",
85
- options=top_k_options,
86
- value=top_k_options_default_value,
87
- ),
88
- ],
89
- className="three columns",
90
  ),
 
 
 
 
 
 
91
  ],
92
- className="row",
93
- ),
94
- html.Div(
95
- [
96
- html.Div([dcc.Graph(id="recall-graph")], className="six columns"),
97
- html.Div([dcc.Graph(id="precision-graph")], className="six columns"),
98
- ],
99
- className="row",
100
- ),
101
  ]
102
  )
103
 
104
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  @app.callback(
106
- Output("recall-graph", "figure"),
107
- Output("precision-graph", "figure"),
108
- Input("alpha-dropdown", "value"),
109
- Input("dataset-dropdown", "value"),
110
- Input("top_k-dropdown", "value"),
 
 
111
  )
112
- def update_graph(alpha, dataset, top_k):
113
- filtered_data = data[
114
- (data["Alpha"] == alpha)
115
- & (data["Dataset"] == dataset)
116
- & (data["Top K"] == top_k)
117
- ]
118
- recall_fig = px.bar(filtered_data, x="Dataset", y="Recall")
119
- precision_fig = px.bar(filtered_data, x="Dataset", y="Precision")
120
- return recall_fig, precision_fig
121
 
122
 
123
- # Run app and display result inline in the notebook
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  if __name__ == "__main__":
125
- app.run_server(host = '0.0.0.0', port='8050', debug=True)
 
1
+ # dependencies
2
+ import sys
3
+ sys.path.append('../src')
4
+ import utils
5
+ import data_processor
6
+ import deeplearning_build
7
+ import dash
8
+ import plotly.graph_objs as go
9
+ from dash import html, dcc
10
+ from dash.dependencies import Input, Output, State
11
+ import dash_bootstrap_components as dbc
12
  import pandas as pd
13
+ import numpy as np
14
+ import joblib
15
+ from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
16
+ import matplotlib.pyplot as plt
17
+ from sklearn.model_selection import train_test_split
18
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
19
+ import base64
20
+ import io
21
 
22
+ # Prepare data and deep learning models
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Load processed data
25
+ processed_data: pd.DataFrame = utils.load_preprocessed_data()
26
 
27
+ # Data preprocessing object
28
+ processor = data_processor.DataProcessor(processed_data)
29
+ X, y = processor.create_feature_matrix_and_target_vector(target_column="price actual")
30
 
31
+ # Target variable should be the last column (for compatibility with deeplearning_build module)
32
+ processed_data = processed_data.drop(columns=["price actual"])
33
+ processed_data["price actual"] = y
34
 
35
+ # Split data into train, validation and test sets (80%, 20%)
36
+ _, df_test = train_test_split(processed_data, test_size=0.2, random_state=0, shuffle=False)
37
 
38
+ # instantiate deepL class
39
+ deepL = deeplearning_build.deepL()
 
 
40
 
41
+ external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
 
42
 
43
+ app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
 
44
 
45
  app.layout = html.Div(
46
+ children=[
 
47
  html.Div(
48
  [
49
+ html.H1("Energy Price Forecasting Dashboard", style={'text-align': 'center'}),
50
+ html.Hr(),
51
+ html.H4(id='output_container', children=[], style={'text-align': 'center'}),
52
+ html.Br(),
53
+ html.Div("Select a model to evaluate its performance on the test set:", style={'text-align': 'center'}),
54
+ dcc.Dropdown(
55
+ id="select_model",
56
+ options=[
57
+ {"label": "BiLSTM", "value": 1},
58
+ {"label": "CNN-BiLSTM", "value": 2},
59
+ {"label": "CNN-BiLSTM-Attention", "value": 3}
60
  ],
61
+ multi=False,
62
+ value=1,
63
+ style={"text-align": 'center'}
64
  ),
65
+ html.Br(),
66
  html.Div(
67
+ id='performance_table_container',
68
+ children=[],
69
+ style={'display': 'flex', 'align-items': 'center', 'justify-content': 'center'}
 
 
 
 
 
 
70
  ),
71
+ html.Br(),
72
+ dcc.Loading(
73
+ id="loading-graph",
74
+ children=dcc.Graph(id="model_accuracy", figure={}),
75
+ type="default"
 
 
 
 
 
76
  ),
77
+ html.Br(),
78
+ html.Div([
79
+ dcc.Graph(id='error_distribution', figure={}),
80
+ ]),
81
+ html.Div(id='residuals-container', children=[]),
82
+ html.Div(id='output_plots', children=[]),
83
  ],
84
+ style={'backgroundColor': '#f2f2f2', 'padding': '30px'} # Light gray background color with padding
85
+ )
 
 
 
 
 
 
 
86
  ]
87
  )
88
 
89
+ def create_performance_table(model_name, mae, mse, rmse, r2):
90
+ performance_metrics = pd.DataFrame({
91
+ 'Metric': [
92
+ #"Model chosen",
93
+ "Mean Absolute Error",
94
+ "Mean Squared Error",
95
+ "Root Mean Squared Error",
96
+ "R2 Score"
97
+ ],
98
+ 'Value': [
99
+ #model_name,
100
+ round(mae, 2),
101
+ round(mse, 2),
102
+ round(rmse, 2),
103
+ round(r2, 2)
104
+ ]
105
+ })
106
+
107
+ performance_table = dbc.Table.from_dataframe(
108
+ performance_metrics,
109
+ striped=True,
110
+ bordered=True,
111
+ hover=True,
112
+ responsive=True
113
+ )
114
+
115
+ return performance_table
116
+
117
+
118
+ def model_processing(df_test, model_name):
119
+ deepL.load_model(model_name)
120
+
121
+ test_set = deepL.prepare_sequential_window(df_test, window_size=15, classification=False)
122
+ y_pred = deepL.predict(model_name, df_test)
123
+ y_pred = y_pred.reshape(-1, 1)
124
+
125
+ y_batch_list = []
126
+ for _, y_batch in test_set:
127
+ y_batch_list.append(y_batch.numpy())
128
+
129
+ y_batch_list = np.array(y_batch_list)
130
+ y_batch_list = y_batch_list.reshape(-1, 1)
131
+
132
+ # load scaler
133
+ scaler = joblib.load("../artifacts/scaler.pkl")
134
+ pred = scaler.inverse_transform(y_pred)
135
+ original_target = scaler.inverse_transform(y_batch_list)
136
+
137
+ # Calculate the errors (residuals)
138
+ errors = original_target - pred
139
+
140
+ mae = mean_absolute_error(original_target, pred)
141
+ mse = mean_squared_error(original_target, pred)
142
+ rmse = np.sqrt(mse)
143
+ r2 = r2_score(original_target, pred)
144
+
145
+ # Plot
146
+ fig = go.Figure()
147
+ fig.add_trace(go.Scatter(x=np.arange(0, len(original_target)), y=original_target.flatten(), mode='lines',
148
+ name='Actual Price'))
149
+ fig.add_trace(go.Scatter(x=np.arange(0, len(original_target)), y=pred.flatten(), mode='lines',
150
+ name='Predicted Price'))
151
+ fig.update_layout(title=f"Actual vs Predicted Price ({model_name})",
152
+ xaxis_title="Time",
153
+ yaxis_title="Price ($)")
154
+ fig.update_xaxes(showgrid=False)
155
+ fig.update_yaxes(showgrid=False)
156
+ fig.update_layout(legend=dict(
157
+ yanchor="top",
158
+ y=0.99,
159
+ xanchor="left",
160
+ x=0.01
161
+ ))
162
+ fig.update_layout(legend_title_text='Legend')
163
+ fig.update_layout(legend=dict(
164
+ orientation="h",
165
+ yanchor="bottom",
166
+ y=1.02,
167
+ xanchor="right",
168
+ x=1
169
+ ))
170
+
171
+ container = f"Multivariate Sequence to Vector Modeling: Next hour price prediction using {model_name} model"
172
+
173
+ return container, fig, errors, mae, mse, rmse, r2
174
+
175
+ # Connect the plotly graphs with Dash Components
176
  @app.callback(
177
+ [Output(component_id='output_container', component_property='children'),
178
+ Output(component_id='model_accuracy', component_property='figure')],
179
+ Output(component_id='error_distribution', component_property='figure'),
180
+ Output(component_id='residuals-container', component_property='children'),
181
+ Output(component_id='output_plots', component_property='children'),
182
+ Output(component_id='performance_table_container', component_property='children'),
183
+ [Input(component_id='select_model', component_property='value')]
184
  )
 
 
 
 
 
 
 
 
 
185
 
186
 
187
+ def update_graph(option_selected):
188
+ models = {
189
+ 1: "BiLSTM",
190
+ 2: "CNN-BiLSTM",
191
+ 3: "CNN-BiLSTM-Attention"
192
+ }
193
+
194
+ if option_selected in models:
195
+ model_name = models[option_selected]
196
+ loading = True
197
+ container, fig, errors, mae, mse, rmse, r2 = model_processing(df_test, model_name)
198
+ loading = False
199
+
200
+ # Plot error distribution
201
+ error_fig = go.Figure()
202
+ error_fig.add_trace(go.Histogram(x=errors.flatten(), histnorm='probability density'))
203
+ error_fig.update_layout(title="Error Distribution",
204
+ xaxis_title="Error",
205
+ yaxis_title="Density",
206
+ showlegend=False)
207
+
208
+ # Perform autocorrelation test
209
+
210
+ # Reshape errors array if needed
211
+ errors = errors.flatten()
212
+
213
+ # Perform autocorrelation test
214
+ fig_acf, ax_acf = plt.subplots()
215
+ fig_pacf, ax_pacf = plt.subplots()
216
+
217
+ plot_acf(errors, ax=ax_acf)
218
+ plot_pacf(errors, ax=ax_pacf)
219
+
220
+ # Save the plots as base64 encoded strings in assets folder
221
+ buffer_acf = io.BytesIO()
222
+ fig_acf.savefig(buffer_acf, format='png')
223
+ buffer_acf.seek(0)
224
+ buffer_pacf = io.BytesIO()
225
+ fig_pacf.savefig(buffer_pacf, format='png')
226
+ buffer_pacf.seek(0)
227
+
228
+ # Encode images to base64 strings
229
+ encoded_acf = base64.b64encode(buffer_acf.read()).decode('utf-8')
230
+ encoded_pacf = base64.b64encode(buffer_pacf.read()).decode('utf-8')
231
+
232
+ # Create HTML images
233
+ acf_plot = html.Img(src=f"data:image/png;base64,{encoded_acf}")
234
+ pacf_plot = html.Img(src=f"data:image/png;base64,{encoded_pacf}")
235
+
236
+ performance_table = create_performance_table(model_name, mae, mse, rmse, r2)
237
+
238
+ return container, fig, error_fig, acf_plot, pacf_plot, performance_table
239
+
240
  if __name__ == "__main__":
241
+ app.run_server(host = '0.0.0.0', port='8050', debug=False)