Spaces:
Runtime error
Runtime error
André Catarino
commited on
Commit
·
90ac5c0
1
Parent(s):
5401cb8
wip
Browse files- app/app.py +214 -98
app/app.py
CHANGED
@@ -1,125 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
data = pd.concat(
|
5 |
-
[
|
6 |
-
data,
|
7 |
-
pd.DataFrame(
|
8 |
-
[["ml-100k", 0.1, 20, 0.2, 0.2]],
|
9 |
-
columns=["Dataset", "Alpha", "Top K", "Recall", "Precision"],
|
10 |
-
),
|
11 |
-
]
|
12 |
-
)
|
13 |
-
|
14 |
-
import os
|
15 |
-
import plotly.express as px
|
16 |
-
import pandas as pd
|
17 |
-
from dash import Dash, html, dcc, Input, Output, callback
|
18 |
-
import plotly.express as px
|
19 |
-
from dataclasses import dataclass
|
20 |
-
import json
|
21 |
-
|
22 |
-
data = pd.DataFrame(columns=["Dataset", "Alpha", "Top K", "Recall", "Precision"])
|
23 |
-
data = pd.concat(
|
24 |
-
[
|
25 |
-
data,
|
26 |
-
pd.DataFrame(
|
27 |
-
[["ml-100k", 0.1, 20, 0.2, 0.2]],
|
28 |
-
columns=["Dataset", "Alpha", "Top K", "Recall", "Precision"],
|
29 |
-
),
|
30 |
-
]
|
31 |
-
)
|
32 |
-
debug = False
|
33 |
-
|
34 |
|
35 |
-
|
|
|
36 |
|
37 |
-
|
|
|
|
|
38 |
|
39 |
-
|
|
|
|
|
40 |
|
|
|
|
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
]
|
45 |
-
dataset_options_default_value = data["Dataset"].unique()[0]
|
46 |
|
47 |
-
|
48 |
-
alpha_options_default_value = data["Alpha"].unique()[0]
|
49 |
|
50 |
-
|
51 |
-
top_k_options_default_value = data["Top K"].unique()[0]
|
52 |
|
53 |
app.layout = html.Div(
|
54 |
-
[
|
55 |
-
html.H1("System Evaluation"),
|
56 |
html.Div(
|
57 |
[
|
58 |
-
html.
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
66 |
],
|
67 |
-
|
|
|
|
|
68 |
),
|
|
|
69 |
html.Div(
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
id="alpha-dropdown",
|
74 |
-
options=alpha_options,
|
75 |
-
value=alpha_options_default_value,
|
76 |
-
),
|
77 |
-
],
|
78 |
-
className="three columns",
|
79 |
),
|
80 |
-
html.
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
options=top_k_options,
|
86 |
-
value=top_k_options_default_value,
|
87 |
-
),
|
88 |
-
],
|
89 |
-
className="three columns",
|
90 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
],
|
92 |
-
|
93 |
-
)
|
94 |
-
html.Div(
|
95 |
-
[
|
96 |
-
html.Div([dcc.Graph(id="recall-graph")], className="six columns"),
|
97 |
-
html.Div([dcc.Graph(id="precision-graph")], className="six columns"),
|
98 |
-
],
|
99 |
-
className="row",
|
100 |
-
),
|
101 |
]
|
102 |
)
|
103 |
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
@app.callback(
|
106 |
-
Output(
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
111 |
)
|
112 |
-
def update_graph(alpha, dataset, top_k):
|
113 |
-
filtered_data = data[
|
114 |
-
(data["Alpha"] == alpha)
|
115 |
-
& (data["Dataset"] == dataset)
|
116 |
-
& (data["Top K"] == top_k)
|
117 |
-
]
|
118 |
-
recall_fig = px.bar(filtered_data, x="Dataset", y="Recall")
|
119 |
-
precision_fig = px.bar(filtered_data, x="Dataset", y="Precision")
|
120 |
-
return recall_fig, precision_fig
|
121 |
|
122 |
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
if __name__ == "__main__":
|
125 |
-
app.run_server(host = '0.0.0.0', port='8050', debug=
|
|
|
1 |
+
# dependencies
|
2 |
+
import sys
|
3 |
+
sys.path.append('../src')
|
4 |
+
import utils
|
5 |
+
import data_processor
|
6 |
+
import deeplearning_build
|
7 |
+
import dash
|
8 |
+
import plotly.graph_objs as go
|
9 |
+
from dash import html, dcc
|
10 |
+
from dash.dependencies import Input, Output, State
|
11 |
+
import dash_bootstrap_components as dbc
|
12 |
import pandas as pd
|
13 |
+
import numpy as np
|
14 |
+
import joblib
|
15 |
+
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
|
16 |
+
import matplotlib.pyplot as plt
|
17 |
+
from sklearn.model_selection import train_test_split
|
18 |
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
19 |
+
import base64
|
20 |
+
import io
|
21 |
|
22 |
+
# Prepare data and deep learning models
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
# Load processed data
|
25 |
+
processed_data: pd.DataFrame = utils.load_preprocessed_data()
|
26 |
|
27 |
+
# Data preprocessing object
|
28 |
+
processor = data_processor.DataProcessor(processed_data)
|
29 |
+
X, y = processor.create_feature_matrix_and_target_vector(target_column="price actual")
|
30 |
|
31 |
+
# Target variable should be the last column (for compatibility with deeplearning_build module)
|
32 |
+
processed_data = processed_data.drop(columns=["price actual"])
|
33 |
+
processed_data["price actual"] = y
|
34 |
|
35 |
+
# Split data into train, validation and test sets (80%, 20%)
|
36 |
+
_, df_test = train_test_split(processed_data, test_size=0.2, random_state=0, shuffle=False)
|
37 |
|
38 |
+
# instantiate deepL class
|
39 |
+
deepL = deeplearning_build.deepL()
|
|
|
|
|
40 |
|
41 |
+
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
|
|
|
42 |
|
43 |
+
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
|
|
|
44 |
|
45 |
app.layout = html.Div(
|
46 |
+
children=[
|
|
|
47 |
html.Div(
|
48 |
[
|
49 |
+
html.H1("Energy Price Forecasting Dashboard", style={'text-align': 'center'}),
|
50 |
+
html.Hr(),
|
51 |
+
html.H4(id='output_container', children=[], style={'text-align': 'center'}),
|
52 |
+
html.Br(),
|
53 |
+
html.Div("Select a model to evaluate its performance on the test set:", style={'text-align': 'center'}),
|
54 |
+
dcc.Dropdown(
|
55 |
+
id="select_model",
|
56 |
+
options=[
|
57 |
+
{"label": "BiLSTM", "value": 1},
|
58 |
+
{"label": "CNN-BiLSTM", "value": 2},
|
59 |
+
{"label": "CNN-BiLSTM-Attention", "value": 3}
|
60 |
],
|
61 |
+
multi=False,
|
62 |
+
value=1,
|
63 |
+
style={"text-align": 'center'}
|
64 |
),
|
65 |
+
html.Br(),
|
66 |
html.Div(
|
67 |
+
id='performance_table_container',
|
68 |
+
children=[],
|
69 |
+
style={'display': 'flex', 'align-items': 'center', 'justify-content': 'center'}
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
),
|
71 |
+
html.Br(),
|
72 |
+
dcc.Loading(
|
73 |
+
id="loading-graph",
|
74 |
+
children=dcc.Graph(id="model_accuracy", figure={}),
|
75 |
+
type="default"
|
|
|
|
|
|
|
|
|
|
|
76 |
),
|
77 |
+
html.Br(),
|
78 |
+
html.Div([
|
79 |
+
dcc.Graph(id='error_distribution', figure={}),
|
80 |
+
]),
|
81 |
+
html.Div(id='residuals-container', children=[]),
|
82 |
+
html.Div(id='output_plots', children=[]),
|
83 |
],
|
84 |
+
style={'backgroundColor': '#f2f2f2', 'padding': '30px'} # Light gray background color with padding
|
85 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
]
|
87 |
)
|
88 |
|
89 |
+
def create_performance_table(model_name, mae, mse, rmse, r2):
|
90 |
+
performance_metrics = pd.DataFrame({
|
91 |
+
'Metric': [
|
92 |
+
#"Model chosen",
|
93 |
+
"Mean Absolute Error",
|
94 |
+
"Mean Squared Error",
|
95 |
+
"Root Mean Squared Error",
|
96 |
+
"R2 Score"
|
97 |
+
],
|
98 |
+
'Value': [
|
99 |
+
#model_name,
|
100 |
+
round(mae, 2),
|
101 |
+
round(mse, 2),
|
102 |
+
round(rmse, 2),
|
103 |
+
round(r2, 2)
|
104 |
+
]
|
105 |
+
})
|
106 |
+
|
107 |
+
performance_table = dbc.Table.from_dataframe(
|
108 |
+
performance_metrics,
|
109 |
+
striped=True,
|
110 |
+
bordered=True,
|
111 |
+
hover=True,
|
112 |
+
responsive=True
|
113 |
+
)
|
114 |
+
|
115 |
+
return performance_table
|
116 |
+
|
117 |
+
|
118 |
+
def model_processing(df_test, model_name):
|
119 |
+
deepL.load_model(model_name)
|
120 |
+
|
121 |
+
test_set = deepL.prepare_sequential_window(df_test, window_size=15, classification=False)
|
122 |
+
y_pred = deepL.predict(model_name, df_test)
|
123 |
+
y_pred = y_pred.reshape(-1, 1)
|
124 |
+
|
125 |
+
y_batch_list = []
|
126 |
+
for _, y_batch in test_set:
|
127 |
+
y_batch_list.append(y_batch.numpy())
|
128 |
+
|
129 |
+
y_batch_list = np.array(y_batch_list)
|
130 |
+
y_batch_list = y_batch_list.reshape(-1, 1)
|
131 |
+
|
132 |
+
# load scaler
|
133 |
+
scaler = joblib.load("../artifacts/scaler.pkl")
|
134 |
+
pred = scaler.inverse_transform(y_pred)
|
135 |
+
original_target = scaler.inverse_transform(y_batch_list)
|
136 |
+
|
137 |
+
# Calculate the errors (residuals)
|
138 |
+
errors = original_target - pred
|
139 |
+
|
140 |
+
mae = mean_absolute_error(original_target, pred)
|
141 |
+
mse = mean_squared_error(original_target, pred)
|
142 |
+
rmse = np.sqrt(mse)
|
143 |
+
r2 = r2_score(original_target, pred)
|
144 |
+
|
145 |
+
# Plot
|
146 |
+
fig = go.Figure()
|
147 |
+
fig.add_trace(go.Scatter(x=np.arange(0, len(original_target)), y=original_target.flatten(), mode='lines',
|
148 |
+
name='Actual Price'))
|
149 |
+
fig.add_trace(go.Scatter(x=np.arange(0, len(original_target)), y=pred.flatten(), mode='lines',
|
150 |
+
name='Predicted Price'))
|
151 |
+
fig.update_layout(title=f"Actual vs Predicted Price ({model_name})",
|
152 |
+
xaxis_title="Time",
|
153 |
+
yaxis_title="Price ($)")
|
154 |
+
fig.update_xaxes(showgrid=False)
|
155 |
+
fig.update_yaxes(showgrid=False)
|
156 |
+
fig.update_layout(legend=dict(
|
157 |
+
yanchor="top",
|
158 |
+
y=0.99,
|
159 |
+
xanchor="left",
|
160 |
+
x=0.01
|
161 |
+
))
|
162 |
+
fig.update_layout(legend_title_text='Legend')
|
163 |
+
fig.update_layout(legend=dict(
|
164 |
+
orientation="h",
|
165 |
+
yanchor="bottom",
|
166 |
+
y=1.02,
|
167 |
+
xanchor="right",
|
168 |
+
x=1
|
169 |
+
))
|
170 |
+
|
171 |
+
container = f"Multivariate Sequence to Vector Modeling: Next hour price prediction using {model_name} model"
|
172 |
+
|
173 |
+
return container, fig, errors, mae, mse, rmse, r2
|
174 |
+
|
175 |
+
# Connect the plotly graphs with Dash Components
|
176 |
@app.callback(
|
177 |
+
[Output(component_id='output_container', component_property='children'),
|
178 |
+
Output(component_id='model_accuracy', component_property='figure')],
|
179 |
+
Output(component_id='error_distribution', component_property='figure'),
|
180 |
+
Output(component_id='residuals-container', component_property='children'),
|
181 |
+
Output(component_id='output_plots', component_property='children'),
|
182 |
+
Output(component_id='performance_table_container', component_property='children'),
|
183 |
+
[Input(component_id='select_model', component_property='value')]
|
184 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
|
186 |
|
187 |
+
def update_graph(option_selected):
|
188 |
+
models = {
|
189 |
+
1: "BiLSTM",
|
190 |
+
2: "CNN-BiLSTM",
|
191 |
+
3: "CNN-BiLSTM-Attention"
|
192 |
+
}
|
193 |
+
|
194 |
+
if option_selected in models:
|
195 |
+
model_name = models[option_selected]
|
196 |
+
loading = True
|
197 |
+
container, fig, errors, mae, mse, rmse, r2 = model_processing(df_test, model_name)
|
198 |
+
loading = False
|
199 |
+
|
200 |
+
# Plot error distribution
|
201 |
+
error_fig = go.Figure()
|
202 |
+
error_fig.add_trace(go.Histogram(x=errors.flatten(), histnorm='probability density'))
|
203 |
+
error_fig.update_layout(title="Error Distribution",
|
204 |
+
xaxis_title="Error",
|
205 |
+
yaxis_title="Density",
|
206 |
+
showlegend=False)
|
207 |
+
|
208 |
+
# Perform autocorrelation test
|
209 |
+
|
210 |
+
# Reshape errors array if needed
|
211 |
+
errors = errors.flatten()
|
212 |
+
|
213 |
+
# Perform autocorrelation test
|
214 |
+
fig_acf, ax_acf = plt.subplots()
|
215 |
+
fig_pacf, ax_pacf = plt.subplots()
|
216 |
+
|
217 |
+
plot_acf(errors, ax=ax_acf)
|
218 |
+
plot_pacf(errors, ax=ax_pacf)
|
219 |
+
|
220 |
+
# Save the plots as base64 encoded strings in assets folder
|
221 |
+
buffer_acf = io.BytesIO()
|
222 |
+
fig_acf.savefig(buffer_acf, format='png')
|
223 |
+
buffer_acf.seek(0)
|
224 |
+
buffer_pacf = io.BytesIO()
|
225 |
+
fig_pacf.savefig(buffer_pacf, format='png')
|
226 |
+
buffer_pacf.seek(0)
|
227 |
+
|
228 |
+
# Encode images to base64 strings
|
229 |
+
encoded_acf = base64.b64encode(buffer_acf.read()).decode('utf-8')
|
230 |
+
encoded_pacf = base64.b64encode(buffer_pacf.read()).decode('utf-8')
|
231 |
+
|
232 |
+
# Create HTML images
|
233 |
+
acf_plot = html.Img(src=f"data:image/png;base64,{encoded_acf}")
|
234 |
+
pacf_plot = html.Img(src=f"data:image/png;base64,{encoded_pacf}")
|
235 |
+
|
236 |
+
performance_table = create_performance_table(model_name, mae, mse, rmse, r2)
|
237 |
+
|
238 |
+
return container, fig, error_fig, acf_plot, pacf_plot, performance_table
|
239 |
+
|
240 |
if __name__ == "__main__":
|
241 |
+
app.run_server(host = '0.0.0.0', port='8050', debug=False)
|