jerin commited on
Commit
985bb5d
2 Parent(s): 66977cd 816790f

Merge branch 'lstm_pipeline' of hf.co:spaces/smartbuildings/smart-buildings into lstm_pipeline

Browse files
mqttpublisher.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
physLSTM/lstm_vav_rtu1.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 35,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -23,7 +23,7 @@
23
  },
24
  {
25
  "cell_type": "code",
26
- "execution_count": 2,
27
  "metadata": {},
28
  "outputs": [],
29
  "source": [
@@ -32,7 +32,7 @@
32
  },
33
  {
34
  "cell_type": "code",
35
- "execution_count": 31,
36
  "metadata": {},
37
  "outputs": [],
38
  "source": [
@@ -51,12 +51,6 @@
51
  " ):\n",
52
  " cols.append(column)\n",
53
  "\n",
54
- "\n",
55
- "# for rtu in rtus:\n",
56
- "# for column in merged.columns:\n",
57
- "# if f\"rtu_00{rtu}_fltrd_sa\" or f\"rtu_00{rtu}_sa_temp\" in column:\n",
58
- "# cols.append(column)\n",
59
- "\n",
60
  "cols = (\n",
61
  " [\"date\"]\n",
62
  " + cols\n",
@@ -82,14 +76,14 @@
82
  },
83
  {
84
  "cell_type": "code",
85
- "execution_count": 32,
86
  "metadata": {},
87
  "outputs": [
88
  {
89
  "name": "stderr",
90
  "output_type": "stream",
91
  "text": [
92
- "C:\\Users\\arbal\\AppData\\Local\\Temp\\ipykernel_29192\\4293840618.py:1: SettingWithCopyWarning: \n",
93
  "A value is trying to be set on a copy of a slice from a DataFrame.\n",
94
  "Try using .loc[row_indexer,col_indexer] = value instead\n",
95
  "\n",
@@ -115,7 +109,7 @@
115
  },
116
  {
117
  "cell_type": "code",
118
- "execution_count": 36,
119
  "metadata": {},
120
  "outputs": [
121
  {
@@ -124,7 +118,7 @@
124
  "[]"
125
  ]
126
  },
127
- "execution_count": 36,
128
  "metadata": {},
129
  "output_type": "execute_result"
130
  }
@@ -144,7 +138,46 @@
144
  },
145
  {
146
  "cell_type": "code",
147
- "execution_count": 37,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  "metadata": {},
149
  "outputs": [
150
  {
@@ -161,7 +194,7 @@
161
  },
162
  {
163
  "cell_type": "code",
164
- "execution_count": 38,
165
  "metadata": {},
166
  "outputs": [
167
  {
@@ -170,7 +203,7 @@
170
  "(1073512, 391818)"
171
  ]
172
  },
173
- "execution_count": 38,
174
  "metadata": {},
175
  "output_type": "execute_result"
176
  }
@@ -181,7 +214,7 @@
181
  },
182
  {
183
  "cell_type": "code",
184
- "execution_count": 39,
185
  "metadata": {},
186
  "outputs": [
187
  {
@@ -190,7 +223,7 @@
190
  "['scaler_vav_1.pkl']"
191
  ]
192
  },
193
- "execution_count": 39,
194
  "metadata": {},
195
  "output_type": "execute_result"
196
  }
@@ -208,7 +241,7 @@
208
  },
209
  {
210
  "cell_type": "code",
211
- "execution_count": 51,
212
  "metadata": {},
213
  "outputs": [],
214
  "source": [
@@ -231,7 +264,7 @@
231
  },
232
  {
233
  "cell_type": "code",
234
- "execution_count": 52,
235
  "metadata": {},
236
  "outputs": [
237
  {
@@ -251,7 +284,7 @@
251
  },
252
  {
253
  "cell_type": "code",
254
- "execution_count": 54,
255
  "metadata": {},
256
  "outputs": [
257
  {
@@ -304,6 +337,18 @@
304
  "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=128, verbose=1, callbacks=[checkpoint_callback])"
305
  ]
306
  },
 
 
 
 
 
 
 
 
 
 
 
 
307
  {
308
  "cell_type": "code",
309
  "execution_count": 55,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 9,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
23
  },
24
  {
25
  "cell_type": "code",
26
+ "execution_count": 10,
27
  "metadata": {},
28
  "outputs": [],
29
  "source": [
 
32
  },
33
  {
34
  "cell_type": "code",
35
+ "execution_count": 11,
36
  "metadata": {},
37
  "outputs": [],
38
  "source": [
 
51
  " ):\n",
52
  " cols.append(column)\n",
53
  "\n",
 
 
 
 
 
 
54
  "cols = (\n",
55
  " [\"date\"]\n",
56
  " + cols\n",
 
76
  },
77
  {
78
  "cell_type": "code",
79
+ "execution_count": 12,
80
  "metadata": {},
81
  "outputs": [
82
  {
83
  "name": "stderr",
84
  "output_type": "stream",
85
  "text": [
86
+ "C:\\Users\\arbal\\AppData\\Local\\Temp\\ipykernel_368\\4293840618.py:1: SettingWithCopyWarning: \n",
87
  "A value is trying to be set on a copy of a slice from a DataFrame.\n",
88
  "Try using .loc[row_indexer,col_indexer] = value instead\n",
89
  "\n",
 
109
  },
110
  {
111
  "cell_type": "code",
112
+ "execution_count": 13,
113
  "metadata": {},
114
  "outputs": [
115
  {
 
118
  "[]"
119
  ]
120
  },
121
+ "execution_count": 13,
122
  "metadata": {},
123
  "output_type": "execute_result"
124
  }
 
138
  },
139
  {
140
  "cell_type": "code",
141
+ "execution_count": 14,
142
+ "metadata": {},
143
+ "outputs": [
144
+ {
145
+ "data": {
146
+ "text/plain": [
147
+ "Index(['date', 'zone_069_temp', 'zone_069_fan_spd', 'zone_068_temp',\n",
148
+ " 'zone_068_fan_spd', 'zone_067_temp', 'zone_067_fan_spd',\n",
149
+ " 'zone_066_temp', 'zone_066_fan_spd', 'zone_065_temp',\n",
150
+ " 'zone_065_fan_spd', 'zone_064_temp', 'zone_064_fan_spd',\n",
151
+ " 'zone_042_temp', 'zone_042_fan_spd', 'zone_041_temp',\n",
152
+ " 'zone_041_fan_spd', 'zone_040_temp', 'zone_040_fan_spd',\n",
153
+ " 'zone_039_temp', 'zone_039_fan_spd', 'zone_038_temp',\n",
154
+ " 'zone_038_fan_spd', 'zone_037_temp', 'zone_037_fan_spd',\n",
155
+ " 'zone_036_temp', 'zone_036_fan_spd', 'rtu_001_fltrd_sa_flow_tn',\n",
156
+ " 'rtu_001_sa_temp', 'air_temp_set_1', 'air_temp_set_2',\n",
157
+ " 'dew_point_temperature_set_1d', 'relative_humidity_set_1',\n",
158
+ " 'solar_radiation_set_1', 'zone_069_cooling_sp', 'zone_069_heating_sp',\n",
159
+ " 'zone_067_cooling_sp', 'zone_067_heating_sp', 'zone_066_cooling_sp',\n",
160
+ " 'zone_066_heating_sp', 'zone_065_cooling_sp', 'zone_065_heating_sp',\n",
161
+ " 'zone_064_cooling_sp', 'zone_064_heating_sp', 'zone_042_cooling_sp',\n",
162
+ " 'zone_042_heating_sp', 'zone_041_cooling_sp', 'zone_041_heating_sp',\n",
163
+ " 'zone_039_cooling_sp', 'zone_039_heating_sp', 'zone_038_cooling_sp',\n",
164
+ " 'zone_038_heating_sp', 'zone_037_cooling_sp', 'zone_037_heating_sp',\n",
165
+ " 'zone_036_cooling_sp', 'zone_036_heating_sp'],\n",
166
+ " dtype='object')"
167
+ ]
168
+ },
169
+ "execution_count": 14,
170
+ "metadata": {},
171
+ "output_type": "execute_result"
172
+ }
173
+ ],
174
+ "source": [
175
+ "traindataset_df.columns"
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": 15,
181
  "metadata": {},
182
  "outputs": [
183
  {
 
194
  },
195
  {
196
  "cell_type": "code",
197
+ "execution_count": 16,
198
  "metadata": {},
199
  "outputs": [
200
  {
 
203
  "(1073512, 391818)"
204
  ]
205
  },
206
+ "execution_count": 16,
207
  "metadata": {},
208
  "output_type": "execute_result"
209
  }
 
214
  },
215
  {
216
  "cell_type": "code",
217
+ "execution_count": 18,
218
  "metadata": {},
219
  "outputs": [
220
  {
 
223
  "['scaler_vav_1.pkl']"
224
  ]
225
  },
226
+ "execution_count": 18,
227
  "metadata": {},
228
  "output_type": "execute_result"
229
  }
 
241
  },
242
  {
243
  "cell_type": "code",
244
+ "execution_count": 10,
245
  "metadata": {},
246
  "outputs": [],
247
  "source": [
 
264
  },
265
  {
266
  "cell_type": "code",
267
+ "execution_count": null,
268
  "metadata": {},
269
  "outputs": [
270
  {
 
284
  },
285
  {
286
  "cell_type": "code",
287
+ "execution_count": null,
288
  "metadata": {},
289
  "outputs": [
290
  {
 
337
  "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=128, verbose=1, callbacks=[checkpoint_callback])"
338
  ]
339
  },
340
+ {
341
+ "cell_type": "code",
342
+ "execution_count": 13,
343
+ "metadata": {},
344
+ "outputs": [],
345
+ "source": [
346
+ "import keras\n",
347
+ "checkpoint_path = \"lstm_vav_01.keras\"\n",
348
+ "\n",
349
+ "model = keras.models.load_model(checkpoint_path)"
350
+ ]
351
+ },
352
  {
353
  "cell_type": "code",
354
  "execution_count": 55,
src/main.py CHANGED
@@ -30,25 +30,39 @@ def main():
30
  num_outputs=rtu_data_pipeline.num_outputs,
31
  )
32
 
33
- # vav_pipeline = VAVPipeline(rtu_id=1, scaler_path="src/vav/models/scaler_vav_1.pkl")
34
 
35
- # vav_anomalizer = VAVAnomalizer(prediction_model_path="src/vav/models/lstm__vav_01")
 
 
 
 
 
 
36
  # print(vav_pipeline.input_col_names)
37
 
38
  # print(len(vav_pipeline.output_col_names))
39
 
40
  def on_message(client, userdata, message):
41
- # print(json.loads(message.payload.decode()))
 
 
 
 
 
 
 
42
  df_new1, df_trans1, df_new2, df_trans2 = rtu_data_pipeline.fit(message)
43
  if not df_new1 is None and not df_trans1 is None and not df_new2 is None and not df_trans2 is None:
44
  out1,out2,out3,out4 = rtu_anomalizer1.pipeline(df_new1, df_trans1, rtu_data_pipeline.scaler1)
45
  out5,out6,out7,out8 = rtu_anomalizer2.pipeline(df_new2, df_trans2, rtu_data_pipeline.scaler2)
46
- print(out2)
47
 
48
  broker_address = "localhost"
49
  broker_port = 1883
50
  topic = "sensor_data"
51
  client = mqtt.Client(mqtt.CallbackAPIVersion.VERSION1)
 
52
  client.on_message = on_message
53
  client.connect(broker_address, broker_port)
54
  client.subscribe(topic)
 
30
  num_outputs=rtu_data_pipeline.num_outputs,
31
  )
32
 
33
+ vav_pipeline = VAVPipeline(rtu_id=1, scaler_path="src/vav/models/scaler_vav_1.pkl")
34
 
35
+ vav_anomalizer = VAVAnomalizer(
36
+ rtu_id=1,
37
+ prediction_model_path="src/vav/models/lstm_vav_01.keras",
38
+ clustering_model_path="src/vav/models/kmeans_vav_1.pkl",
39
+ num_inputs=vav_pipeline.num_inputs,
40
+ num_outputs=vav_pipeline.num_outputs,
41
+ )
42
  # print(vav_pipeline.input_col_names)
43
 
44
  # print(len(vav_pipeline.output_col_names))
45
 
46
  def on_message(client, userdata, message):
47
+ df_new_vav, df_trans_vav = vav_pipeline.fit(message)
48
+ vav_anomalizer.num_inputs = vav_pipeline.num_inputs
49
+ vav_anomalizer.num_outputs = vav_pipeline.num_outputs
50
+ if not df_new_vav is None and not df_trans_vav is None:
51
+ out_vav = vav_anomalizer.pipeline(
52
+ df_new_vav, df_trans_vav, vav_pipeline.scaler
53
+ )
54
+
55
  df_new1, df_trans1, df_new2, df_trans2 = rtu_data_pipeline.fit(message)
56
  if not df_new1 is None and not df_trans1 is None and not df_new2 is None and not df_trans2 is None:
57
  out1,out2,out3,out4 = rtu_anomalizer1.pipeline(df_new1, df_trans1, rtu_data_pipeline.scaler1)
58
  out5,out6,out7,out8 = rtu_anomalizer2.pipeline(df_new2, df_trans2, rtu_data_pipeline.scaler2)
59
+ #print(out2)
60
 
61
  broker_address = "localhost"
62
  broker_port = 1883
63
  topic = "sensor_data"
64
  client = mqtt.Client(mqtt.CallbackAPIVersion.VERSION1)
65
+ print("Connecting to broker")
66
  client.on_message = on_message
67
  client.connect(broker_address, broker_port)
68
  client.subscribe(topic)
src/vav/VAVPipeline.py CHANGED
@@ -1,15 +1,41 @@
1
  import json
 
 
2
  from sklearn.preprocessing import StandardScaler
3
- from pickle import load
4
  import numpy as np
5
 
6
 
7
  class VAVPipeline:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def __init__(self, rtu_id, scaler_path=None, window_size=30):
10
-
 
 
 
 
 
 
 
 
11
  self.window_size = window_size
12
-
13
  if rtu_id == 1:
14
  self.zones = [69, 68, 67, 66, 65, 64, 42, 41, 40, 39, 38, 37, 36]
15
  if rtu_id == 2:
@@ -36,8 +62,6 @@ class VAVPipeline:
36
  28,
37
  ]
38
 
39
- outputs = ["temp", "fan_speed"]
40
- inputs = ["cooling_sp", "heating_sp"]
41
  self.output_col_names = []
42
  self.input_col_names = [
43
  f"rtu_00{rtu_id}_fltrd_sa_flow_tn",
@@ -48,21 +72,37 @@ class VAVPipeline:
48
  "relative_humidity_set_1",
49
  "solar_radiation_set_1",
50
  ]
51
- for zone in self.zones:
52
- for output in outputs:
53
- self.output_col_names.append(f"zone_0{zone}_{output}")
54
- for input in inputs:
55
- self.input_col_names.append(f"zone_0{zone}_{input}")
56
 
57
  self.column_names = self.output_col_names + self.input_col_names
58
 
 
 
 
59
  if scaler_path:
60
  self.scaler = self.get_scaler(scaler_path)
61
 
62
  def get_scaler(self, scaler_path):
63
- return load(scaler_path)
 
 
 
 
 
 
 
 
 
64
 
65
  def get_window(self, df):
 
 
 
 
 
 
 
 
 
66
  len_df = len(df)
67
  if len_df > self.window_size:
68
  return df[len_df - (self.window_size + 1) : len_df].astype("float32")
@@ -70,26 +110,99 @@ class VAVPipeline:
70
  return None
71
 
72
  def transform_window(self, df_window):
73
- return self.scaler.transform(df_window)
 
 
 
 
 
 
 
 
 
74
 
75
  def prepare_input(self, df_trans):
 
 
 
 
 
 
 
 
 
76
  return df_trans[: self.window_size, :].reshape(
77
  (1, self.window_size, len(self.column_names))
78
  )
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def extract_data_from_message(self, message):
81
- payload = json.loads(message.payload.decode())
 
82
 
83
- len_df = len(self.df)
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- k = {}
86
- for col in self.column_names:
87
- k[col] = payload[col]
88
- self.df.loc[len_df] = k
89
  return self.df
90
 
91
  def fit(self, message):
 
 
 
 
 
 
 
 
 
92
  df = self.extract_data_from_message(message)
 
93
  df_window = self.get_window(df)
94
  if df_window is not None:
95
  df_trans = self.transform_window(df_window)
 
1
  import json
2
+ import joblib
3
+ import pandas as pd
4
  from sklearn.preprocessing import StandardScaler
 
5
  import numpy as np
6
 
7
 
8
  class VAVPipeline:
9
+ """
10
+ A class representing a Variable Air Volume (VAV) pipeline.
11
+
12
+ Attributes:
13
+ rtu_id (int): The ID of the RTU (Roof Top Unit).
14
+ scaler_path (str): The path to the scaler file.
15
+ window_size (int): The size of the sliding window.
16
+
17
+ Methods:
18
+ get_scaler(scaler_path): Loads the scaler from the given path.
19
+ get_window(df): Returns the sliding window of the given dataframe.
20
+ transform_window(df_window): Transforms the values of the dataframe using the scaler.
21
+ prepare_input(df_trans): Prepares the input for the model.
22
+ get_input_output(df): Extracts the input and output column names from the dataframe.
23
+ extract_data_from_message(message): Extracts data from the message payload and returns a dataframe.
24
+ fit(message): Fits the model with the extracted data and returns the prepared input and transformed data.
25
+ """
26
 
27
  def __init__(self, rtu_id, scaler_path=None, window_size=30):
28
+ """
29
+ Initializes a VAVPipeline object.
30
+
31
+ Args:
32
+ rtu_id (int): The ID of the RTU (Roof Top Unit).
33
+ scaler_path (str, optional): The path to the scaler file. Defaults to None.
34
+ window_size (int, optional): The size of the sliding window. Defaults to 30.
35
+ """
36
+ self.get_cols = True
37
  self.window_size = window_size
38
+ self.rtu_id = rtu_id
39
  if rtu_id == 1:
40
  self.zones = [69, 68, 67, 66, 65, 64, 42, 41, 40, 39, 38, 37, 36]
41
  if rtu_id == 2:
 
62
  28,
63
  ]
64
 
 
 
65
  self.output_col_names = []
66
  self.input_col_names = [
67
  f"rtu_00{rtu_id}_fltrd_sa_flow_tn",
 
72
  "relative_humidity_set_1",
73
  "solar_radiation_set_1",
74
  ]
 
 
 
 
 
75
 
76
  self.column_names = self.output_col_names + self.input_col_names
77
 
78
+ self.num_inputs = len(self.input_col_names)
79
+ self.num_outputs = len(self.output_col_names)
80
+
81
  if scaler_path:
82
  self.scaler = self.get_scaler(scaler_path)
83
 
84
  def get_scaler(self, scaler_path):
85
+ """
86
+ Loads the scaler from the given path.
87
+
88
+ Args:
89
+ scaler_path (str): The path to the scaler file.
90
+
91
+ Returns:
92
+ StandardScaler: The loaded scaler object.
93
+ """
94
+ return joblib.load(scaler_path)
95
 
96
  def get_window(self, df):
97
+ """
98
+ Returns the sliding window of the given dataframe.
99
+
100
+ Args:
101
+ df (pd.DataFrame): The dataframe.
102
+
103
+ Returns:
104
+ pd.DataFrame: The sliding window dataframe.
105
+ """
106
  len_df = len(df)
107
  if len_df > self.window_size:
108
  return df[len_df - (self.window_size + 1) : len_df].astype("float32")
 
110
  return None
111
 
112
  def transform_window(self, df_window):
113
+ """
114
+ Transforms the values of the dataframe using the scaler.
115
+
116
+ Args:
117
+ df_window (pd.DataFrame): The dataframe.
118
+
119
+ Returns:
120
+ np.ndarray: The transformed values.
121
+ """
122
+ return self.scaler.transform(df_window.values)
123
 
124
  def prepare_input(self, df_trans):
125
+ """
126
+ Prepares the input for the model.
127
+
128
+ Args:
129
+ df_trans (np.ndarray): The transformed values.
130
+
131
+ Returns:
132
+ np.ndarray: The prepared input.
133
+ """
134
  return df_trans[: self.window_size, :].reshape(
135
  (1, self.window_size, len(self.column_names))
136
  )
137
 
138
+ def get_input_output(self, df: pd.DataFrame):
139
+ """
140
+ Extracts the input and output column names from the dataframe.
141
+
142
+ Args:
143
+ df (pd.DataFrame): The dataframe.
144
+ """
145
+ for zone in self.zones:
146
+ for column in df.columns:
147
+ if (
148
+ f"zone_0{zone}" in column
149
+ and "co2" not in column
150
+ and "hw_valve" not in column
151
+ and "cooling_sp" not in column
152
+ and "heating_sp" not in column
153
+ ):
154
+ self.output_col_names.append(column)
155
+ self.input_col_names = [
156
+ f"rtu_00{self.rtu_id}_fltrd_sa_flow_tn",
157
+ f"rtu_00{self.rtu_id}_sa_temp",
158
+ "air_temp_set_1",
159
+ "air_temp_set_2",
160
+ "dew_point_temperature_set_1d",
161
+ "relative_humidity_set_1",
162
+ "solar_radiation_set_1",
163
+ ]
164
+ for zone in self.zones:
165
+ for column in df.columns:
166
+ if f"zone_0{zone}" in column:
167
+ if "cooling_sp" in column or "heating_sp" in column:
168
+ self.input_col_names.append(column)
169
+ self.column_names = self.output_col_names + self.input_col_names
170
+ self.num_inputs = len(self.input_col_names)
171
+ self.num_outputs = len(self.output_col_names)
172
+ self.df = pd.DataFrame(columns=self.column_names)
173
+
174
  def extract_data_from_message(self, message):
175
+ """
176
+ Extracts data from the message payload and returns a dataframe.
177
 
178
+ Args:
179
+ message: The message containing the payload.
180
+
181
+ Returns:
182
+ pd.DataFrame: The extracted data as a dataframe.
183
+ """
184
+ payload = json.loads(message.payload.decode())
185
+ df = pd.DataFrame.from_dict(payload, orient="index").T
186
+ if self.get_cols == True:
187
+ self.get_input_output(df)
188
+ self.get_cols = False
189
+ df = df[self.column_names]
190
+ self.df.loc[len(self.df)] = df.values[0]
191
 
 
 
 
 
192
  return self.df
193
 
194
  def fit(self, message):
195
+ """
196
+ Fits the model with the extracted data and returns the prepared input and transformed data.
197
+
198
+ Args:
199
+ message: The message containing the data.
200
+
201
+ Returns:
202
+ tuple: A tuple containing the prepared input and transformed data.
203
+ """
204
  df = self.extract_data_from_message(message)
205
+
206
  df_window = self.get_window(df)
207
  if df_window is not None:
208
  df_trans = self.transform_window(df_window)