GhoulMac commited on
Commit
d47fa16
1 Parent(s): 3ea2851

Upload 8 files

Browse files
Files changed (8) hide show
  1. Generate.py +119 -0
  2. app.py +66 -0
  3. config.py +57 -0
  4. forecast.ipynb +0 -0
  5. forecast.py +228 -0
  6. test.ipynb +0 -0
  7. test.py +16 -0
  8. test2.ipynb +0 -0
Generate.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sklearn
2
+ import pandas as pd
3
+ from tsai.basics import *
4
+ import config
5
+ from tsai.inference import load_learner
6
+
7
+ import pandas as pd
8
+
9
+
10
+
11
+ def get_inputs_from_user():
12
+
13
+ return 0
14
+
15
+ def preprocess_data(DataFrame:pd.DataFrame):
16
+ preproc=load_object()
17
+ return DataFrame
18
+
19
+ def preprocess_data_transform_generate_splits_Train(DataFrame:pd.DataFrame):
20
+ DataFrame=DataFrame.drop(config.DROP_COLOUMNS,axis=1)
21
+ preproc_pipe=load_object(config.PREPROCESSOR_PATH)
22
+ exp_pipe=load_object(config.SCALING_DATA)
23
+ DataFrame=preproc_pipe.fit_transform(DataFrame)
24
+
25
+ print("dataframe processed and ready for splitting")
26
+
27
+ splits=get_forecasting_splits(DataFrame,fcst_history=config.FCST_HISTORY,fcst_horizon=config.FCST_HORIZON,datetime_col=config.DATETIME_COL,
28
+ valid_size=config.VALID_SIZE,test_size=config.TEST_SIZE)
29
+
30
+ X,y=prepare_forecasting_data(DataFrame,fcst_history=config.FCST_HISTORY,fcst_horizon=config.FCST_HORIZON,x_vars=config.COLOUMNS,y_vars=config.COLOUMNS)
31
+
32
+ learn=TSForecaster(X,y,splits=splits,
33
+ batch_size=16,path='models',
34
+ arch='InceptionTimePlus',#"PatchTST" when PatchTST is to be used
35
+ pipelines=[preproc_pipe,exp_pipe],
36
+ #arch_config=config.ARCH_CONFIG, #uncomment only if PatchTST is used
37
+ metrics=[mse,mape],
38
+ cbs=ShowGraph()
39
+ )
40
+
41
+ lr_max=learn.lr_find().valley
42
+
43
+ learn.fit_one_cycle(n_epoch=config.N_EPOCH,lr_max=lr_max)
44
+ learn.export("model_in.pt")
45
+ return 0
46
+
47
+ #when using PatchTst model use the below function
48
+ def inference_Aircomp(fcst_date:string,DataFrame:pd.DataFrame):
49
+ pre=load_object(config.AIR_PREPROCESSOR_PATH)
50
+ DataFrame=pre.fit_transform(DataFrame)
51
+
52
+ dates=pd.date_range(start=None,end=fcst_date,periods=config.FCST_HISTORY,freq=config.FREQUENCY)
53
+ new_df=DataFrame[DataFrame[config.AIR_DATETIME_COL].isin(dates)].reset_index(drop=True)
54
+
55
+
56
+ predict=load_learner(config.MODEL_PATH_ITP_AIR)
57
+ new_df=predict.transform(new_df)
58
+
59
+ new_x,__=prepare_forecasting_data(new_df,fcst_history=config.FCST_HISTORY,fcst_horizon=0,x_vars=config.AIR_COLOUMNS,y_vars=config.AIR_COLOUMNS)
60
+
61
+ new_scaled_preds, *_ = predict.get_X_preds(new_x)
62
+
63
+ new_scaled_preds=to_np(new_scaled_preds).swapaxes(1,2).reshape(-1,len(config.AIR_COLOUMNS))
64
+
65
+ dates=pd.date_range(start=fcst_date, periods=config.FCST_HORIZON+1,freq='1H')[1:]
66
+ preds_df=pd.DataFrame(dates,columns=[config.AIR_DATETIME_COL])
67
+ preds_df.loc[:, config.AIR_COLOUMNS]=new_scaled_preds
68
+ preds_df=predict.inverse_transform(preds_df)
69
+
70
+ return preds_df
71
+
72
+ def inference_Energy(fcst_date:string,DataFrame:pd.DataFrame):
73
+ pre=load_object(config.ENER_PREPROCESSOR_PATH)
74
+ DataFrame[config.ENERGY_DATETIME]=pd.to_datetime(DataFrame[config.ENERGY_DATETIME],format='mixed')
75
+ DataFrame=pre.fit_transform(DataFrame)
76
+
77
+ dates=pd.date_range(start=None,end=fcst_date,periods=config.FCST_HISTORY,freq=config.FREQUENCY)
78
+ new_df=DataFrame[DataFrame[config.ENERGY_DATETIME].isin(dates)].reset_index(drop=True)
79
+
80
+
81
+ predict=load_learner(config.MODEL_PATH_ITP_ENER)
82
+ new_df=predict.transform(new_df)
83
+
84
+ new_x,__=prepare_forecasting_data(new_df,fcst_history=config.FCST_HISTORY,fcst_horizon=0,x_vars=config.ENERGY_COLOUMNS,y_vars=config.ENERGY_COLOUMNS)
85
+
86
+ new_scaled_preds, *_ = predict.get_X_preds(new_x)
87
+
88
+ new_scaled_preds=to_np(new_scaled_preds).swapaxes(1,2).reshape(-1,len(config.ENERGY_COLOUMNS))
89
+
90
+ dates=pd.date_range(start=fcst_date, periods=config.FCST_HORIZON+1,freq='1H')[1:]
91
+ preds_df=pd.DataFrame(dates,columns=[config.ENERGY_DATETIME])
92
+ preds_df.loc[:, config.ENERGY_COLOUMNS]=new_scaled_preds
93
+ preds_df=predict.inverse_transform(preds_df)
94
+
95
+ return preds_df
96
+
97
+ def inference_boiler(fcst_date:string,DataFrame:pd.DataFrame):
98
+ pre=load_object(config.BOILER_PREPROCESSOR_PATH)
99
+ DataFrame=pre.fit_transform(DataFrame)
100
+
101
+ dates=pd.date_range(start=None,end=fcst_date,periods=config.FCST_HISTORY,freq=config.FREQUENCY)
102
+ new_df=DataFrame[DataFrame[config.BOILER_DATETIME].isin(dates)].reset_index(drop=True)
103
+
104
+
105
+ predict=load_learner(config.MODEL_PATH_ITP_BOIL)
106
+ new_df=predict.transform(new_df)
107
+
108
+ new_x,__=prepare_forecasting_data(new_df,fcst_history=config.FCST_HISTORY,fcst_horizon=0,x_vars=config.BOILER_COLOUMNS,y_vars=config.BOILER_COLOUMNS)
109
+
110
+ new_scaled_preds, *_ = predict.get_X_preds(new_x)
111
+
112
+ new_scaled_preds=to_np(new_scaled_preds).swapaxes(1,2).reshape(-1,len(config.BOILER_COLOUMNS))
113
+
114
+ dates=pd.date_range(start=fcst_date, periods=config.FCST_HORIZON+1,freq='1H')[1:]
115
+ preds_df=pd.DataFrame(dates,columns=[config.BOILER_DATETIME])
116
+ preds_df.loc[:, config.BOILER_COLOUMNS]=new_scaled_preds
117
+ preds_df=predict.inverse_transform(preds_df)
118
+
119
+ return preds_df
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import tsai
4
+ import Generate
5
+
6
+ example_df_aircomp=pd.read_csv('D:/project/aircompressordata.csv')
7
+ example_df_ener=pd.read_csv('D:/project/energymeter.csv')
8
+ example_df_boiler=pd.read_csv('D:/project/Boiler1.csv')
9
+
10
+ demo=gr.Blocks(title="EcoForecast")
11
+
12
+ def pred_air(date):
13
+ preds=Generate.inference_Aircomp(date,example_df_aircomp)
14
+ return preds
15
+
16
+ def pred_ener(date):
17
+ preds=Generate.inference_Energy(date,example_df_ener)
18
+ return preds
19
+
20
+ def pred_boiler(date):
21
+ preds=Generate.inference_boiler(date,example_df_boiler)
22
+ return preds
23
+
24
+ def plotgraphs(dataframe):
25
+ plots=0
26
+ return plots
27
+
28
+ with demo:
29
+ gr.Markdown("Tool for predicting the next seven days of data in the future using the last 200 points of data incoming")
30
+ with gr.Tabs():
31
+ with gr.TabItem("Air compressor data"):
32
+ with gr.Row():
33
+ Air_input=gr.Text(placeholder="Enter date and like the example only",show_label=False)
34
+ air_dataframe_input=gr.Dataframe(example_df_aircomp.head(100))
35
+ Air_dataframe_output=gr.Dataframe()
36
+ Air_plots=gr.Plot()
37
+ with gr.Column():
38
+ Aircomp_output_btn=gr.Button("Forecast")
39
+ Air_plot_forecast=gr.Button("Plot")
40
+
41
+ with gr.TabItem("Energymeter data"):
42
+ with gr.Row():
43
+ ener_input=gr.Text(placeholder="Enter the date and time in example format only",show_label=False)
44
+ ener_dataframe_input=gr.Dataframe(example_df_ener.head(100))
45
+ Ener_dataframe_output=gr.Dataframe()
46
+ Ener_plots=gr.Plot()
47
+ with gr.Column():
48
+ Energy_output_btn=gr.Button("Forecast")
49
+ Ener_plot_forecast=gr.Button("Plot")
50
+
51
+ with gr.TabItem("Boiler data"):
52
+ with gr.Row():
53
+ boiler_input=gr.Text(placeholder="Enter the date and time in example format only",show_label=False)
54
+ ener_dataframe_input=gr.Dataframe(example_df_boiler.head(100))
55
+ boiler_dataframe_output=gr.Dataframe()
56
+ boil_plots=gr.Plot()
57
+ with gr.Column():
58
+ Boiler_output_btn=gr.Button("Forecast")
59
+ boiler_plot_forecast=gr.Button("Plot")
60
+
61
+ Aircomp_output_btn.click(pred_air,inputs=Air_input,outputs=Air_dataframe_output)
62
+ Energy_output_btn.click(pred_ener,inputs=ener_input,outputs=Ener_dataframe_output)
63
+ Boiler_output_btn.click(pred_boiler,inputs=boiler_input,outputs=boiler_dataframe_output)
64
+ Air_plot_forecast.click()
65
+
66
+ demo.launch(share=True)
config.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL_PATH_ITP_AIR='/project/Pipline/models/AirInceptionTime.pt'
2
+ MODEL_PATH_ITP_ENER='/project/Pipline/models/EnerInceptionTime.pt'
3
+ MODEL_PATH_ITP_BOIL='/project/Pipline/models/BoilerInceptionTime.pt'
4
+
5
+ DATA_PATH='/project/Pipline/data'
6
+ AIR_PREPROCESSOR_PATH='/project/Pipline/data/preproc_pipe.pkl'
7
+ AIR_SCALING_DATA='/project/Pipline/data/exp_pipe.pkl'
8
+
9
+ ENER_PREPROCESSOR_PATH='/project/Pipline/data/Enerpreproc_pipe.pkl'
10
+ ENER_SCALING_DATA='/project/Pipline/data/Ener_exp_pipe.pkl'
11
+
12
+ AIR_DATETIME_COL='timestamp'
13
+ #the current model is only for 1 device and of a specific appliance only
14
+ AIR_COLOUMNS=['air_inlet_temp_deg_f', 'average_cfm', 'average_kw',
15
+ 'compressor_room_air_generated_cfm',
16
+ 'compressor_room_energy_cosumed_kwh', 'specific_power_kw_100cfm',
17
+ 'percentage_loading_based_on_air_supplied_design_600cfm']
18
+
19
+ #the below columns provide no relavant information to the model
20
+ AIR_DROP_COLOUMNS=['data_id','ideal_specific_power_kw_100cfm','device_id']
21
+
22
+ ENERGY_DATETIME='parameter_timestamp'
23
+ ENERGY_COLOUMNS=['current_ir', 'electrical_energy', 'frequency', 'power', 'powerfactor',
24
+ 'pressure', 'temperature', 'voltage_vb',
25
+ 'voltage_vr', 'voltage_vy']
26
+ ENERGY_DROP_COLOUMNS=["location","current_ib","current_iy","device_1_state","device_2_state","device_id","device_name",'id',"device_type"]
27
+
28
+ BOILER_PREPROCESSOR_PATH='/project/Pipline/data/boiler_preproc_pipe.pkl'
29
+ BOILER_DATETIME='DateString'
30
+ BOILER_COLOUMNS=['Boiler2_Feed Water Temp (T-4) (°F)','Boiler2_Gas Flow (G-2) (MMBtu)', 'Boiler2_Make Up Flow (W-17) (kGal)','Boiler2_Steam Flow (S-1) (lbs)']
31
+ #Frequency of the incoming data
32
+ FREQUENCY='1H'
33
+ METHOD='ffill'
34
+ VALUE=0
35
+
36
+ #No. of values needed to be looked back
37
+ FCST_HISTORY=200
38
+
39
+ #No. of Timestamps predicted in the future
40
+ FCST_HORIZON=168 #1 Week
41
+ VALID_SIZE=0.1
42
+ TEST_SIZE=0.2
43
+
44
+ #Training Parameters for a new Model
45
+ ARCH_CONFIG=dict(
46
+ n_layers=3,
47
+ n_heads=4,
48
+ d_model=16,
49
+ d_ff=128,
50
+ attn_dropout=0.0,
51
+ dropout=0.3,
52
+ patch_len=24,
53
+ stride=2,
54
+ padding_patch=True,
55
+ )#Only needed when using PatchTst
56
+
57
+ N_EPOCH=100
forecast.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
forecast.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ import sklearn
5
+ from tsai.basics import *
6
+ #my_setup(sklearn)
7
+
8
+ import pandas as pd
9
+ df=pd.read_csv('D:/project/aircompressordata.csv')
10
+ df=df.drop(['data_id','ideal_specific_power_kw_100cfm','device_id'],axis=1)
11
+ print(df.columns)
12
+ df.head(5)
13
+
14
+
15
+ # In[3]:
16
+
17
+
18
+ datetime_col="timestamp"
19
+ freq='1H'
20
+ coloumns=df.columns[:7]
21
+ method='ffill'
22
+ value=0
23
+
24
+ preproc_pipe=sklearn.pipeline.Pipeline([
25
+ ('shrinker',TSShrinkDataFrame()),
26
+ ('drop_duplicates',TSDropDuplicates(datetime_col=datetime_col)),
27
+ ('add_mts',TSAddMissingTimestamps(datetime_col=datetime_col,freq=freq)),
28
+ ('fill_missing',TSFillMissing(columns=coloumns,method=method,value=value)),
29
+ ],
30
+ verbose=True)
31
+ mkdir('data', exist_ok=True,parents=True)
32
+ save_object(preproc_pipe,'data/preproc_pipe.pkl')
33
+ preproc_pipe=load_object('data/preproc_pipe.pkl')
34
+
35
+ df=preproc_pipe.fit_transform(df)
36
+
37
+
38
+ # In[4]:
39
+
40
+
41
+ df.head()
42
+
43
+
44
+ # In[5]:
45
+
46
+
47
+ fcst_history=200
48
+ fcst_horizon=72
49
+ valid_size=0.1
50
+ test_size=0.2
51
+
52
+ splits=get_forecasting_splits(df,fcst_history=fcst_history,fcst_horizon=fcst_horizon,datetime_col=datetime_col,
53
+ valid_size=valid_size,test_size=test_size)
54
+
55
+ splits
56
+
57
+
58
+ # In[6]:
59
+
60
+
61
+ coloumns=df.columns[1:]
62
+ train_split=splits[0]
63
+
64
+ exp_pipe=sklearn.pipeline.Pipeline([
65
+ ('scaler',TSStandardScaler(columns=coloumns)),
66
+ ],
67
+ verbose=True)
68
+
69
+ save_object(exp_pipe,'data/exp_pipe.pkl')
70
+ exp_pipe=load_object('data/exp_pipe.pkl')
71
+
72
+ df_scaled=exp_pipe.fit_transform(df,scaler__idxs=train_split)
73
+
74
+ df_scaled
75
+
76
+
77
+ # In[7]:
78
+
79
+
80
+ x_vars=df.columns[1:]
81
+ y_vars=df.columns[1:]
82
+
83
+
84
+ # In[8]:
85
+
86
+
87
+ X,y=prepare_forecasting_data(df,fcst_history=fcst_history,fcst_horizon=fcst_horizon,x_vars=x_vars,y_vars=y_vars)
88
+ X.shape , y.shape
89
+
90
+
91
+ # In[9]:
92
+
93
+
94
+ arch_config=dict(
95
+ n_layers=3,
96
+ n_heads=4,
97
+ d_model=16,
98
+ d_ff=128,
99
+ attn_dropout=0.0,
100
+ dropout=0.3,
101
+ patch_len=24,
102
+ stride=2,
103
+ padding_patch=True,
104
+ )
105
+
106
+
107
+ # In[10]:
108
+
109
+
110
+ learn=TSForecaster(X,y,splits=splits,
111
+ batch_size=16,path="models",
112
+ pipelines=[preproc_pipe,exp_pipe],
113
+ arch="PatchTST",
114
+ arch_config=arch_config,
115
+ metrics=[mse,mae],
116
+ cbs=ShowGraph())
117
+
118
+
119
+ # In[11]:
120
+
121
+
122
+ learn.summary()
123
+
124
+
125
+ # In[12]:
126
+
127
+
128
+ lr_max=learn.lr_find().valley
129
+
130
+
131
+ # In[ ]:
132
+
133
+
134
+ n_epochs=100
135
+
136
+ learn.fit_one_cycle(n_epoch=n_epochs,lr_max=lr_max)
137
+ learn.export('PatchTST.pt')
138
+
139
+
140
+ # In[14]:
141
+
142
+
143
+ from tsai.inference import load_learner
144
+ from sklearn.metrics import mean_squared_error, mean_absolute_error
145
+ from sklearn.metrics import mean_absolute_percentage_error
146
+
147
+ learn=load_learner('models/PatchTST.pt')
148
+ y_test_preds, *_=learn.get_X_preds(X[splits[2]])
149
+ y_test_preds=to_np(y_test_preds)
150
+ print(y_test_preds.shape)
151
+
152
+ y_test=y[splits[2]]
153
+
154
+ print(mean_squared_error(y_test.flatten(),y_test_preds.flatten()))
155
+ print(mean_absolute_error(y_test.flatten(),y_test_preds.flatten()))
156
+ print(mean_absolute_percentage_error(y_test.flatten(),y_test_preds.flatten()))
157
+
158
+
159
+ # In[15]:
160
+
161
+
162
+ X_test=X[splits[2]]
163
+ plot_forecast(X_test,y_test,y_test_preds,sel_vars=True)
164
+
165
+
166
+ # In[16]:
167
+
168
+
169
+ fcst_date="2023-07-31 23:00:00"
170
+ dates=pd.date_range(start=None,end=fcst_date,periods=fcst_history,freq=freq)
171
+ dates
172
+
173
+
174
+ # In[17]:
175
+
176
+
177
+ #df=pd.read_csv('D:/project/aircompressordata.csv')
178
+ #df=preproc_pipe.fit_transform(df)
179
+
180
+ new_df=df[df[datetime_col].isin(dates)].reset_index(drop=True)
181
+ new_df
182
+
183
+
184
+ # In[18]:
185
+
186
+
187
+ from tsai.inference import load_learner
188
+
189
+ predict=load_learner('models/PatchTST.pt')
190
+ new_df=predict.transform(new_df)
191
+
192
+ new_df
193
+
194
+
195
+ # In[19]:
196
+
197
+
198
+ x_feat=new_df.columns[1:]
199
+ new_x,__=prepare_forecasting_data(new_df,fcst_history=fcst_history,fcst_horizon=0,x_vars=x_vars,y_vars=y_vars)
200
+ new_x.shape
201
+
202
+
203
+ # In[20]:
204
+
205
+
206
+ new_scaled_preds, *_ = learn.get_X_preds(new_x)
207
+
208
+ new_scaled_preds=to_np(new_scaled_preds).swapaxes(1,2).reshape(-1,len(y_vars))
209
+ dates=pd.date_range(start=fcst_date, periods=fcst_horizon+1,freq='1H')[1:]
210
+ preds_df=pd.DataFrame(dates,columns=[datetime_col])
211
+ preds_df.loc[:, y_vars]=new_scaled_preds
212
+ preds_df=learn.inverse_transform(preds_df)
213
+
214
+ preds_df
215
+
216
+
217
+ # In[1]:
218
+
219
+
220
+ from tsai.export import get_nb_name; nb_name=get_nb_name(locals())
221
+ from tsai.imports import create_scripts; create_scripts(nb_name)
222
+
223
+
224
+ # In[ ]:
225
+
226
+
227
+
228
+
test.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
test.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import Generate
3
+ import config
4
+
5
+ Aircomp_df=pd.read_csv('D:/project/aircompressordata.csv')
6
+ #df=Aircomp_df.drop(config.DROP_COLOUMNS,axis=1)
7
+ Energy_df=pd.read_csv('D:/project/energymeter.csv')
8
+ boiler_df=pd.read_csv('D:/project/Boiler1.csv')
9
+ '''
10
+ preds=Generate.inference_Aircomp("2023-07-31 23:00:00",Aircomp_df)
11
+ print(preds)
12
+ preds=Generate.inference_Energy("2023-07-13 12:00:50",Energy_df)
13
+ print(preds)
14
+ '''
15
+ preds=Generate.inference_boiler("2023-04-30 01:59:00",boiler_df)
16
+ print(preds)
test2.ipynb ADDED
The diff for this file is too large to render. See raw diff