File size: 6,621 Bytes
bda8546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40cf94b
bda8546
 
a5f8ce5
bda8546
 
 
 
 
 
a5f8ce5
bda8546
52383ab
bda8546
 
 
 
ad4d878
bda8546
 
 
 
 
 
649fb5a
ad4d878
bda8546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3b8b29
 
bda8546
b3b8b29
 
 
 
 
 
 
bda8546
 
 
 
b3b8b29
bda8546
 
 
20afd72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bda8546
52383ab
bda8546
 
ad4d878
649fb5a
20afd72
649fb5a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# -*- coding: utf-8 -*-
"""ModelTraining.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1g8XfXJQFrvtAxDsWj9EQ5ZT90A-C-T-7
"""
import pandas as pd
import matplotlib.pyplot as plt
import keras
import requests
import pandas as pd
import numpy as np
from datetime import date
from datetime import timedelta
from bs4 import BeautifulSoup
import csv


path = ''

def preprocesshyper():
    with open(path + 'data/' + 'preprocessing_data.csv', newline='') as f:
        reader = csv.reader(f)
        data = list(reader)

    version = int(*data[0])
    version += 1

    with open(path + 'data/' + "preprocessing_data.csv", "w") as f:
        f.write("{}\n".format(version))
    return version

def normalize(data):
    data_mean = data.mean(axis=0)
    data_std = data.std(axis=0)
    with open(path + 'data/' + "preprocessing_data.csv", "a") as f:
        f.write("Mean, Standard Deviation\n")
        f.write("{}, {}, {}, {}\n".format(data_mean[0], data_mean[1], data_mean[2], data_mean[3]))
        f.write("{}, {}, {}, {}".format(data_std[0], data_std[1], data_std[2], data_std[3]))
    return (data - data_mean) / data_std


def preprocessdata():
    pd_data = pd.read_csv(path + 'data/' + 'weather_data.csv')
    pd_data['Time PST'] = pd.to_datetime(pd_data['Time PST'])
    pd_data['Temp (F)'] = pd_data['Temp (F)'].astype(int)
    pd_data['Humidity'] = pd_data['Humidity'].astype(int)
    pd_data['Wind Speed (in HG)'] = pd_data['Wind Speed (in HG)'].astype(float)
    pd_data['Wind Gust (MPH)'] = pd_data['Wind Gust (MPH)'].astype(float)

    pd_data = pd_data.drop(['Time PST'], axis=1)
    df = normalize(pd_data)

    return df


def model_train(df):
    """This is to split the data set into training and validation set"""

    split_fraction = 0.715
    train_split = int(split_fraction * int(df.shape[0]))
    step = 1

    past = 60  # Sequence length
    future = 0  # Amount of sequence in the future to predict
    learning_rate = 0.001
    batch_size = 1  # how many predictions per sample
    epochs = 20

    train_data = df.loc[0: train_split - 1]
    val_data = df.loc[train_split:]

    """
    The starting point for y_train must be at start as we take that (past) input to predict another output
    
    for example using three sequence (past= 3):
      data = [0,1,2,3,4,5,6,7,8,9,10]
      split
      x_train = [0,1,2,3,4]
      y_train = [3,4,5]
    
      [0,1,2] -> [3]
      [1,2,3] -> [4]
      [2,3,4] -> [5]
    
    The step is to sample at every integer steps. (1,2,3,4), (1,3,5,7), ...
    """

    start = past + future
    end = start + train_split

    x_train = train_data.values
    y_train = df.iloc[start:end]

    sequence_length = int(past / step)

    dataset_train = keras.preprocessing.timeseries_dataset_from_array(
        x_train,
        y_train,
        sequence_length=sequence_length,
        sampling_rate=step,
        batch_size=batch_size,
    )

    """
    
    The x_end must be subtracted by 1
    
    for example using three sequence (past = 3):
      data = [0,1,2,3,4,5,6,7,8,9,10]
      split
      x_val = [5,6,7,8,9,10]
      y_val = [8,9,10]
    
      [5,6,7] -> [8]
      [6,7,8] -> [9]
      [7,8,9] -> [10]
    
      [8,9,10] -> [?]  # is unknown
    
    """

    x_end = len(val_data) - 1

    label_start = train_split + past + future

    x_val = val_data.iloc[:x_end].values
    y_val = df.iloc[label_start:]

    dataset_val = keras.preprocessing.timeseries_dataset_from_array(
        x_val,
        y_val,
        sequence_length=sequence_length,
        sampling_rate=step,
        batch_size=batch_size,
    )

    for batch in dataset_train.take(1):
        inputs, targets = batch

    """
    
    (1, 60, 5)
    1 is batch size
    60 is sequence length
    5 is features

    """

    inputs = keras.layers.Input(shape=(inputs.shape[1], inputs.shape[2]))
    lstm_out = keras.layers.LSTM(32)(inputs)
    outputs = keras.layers.Dense(4)(lstm_out)

    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
    model.summary()


    path_checkpoint = "model_checkpoint.weights.h5"
    es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5)

    # modelckpt_callback = keras.callbacks.ModelCheckpoint(
    #     monitor="val_loss",
    #     filepath=path_checkpoint,
    #     verbose=1,
    #     save_weights_only=True,
    #     save_best_only=True,
    # )
    history = model.fit(
        dataset_train,
        epochs=epochs,
        validation_data=dataset_val,
        callbacks=[es_callback],
    )
    return model

    
    # def visualize_loss(history, title):
    #     loss = history.history["loss"]
    #     val_loss = history.history["val_loss"]
    #     epochs = range(len(loss))
    #     plt.figure()
    #     plt.plot(epochs, loss, "b", label="Training loss")
    #     plt.plot(epochs, val_loss, "r", label="Validation loss")
    #     plt.title(title)
    #     plt.xlabel("Epochs")
    #     plt.ylabel("Loss")
    #     plt.legend()
    #     plt.show()
    #
    #
    # visualize_loss(history, "Training and Validation Loss")
    #
    # def show_plot(plot_data, delta, title):
    #     labels = ["History", "True Future", "Model Prediction"]
    #     marker = [".-", "rx", "go"]
    #     time_steps = list(range(-(plot_data[0].shape[0]), 0))
    #     if delta:
    #         future = delta
    #     else:
    #         future = 0
    #
    #     plt.title(title)
    #     for i, val in enumerate(plot_data):
    #         if i:
    #           if i == 2:
    #             plt.plot(future, plot_data[i][0], marker[i], markersize=10, label=labels[i])
    #           else:
    #             plt.plot(future, plot_data[i][0], marker[i], markersize=10, label=labels[i])
    #         else:
    #             plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
    #     plt.legend()
    #     plt.xlim([time_steps[0], (future + 5) * 2])
    #     plt.xlabel("Time-Step")
    #     plt.show()
    #     return
    #
    #
    # for x, y in dataset_val.take(5):
    #     print(x.shape)
    #     print(model.predict(x))
    #     show_plot(
    #         [x[0][:, 1].numpy(), y[0].numpy(), model.predict(x)[0]],
    #         12,
    #         "Single Step Prediction",
    #     )
def main():
    version = preprocesshyper()
    df = preprocessdata()
    model = model_train(df)
    model.save(path + 'model/' + 'LTSM{}.h5'.format(version))

if __name__ == "__main__":
    main()