Spaces:
Running
Running
# -*- coding: utf-8 -*- | |
"""ModelTraining.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1g8XfXJQFrvtAxDsWj9EQ5ZT90A-C-T-7 | |
""" | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import keras | |
import requests | |
import pandas as pd | |
import numpy as np | |
from datetime import date | |
from datetime import timedelta | |
from bs4 import BeautifulSoup | |
import csv | |
path = '' | |
def preprocesshyper(): | |
with open(path + 'data/' + 'preprocessing_data.csv', newline='') as f: | |
reader = csv.reader(f) | |
data = list(reader) | |
version = int(*data[0]) | |
version += 1 | |
with open(path + 'data/' + "preprocessing_data.csv", "w") as f: | |
f.write("{}\n".format(version)) | |
return version | |
def normalize(data): | |
data_mean = data.mean(axis=0) | |
data_std = data.std(axis=0) | |
with open(path + 'data/' + "preprocessing_data.csv", "a") as f: | |
f.write("Mean, Standard Deviation\n") | |
f.write("{}, {}, {}, {}\n".format(data_mean[0], data_mean[1], data_mean[2], data_mean[3])) | |
f.write("{}, {}, {}, {}".format(data_std[0], data_std[1], data_std[2], data_std[3])) | |
return (data - data_mean) / data_std | |
def preprocessdata(): | |
pd_data = pd.read_csv(path + 'data/' + 'weather_data.csv') | |
pd_data['Time PST'] = pd.to_datetime(pd_data['Time PST']) | |
pd_data['Temp (F)'] = pd_data['Temp (F)'].astype(int) | |
pd_data['Humidity'] = pd_data['Humidity'].astype(int) | |
pd_data['Wind Speed (in HG)'] = pd_data['Wind Speed (in HG)'].astype(float) | |
pd_data['Wind Gust (MPH)'] = pd_data['Wind Gust (MPH)'].astype(float) | |
pd_data = pd_data.drop(['Time PST'], axis=1) | |
df = normalize(pd_data) | |
return df | |
def model_train(df): | |
"""This is to split the data set into training and validation set""" | |
split_fraction = 0.715 | |
train_split = int(split_fraction * int(df.shape[0])) | |
step = 1 | |
past = 60 # Sequence length | |
future = 0 # Amount of sequence in the future to predict | |
learning_rate = 0.001 | |
batch_size = 1 # how many predictions per sample | |
epochs = 20 | |
train_data = df.loc[0: train_split - 1] | |
val_data = df.loc[train_split:] | |
""" | |
The starting point for y_train must be at start as we take that (past) input to predict another output | |
for example using three sequence (past= 3): | |
data = [0,1,2,3,4,5,6,7,8,9,10] | |
split | |
x_train = [0,1,2,3,4] | |
y_train = [3,4,5] | |
[0,1,2] -> [3] | |
[1,2,3] -> [4] | |
[2,3,4] -> [5] | |
The step is to sample at every integer steps. (1,2,3,4), (1,3,5,7), ... | |
""" | |
start = past + future | |
end = start + train_split | |
x_train = train_data.values | |
y_train = df.iloc[start:end] | |
sequence_length = int(past / step) | |
dataset_train = keras.preprocessing.timeseries_dataset_from_array( | |
x_train, | |
y_train, | |
sequence_length=sequence_length, | |
sampling_rate=step, | |
batch_size=batch_size, | |
) | |
""" | |
The x_end must be subtracted by 1 | |
for example using three sequence (past = 3): | |
data = [0,1,2,3,4,5,6,7,8,9,10] | |
split | |
x_val = [5,6,7,8,9,10] | |
y_val = [8,9,10] | |
[5,6,7] -> [8] | |
[6,7,8] -> [9] | |
[7,8,9] -> [10] | |
[8,9,10] -> [?] # is unknown | |
""" | |
x_end = len(val_data) - 1 | |
label_start = train_split + past + future | |
x_val = val_data.iloc[:x_end].values | |
y_val = df.iloc[label_start:] | |
dataset_val = keras.preprocessing.timeseries_dataset_from_array( | |
x_val, | |
y_val, | |
sequence_length=sequence_length, | |
sampling_rate=step, | |
batch_size=batch_size, | |
) | |
for batch in dataset_train.take(1): | |
inputs, targets = batch | |
""" | |
(1, 60, 5) | |
1 is batch size | |
60 is sequence length | |
5 is features | |
""" | |
inputs = keras.layers.Input(shape=(inputs.shape[1], inputs.shape[2])) | |
lstm_out = keras.layers.LSTM(32)(inputs) | |
outputs = keras.layers.Dense(4)(lstm_out) | |
model = keras.Model(inputs=inputs, outputs=outputs) | |
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse") | |
model.summary() | |
path_checkpoint = "model_checkpoint.weights.h5" | |
es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5) | |
# modelckpt_callback = keras.callbacks.ModelCheckpoint( | |
# monitor="val_loss", | |
# filepath=path_checkpoint, | |
# verbose=1, | |
# save_weights_only=True, | |
# save_best_only=True, | |
# ) | |
history = model.fit( | |
dataset_train, | |
epochs=epochs, | |
validation_data=dataset_val, | |
callbacks=[es_callback], | |
) | |
return model | |
# def visualize_loss(history, title): | |
# loss = history.history["loss"] | |
# val_loss = history.history["val_loss"] | |
# epochs = range(len(loss)) | |
# plt.figure() | |
# plt.plot(epochs, loss, "b", label="Training loss") | |
# plt.plot(epochs, val_loss, "r", label="Validation loss") | |
# plt.title(title) | |
# plt.xlabel("Epochs") | |
# plt.ylabel("Loss") | |
# plt.legend() | |
# plt.show() | |
# | |
# | |
# visualize_loss(history, "Training and Validation Loss") | |
# | |
# def show_plot(plot_data, delta, title): | |
# labels = ["History", "True Future", "Model Prediction"] | |
# marker = [".-", "rx", "go"] | |
# time_steps = list(range(-(plot_data[0].shape[0]), 0)) | |
# if delta: | |
# future = delta | |
# else: | |
# future = 0 | |
# | |
# plt.title(title) | |
# for i, val in enumerate(plot_data): | |
# if i: | |
# if i == 2: | |
# plt.plot(future, plot_data[i][0], marker[i], markersize=10, label=labels[i]) | |
# else: | |
# plt.plot(future, plot_data[i][0], marker[i], markersize=10, label=labels[i]) | |
# else: | |
# plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i]) | |
# plt.legend() | |
# plt.xlim([time_steps[0], (future + 5) * 2]) | |
# plt.xlabel("Time-Step") | |
# plt.show() | |
# return | |
# | |
# | |
# for x, y in dataset_val.take(5): | |
# print(x.shape) | |
# print(model.predict(x)) | |
# show_plot( | |
# [x[0][:, 1].numpy(), y[0].numpy(), model.predict(x)[0]], | |
# 12, | |
# "Single Step Prediction", | |
# ) | |
def main(): | |
version = preprocesshyper() | |
df = preprocessdata() | |
model = model_train(df) | |
model.save(path + 'model/' + 'LTSM{}.h5'.format(version)) | |
if __name__ == "__main__": | |
main() | |