Spaces:

Juneyy
/

PredictTemp24Hours

Running

PredictTemp24Hours / modeltraining.py

Juneyy

Update modeltraining.py

b3b8b29 unverified 5 months ago

No virus

6.62 kB

	# -- coding: utf-8 --
	"""ModelTraining.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1g8XfXJQFrvtAxDsWj9EQ5ZT90A-C-T-7
	"""
	import pandas as pd
	import matplotlib.pyplot as plt
	import keras
	import requests
	import pandas as pd
	import numpy as np
	from datetime import date
	from datetime import timedelta
	from bs4 import BeautifulSoup
	import csv


	path = ''

	def preprocesshyper():
	with open(path + 'data/' + 'preprocessing_data.csv', newline='') as f:
	reader = csv.reader(f)
	data = list(reader)

	version = int(*data[0])
	version += 1

	with open(path + 'data/' + "preprocessing_data.csv", "w") as f:
	f.write("{}\n".format(version))
	return version

	def normalize(data):
	data_mean = data.mean(axis=0)
	data_std = data.std(axis=0)
	with open(path + 'data/' + "preprocessing_data.csv", "a") as f:
	f.write("Mean, Standard Deviation\n")
	f.write("{}, {}, {}, {}\n".format(data_mean[0], data_mean[1], data_mean[2], data_mean[3]))
	f.write("{}, {}, {}, {}".format(data_std[0], data_std[1], data_std[2], data_std[3]))
	return (data - data_mean) / data_std


	def preprocessdata():
	pd_data = pd.read_csv(path + 'data/' + 'weather_data.csv')
	pd_data['Time PST'] = pd.to_datetime(pd_data['Time PST'])
	pd_data['Temp (F)'] = pd_data['Temp (F)'].astype(int)
	pd_data['Humidity'] = pd_data['Humidity'].astype(int)
	pd_data['Wind Speed (in HG)'] = pd_data['Wind Speed (in HG)'].astype(float)
	pd_data['Wind Gust (MPH)'] = pd_data['Wind Gust (MPH)'].astype(float)

	pd_data = pd_data.drop(['Time PST'], axis=1)
	df = normalize(pd_data)

	return df


	def model_train(df):
	"""This is to split the data set into training and validation set"""

	split_fraction = 0.715
	train_split = int(split_fraction * int(df.shape[0]))
	step = 1

	past = 60 # Sequence length
	future = 0 # Amount of sequence in the future to predict
	learning_rate = 0.001
	batch_size = 1 # how many predictions per sample
	epochs = 20

	train_data = df.loc[0: train_split - 1]
	val_data = df.loc[train_split:]

	"""
	The starting point for y_train must be at start as we take that (past) input to predict another output

	for example using three sequence (past= 3):
	data = [0,1,2,3,4,5,6,7,8,9,10]
	split
	x_train = [0,1,2,3,4]
	y_train = [3,4,5]

	[0,1,2] -> [3]
	[1,2,3] -> [4]
	[2,3,4] -> [5]

	The step is to sample at every integer steps. (1,2,3,4), (1,3,5,7), ...
	"""

	start = past + future
	end = start + train_split

	x_train = train_data.values
	y_train = df.iloc[start:end]

	sequence_length = int(past / step)

	dataset_train = keras.preprocessing.timeseries_dataset_from_array(
	x_train,
	y_train,
	sequence_length=sequence_length,
	sampling_rate=step,
	batch_size=batch_size,
	)

	"""

	The x_end must be subtracted by 1

	for example using three sequence (past = 3):
	data = [0,1,2,3,4,5,6,7,8,9,10]
	split
	x_val = [5,6,7,8,9,10]
	y_val = [8,9,10]

	[5,6,7] -> [8]
	[6,7,8] -> [9]
	[7,8,9] -> [10]

	[8,9,10] -> [?] # is unknown

	"""

	x_end = len(val_data) - 1

	label_start = train_split + past + future

	x_val = val_data.iloc[:x_end].values
	y_val = df.iloc[label_start:]

	dataset_val = keras.preprocessing.timeseries_dataset_from_array(
	x_val,
	y_val,
	sequence_length=sequence_length,
	sampling_rate=step,
	batch_size=batch_size,
	)

	for batch in dataset_train.take(1):
	inputs, targets = batch

	"""

	(1, 60, 5)
	1 is batch size
	60 is sequence length
	5 is features

	"""

	inputs = keras.layers.Input(shape=(inputs.shape[1], inputs.shape[2]))
	lstm_out = keras.layers.LSTM(32)(inputs)
	outputs = keras.layers.Dense(4)(lstm_out)

	model = keras.Model(inputs=inputs, outputs=outputs)
	model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
	model.summary()


	path_checkpoint = "model_checkpoint.weights.h5"
	es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5)

	# modelckpt_callback = keras.callbacks.ModelCheckpoint(
	# monitor="val_loss",
	# filepath=path_checkpoint,
	# verbose=1,
	# save_weights_only=True,
	# save_best_only=True,
	# )
	history = model.fit(
	dataset_train,
	epochs=epochs,
	validation_data=dataset_val,
	callbacks=[es_callback],
	)
	return model


	# def visualize_loss(history, title):
	# loss = history.history["loss"]
	# val_loss = history.history["val_loss"]
	# epochs = range(len(loss))
	# plt.figure()
	# plt.plot(epochs, loss, "b", label="Training loss")
	# plt.plot(epochs, val_loss, "r", label="Validation loss")
	# plt.title(title)
	# plt.xlabel("Epochs")
	# plt.ylabel("Loss")
	# plt.legend()
	# plt.show()
	#
	#
	# visualize_loss(history, "Training and Validation Loss")
	#
	# def show_plot(plot_data, delta, title):
	# labels = ["History", "True Future", "Model Prediction"]
	# marker = [".-", "rx", "go"]
	# time_steps = list(range(-(plot_data[0].shape[0]), 0))
	# if delta:
	# future = delta
	# else:
	# future = 0
	#
	# plt.title(title)
	# for i, val in enumerate(plot_data):
	# if i:
	# if i == 2:
	# plt.plot(future, plot_data[i][0], marker[i], markersize=10, label=labels[i])
	# else:
	# plt.plot(future, plot_data[i][0], marker[i], markersize=10, label=labels[i])
	# else:
	# plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
	# plt.legend()
	# plt.xlim([time_steps[0], (future + 5) * 2])
	# plt.xlabel("Time-Step")
	# plt.show()
	# return
	#
	#
	# for x, y in dataset_val.take(5):
	# print(x.shape)
	# print(model.predict(x))
	# show_plot(
	# [x[0][:, 1].numpy(), y[0].numpy(), model.predict(x)[0]],
	# 12,
	# "Single Step Prediction",
	# )
	def main():
	version = preprocesshyper()
	df = preprocessdata()
	model = model_train(df)
	model.save(path + 'model/' + 'LTSM{}.h5'.format(version))

	if __name__ == "__main__":
	main()