Spaces:

janrswong
/

predictaoil

Runtime error

App Files Files Community

predictaoil / bak.py

janrswong

init

8624212 about 2 years ago

raw

history blame contribute delete

No virus

8.99 kB

	from statsmodels.tsa.arima_model import ARIMAResults
	import streamlit as st
	import pandas as pd
	import yfinance as yf
	import matplotlib.pyplot as plt
	import numpy as np
	import plotly.express as px
	import joblib
	import math
	from statsmodels.tsa.arima_model import ARIMA
	from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
	import tensorflow as tf
	from tensorflow import keras
	from keras.models import Sequential
	from keras.layers import Dense, LSTM
	from tensorflow.keras import layers
	from tensorflow.keras.optimizers import Adam
	from tensorflow.keras.callbacks import ModelCheckpoint

	# page expands to full width
	st.set_page_config(page_title="LSTM vs ARIMA", layout='wide')

	# PAGE LAYOUT
	# heading
	st.title("Crude Oil Benchmark Stock Price Prediction LSTM and ARIMA Models")
	st.subheader("""© Castillon, Ignas, Wong""")

	# ARIMA PARAMETERS
	pValue = 4
	dValue = 1
	qValue = 0


	# sidebar
	# Sidebar - Specify parameter settings
	with st.sidebar.header('Set Data Split'):
	# PARAMETERS min,max,default,skip
	trainData = st.sidebar.slider(
	'Data split ratio (% for Training Set)', 10, 90, 80, 5)
	# st.write(trainData*.01)
	accuracy = st.sidebar.select_slider(
	'Performance measure (accuracy Metrics)', options=['both', 'mse', 'mape'])
	# ARIMA PARAMETERS
	pValue = st.sidebar.number_input('P-value:', 0, 100, pValue)
	st.sidebar.write('The current p-Value is ', pValue)
	dValue = st.sidebar.number_input('D-value:', 0, 100, dValue)
	st.sidebar.write('The current d-Value is ', dValue)
	qValue = st.sidebar.number_input('Q-value:', 0, 100, qValue)
	st.sidebar.write('The current q-Value is ', qValue)

	# download

	# model selection
	modSelect = st.selectbox("Select Model for Prediction:",
	("ARIMA & LSTM", "LSTM", "ARIMA"))

	# //show option selected
	# st.write(modSelect)

	# select time interval
	interv = st.select_slider('Select Time Series Data Interval for Prediction', options=[
	'Weekly', 'Monthly', 'Quarterly', 'Yearly'])

	# st.write(interv[0])

	# Function to convert time series to interval


	def getInterval(argument):
	switcher = {
	"W": "1wk",
	"M": "1mo",
	"Q": "3mo",
	"Y": "1d"
	}
	return switcher.get(argument, "1d")


	# show raw data
	st.header("Raw Data")
	# using button
	# if st.button('Press to see Brent Crude Oil Raw Data'):
	df = yf.download('BZ=F', interval=getInterval(interv[0]))
	df

	# graph visualization
	st.header("Visualizations")
	# LSTM


	def df_to_X_y(df, window_size=5):
	df_as_np = df.to_numpy()
	X = []
	y = []
	for i in range(len(df_as_np)-window_size):
	row = [[a] for a in df_as_np[i:i+window_size]]
	X.append(row)
	label = df_as_np[i+window_size]
	y.append(label)
	return np.array(X), np.array(y)


	def mse_eval(test, predictions):
	return mean_squared_error(test, predictions)


	def mape_eval(test, predictions):
	return mean_absolute_percentage_error(test, predictions)


	def evaluate_lstm_model(split):
	WINDOW_SIZE = 3
	X1, y1 = df_to_X_y(df['Close'], WINDOW_SIZE)

	# preprocessing
	date_train, date_test = df.index[:int(
	df.shape[0]split)], df.index[int(df.shape[0]split)+WINDOW_SIZE:]
	X_train1, y_train1 = X1[:int(df.shape[0]*split)
	], y1[:int(df.shape[0]*split)]
	X_test1, y_test1 = X1[int(df.shape[0]split):], y1[int(df.shape[0]split):]
	# X_train1.shape, y_train1.shape, X_test1.shape, y_test1.shape

	# lstm model
	model = Sequential([layers.Input((3, 1)), layers.LSTM(64), layers.Dense(
	32, activation='relu'), layers.Dense(32, activation='relu'), layers.Dense(1)])
	cp1 = ModelCheckpoint('model1/', save_best_only=True)
	model.compile(loss='mse', optimizer=Adam(learning_rate=0.001),
	metrics=['mean_absolute_percentage_error'])
	model.fit(X_train1, y_train1, epochs=100, callbacks=[cp1])
	model.summary()
	# train predictions
	train_predictions = model.predict(X_train1).flatten()
	train_results = pd.DataFrame(
	data={'Date': date_train, 'Close Prices': y_train1, 'Train Predictions': train_predictions})
	# train_results

	# test predictions
	test_predictions = model.predict(X_test1).flatten()
	test_results = pd.DataFrame(
	data={'Date': date_test, 'Close Prices': y_test1, 'LSTM Predictions': test_predictions})
	# test_results

	# evaluate model
	mse = mse_eval(test_results['Close Prices'],
	test_results['LSTM Predictions'])
	mape = mape_eval(test_results['Close Prices'],
	test_results['LSTM Predictions'])
	print(mse)
	print(mape)
	# # save to csv
	# # csv file
	# current_name_model = str('LSTM_'+str(split*100))
	# predict = '/home/janna/1thesis/testingthesis/CSVPREDICTIONS_' + \
	# current_name_model + '.csv'
	# test_results.to_csv(predict, float_format='%.2f')

	# plot orig price and predicted price
	fig = px.line(test_results, x=test_results['Date'], y=["Close Prices", "LSTM Predictions"],
	title="LSTM PREDICTED BRENT CRUDE OIL PRICES", width=1000)
	st.plotly_chart(fig, use_container_width=True)
	# VISUALIZE DATA
	plt.figure(figsize=(24, 24))
	plt.grid(True)
	return test_results


	results = evaluate_lstm_model(trainData*.01)
	results
	# # model

	# ARIMA MODEL
	# TRAIN,TEST,&SPLIT DATA

	# split data
	row = int(len(df)(trainData.01)) # 80% testing

	trainingData = list(df[0:row]['Close'])
	# len(trainingData)
	testingData = list(df[row:]['Close'])
	# len(testingData)
	# using historical data to predict future data

	predictions = []
	nObservations = len(testingData)

	for i in range(nObservations):
	model = ARIMA(trainingData, order=(pValue, dValue, qValue)) # p,d,q
	# model = sm.tsa.arima.ARIMA(trainingData, order=(4,1,0)) #p,d,q
	model_fit = model.fit()
	output = model_fit.forecast()
	yhat = list(output[0])[0]
	predictions.append(yhat)
	actualTestValue = testingData[i]
	# update training set
	trainingData.append(actualTestValue)
	# print(output)
	# break

	# print summary
	details = st.checkbox('Details')

	arimamodsum = model_fit.summary()
	if details:
	st.write(arimamodsum)

	# st.write(predictions)
	predictionss = pd.DataFrame(predictions)
	# df['ARIMApredictions'] = predictions

	# df = pd.insert([predictionss])

	# st.write(predictionss)
	# df

	testingSet = pd.DataFrame(testingData)
	testingSet['ARIMApredictions'] = predictions
	testingSet.columns = ['Close Prices', 'ARIMA Predictions']
	testingSet

	results["ARIMA Predictions"] = testingSet["ARIMA Predictions"]
	results

	# # plot orig price and predicted price
	# fig = px.line(testingSet, x=testingSet.index, y=["Close Prices","ARIMA Predictions"],
	# title="ARIMA PREDICTED BRENT CRUDE OIL PRICES", width=1000)
	# st.plotly_chart(fig, use_container_width=True)

	# plot orig price and predicted price
	fig = px.line(results, x=results["Date"], y=["Close Prices", "ARIMA Predictions", "LSTM Predictions"],
	title="BOTH PREDICTED BRENT CRUDE OIL PRICES", width=1000)
	st.plotly_chart(fig, use_container_width=True)

	# #VISUALIZE DATA
	# plt.figure(figsize=(24,24))
	# plt.grid(True)

	# dateRange = df[row:].index

	# plt.plot(dateRange, predictions, color='blue', marker = 'o', linestyle ='dashed', label='Predicted Brent Price')
	# plt.plot(dateRange, testingData, color='red', label='Original Brent Price')

	# plt.title(" ARIMA BRENT PRICE PREDICTION")
	# plt.xlabel('Date')
	# plt.ylabel('Price')
	# plt.legend()
	# plt.show()

	mape = np.mean(np.abs(np.array(predictions) -
	np.array(testingData))/np.abs(testingData))
	mse = np.square(np.subtract(testingData, predictions)).mean()
	MSE = mean_squared_error(testingData, predictions)
	MAPE = mean_absolute_percentage_error(testingData, predictions)
	MAE = mean_absolute_error(testingData, predictions)

	st.write("MAPE: " + str(mape)) # Mean absolute Percentage Error
	st.write("MAPE: " + str(MAPE)) # Mean absolute Percentage Error
	st.write("MSE: " + str(mse)) # MSE
	st.write("MSE: " + str(MSE)) # MSE

	accTable = pd.DataFrame()
	accTable['MAPE'] = [mape]
	accTable['MSE'] = [mse]
	accTable['Improved'] = [2200]

	# accuracy metrics
	st.header("Accuracy Metrics")

	st.table(accTable)

	# ______________________________________________________
	# sample read from local file!!!
	readfile = pd.read_csv('ARIMA/Sheets/ARIMA-WEEKLY.csv')
	readfile

	# load csv
	# file = pd.read_csv('./PREDICTIONS_ARIMA_80.0.csv')
	file = pd.read_csv('./PREDICTIONS_ARIMA_80.0_(4,2,2).csv')
	file
	# load model
	# loaded = ARIMAResults.load('ARIMA_80.0.pkl')
	loaded = ARIMAResults.load('ARIMA_80.0_(4, 2, 2).pkl')
	st.write(loaded.summary())

	# file['ARIMA Predictions']
	# file['Close Prices']

	# # evaluate model
	# mse = float(mse_eval(file['Close Prices'],file['ARIMA Predictions']))
	# mape = mape_eval(file['Close Prices'],file['ARIMA Predictions'])
	# print("MSE: "+ str(mse))
	# print("MAPE: "+ str(mape))