Spaces:
Runtime error
Runtime error
from statsmodels.tsa.arima_model import ARIMAResults | |
import streamlit as st | |
import pandas as pd | |
import yfinance as yf | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import plotly.express as px | |
import joblib | |
import math | |
from statsmodels.tsa.arima_model import ARIMA | |
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error | |
import tensorflow as tf | |
from tensorflow import keras | |
from keras.models import Sequential | |
from keras.layers import Dense, LSTM | |
from tensorflow.keras import layers | |
from tensorflow.keras.optimizers import Adam | |
from tensorflow.keras.callbacks import ModelCheckpoint | |
# page expands to full width | |
st.set_page_config(page_title="LSTM vs ARIMA", layout='wide') | |
# PAGE LAYOUT | |
# heading | |
st.title("Crude Oil Benchmark Stock Price Prediction LSTM and ARIMA Models") | |
st.subheader("""© Castillon, Ignas, Wong""") | |
# ARIMA PARAMETERS | |
pValue = 4 | |
dValue = 1 | |
qValue = 0 | |
# sidebar | |
# Sidebar - Specify parameter settings | |
with st.sidebar.header('Set Data Split'): | |
# PARAMETERS min,max,default,skip | |
trainData = st.sidebar.slider( | |
'Data split ratio (% for Training Set)', 10, 90, 80, 5) | |
# st.write(trainData*.01) | |
accuracy = st.sidebar.select_slider( | |
'Performance measure (accuracy Metrics)', options=['both', 'mse', 'mape']) | |
# ARIMA PARAMETERS | |
pValue = st.sidebar.number_input('P-value:', 0, 100, pValue) | |
st.sidebar.write('The current p-Value is ', pValue) | |
dValue = st.sidebar.number_input('D-value:', 0, 100, dValue) | |
st.sidebar.write('The current d-Value is ', dValue) | |
qValue = st.sidebar.number_input('Q-value:', 0, 100, qValue) | |
st.sidebar.write('The current q-Value is ', qValue) | |
# download | |
# model selection | |
modSelect = st.selectbox("Select Model for Prediction:", | |
("ARIMA & LSTM", "LSTM", "ARIMA")) | |
# //show option selected | |
# st.write(modSelect) | |
# select time interval | |
interv = st.select_slider('Select Time Series Data Interval for Prediction', options=[ | |
'Weekly', 'Monthly', 'Quarterly', 'Yearly']) | |
# st.write(interv[0]) | |
# Function to convert time series to interval | |
def getInterval(argument): | |
switcher = { | |
"W": "1wk", | |
"M": "1mo", | |
"Q": "3mo", | |
"Y": "1d" | |
} | |
return switcher.get(argument, "1d") | |
# show raw data | |
st.header("Raw Data") | |
# using button | |
# if st.button('Press to see Brent Crude Oil Raw Data'): | |
df = yf.download('BZ=F', interval=getInterval(interv[0])) | |
df | |
# graph visualization | |
st.header("Visualizations") | |
# LSTM | |
def df_to_X_y(df, window_size=5): | |
df_as_np = df.to_numpy() | |
X = [] | |
y = [] | |
for i in range(len(df_as_np)-window_size): | |
row = [[a] for a in df_as_np[i:i+window_size]] | |
X.append(row) | |
label = df_as_np[i+window_size] | |
y.append(label) | |
return np.array(X), np.array(y) | |
def mse_eval(test, predictions): | |
return mean_squared_error(test, predictions) | |
def mape_eval(test, predictions): | |
return mean_absolute_percentage_error(test, predictions) | |
def evaluate_lstm_model(split): | |
WINDOW_SIZE = 3 | |
X1, y1 = df_to_X_y(df['Close'], WINDOW_SIZE) | |
# preprocessing | |
date_train, date_test = df.index[:int( | |
df.shape[0]*split)], df.index[int(df.shape[0]*split)+WINDOW_SIZE:] | |
X_train1, y_train1 = X1[:int(df.shape[0]*split) | |
], y1[:int(df.shape[0]*split)] | |
X_test1, y_test1 = X1[int(df.shape[0]*split):], y1[int(df.shape[0]*split):] | |
# X_train1.shape, y_train1.shape, X_test1.shape, y_test1.shape | |
# lstm model | |
model = Sequential([layers.Input((3, 1)), layers.LSTM(64), layers.Dense( | |
32, activation='relu'), layers.Dense(32, activation='relu'), layers.Dense(1)]) | |
cp1 = ModelCheckpoint('model1/', save_best_only=True) | |
model.compile(loss='mse', optimizer=Adam(learning_rate=0.001), | |
metrics=['mean_absolute_percentage_error']) | |
model.fit(X_train1, y_train1, epochs=100, callbacks=[cp1]) | |
model.summary() | |
# train predictions | |
train_predictions = model.predict(X_train1).flatten() | |
train_results = pd.DataFrame( | |
data={'Date': date_train, 'Close Prices': y_train1, 'Train Predictions': train_predictions}) | |
# train_results | |
# test predictions | |
test_predictions = model.predict(X_test1).flatten() | |
test_results = pd.DataFrame( | |
data={'Date': date_test, 'Close Prices': y_test1, 'LSTM Predictions': test_predictions}) | |
# test_results | |
# evaluate model | |
mse = mse_eval(test_results['Close Prices'], | |
test_results['LSTM Predictions']) | |
mape = mape_eval(test_results['Close Prices'], | |
test_results['LSTM Predictions']) | |
print(mse) | |
print(mape) | |
# # save to csv | |
# # csv file | |
# current_name_model = str('LSTM_'+str(split*100)) | |
# predict = '/home/janna/1thesis/testingthesis/CSVPREDICTIONS_' + \ | |
# current_name_model + '.csv' | |
# test_results.to_csv(predict, float_format='%.2f') | |
# plot orig price and predicted price | |
fig = px.line(test_results, x=test_results['Date'], y=["Close Prices", "LSTM Predictions"], | |
title="LSTM PREDICTED BRENT CRUDE OIL PRICES", width=1000) | |
st.plotly_chart(fig, use_container_width=True) | |
# VISUALIZE DATA | |
plt.figure(figsize=(24, 24)) | |
plt.grid(True) | |
return test_results | |
results = evaluate_lstm_model(trainData*.01) | |
results | |
# # model | |
# ARIMA MODEL | |
# TRAIN,TEST,&SPLIT DATA | |
# split data | |
row = int(len(df)*(trainData*.01)) # 80% testing | |
trainingData = list(df[0:row]['Close']) | |
# len(trainingData) | |
testingData = list(df[row:]['Close']) | |
# len(testingData) | |
# using historical data to predict future data | |
predictions = [] | |
nObservations = len(testingData) | |
for i in range(nObservations): | |
model = ARIMA(trainingData, order=(pValue, dValue, qValue)) # p,d,q | |
# model = sm.tsa.arima.ARIMA(trainingData, order=(4,1,0)) #p,d,q | |
model_fit = model.fit() | |
output = model_fit.forecast() | |
yhat = list(output[0])[0] | |
predictions.append(yhat) | |
actualTestValue = testingData[i] | |
# update training set | |
trainingData.append(actualTestValue) | |
# print(output) | |
# break | |
# print summary | |
details = st.checkbox('Details') | |
arimamodsum = model_fit.summary() | |
if details: | |
st.write(arimamodsum) | |
# st.write(predictions) | |
predictionss = pd.DataFrame(predictions) | |
# df['ARIMApredictions'] = predictions | |
# df = pd.insert([predictionss]) | |
# st.write(predictionss) | |
# df | |
testingSet = pd.DataFrame(testingData) | |
testingSet['ARIMApredictions'] = predictions | |
testingSet.columns = ['Close Prices', 'ARIMA Predictions'] | |
testingSet | |
results["ARIMA Predictions"] = testingSet["ARIMA Predictions"] | |
results | |
# # plot orig price and predicted price | |
# fig = px.line(testingSet, x=testingSet.index, y=["Close Prices","ARIMA Predictions"], | |
# title="ARIMA PREDICTED BRENT CRUDE OIL PRICES", width=1000) | |
# st.plotly_chart(fig, use_container_width=True) | |
# plot orig price and predicted price | |
fig = px.line(results, x=results["Date"], y=["Close Prices", "ARIMA Predictions", "LSTM Predictions"], | |
title="BOTH PREDICTED BRENT CRUDE OIL PRICES", width=1000) | |
st.plotly_chart(fig, use_container_width=True) | |
# #VISUALIZE DATA | |
# plt.figure(figsize=(24,24)) | |
# plt.grid(True) | |
# dateRange = df[row:].index | |
# plt.plot(dateRange, predictions, color='blue', marker = 'o', linestyle ='dashed', label='Predicted Brent Price') | |
# plt.plot(dateRange, testingData, color='red', label='Original Brent Price') | |
# plt.title(" ARIMA BRENT PRICE PREDICTION") | |
# plt.xlabel('Date') | |
# plt.ylabel('Price') | |
# plt.legend() | |
# plt.show() | |
mape = np.mean(np.abs(np.array(predictions) - | |
np.array(testingData))/np.abs(testingData)) | |
mse = np.square(np.subtract(testingData, predictions)).mean() | |
MSE = mean_squared_error(testingData, predictions) | |
MAPE = mean_absolute_percentage_error(testingData, predictions) | |
MAE = mean_absolute_error(testingData, predictions) | |
st.write("MAPE: " + str(mape)) # Mean absolute Percentage Error | |
st.write("MAPE: " + str(MAPE)) # Mean absolute Percentage Error | |
st.write("MSE: " + str(mse)) # MSE | |
st.write("MSE: " + str(MSE)) # MSE | |
accTable = pd.DataFrame() | |
accTable['MAPE'] = [mape] | |
accTable['MSE'] = [mse] | |
accTable['Improved'] = [2200] | |
# accuracy metrics | |
st.header("Accuracy Metrics") | |
st.table(accTable) | |
# ______________________________________________________ | |
# sample read from local file!!! | |
readfile = pd.read_csv('ARIMA/Sheets/ARIMA-WEEKLY.csv') | |
readfile | |
# load csv | |
# file = pd.read_csv('./PREDICTIONS_ARIMA_80.0.csv') | |
file = pd.read_csv('./PREDICTIONS_ARIMA_80.0_(4,2,2).csv') | |
file | |
# load model | |
# loaded = ARIMAResults.load('ARIMA_80.0.pkl') | |
loaded = ARIMAResults.load('ARIMA_80.0_(4, 2, 2).pkl') | |
st.write(loaded.summary()) | |
# file['ARIMA Predictions'] | |
# file['Close Prices'] | |
# # evaluate model | |
# mse = float(mse_eval(file['Close Prices'],file['ARIMA Predictions'])) | |
# mape = mape_eval(file['Close Prices'],file['ARIMA Predictions']) | |
# print("MSE: "+ str(mse)) | |
# print("MAPE: "+ str(mape)) | |