from statsmodels.tsa.arima_model import ARIMAResults import streamlit as st import pandas as pd import yfinance as yf import matplotlib.pyplot as plt import numpy as np import plotly.express as px import joblib import math from statsmodels.tsa.arima_model import ARIMA from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error import tensorflow as tf from tensorflow import keras from keras.models import Sequential from keras.layers import Dense, LSTM from tensorflow.keras import layers from tensorflow.keras.optimizers import Adam from tensorflow.keras.callbacks import ModelCheckpoint # page expands to full width st.set_page_config(page_title="LSTM vs ARIMA", layout='wide') # PAGE LAYOUT # heading st.title("Crude Oil Benchmark Stock Price Prediction LSTM and ARIMA Models") st.subheader("""© Castillon, Ignas, Wong""") # ARIMA PARAMETERS pValue = 4 dValue = 1 qValue = 0 # sidebar # Sidebar - Specify parameter settings with st.sidebar.header('Set Data Split'): # PARAMETERS min,max,default,skip trainData = st.sidebar.slider( 'Data split ratio (% for Training Set)', 10, 90, 80, 5) # st.write(trainData*.01) accuracy = st.sidebar.select_slider( 'Performance measure (accuracy Metrics)', options=['both', 'mse', 'mape']) # ARIMA PARAMETERS pValue = st.sidebar.number_input('P-value:', 0, 100, pValue) st.sidebar.write('The current p-Value is ', pValue) dValue = st.sidebar.number_input('D-value:', 0, 100, dValue) st.sidebar.write('The current d-Value is ', dValue) qValue = st.sidebar.number_input('Q-value:', 0, 100, qValue) st.sidebar.write('The current q-Value is ', qValue) # download # model selection modSelect = st.selectbox("Select Model for Prediction:", ("ARIMA & LSTM", "LSTM", "ARIMA")) # //show option selected # st.write(modSelect) # select time interval interv = st.select_slider('Select Time Series Data Interval for Prediction', options=[ 'Weekly', 'Monthly', 'Quarterly', 'Yearly']) # st.write(interv[0]) # Function to convert time series to interval def getInterval(argument): switcher = { "W": "1wk", "M": "1mo", "Q": "3mo", "Y": "1d" } return switcher.get(argument, "1d") # show raw data st.header("Raw Data") # using button # if st.button('Press to see Brent Crude Oil Raw Data'): df = yf.download('BZ=F', interval=getInterval(interv[0])) df # graph visualization st.header("Visualizations") # LSTM def df_to_X_y(df, window_size=5): df_as_np = df.to_numpy() X = [] y = [] for i in range(len(df_as_np)-window_size): row = [[a] for a in df_as_np[i:i+window_size]] X.append(row) label = df_as_np[i+window_size] y.append(label) return np.array(X), np.array(y) def mse_eval(test, predictions): return mean_squared_error(test, predictions) def mape_eval(test, predictions): return mean_absolute_percentage_error(test, predictions) def evaluate_lstm_model(split): WINDOW_SIZE = 3 X1, y1 = df_to_X_y(df['Close'], WINDOW_SIZE) # preprocessing date_train, date_test = df.index[:int( df.shape[0]*split)], df.index[int(df.shape[0]*split)+WINDOW_SIZE:] X_train1, y_train1 = X1[:int(df.shape[0]*split) ], y1[:int(df.shape[0]*split)] X_test1, y_test1 = X1[int(df.shape[0]*split):], y1[int(df.shape[0]*split):] # X_train1.shape, y_train1.shape, X_test1.shape, y_test1.shape # lstm model model = Sequential([layers.Input((3, 1)), layers.LSTM(64), layers.Dense( 32, activation='relu'), layers.Dense(32, activation='relu'), layers.Dense(1)]) cp1 = ModelCheckpoint('model1/', save_best_only=True) model.compile(loss='mse', optimizer=Adam(learning_rate=0.001), metrics=['mean_absolute_percentage_error']) model.fit(X_train1, y_train1, epochs=100, callbacks=[cp1]) model.summary() # train predictions train_predictions = model.predict(X_train1).flatten() train_results = pd.DataFrame( data={'Date': date_train, 'Close Prices': y_train1, 'Train Predictions': train_predictions}) # train_results # test predictions test_predictions = model.predict(X_test1).flatten() test_results = pd.DataFrame( data={'Date': date_test, 'Close Prices': y_test1, 'LSTM Predictions': test_predictions}) # test_results # evaluate model mse = mse_eval(test_results['Close Prices'], test_results['LSTM Predictions']) mape = mape_eval(test_results['Close Prices'], test_results['LSTM Predictions']) print(mse) print(mape) # # save to csv # # csv file # current_name_model = str('LSTM_'+str(split*100)) # predict = '/home/janna/1thesis/testingthesis/CSVPREDICTIONS_' + \ # current_name_model + '.csv' # test_results.to_csv(predict, float_format='%.2f') # plot orig price and predicted price fig = px.line(test_results, x=test_results['Date'], y=["Close Prices", "LSTM Predictions"], title="LSTM PREDICTED BRENT CRUDE OIL PRICES", width=1000) st.plotly_chart(fig, use_container_width=True) # VISUALIZE DATA plt.figure(figsize=(24, 24)) plt.grid(True) return test_results results = evaluate_lstm_model(trainData*.01) results # # model # ARIMA MODEL # TRAIN,TEST,&SPLIT DATA # split data row = int(len(df)*(trainData*.01)) # 80% testing trainingData = list(df[0:row]['Close']) # len(trainingData) testingData = list(df[row:]['Close']) # len(testingData) # using historical data to predict future data predictions = [] nObservations = len(testingData) for i in range(nObservations): model = ARIMA(trainingData, order=(pValue, dValue, qValue)) # p,d,q # model = sm.tsa.arima.ARIMA(trainingData, order=(4,1,0)) #p,d,q model_fit = model.fit() output = model_fit.forecast() yhat = list(output[0])[0] predictions.append(yhat) actualTestValue = testingData[i] # update training set trainingData.append(actualTestValue) # print(output) # break # print summary details = st.checkbox('Details') arimamodsum = model_fit.summary() if details: st.write(arimamodsum) # st.write(predictions) predictionss = pd.DataFrame(predictions) # df['ARIMApredictions'] = predictions # df = pd.insert([predictionss]) # st.write(predictionss) # df testingSet = pd.DataFrame(testingData) testingSet['ARIMApredictions'] = predictions testingSet.columns = ['Close Prices', 'ARIMA Predictions'] testingSet results["ARIMA Predictions"] = testingSet["ARIMA Predictions"] results # # plot orig price and predicted price # fig = px.line(testingSet, x=testingSet.index, y=["Close Prices","ARIMA Predictions"], # title="ARIMA PREDICTED BRENT CRUDE OIL PRICES", width=1000) # st.plotly_chart(fig, use_container_width=True) # plot orig price and predicted price fig = px.line(results, x=results["Date"], y=["Close Prices", "ARIMA Predictions", "LSTM Predictions"], title="BOTH PREDICTED BRENT CRUDE OIL PRICES", width=1000) st.plotly_chart(fig, use_container_width=True) # #VISUALIZE DATA # plt.figure(figsize=(24,24)) # plt.grid(True) # dateRange = df[row:].index # plt.plot(dateRange, predictions, color='blue', marker = 'o', linestyle ='dashed', label='Predicted Brent Price') # plt.plot(dateRange, testingData, color='red', label='Original Brent Price') # plt.title(" ARIMA BRENT PRICE PREDICTION") # plt.xlabel('Date') # plt.ylabel('Price') # plt.legend() # plt.show() mape = np.mean(np.abs(np.array(predictions) - np.array(testingData))/np.abs(testingData)) mse = np.square(np.subtract(testingData, predictions)).mean() MSE = mean_squared_error(testingData, predictions) MAPE = mean_absolute_percentage_error(testingData, predictions) MAE = mean_absolute_error(testingData, predictions) st.write("MAPE: " + str(mape)) # Mean absolute Percentage Error st.write("MAPE: " + str(MAPE)) # Mean absolute Percentage Error st.write("MSE: " + str(mse)) # MSE st.write("MSE: " + str(MSE)) # MSE accTable = pd.DataFrame() accTable['MAPE'] = [mape] accTable['MSE'] = [mse] accTable['Improved'] = [2200] # accuracy metrics st.header("Accuracy Metrics") st.table(accTable) # ______________________________________________________ # sample read from local file!!! readfile = pd.read_csv('ARIMA/Sheets/ARIMA-WEEKLY.csv') readfile # load csv # file = pd.read_csv('./PREDICTIONS_ARIMA_80.0.csv') file = pd.read_csv('./PREDICTIONS_ARIMA_80.0_(4,2,2).csv') file # load model # loaded = ARIMAResults.load('ARIMA_80.0.pkl') loaded = ARIMAResults.load('ARIMA_80.0_(4, 2, 2).pkl') st.write(loaded.summary()) # file['ARIMA Predictions'] # file['Close Prices'] # # evaluate model # mse = float(mse_eval(file['Close Prices'],file['ARIMA Predictions'])) # mape = mape_eval(file['Close Prices'],file['ARIMA Predictions']) # print("MSE: "+ str(mse)) # print("MAPE: "+ str(mape))