predictaoil / pages /3_πŸ“Š_Make_a_Model.py
janrswong's picture
changed default values
52c834f
raw
history blame contribute delete
No virus
7.88 kB
from statsmodels.tsa.arima_model import ARIMAResults
import streamlit as st
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
import time
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras import layers
from keras import wrappers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from st_aggrid import GridOptionsBuilder, AgGrid
from style import add_logo
hide_menu_style = """
<style>
#MainMenu{visibility: hidden;}
footer{visibility:hidden;}
</style>
"""
# page expands to full width
st.set_page_config(page_title="Predicta.oil | Make a Model",
layout='wide', page_icon="β›½")
st.markdown(hide_menu_style, unsafe_allow_html=True)
# ag grid pagination
add_logo()
def pagination(df):
gb = GridOptionsBuilder.from_dataframe(df)
gb.configure_pagination(paginationAutoPageSize=True)
return gb.build()
# PAGE LAYOUT
# heading
st.title("Make a Model")
# ARIMA PARAMETERS
pValue = 1
dValue = 0
qValue = 0
# show raw data
st.header("Raw Data")
# sidebar
# Sidebar - Specify parameter settings
with st.sidebar.header('Set Data Split'):
# PARAMETERS min,max,default,skip
trainData = st.sidebar.slider(
'Data split ratio (% for Training Set)', 10, 90, 50, 5)
# ARIMA PARAMETERS
pValue = st.sidebar.number_input('P-value:', 0, 100, pValue)
st.sidebar.write('The current p-Value is ', pValue)
dValue = st.sidebar.number_input('D-value:', 0, 100, dValue)
st.sidebar.write('The current d-Value is ', dValue)
qValue = st.sidebar.number_input('Q-value:', 0, 100, qValue)
st.sidebar.write('The current q-Value is ', qValue)
details = st.sidebar.checkbox('Show Details')
runModels = st.sidebar.button('Test Models')
# select time interval
interv = st.select_slider('Select Time Series Data Interval for Prediction', options=[
'Daily', 'Weekly', 'Monthly', 'Quarterly'], value='Weekly')
@st.cache
def getInterval(argument):
switcher = {
"W": "1wk",
"M": "1mo",
"Q": "3mo",
"D": "1d"
}
return switcher.get(argument, "1d")
df = yf.download('BZ=F', interval=getInterval(interv[0]))
st.table(df.head())
# download full data
@st.cache
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv().encode('utf-8')
csv = convert_df(df)
# download full data
st.download_button(
label="Download data as CSV",
data=csv,
file_name='Brent Oil Prices.csv',
mime='text/csv',
)
# graph visualization
st.header("Visualizations")
# LSTM
def df_to_X_y(df, window_size=5):
df_as_np = df.to_numpy()
X = []
y = []
for i in range(len(df_as_np)-window_size):
row = [[a] for a in df_as_np[i:i+window_size]]
X.append(row)
label = df_as_np[i+window_size]
y.append(label)
return np.array(X), np.array(y)
def mse_eval(test, predictions):
return mean_squared_error(test, predictions)
def mape_eval(test, predictions):
return mean_absolute_percentage_error(test, predictions)
def evaluate_lstm_model(split):
global lstmModel
WINDOW_SIZE = 3
X1, y1 = df_to_X_y(df['Close'], WINDOW_SIZE)
# preprocessing
date_train, date_test = df.index[:int(
df.shape[0]*split)], df.index[int(df.shape[0]*split)+WINDOW_SIZE:]
X_train1, y_train1 = X1[:int(df.shape[0]*split)
], y1[:int(df.shape[0]*split)]
X_test1, y_test1 = X1[int(df.shape[0]*split):], y1[int(df.shape[0]*split):]
# lstm model
with st.spinner('LSTM Model...'):
model = Sequential([layers.Input((3, 1)), layers.LSTM(64), layers.Dense(
32, activation='relu'), layers.Dense(32, activation='relu'), layers.Dense(1)])
cp1 = ModelCheckpoint('model1/', save_best_only=True)
model.compile(loss='mse', optimizer=Adam(learning_rate=0.001),
metrics=['mean_absolute_percentage_error'])
model.fit(X_train1, y_train1, epochs=100)
lstmModel = model.summary()
# train predictions
train_predictions = model.predict(X_train1).flatten()
train_results = pd.DataFrame(
data={'Date': date_train, 'Close Prices': y_train1, 'Train Predictions': train_predictions})
# train_results
# test predictions
test_predictions = model.predict(X_test1).flatten()
test_results = pd.DataFrame(
data={'Date': date_test, 'Close Prices': y_test1, 'LSTM Predictions': test_predictions})
# test_results
# evaluate model
mse = mse_eval(test_results['Close Prices'],
test_results['LSTM Predictions'])
mape = mape_eval(test_results['Close Prices'],
test_results['LSTM Predictions'])
print(mse)
print(mape)
return test_results, mse, mape
global results
# ARIMA MODEL
def evaluate_arima_model(df, trainData):
global arimamodsum
try:
with st.spinner('ARIMA Model...'):
row = int(len(df)*(trainData*.01)) # 80% testing
trainingData = list(df[0:row]['Close'])
testingData = list(df[row:]['Close'])
predictions = []
nObservations = len(testingData)
for i in range(nObservations):
model = ARIMA(trainingData, order=(
pValue, dValue, qValue)) # p,d,q
model_fit = model.fit()
output = model_fit.forecast()
yhat = list(output[0])[0]
predictions.append(yhat)
actualTestValue = testingData[i]
trainingData.append(actualTestValue)
arimamodsum = model_fit.summary()
# st.write(predictions)
testingSet = pd.DataFrame(testingData)
testingSet['ARIMApredictions'] = predictions
testingSet.columns = ['Close Prices', 'ARIMA Predictions']
results["ARIMA Predictions"] = testingSet["ARIMA Predictions"]
MSE = mean_squared_error(testingData, predictions)
MAPE = mean_absolute_percentage_error(testingData, predictions)
return MSE, MAPE
except:
st.error('Please select other ARIMA values as this is not possible.')
st.stop()
return()
# run models
# plot all results
if runModels:
results, lstmMse, lstmMape = evaluate_lstm_model(trainData*.01)
arimaMSE, arimaMAPE = evaluate_arima_model(df, trainData)
# plot orig price and predicted price
fig = px.line(results, x=results["Date"], y=["Close Prices", "ARIMA Predictions", "LSTM Predictions"],
title="BOTH PREDICTED BRENT CRUDE OIL PRICES", width=1000)
st.plotly_chart(fig, use_container_width=True)
# print(arimamodsum)
# initialize session state
if 'details_state' not in st.session_state:
st.session_state.details_state = False
# st.write(details)
if details or st.session_state.details_state:
st.session_state.details_state = True
page = pagination(results)
AgGrid(results, key='dailyCombined', fit_columns_on_grid_load=True,
enable_enterprise_modules=True, theme='streamlit', gridOptions=page)
st.write(arimamodsum)
# ACCURACY METRICS
accTable = pd.DataFrame()
accTable['ARIMA-MAPE'] = [arimaMAPE]
accTable['LSTM-MAPE'] = [lstmMape]
accTable['ARIMA-MSE'] = [arimaMSE]
accTable['LSTM-MSE'] = [lstmMse]
# accuracy metrics
st.header("Accuracy Metrics")
st.table(accTable)