cse6242-dataminers / arima.py
Lirsen Myrtaj
Upload arima.py
6979b79
raw
history blame
3.23 kB
import requests
import pandas as pd
from bs4 import BeautifulSoup
import io
import yfinance as yf
from datetime import datetime
import numpy as np
import statsmodels.api as sm
# import plotly.express as px
import matplotlib.pyplot as plt
# from statistics import covariance
import warnings
warnings.filterwarnings("ignore")
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error
import pandas as pd
df = pd.read_csv('us-shareprices-daily.csv', sep=';')
def get_model_accuracy(data, ticker_symbol):
stock_data = data[data['Ticker'] == ticker_symbol]
# get MSE for testing data using 85/15 split for chosen stock symbol
train_data, test_data = stock_data[0:int(len(stock_data)*0.85)], stock_data[int(len(stock_data)*0.85):]
training_data = train_data['Close'].values
test_data = test_data['Close'].values
history = [x for x in training_data]
model_predictions = []
N_test_observations = len(test_data)
for time_point in range(N_test_observations):
model = sm.tsa.statespace.SARIMAX(history, order=(1,1,1))
model_fit = model.fit(disp=0)
output = model_fit.forecast()
yhat = output[0]
model_predictions.append(yhat)
true_test_value = test_data[time_point]
history.append(true_test_value)
MSE_error = mean_squared_error(test_data, model_predictions)
return 'Testing Mean Squared Error is {}'.format(MSE_error)
def main(tickers, earliest_date):
df = pd.read_csv('data_and_sp500.csv')
for ticker in tickers:
x = np.array(df['Date'])
y = np.array(df[ticker])
ticker_df = pd.concat([df['Date'], df[ticker]], axis=1)
model = sm.tsa.statespace.SARIMAX(ticker_df[ticker], order=(21,1,7))
model_fit = model.fit(disp=-1)
# print(model_fit.summary())
forecast = model_fit.forecast(7, alpha=0.05)#.predict(start=1259, end=1289)
data = pd.Series(y, x)
# plt.plot(fit1.fittedvalues, marker="o", color="blue", label='smoothing')
plt.plot(x, y, label='{} historical'.format(ticker))
plt.plot(forecast, label='{} forecast'.format(ticker))
plt.legend(loc="upper left")
step = 10
plt.xticks([x[i+step] for i in range(-step, len(x), step) if i+step < len(x)], rotation=90)
plt.title('ARIMA forecast model vs. actual for {}'.format('ticker'))
plt.xlim(df.shape[0]-100, df.shape[0]+21)
plt.show()
# plot_df = data.to_frame().reset_index().rename(columns={'index': 'date', 0: 'price'})
# plot_df.columns = ['date', 'price']
# print(plot_df)
# fig = px.line(plot_df, x='date', y='price')
# fig.show()
def stock_covariance(stocks):
df = pd.read_csv('djia_2017-2022.csv')
x = df[df['Name']==stocks[0]].dropna(how='any')['Close']
y = df[df['Name']==stocks[0]].dropna(how='any')['Close']
cov = covariance(x, y)
return cov
def dji_covariance(stock):
df = pd.read_csv('djia_2017-2022.csv')
x = df[df['Name']=='^DJI'].dropna(how='any')['Close']
y = df[df['Name']==stock].dropna(how='any')['Close']
cov = covariance(x, y)
return cov
if __name__ == "__main__":
main(['AA', 'IBM', 'AAPL', 'AMD'], '2021-01-01')