File size: 3,225 Bytes
0c6b0a3 6979b79 0c6b0a3 6979b79 0c6b0a3 6979b79 0c6b0a3 6979b79 0c6b0a3 6979b79 0c6b0a3 6979b79 0c6b0a3 6979b79 0c6b0a3 6979b79 0c6b0a3 6979b79 0c6b0a3 6979b79 0c6b0a3 6979b79 0c6b0a3 6979b79 0c6b0a3 6979b79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import requests
import pandas as pd
from bs4 import BeautifulSoup
import io
import yfinance as yf
from datetime import datetime
import numpy as np
import statsmodels.api as sm
# import plotly.express as px
import matplotlib.pyplot as plt
# from statistics import covariance
import warnings
warnings.filterwarnings("ignore")
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error
import pandas as pd
df = pd.read_csv('us-shareprices-daily.csv', sep=';')
def get_model_accuracy(data, ticker_symbol):
stock_data = data[data['Ticker'] == ticker_symbol]
# get MSE for testing data using 85/15 split for chosen stock symbol
train_data, test_data = stock_data[0:int(len(stock_data)*0.85)], stock_data[int(len(stock_data)*0.85):]
training_data = train_data['Close'].values
test_data = test_data['Close'].values
history = [x for x in training_data]
model_predictions = []
N_test_observations = len(test_data)
for time_point in range(N_test_observations):
model = sm.tsa.statespace.SARIMAX(history, order=(1,1,1))
model_fit = model.fit(disp=0)
output = model_fit.forecast()
yhat = output[0]
model_predictions.append(yhat)
true_test_value = test_data[time_point]
history.append(true_test_value)
MSE_error = mean_squared_error(test_data, model_predictions)
return 'Testing Mean Squared Error is {}'.format(MSE_error)
def main(tickers, earliest_date):
df = pd.read_csv('data_and_sp500.csv')
for ticker in tickers:
x = np.array(df['Date'])
y = np.array(df[ticker])
ticker_df = pd.concat([df['Date'], df[ticker]], axis=1)
model = sm.tsa.statespace.SARIMAX(ticker_df[ticker], order=(21,1,7))
model_fit = model.fit(disp=-1)
# print(model_fit.summary())
forecast = model_fit.forecast(7, alpha=0.05)#.predict(start=1259, end=1289)
data = pd.Series(y, x)
# plt.plot(fit1.fittedvalues, marker="o", color="blue", label='smoothing')
plt.plot(x, y, label='{} historical'.format(ticker))
plt.plot(forecast, label='{} forecast'.format(ticker))
plt.legend(loc="upper left")
step = 10
plt.xticks([x[i+step] for i in range(-step, len(x), step) if i+step < len(x)], rotation=90)
plt.title('ARIMA forecast model vs. actual for {}'.format('ticker'))
plt.xlim(df.shape[0]-100, df.shape[0]+21)
plt.show()
# plot_df = data.to_frame().reset_index().rename(columns={'index': 'date', 0: 'price'})
# plot_df.columns = ['date', 'price']
# print(plot_df)
# fig = px.line(plot_df, x='date', y='price')
# fig.show()
def stock_covariance(stocks):
df = pd.read_csv('djia_2017-2022.csv')
x = df[df['Name']==stocks[0]].dropna(how='any')['Close']
y = df[df['Name']==stocks[0]].dropna(how='any')['Close']
cov = covariance(x, y)
return cov
def dji_covariance(stock):
df = pd.read_csv('djia_2017-2022.csv')
x = df[df['Name']=='^DJI'].dropna(how='any')['Close']
y = df[df['Name']==stock].dropna(how='any')['Close']
cov = covariance(x, y)
return cov
if __name__ == "__main__":
main(['AA', 'IBM', 'AAPL', 'AMD'], '2021-01-01')
|