import requests import pandas as pd from bs4 import BeautifulSoup import io import yfinance as yf from datetime import datetime import numpy as np import statsmodels.api as sm # import plotly.express as px import matplotlib.pyplot as plt # from statistics import covariance import warnings warnings.filterwarnings("ignore") import statsmodels.api as sm from sklearn.metrics import mean_squared_error import pandas as pd df = pd.read_csv('us-shareprices-daily.csv', sep=';') def get_model_accuracy(data, ticker_symbol): stock_data = data[data['Ticker'] == ticker_symbol] # get MSE for testing data using 85/15 split for chosen stock symbol train_data, test_data = stock_data[0:int(len(stock_data)*0.85)], stock_data[int(len(stock_data)*0.85):] training_data = train_data['Close'].values test_data = test_data['Close'].values history = [x for x in training_data] model_predictions = [] N_test_observations = len(test_data) for time_point in range(N_test_observations): model = sm.tsa.statespace.SARIMAX(history, order=(1,1,1)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] model_predictions.append(yhat) true_test_value = test_data[time_point] history.append(true_test_value) MSE_error = mean_squared_error(test_data, model_predictions) return 'Testing Mean Squared Error is {}'.format(MSE_error) def main(tickers, earliest_date): df = pd.read_csv('data_and_sp500.csv') for ticker in tickers: x = np.array(df['Date']) y = np.array(df[ticker]) ticker_df = pd.concat([df['Date'], df[ticker]], axis=1) model = sm.tsa.statespace.SARIMAX(ticker_df[ticker], order=(21,1,7)) model_fit = model.fit(disp=-1) # print(model_fit.summary()) forecast = model_fit.forecast(7, alpha=0.05)#.predict(start=1259, end=1289) data = pd.Series(y, x) # plt.plot(fit1.fittedvalues, marker="o", color="blue", label='smoothing') plt.plot(x, y, label='{} historical'.format(ticker)) plt.plot(forecast, label='{} forecast'.format(ticker)) plt.legend(loc="upper left") step = 10 plt.xticks([x[i+step] for i in range(-step, len(x), step) if i+step < len(x)], rotation=90) plt.title('ARIMA forecast model vs. actual for {}'.format('ticker')) plt.xlim(df.shape[0]-100, df.shape[0]+21) plt.show() # plot_df = data.to_frame().reset_index().rename(columns={'index': 'date', 0: 'price'}) # plot_df.columns = ['date', 'price'] # print(plot_df) # fig = px.line(plot_df, x='date', y='price') # fig.show() def stock_covariance(stocks): df = pd.read_csv('djia_2017-2022.csv') x = df[df['Name']==stocks[0]].dropna(how='any')['Close'] y = df[df['Name']==stocks[0]].dropna(how='any')['Close'] cov = covariance(x, y) return cov def dji_covariance(stock): df = pd.read_csv('djia_2017-2022.csv') x = df[df['Name']=='^DJI'].dropna(how='any')['Close'] y = df[df['Name']==stock].dropna(how='any')['Close'] cov = covariance(x, y) return cov if __name__ == "__main__": main(['AA', 'IBM', 'AAPL', 'AMD'], '2021-01-01')