File size: 3,225 Bytes
0c6b0a3
 
 
 
 
 
 
6979b79
0c6b0a3
6979b79
 
 
 
0c6b0a3
 
 
 
 
6979b79
 
 
0c6b0a3
6979b79
0c6b0a3
6979b79
 
 
0c6b0a3
 
6979b79
0c6b0a3
6979b79
 
 
 
 
 
 
 
 
 
 
 
 
 
0c6b0a3
6979b79
 
0c6b0a3
 
6979b79
 
 
 
 
 
 
 
 
 
 
 
 
0c6b0a3
6979b79
0c6b0a3
6979b79
 
 
 
 
0c6b0a3
6979b79
 
 
 
 
 
 
 
0c6b0a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6979b79
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import requests
import pandas as pd
from bs4 import BeautifulSoup
import io   
import yfinance as yf
from datetime import datetime
import numpy as np
import statsmodels.api as sm

# import plotly.express as px
import matplotlib.pyplot as plt

# from statistics import covariance


import warnings
warnings.filterwarnings("ignore")

import statsmodels.api as sm
from sklearn.metrics import mean_squared_error
import pandas as pd

df = pd.read_csv('us-shareprices-daily.csv', sep=';')

def get_model_accuracy(data, ticker_symbol):
    
    stock_data = data[data['Ticker'] == ticker_symbol]


    # get MSE for testing data using 85/15 split for chosen stock symbol

    train_data, test_data = stock_data[0:int(len(stock_data)*0.85)], stock_data[int(len(stock_data)*0.85):]
    training_data = train_data['Close'].values
    test_data = test_data['Close'].values
    history = [x for x in training_data]
    model_predictions = []
    N_test_observations = len(test_data)
    for time_point in range(N_test_observations):
        model = sm.tsa.statespace.SARIMAX(history, order=(1,1,1))
        model_fit = model.fit(disp=0)
        output = model_fit.forecast()
        yhat = output[0]
        model_predictions.append(yhat)
        true_test_value = test_data[time_point]
        history.append(true_test_value)

    MSE_error = mean_squared_error(test_data, model_predictions)
    return 'Testing Mean Squared Error is {}'.format(MSE_error)


def main(tickers, earliest_date):
    df = pd.read_csv('data_and_sp500.csv')
    for ticker in tickers:
        x = np.array(df['Date'])
        y = np.array(df[ticker])
        ticker_df = pd.concat([df['Date'], df[ticker]], axis=1)

        model = sm.tsa.statespace.SARIMAX(ticker_df[ticker], order=(21,1,7))
        model_fit = model.fit(disp=-1)
        # print(model_fit.summary())
        forecast = model_fit.forecast(7, alpha=0.05)#.predict(start=1259, end=1289)

        data = pd.Series(y, x)

        # plt.plot(fit1.fittedvalues, marker="o", color="blue", label='smoothing')

        plt.plot(x, y, label='{} historical'.format(ticker))
        plt.plot(forecast, label='{} forecast'.format(ticker))
        plt.legend(loc="upper left")
        step = 10
        plt.xticks([x[i+step] for i in range(-step, len(x), step) if i+step < len(x)], rotation=90)

    plt.title('ARIMA forecast model vs. actual for {}'.format('ticker'))
    plt.xlim(df.shape[0]-100, df.shape[0]+21)
    plt.show()
    # plot_df = data.to_frame().reset_index().rename(columns={'index': 'date', 0: 'price'})
    # plot_df.columns = ['date', 'price']
    # print(plot_df)
    # fig = px.line(plot_df, x='date', y='price')
    # fig.show()


def stock_covariance(stocks):
    df = pd.read_csv('djia_2017-2022.csv')
    x = df[df['Name']==stocks[0]].dropna(how='any')['Close']
    y = df[df['Name']==stocks[0]].dropna(how='any')['Close']

    cov = covariance(x, y)
    return cov

def dji_covariance(stock):
    df = pd.read_csv('djia_2017-2022.csv')
    x = df[df['Name']=='^DJI'].dropna(how='any')['Close']
    y = df[df['Name']==stock].dropna(how='any')['Close']

    cov = covariance(x, y)
    return cov




if __name__ == "__main__":
    main(['AA', 'IBM', 'AAPL', 'AMD'], '2021-01-01')