Lirsen Myrtaj
commited on
Commit
·
6979b79
1
Parent(s):
7a8b853
Upload arima.py
Browse files
arima.py
CHANGED
@@ -5,68 +5,79 @@ import io
|
|
5 |
import yfinance as yf
|
6 |
from datetime import datetime
|
7 |
import numpy as np
|
8 |
-
|
9 |
-
from statsmodels.tsa.arima.model import ARIMA
|
10 |
-
from statsmodels.graphics.tsaplots import plot_predict
|
11 |
|
12 |
-
import
|
13 |
-
|
|
|
|
|
14 |
|
15 |
|
16 |
import warnings
|
17 |
warnings.filterwarnings("ignore")
|
18 |
|
19 |
-
|
20 |
-
|
|
|
21 |
|
22 |
-
|
23 |
-
soup = BeautifulSoup(req, 'lxml')
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
stock_final = pd.DataFrame()
|
29 |
|
30 |
-
for i in tickers:
|
31 |
-
print(i)
|
32 |
-
try:
|
33 |
-
stock = []
|
34 |
-
stock = yf.download(i, start=datetime(2017, 10, 31), end=datetime(2022, 10, 31), progress=False)
|
35 |
-
|
36 |
-
if len(stock) == 0:
|
37 |
-
None
|
38 |
|
39 |
-
|
40 |
-
stock['Name'] = i
|
41 |
-
stock_final = stock_final.append(stock, sort=False)
|
42 |
-
except:
|
43 |
-
pass
|
44 |
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
|
|
|
|
47 |
|
48 |
-
def main(ticker):
|
49 |
-
df = pd.read_csv('djia_2017-2022.csv')
|
50 |
-
df = df[df['Name']==ticker].dropna(how='any')
|
51 |
-
x = np.array(df.dropna()['Date'])
|
52 |
-
y = np.array(df.dropna()['Close'])
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
-
|
60 |
-
fit1 = Holt(data).fit(smoothing_level=0.1, smoothing_slope=0.1, optimized=False)
|
61 |
|
62 |
-
|
|
|
|
|
|
|
|
|
63 |
|
64 |
-
plt.
|
65 |
-
plt.
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
70 |
|
71 |
|
72 |
def stock_covariance(stocks):
|
@@ -89,5 +100,4 @@ def dji_covariance(stock):
|
|
89 |
|
90 |
|
91 |
if __name__ == "__main__":
|
92 |
-
main('IBM')
|
93 |
-
# dowanload_data()
|
|
|
5 |
import yfinance as yf
|
6 |
from datetime import datetime
|
7 |
import numpy as np
|
8 |
+
import statsmodels.api as sm
|
|
|
|
|
9 |
|
10 |
+
# import plotly.express as px
|
11 |
+
import matplotlib.pyplot as plt
|
12 |
+
|
13 |
+
# from statistics import covariance
|
14 |
|
15 |
|
16 |
import warnings
|
17 |
warnings.filterwarnings("ignore")
|
18 |
|
19 |
+
import statsmodels.api as sm
|
20 |
+
from sklearn.metrics import mean_squared_error
|
21 |
+
import pandas as pd
|
22 |
|
23 |
+
df = pd.read_csv('us-shareprices-daily.csv', sep=';')
|
|
|
24 |
|
25 |
+
def get_model_accuracy(data, ticker_symbol):
|
26 |
+
|
27 |
+
stock_data = data[data['Ticker'] == ticker_symbol]
|
|
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
+
# get MSE for testing data using 85/15 split for chosen stock symbol
|
|
|
|
|
|
|
|
|
31 |
|
32 |
+
train_data, test_data = stock_data[0:int(len(stock_data)*0.85)], stock_data[int(len(stock_data)*0.85):]
|
33 |
+
training_data = train_data['Close'].values
|
34 |
+
test_data = test_data['Close'].values
|
35 |
+
history = [x for x in training_data]
|
36 |
+
model_predictions = []
|
37 |
+
N_test_observations = len(test_data)
|
38 |
+
for time_point in range(N_test_observations):
|
39 |
+
model = sm.tsa.statespace.SARIMAX(history, order=(1,1,1))
|
40 |
+
model_fit = model.fit(disp=0)
|
41 |
+
output = model_fit.forecast()
|
42 |
+
yhat = output[0]
|
43 |
+
model_predictions.append(yhat)
|
44 |
+
true_test_value = test_data[time_point]
|
45 |
+
history.append(true_test_value)
|
46 |
|
47 |
+
MSE_error = mean_squared_error(test_data, model_predictions)
|
48 |
+
return 'Testing Mean Squared Error is {}'.format(MSE_error)
|
49 |
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
+
def main(tickers, earliest_date):
|
52 |
+
df = pd.read_csv('data_and_sp500.csv')
|
53 |
+
for ticker in tickers:
|
54 |
+
x = np.array(df['Date'])
|
55 |
+
y = np.array(df[ticker])
|
56 |
+
ticker_df = pd.concat([df['Date'], df[ticker]], axis=1)
|
57 |
+
|
58 |
+
model = sm.tsa.statespace.SARIMAX(ticker_df[ticker], order=(21,1,7))
|
59 |
+
model_fit = model.fit(disp=-1)
|
60 |
+
# print(model_fit.summary())
|
61 |
+
forecast = model_fit.forecast(7, alpha=0.05)#.predict(start=1259, end=1289)
|
62 |
+
|
63 |
+
data = pd.Series(y, x)
|
64 |
|
65 |
+
# plt.plot(fit1.fittedvalues, marker="o", color="blue", label='smoothing')
|
|
|
66 |
|
67 |
+
plt.plot(x, y, label='{} historical'.format(ticker))
|
68 |
+
plt.plot(forecast, label='{} forecast'.format(ticker))
|
69 |
+
plt.legend(loc="upper left")
|
70 |
+
step = 10
|
71 |
+
plt.xticks([x[i+step] for i in range(-step, len(x), step) if i+step < len(x)], rotation=90)
|
72 |
|
73 |
+
plt.title('ARIMA forecast model vs. actual for {}'.format('ticker'))
|
74 |
+
plt.xlim(df.shape[0]-100, df.shape[0]+21)
|
75 |
+
plt.show()
|
76 |
+
# plot_df = data.to_frame().reset_index().rename(columns={'index': 'date', 0: 'price'})
|
77 |
+
# plot_df.columns = ['date', 'price']
|
78 |
+
# print(plot_df)
|
79 |
+
# fig = px.line(plot_df, x='date', y='price')
|
80 |
+
# fig.show()
|
81 |
|
82 |
|
83 |
def stock_covariance(stocks):
|
|
|
100 |
|
101 |
|
102 |
if __name__ == "__main__":
|
103 |
+
main(['AA', 'IBM', 'AAPL', 'AMD'], '2021-01-01')
|
|