Lirsen Myrtaj commited on
Commit
6979b79
·
1 Parent(s): 7a8b853

Upload arima.py

Browse files
Files changed (1) hide show
  1. arima.py +57 -47
arima.py CHANGED
@@ -5,68 +5,79 @@ import io
5
  import yfinance as yf
6
  from datetime import datetime
7
  import numpy as np
8
- from statsmodels.tsa.api import Holt
9
- from statsmodels.tsa.arima.model import ARIMA
10
- from statsmodels.graphics.tsaplots import plot_predict
11
 
12
- import matplotlib.pyplot as plt, mpld3
13
- from statistics import covariance
 
 
14
 
15
 
16
  import warnings
17
  warnings.filterwarnings("ignore")
18
 
19
- def dowanload_data():
20
- url = 'https://stockmarketmba.com/stocksinthedjia.php'
 
21
 
22
- req = requests.get(url).text
23
- soup = BeautifulSoup(req, 'lxml')
24
 
25
- table = soup.find('table', attrs={'id': 'ipos'})
26
- df = pd.read_html(str(table))[0]
27
- tickers = df['Symbol'].tolist()+['^DJI']
28
- stock_final = pd.DataFrame()
29
 
30
- for i in tickers:
31
- print(i)
32
- try:
33
- stock = []
34
- stock = yf.download(i, start=datetime(2017, 10, 31), end=datetime(2022, 10, 31), progress=False)
35
-
36
- if len(stock) == 0:
37
- None
38
 
39
- else:
40
- stock['Name'] = i
41
- stock_final = stock_final.append(stock, sort=False)
42
- except:
43
- pass
44
 
45
- stock_final.to_csv('djia_2017-2022.csv')
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
 
 
47
 
48
- def main(ticker):
49
- df = pd.read_csv('djia_2017-2022.csv')
50
- df = df[df['Name']==ticker].dropna(how='any')
51
- x = np.array(df.dropna()['Date'])
52
- y = np.array(df.dropna()['Close'])
53
 
54
- model = ARIMA(y, order=(1,1,2))
55
- model_fit = model.fit()
56
- print(model_fit.summary())
57
- plot_predict(model_fit, dynamic=False)
 
 
 
 
 
 
 
 
 
58
 
59
- data = pd.Series(y, x)
60
- fit1 = Holt(data).fit(smoothing_level=0.1, smoothing_slope=0.1, optimized=False)
61
 
62
- # plt.plot(fit1.fittedvalues, marker="o", color="blue", label='smoothing')
 
 
 
 
63
 
64
- plt.plot(x, y, label='actual')
65
- plt.legend(loc="upper left")
66
- step = 10
67
- plt.xticks([x[i+step] for i in range(-step, len(x), step) if i+step < len(x)], rotation=90)
68
- plt.title('ARIMA forecast model vs. actual for {}'.format(ticker))
69
- mpld3.show()
 
 
70
 
71
 
72
  def stock_covariance(stocks):
@@ -89,5 +100,4 @@ def dji_covariance(stock):
89
 
90
 
91
  if __name__ == "__main__":
92
- main('IBM')
93
- # dowanload_data()
 
5
  import yfinance as yf
6
  from datetime import datetime
7
  import numpy as np
8
+ import statsmodels.api as sm
 
 
9
 
10
+ # import plotly.express as px
11
+ import matplotlib.pyplot as plt
12
+
13
+ # from statistics import covariance
14
 
15
 
16
  import warnings
17
  warnings.filterwarnings("ignore")
18
 
19
+ import statsmodels.api as sm
20
+ from sklearn.metrics import mean_squared_error
21
+ import pandas as pd
22
 
23
+ df = pd.read_csv('us-shareprices-daily.csv', sep=';')
 
24
 
25
+ def get_model_accuracy(data, ticker_symbol):
26
+
27
+ stock_data = data[data['Ticker'] == ticker_symbol]
 
28
 
 
 
 
 
 
 
 
 
29
 
30
+ # get MSE for testing data using 85/15 split for chosen stock symbol
 
 
 
 
31
 
32
+ train_data, test_data = stock_data[0:int(len(stock_data)*0.85)], stock_data[int(len(stock_data)*0.85):]
33
+ training_data = train_data['Close'].values
34
+ test_data = test_data['Close'].values
35
+ history = [x for x in training_data]
36
+ model_predictions = []
37
+ N_test_observations = len(test_data)
38
+ for time_point in range(N_test_observations):
39
+ model = sm.tsa.statespace.SARIMAX(history, order=(1,1,1))
40
+ model_fit = model.fit(disp=0)
41
+ output = model_fit.forecast()
42
+ yhat = output[0]
43
+ model_predictions.append(yhat)
44
+ true_test_value = test_data[time_point]
45
+ history.append(true_test_value)
46
 
47
+ MSE_error = mean_squared_error(test_data, model_predictions)
48
+ return 'Testing Mean Squared Error is {}'.format(MSE_error)
49
 
 
 
 
 
 
50
 
51
+ def main(tickers, earliest_date):
52
+ df = pd.read_csv('data_and_sp500.csv')
53
+ for ticker in tickers:
54
+ x = np.array(df['Date'])
55
+ y = np.array(df[ticker])
56
+ ticker_df = pd.concat([df['Date'], df[ticker]], axis=1)
57
+
58
+ model = sm.tsa.statespace.SARIMAX(ticker_df[ticker], order=(21,1,7))
59
+ model_fit = model.fit(disp=-1)
60
+ # print(model_fit.summary())
61
+ forecast = model_fit.forecast(7, alpha=0.05)#.predict(start=1259, end=1289)
62
+
63
+ data = pd.Series(y, x)
64
 
65
+ # plt.plot(fit1.fittedvalues, marker="o", color="blue", label='smoothing')
 
66
 
67
+ plt.plot(x, y, label='{} historical'.format(ticker))
68
+ plt.plot(forecast, label='{} forecast'.format(ticker))
69
+ plt.legend(loc="upper left")
70
+ step = 10
71
+ plt.xticks([x[i+step] for i in range(-step, len(x), step) if i+step < len(x)], rotation=90)
72
 
73
+ plt.title('ARIMA forecast model vs. actual for {}'.format('ticker'))
74
+ plt.xlim(df.shape[0]-100, df.shape[0]+21)
75
+ plt.show()
76
+ # plot_df = data.to_frame().reset_index().rename(columns={'index': 'date', 0: 'price'})
77
+ # plot_df.columns = ['date', 'price']
78
+ # print(plot_df)
79
+ # fig = px.line(plot_df, x='date', y='price')
80
+ # fig.show()
81
 
82
 
83
  def stock_covariance(stocks):
 
100
 
101
 
102
  if __name__ == "__main__":
103
+ main(['AA', 'IBM', 'AAPL', 'AMD'], '2021-01-01')