cse6242-dataminers / arima.py
Lirsen Myrtaj
Rename capm.py to arima.py
cad4a4b
raw
history blame
2.52 kB
import requests
import pandas as pd
from bs4 import BeautifulSoup
import io
import yfinance as yf
from datetime import datetime
import numpy as np
from statsmodels.tsa.api import Holt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_predict
import matplotlib.pyplot as plt, mpld3
from statistics import covariance
import warnings
warnings.filterwarnings("ignore")
def dowanload_data():
url = 'https://stockmarketmba.com/stocksinthedjia.php'
req = requests.get(url).text
soup = BeautifulSoup(req, 'lxml')
table = soup.find('table', attrs={'id': 'ipos'})
df = pd.read_html(str(table))[0]
tickers = df['Symbol'].tolist()+['^DJI']
stock_final = pd.DataFrame()
for i in tickers:
print(i)
try:
stock = []
stock = yf.download(i, start=datetime(2017, 10, 31), end=datetime(2022, 10, 31), progress=False)
if len(stock) == 0:
None
else:
stock['Name'] = i
stock_final = stock_final.append(stock, sort=False)
except:
pass
stock_final.to_csv('djia_2017-2022.csv')
def main(ticker):
df = pd.read_csv('djia_2017-2022.csv')
df = df[df['Name']==ticker].dropna(how='any')
x = np.array(df.dropna()['Date'])
y = np.array(df.dropna()['Close'])
model = ARIMA(y, order=(1,1,2))
model_fit = model.fit()
print(model_fit.summary())
plot_predict(model_fit, dynamic=False)
data = pd.Series(y, x)
fit1 = Holt(data).fit(smoothing_level=0.1, smoothing_slope=0.1, optimized=False)
# plt.plot(fit1.fittedvalues, marker="o", color="blue", label='smoothing')
plt.plot(x, y, label='actual')
plt.legend(loc="upper left")
step = 10
plt.xticks([x[i+step] for i in range(-step, len(x), step) if i+step < len(x)], rotation=90)
plt.title('ARIMA forecast model vs. actual for {}'.format(ticker))
mpld3.show()
def stock_covariance(stocks):
df = pd.read_csv('djia_2017-2022.csv')
x = df[df['Name']==stocks[0]].dropna(how='any')['Close']
y = df[df['Name']==stocks[0]].dropna(how='any')['Close']
cov = covariance(x, y)
return cov
def dji_covariance(stock):
df = pd.read_csv('djia_2017-2022.csv')
x = df[df['Name']=='^DJI'].dropna(how='any')['Close']
y = df[df['Name']==stock].dropna(how='any')['Close']
cov = covariance(x, y)
return cov
if __name__ == "__main__":
main('IBM')
# dowanload_data()