Carsten Stahl commited on
Commit
d31af6a
1 Parent(s): 48067f6

Introduced data management classes to seperate backend from frontend

Browse files
Files changed (1) hide show
  1. utilities/py/data_management.py +151 -0
utilities/py/data_management.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import yfinance as yf
3
+
4
+ from pypfopt import EfficientFrontier
5
+ from pypfopt import risk_models
6
+ from pypfopt import expected_returns
7
+ from pypfopt import HRPOpt, hierarchical_portfolio
8
+
9
+
10
+ class CompData:
11
+ def __init__(self, company_data):
12
+ """
13
+ Class that manages company and stock data
14
+ """
15
+ self.df = company_data
16
+ self.company_names = self.df["Name"].to_list()
17
+ self.company_symbols = (self.df["Ticker"] + ".NS").to_list()
18
+
19
+ # utilities for tranlation
20
+ name_to_id_dict = dict()
21
+ id_to_name_dict = dict()
22
+
23
+ for CSymbol, CName in zip(self.company_symbols, self.company_names):
24
+ name_to_id_dict[CName] = CSymbol
25
+
26
+ for CSymbol, CName in zip(self.company_symbols, self.company_names):
27
+ id_to_name_dict[CSymbol] = CName
28
+
29
+ self.name_to_id = name_to_id_dict
30
+ self.id_to_name = id_to_name_dict
31
+
32
+ def fetch_stock_data(self, company_ids: list, start_date: str) -> pd.DataFrame:
33
+ """
34
+ Use yfinance client sdk to fetch stock data from the yahoo finance api
35
+ """
36
+ company_data = pd.DataFrame()
37
+
38
+ # get the stock data for the companies
39
+ for cname in company_ids:
40
+ stock_data_temp = yf.download(
41
+ cname, start=start_date, end=pd.Timestamp.now().strftime("%Y-%m-%d")
42
+ )["Adj Close"]
43
+ stock_data_temp.name = cname
44
+ company_data = pd.merge(
45
+ company_data,
46
+ stock_data_temp,
47
+ how="outer",
48
+ right_index=True,
49
+ left_index=True,
50
+ )
51
+
52
+ # cleaning the data
53
+ company_data.dropna(axis=1, how="all", inplace=True)
54
+
55
+ company_data.dropna(inplace=True)
56
+
57
+ for i in company_data.columns:
58
+ company_data[i] = company_data[i].abs()
59
+
60
+ return company_data
61
+
62
+ def comp_id_to_name(self, list_of_ids: list):
63
+ return [self.id_to_name[i] for i in list_of_ids]
64
+
65
+ def comp_name_to_id(self, list_of_names: list):
66
+ return [self.name_to_id[i] for i in list_of_names]
67
+
68
+
69
+ class PortfolioOptimizer:
70
+
71
+ def __init__(self, comp_data: CompData, company_ids: list, start_date: str):
72
+ self.comp_data = comp_data
73
+ self.stock_data = self.comp_data.fetch_stock_data(
74
+ company_ids, start_date)
75
+ self.stock_data_returns = self.stock_data.pct_change().dropna()
76
+
77
+ def optimize(self, method: str, ef_parameter=None):
78
+ company_asset_weights = 0
79
+
80
+ # Do the portfolio optimization
81
+ if method == "Efficient Frontier":
82
+ mu = expected_returns.mean_historical_return(self.stock_data)
83
+ S = risk_models.sample_cov(self.stock_data)
84
+
85
+ self.ef = EfficientFrontier(mu, S)
86
+
87
+ if ef_parameter == "Maximum Sharpe Raio":
88
+ self.ef.max_sharpe()
89
+ elif ef_parameter == "Minimum Volatility":
90
+ self.ef.min_volatility()
91
+ elif ef_parameter == "Efficient Risk":
92
+ self.ef.efficient_risk(0.5)
93
+ else:
94
+ self.ef.efficient_return(0.05)
95
+
96
+ company_asset_weights = pd.DataFrame.from_dict(
97
+ self.ef.clean_weights(), orient="index"
98
+ ).reset_index()
99
+
100
+ elif method == "Hierarchical Risk Parity":
101
+ mu = expected_returns.returns_from_prices(self.stock_data)
102
+ S = risk_models.sample_cov(self.stock_data)
103
+
104
+ self.ef = HRPOpt(mu, S)
105
+
106
+ company_asset_weights = self.ef.optimize()
107
+ company_asset_weights = pd.DataFrame.from_dict(
108
+ company_asset_weights, orient="index", columns=["Weight"]
109
+ ).reset_index()
110
+
111
+ # cleaning the returned data from the optimization
112
+ company_asset_weights.columns = ["Ticker", "Allocation"]
113
+
114
+ company_asset_weights["Name"] = self.comp_data.comp_id_to_name(
115
+ company_asset_weights["Ticker"])
116
+
117
+ company_asset_weights = company_asset_weights[[
118
+ "Name", "Ticker", "Allocation"]]
119
+
120
+ return company_asset_weights
121
+
122
+ def get_portfolio_performance(self):
123
+ if self.ef is not None:
124
+ (
125
+ expected_annual_return,
126
+ annual_volatility,
127
+ sharpe_ratio,
128
+ ) = self.ef.portfolio_performance()
129
+
130
+ st_portfolio_performance = pd.DataFrame.from_dict(
131
+ {
132
+ "Expected annual return": (expected_annual_return * 100).round(2),
133
+ "Annual volatility": (annual_volatility * 100).round(2),
134
+ "Sharpe ratio": sharpe_ratio.round(2),
135
+ },
136
+ orient="index",
137
+ ).reset_index()
138
+
139
+ st_portfolio_performance.columns = ["Metrics", "Summary"]
140
+
141
+ return st_portfolio_performance
142
+ else:
143
+ return None
144
+
145
+ def get_portfolio_returns(self):
146
+ return (
147
+ self.stock_data_returns * list(self.ef.clean_weights().values())
148
+ ).sum(axis=1)
149
+
150
+ def get_annual_portfolio_returns(self):
151
+ return self.get_portfolio_returns().resample("Y").apply(lambda x: (x + 1).prod() - 1)