Add Barra-style multi-factor risk model with factor covariance and specific risk
Browse files- factor_risk_model.py +132 -0
factor_risk_model.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""factor_risk_model.py — Barra-Style Multi-Factor Risk Model
|
| 2 |
+
|
| 3 |
+
Decomposes portfolio risk into factor (systematic) and specific (idiosyncratic)
|
| 4 |
+
components. Models factor covariance using PCA + exponential weighting, and
|
| 5 |
+
estimates specific risk from residuals. Essential for risk budgeting and
|
| 6 |
+
attribution.
|
| 7 |
+
|
| 8 |
+
References:
|
| 9 |
+
- Grinold & Kahn 2000: "Active Portfolio Management" (Barra model)
|
| 10 |
+
- Menchero et al. 2010: "The Barra US Equity Model (USE4)"
|
| 11 |
+
- Connor et al. 2010: "The Structure of Factor Risk Premiums"
|
| 12 |
+
"""
|
| 13 |
+
import numpy as np, pandas as pd
|
| 14 |
+
from scipy.linalg import eigh
|
| 15 |
+
|
| 16 |
+
class FactorRiskModel:
|
| 17 |
+
"""Barra-style factor risk model."""
|
| 18 |
+
|
| 19 |
+
def __init__(self, n_factors=20, halflife=126):
|
| 20 |
+
self.n_factors = n_factors
|
| 21 |
+
self.halflife = halflife
|
| 22 |
+
self.factor_cov = None
|
| 23 |
+
self.factor_loadings = None
|
| 24 |
+
self.specific_var = None
|
| 25 |
+
self.factor_names = None
|
| 26 |
+
|
| 27 |
+
def _exp_weights(self, n):
|
| 28 |
+
lambda_ = 0.5 ** (1.0 / self.halflife)
|
| 29 |
+
w = np.array([lambda_ ** (n - 1 - i) for i in range(n)])
|
| 30 |
+
return w / w.sum()
|
| 31 |
+
|
| 32 |
+
def fit(self, returns):
|
| 33 |
+
"""Fit factor model via PCA with exponential weighting."""
|
| 34 |
+
r = returns.dropna()
|
| 35 |
+
T, N = r.shape
|
| 36 |
+
w = self._exp_weights(T)
|
| 37 |
+
# Weighted covariance
|
| 38 |
+
rw = r.values * np.sqrt(w[:, None])
|
| 39 |
+
cov = (rw.T @ rw) / (1 - lambda_ ** T) # normalize
|
| 40 |
+
# PCA
|
| 41 |
+
eigvals, eigvecs = eigh(cov)
|
| 42 |
+
idx = np.argsort(eigvals)[::-1]
|
| 43 |
+
eigvals = eigvals[idx]; eigvecs = eigvecs[:, idx]
|
| 44 |
+
self.factor_loadings = eigvecs[:, :self.n_factors]
|
| 45 |
+
self.factor_cov = np.diag(eigvals[:self.n_factors])
|
| 46 |
+
self.factor_names = [f"PC{i+1}" for i in range(self.n_factors)]
|
| 47 |
+
# Specific risk from residuals
|
| 48 |
+
factor_rets = r.values @ self.factor_loadings
|
| 49 |
+
explained = factor_rets @ self.factor_loadings.T
|
| 50 |
+
residuals = r.values - explained
|
| 51 |
+
self.specific_var = np.var(residuals, axis=0)
|
| 52 |
+
return self
|
| 53 |
+
|
| 54 |
+
def portfolio_risk(self, weights):
|
| 55 |
+
"""Decompose portfolio risk into factor + specific."""
|
| 56 |
+
w = np.array(weights).reshape(-1)
|
| 57 |
+
# Factor risk
|
| 58 |
+
factor_exposure = w @ self.factor_loadings
|
| 59 |
+
factor_var = factor_exposure @ self.factor_cov @ factor_exposure
|
| 60 |
+
# Specific risk
|
| 61 |
+
specific_var = np.sum((w ** 2) * self.specific_var)
|
| 62 |
+
total_var = factor_var + specific_var
|
| 63 |
+
return {
|
| 64 |
+
'total_vol': float(np.sqrt(total_var)),
|
| 65 |
+
'factor_vol': float(np.sqrt(factor_var)),
|
| 66 |
+
'specific_vol': float(np.sqrt(specific_var)),
|
| 67 |
+
'factor_pct': float(factor_var / (total_var + 1e-10) * 100),
|
| 68 |
+
'specific_pct': float(specific_var / (total_var + 1e-10) * 100),
|
| 69 |
+
'factor_exposures': dict(zip(self.factor_names, factor_exposure.tolist()))
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
def marginal_risk_contrib(self, weights):
|
| 73 |
+
"""Marginal risk contribution per asset."""
|
| 74 |
+
w = np.array(weights).reshape(-1)
|
| 75 |
+
total_var = self.portfolio_risk(weights)
|
| 76 |
+
sigma = total_var['total_vol']
|
| 77 |
+
# Gradient of variance w.r.t weights
|
| 78 |
+
cov_total = (self.factor_loadings @ self.factor_cov @ self.factor_loadings.T +
|
| 79 |
+
np.diag(self.specific_var))
|
| 80 |
+
grad = cov_total @ w
|
| 81 |
+
mrc = w * grad / (sigma + 1e-10)
|
| 82 |
+
return pd.Series(mrc, index=[f'Asset_{i}' for i in range(len(w))])
|
| 83 |
+
|
| 84 |
+
def risk_budget(self, weights, target_risk=None):
|
| 85 |
+
"""Risk budgeting: find weights such that each asset contributes equally."""
|
| 86 |
+
n = len(weights)
|
| 87 |
+
w0 = np.ones(n) / n
|
| 88 |
+
def risk_parity_objective(w):
|
| 89 |
+
mrc = self.marginal_risk_contrib(w)
|
| 90 |
+
target = mrc.sum() / n
|
| 91 |
+
return np.sum((mrc - target) ** 2)
|
| 92 |
+
# Simple iterative approach
|
| 93 |
+
for _ in range(100):
|
| 94 |
+
mrc = self.marginal_risk_contrib(w0)
|
| 95 |
+
w0 = w0 * (1.0 / (mrc.values + 1e-10))
|
| 96 |
+
w0 = w0 / w0.sum()
|
| 97 |
+
if target_risk:
|
| 98 |
+
vol = self.portfolio_risk(w0)['total_vol']
|
| 99 |
+
w0 = w0 * (target_risk / (vol + 1e-10))
|
| 100 |
+
return w0
|
| 101 |
+
|
| 102 |
+
def risk_report(self, weights):
|
| 103 |
+
"""Human-readable risk decomposition."""
|
| 104 |
+
risk = self.portfolio_risk(weights)
|
| 105 |
+
mrc = self.marginal_risk_contrib(weights)
|
| 106 |
+
report = f"""## Factor Risk Decomposition
|
| 107 |
+
|
| 108 |
+
| Risk Component | Volatility | % of Total |
|
| 109 |
+
|----------------|-----------|------------|
|
| 110 |
+
| Total | {risk['total_vol']*100:.2f}% | 100% |
|
| 111 |
+
| Factor (Systematic) | {risk['factor_vol']*100:.2f}% | {risk['factor_pct']:.1f}% |
|
| 112 |
+
| Specific (Idiosyncratic) | {risk['specific_vol']*100:.2f}% | {risk['specific_pct']:.1f}% |
|
| 113 |
+
|
| 114 |
+
**Top Factor Exposures:**
|
| 115 |
+
"""
|
| 116 |
+
top = sorted(risk['factor_exposures'].items(), key=lambda x: abs(x[1]), reverse=True)[:5]
|
| 117 |
+
for name, exp in top:
|
| 118 |
+
report += f"- {name}: {exp:.3f}\n"
|
| 119 |
+
report += f"\n**Top Risk Contributors:**\n"
|
| 120 |
+
top_mrc = mrc.sort_values(ascending=False).head(5)
|
| 121 |
+
for asset, contrib in top_mrc.items():
|
| 122 |
+
report += f"- {asset}: {contrib*100:.2f}%\n"
|
| 123 |
+
return report
|
| 124 |
+
|
| 125 |
+
if __name__ == '__main__':
|
| 126 |
+
np.random.seed(42)
|
| 127 |
+
returns = pd.DataFrame(np.random.normal(0.0003, 0.015, (500, 10)),
|
| 128 |
+
columns=[f'Stock_{i}' for i in range(10)],
|
| 129 |
+
index=pd.date_range('2022-01-01', periods=500, freq='B'))
|
| 130 |
+
model = FactorRiskModel(n_factors=5).fit(returns)
|
| 131 |
+
weights = np.array([0.1]*10)
|
| 132 |
+
print(model.risk_report(weights))
|