Upload factor_decomposition.py
Browse files- factor_decomposition.py +81 -0
factor_decomposition.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Factor Decomposition Engine - Break returns into style factors."""
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from typing import Dict, Optional
|
| 5 |
+
import warnings
|
| 6 |
+
warnings.filterwarnings('ignore')
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class FactorDecomposition:
|
| 10 |
+
"""Decompose returns into style factors."""
|
| 11 |
+
|
| 12 |
+
def __init__(self):
|
| 13 |
+
self.factor_names = ['momentum', 'value', 'size', 'volatility', 'quality', 'market']
|
| 14 |
+
self.factor_returns = None
|
| 15 |
+
self.exposures = None
|
| 16 |
+
|
| 17 |
+
def compute_factor_returns(self, returns_df: pd.DataFrame) -> pd.DataFrame:
|
| 18 |
+
factors = pd.DataFrame(index=returns_df.index)
|
| 19 |
+
factors['market'] = returns_df.mean(axis=1)
|
| 20 |
+
|
| 21 |
+
momentum_rets = []; value_rets = []; size_rets = []; vol_rets = []; quality_rets = []
|
| 22 |
+
|
| 23 |
+
for date in returns_df.index:
|
| 24 |
+
day_returns = returns_df.loc[date].dropna()
|
| 25 |
+
if len(day_returns) < 5:
|
| 26 |
+
for lst in [momentum_rets, value_rets, size_rets, vol_rets, quality_rets]:
|
| 27 |
+
lst.append(0)
|
| 28 |
+
continue
|
| 29 |
+
|
| 30 |
+
past_returns = returns_df.loc[:date].iloc[-21:-1].mean().reindex(day_returns.index).fillna(0) if date in returns_df.index else pd.Series(0, index=day_returns.index)
|
| 31 |
+
value_score = 1.0 / (1 + day_returns.abs())
|
| 32 |
+
size_score = 1.0 / (day_returns.rolling(21).std().reindex(day_returns.index).fillna(0.01) + 0.01)
|
| 33 |
+
vol_score = -day_returns.rolling(21).std().reindex(day_returns.index).fillna(0.01)
|
| 34 |
+
quality_score = (day_returns > 0).astype(float)
|
| 35 |
+
|
| 36 |
+
X = pd.DataFrame({'momentum': past_returns, 'value': value_score, 'size': size_score,
|
| 37 |
+
'volatility': vol_score, 'quality': quality_score}).fillna(0)
|
| 38 |
+
X = (X - X.mean()) / (X.std() + 1e-8)
|
| 39 |
+
y = day_returns.reindex(X.index).fillna(0)
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
coefs = np.linalg.lstsq(X.values, y.values, rcond=None)[0]
|
| 43 |
+
except:
|
| 44 |
+
coefs = np.zeros(5)
|
| 45 |
+
|
| 46 |
+
momentum_rets.append(coefs[0]); value_rets.append(coefs[1]); size_rets.append(coefs[2])
|
| 47 |
+
vol_rets.append(coefs[3]); quality_rets.append(coefs[4])
|
| 48 |
+
|
| 49 |
+
factors['momentum'] = momentum_rets; factors['value'] = value_rets
|
| 50 |
+
factors['size'] = size_rets; factors['volatility'] = vol_rets; factors['quality'] = quality_rets
|
| 51 |
+
|
| 52 |
+
self.factor_returns = factors
|
| 53 |
+
return factors
|
| 54 |
+
|
| 55 |
+
def compute_exposures(self, asset_returns: pd.Series, factor_returns: pd.DataFrame, window: int = 63) -> pd.DataFrame:
|
| 56 |
+
from sklearn.linear_model import Ridge
|
| 57 |
+
exposures = pd.DataFrame(index=asset_returns.index, columns=factor_returns.columns)
|
| 58 |
+
|
| 59 |
+
for factor in factor_returns.columns:
|
| 60 |
+
for i in range(len(asset_returns)):
|
| 61 |
+
if i < window: exposures.iloc[i][factor] = 0; continue
|
| 62 |
+
y = asset_returns.iloc[i-window:i].values
|
| 63 |
+
X = factor_returns[factor].iloc[i-window:i].values.reshape(-1, 1)
|
| 64 |
+
try:
|
| 65 |
+
model = Ridge(alpha=1.0).fit(X, y)
|
| 66 |
+
exposures.iloc[i][factor] = model.coef_[0]
|
| 67 |
+
except:
|
| 68 |
+
exposures.iloc[i][factor] = 0
|
| 69 |
+
|
| 70 |
+
self.exposures = exposures
|
| 71 |
+
return exposures
|
| 72 |
+
|
| 73 |
+
def attribution(self, portfolio_returns: pd.Series, factor_returns: pd.DataFrame, exposures: pd.DataFrame) -> Dict:
|
| 74 |
+
factor_contrib = exposures.multiply(factor_returns.reindex(exposures.index).fillna(0)).sum(axis=1)
|
| 75 |
+
residual = portfolio_returns - factor_contrib
|
| 76 |
+
return {
|
| 77 |
+
'total_return': portfolio_returns.sum(),
|
| 78 |
+
'factor_return': factor_contrib.sum(),
|
| 79 |
+
'residual_return': residual.sum(),
|
| 80 |
+
'r_squared': 1 - residual.var() / portfolio_returns.var() if portfolio_returns.var() > 0 else 0
|
| 81 |
+
}
|