Premchan369 commited on
Commit
de3f3f3
·
verified ·
1 Parent(s): 5d5ab7d

Upload meta_model.py

Browse files
Files changed (1) hide show
  1. meta_model.py +271 -0
meta_model.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Meta-Model: Learns which model/signal to trust dynamically.
2
+
3
+ This mimics how Renaissance Technologies combines signals — a meta-learner
4
+ weights LSTM, Transformer, XGBoost, and sentiment based on recent performance,
5
+ regime, and volatility state.
6
+ """
7
+ import numpy as np
8
+ import pandas as pd
9
+ import torch
10
+ import torch.nn as nn
11
+ from sklearn.ensemble import GradientBoostingRegressor
12
+ from typing import Dict, List, Optional, Tuple
13
+ import warnings
14
+ warnings.filterwarnings('ignore')
15
+
16
+
17
+ class MetaModel:
18
+ """Meta-learner that dynamically weights base model predictions."""
19
+
20
+ def __init__(self,
21
+ base_models: List[str] = None,
22
+ meta_learner_type: str = 'xgb',
23
+ lookback_window: int = 63,
24
+ device: str = 'cpu'):
25
+ """
26
+ Args:
27
+ base_models: Names of base models (e.g., ['lstm','transformer','xgboost','sentiment'])
28
+ meta_learner_type: 'xgb', 'nn', or 'bayesian'
29
+ lookback_window: How many days of past performance to use as features
30
+ """
31
+ self.base_models = base_models or ['lstm', 'transformer', 'xgboost', 'sentiment']
32
+ self.meta_learner_type = meta_learner_type
33
+ self.lookback_window = lookback_window
34
+ self.device = torch.device(device)
35
+
36
+ self.meta_model = None
37
+ self.performance_history = {m: [] for m in self.base_models}
38
+ self.weight_history = []
39
+ self.is_fitted = False
40
+
41
+ def _build_meta_features(self,
42
+ predictions: Dict[str, np.ndarray],
43
+ regime: Optional[str] = None,
44
+ volatility: Optional[float] = None,
45
+ recent_returns: Optional[np.ndarray] = None) -> np.ndarray:
46
+ """
47
+ Build feature vector for meta-learner.
48
+
49
+ Features include:
50
+ - Raw predictions from each base model
51
+ - Recent IC of each model
52
+ - Recent MSE of each model
53
+ - Volatility regime
54
+ - Recent market return
55
+ """
56
+ n_samples = len(list(predictions.values())[0])
57
+ features = []
58
+
59
+ # Raw predictions
60
+ for model in self.base_models:
61
+ if model in predictions:
62
+ features.append(predictions[model])
63
+ else:
64
+ features.append(np.zeros(n_samples))
65
+
66
+ # Recent performance (rolling IC over lookback window)
67
+ for model in self.base_models:
68
+ perf = self.performance_history.get(model, [0.0] * self.lookback_window)
69
+ # Pad if needed
70
+ perf = perf[-self.lookback_window:]
71
+ while len(perf) < self.lookback_window:
72
+ perf = [0.0] + perf
73
+ # Summary stats of recent performance
74
+ features.append(np.full(n_samples, np.mean(perf)))
75
+ features.append(np.full(n_samples, np.std(perf) if len(perf) > 1 else 0.0))
76
+ features.append(np.full(n_samples, perf[-1] if perf else 0.0))
77
+
78
+ # Regime encoding
79
+ if regime:
80
+ regime_map = {'bull': 1.0, 'bear': -1.0, 'high_vol': 0.0, 'neutral': 0.5}
81
+ regime_val = regime_map.get(regime, 0.5)
82
+ features.append(np.full(n_samples, regime_val))
83
+ else:
84
+ features.append(np.zeros(n_samples))
85
+
86
+ # Volatility
87
+ features.append(np.full(n_samples, volatility or 0.2))
88
+
89
+ # Recent market return
90
+ if recent_returns is not None and len(recent_returns) > 0:
91
+ features.append(np.full(n_samples, np.mean(recent_returns[-5:])))
92
+ else:
93
+ features.append(np.zeros(n_samples))
94
+
95
+ return np.column_stack(features)
96
+
97
+ def fit(self,
98
+ predictions_train: Dict[str, np.ndarray],
99
+ actual_train: np.ndarray,
100
+ regime_train: Optional[List[str]] = None,
101
+ volatility_train: Optional[np.ndarray] = None) -> Dict:
102
+ """
103
+ Train meta-learner to predict actual returns from base model predictions.
104
+
105
+ The meta-learner learns optimal weights for combining base models.
106
+ """
107
+ n_samples = len(actual_train)
108
+
109
+ # Build meta-features
110
+ X_meta = self._build_meta_features(
111
+ predictions_train,
112
+ regime=regime_train[0] if regime_train else None,
113
+ volatility=volatility_train[0] if volatility_train is not None else None
114
+ )
115
+
116
+ if self.meta_learner_type == 'xgb':
117
+ self.meta_model = GradientBoostingRegressor(
118
+ n_estimators=100,
119
+ max_depth=4,
120
+ learning_rate=0.05,
121
+ subsample=0.8,
122
+ random_state=42
123
+ )
124
+ self.meta_model.fit(X_meta, actual_train)
125
+
126
+ elif self.meta_learner_type == 'nn':
127
+ self.meta_model = self._build_nn_meta_model(X_meta.shape[1])
128
+ self._train_nn_meta(X_meta, actual_train)
129
+
130
+ elif self.meta_learner_type == 'bayesian':
131
+ # Use XGB with quantile loss for uncertainty
132
+ self.meta_model = GradientBoostingRegressor(
133
+ n_estimators=100,
134
+ max_depth=4,
135
+ learning_rate=0.05,
136
+ loss='quantile', alpha=0.5,
137
+ random_state=42
138
+ )
139
+ self.meta_model.fit(X_meta, actual_train)
140
+
141
+ self.is_fitted = True
142
+
143
+ # Compute in-sample performance
144
+ pred = self.predict_meta(predictions_train, regime_train, volatility_train)
145
+ from scipy.stats import spearmanr
146
+ ic, _ = spearmanr(pred, actual_train)
147
+ mse = np.mean((pred - actual_train) ** 2)
148
+
149
+ return {
150
+ 'meta_ic': ic,
151
+ 'meta_mse': mse,
152
+ 'n_samples': n_samples
153
+ }
154
+
155
+ def _build_nn_meta_model(self, input_size: int):
156
+ """Build small neural network meta-learner."""
157
+ class MetaNN(nn.Module):
158
+ def __init__(self, input_size):
159
+ super().__init__()
160
+ self.net = nn.Sequential(
161
+ nn.Linear(input_size, 64),
162
+ nn.ReLU(),
163
+ nn.Dropout(0.2),
164
+ nn.Linear(64, 32),
165
+ nn.ReLU(),
166
+ nn.Linear(32, 1)
167
+ )
168
+ def forward(self, x):
169
+ return self.net(x)
170
+ return MetaNN(input_size).to(self.device)
171
+
172
+ def _train_nn_meta(self, X: np.ndarray, y: np.ndarray, epochs: int = 50):
173
+ """Train NN meta-learner."""
174
+ X_t = torch.FloatTensor(X).to(self.device)
175
+ y_t = torch.FloatTensor(y).unsqueeze(1).to(self.device)
176
+
177
+ optimizer = torch.optim.Adam(self.meta_model.parameters(), lr=1e-3)
178
+ criterion = nn.MSELoss()
179
+
180
+ for epoch in range(epochs):
181
+ self.meta_model.train()
182
+ optimizer.zero_grad()
183
+ pred = self.meta_model(X_t)
184
+ loss = criterion(pred, y_t)
185
+ loss.backward()
186
+ optimizer.step()
187
+
188
+ def predict_meta(self,
189
+ predictions: Dict[str, np.ndarray],
190
+ regimes: Optional[List[str]] = None,
191
+ volatilities: Optional[np.ndarray] = None) -> np.ndarray:
192
+ """Generate meta-model predictions."""
193
+ if not self.is_fitted:
194
+ # Fallback: equal weight
195
+ preds = [predictions.get(m, np.zeros(len(list(predictions.values())[0])))
196
+ for m in self.base_models]
197
+ return np.mean(preds, axis=0)
198
+
199
+ X_meta = self._build_meta_features(
200
+ predictions,
201
+ regime=regimes[0] if regimes else None,
202
+ volatility=volatilities[0] if volatilities is not None else None
203
+ )
204
+
205
+ if self.meta_learner_type == 'nn':
206
+ self.meta_model.eval()
207
+ with torch.no_grad():
208
+ X_t = torch.FloatTensor(X_meta).to(self.device)
209
+ pred = self.meta_model(X_t).cpu().numpy().flatten()
210
+ else:
211
+ pred = self.meta_model.predict(X_meta)
212
+
213
+ return pred
214
+
215
+ def update_performance(self, model_name: str, prediction: np.ndarray, actual: np.ndarray):
216
+ """Update rolling performance history for a base model."""
217
+ from scipy.stats import spearmanr
218
+ ic, _ = spearmanr(prediction, actual)
219
+ if np.isnan(ic):
220
+ ic = 0.0
221
+ self.performance_history[model_name].append(ic)
222
+ # Keep only lookback window
223
+ self.performance_history[model_name] = self.performance_history[model_name][-self.lookback_window:]
224
+
225
+ def get_model_weights(self) -> Dict[str, float]:
226
+ """Get current implied weights from performance history."""
227
+ weights = {}
228
+ total_ic = 0
229
+ for model in self.base_models:
230
+ perf = self.performance_history.get(model, [0.0])
231
+ avg_ic = np.mean(perf) if perf else 0.0
232
+ # Use max(0, ic) to avoid negative weights, or use signed weights
233
+ weight = max(avg_ic, 0.0)
234
+ weights[model] = weight
235
+ total_ic += weight
236
+
237
+ if total_ic > 0:
238
+ weights = {k: v / total_ic for k, v in weights.items()}
239
+ else:
240
+ # Equal weight fallback
241
+ weights = {k: 1.0 / len(self.base_models) for k in self.base_models}
242
+
243
+ return weights
244
+
245
+ def adaptive_predict(self,
246
+ predictions: Dict[str, np.ndarray],
247
+ actual_prev: Optional[np.ndarray] = None,
248
+ regime: Optional[str] = None) -> Tuple[np.ndarray, Dict[str, float]]:
249
+ """
250
+ Adaptive prediction that updates weights based on recent performance.
251
+
252
+ Returns:
253
+ final_predictions, current_weights
254
+ """
255
+ # Update performance if previous actuals available
256
+ if actual_prev is not None:
257
+ for model, pred in predictions.items():
258
+ if len(pred) == len(actual_prev):
259
+ self.update_performance(model, pred, actual_prev)
260
+
261
+ # Get adaptive weights
262
+ weights = self.get_model_weights()
263
+ self.weight_history.append(weights)
264
+
265
+ # Weighted combination
266
+ final_pred = np.zeros(len(list(predictions.values())[0]))
267
+ for model, weight in weights.items():
268
+ if model in predictions:
269
+ final_pred += weight * predictions[model]
270
+
271
+ return final_pred, weights