Premchan369 commited on
Commit
2065fdc
·
verified ·
1 Parent(s): 72c80ab

Add ML options pricing with neural network and mispricing detection

Browse files
Files changed (1) hide show
  1. options_pricer.py +306 -0
options_pricer.py ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Options Pricing with ML - Neural network for option pricing/IV prediction."""
2
+ import numpy as np
3
+ import pandas as pd
4
+ import torch
5
+ import torch.nn as nn
6
+ from torch.utils.data import Dataset, DataLoader
7
+ from typing import Dict, Tuple, Optional
8
+ import warnings
9
+ warnings.filterwarnings('ignore')
10
+
11
+
12
+ class OptionDataset(Dataset):
13
+ """Dataset for option pricing"""
14
+ def __init__(self, X: np.ndarray, y: np.ndarray):
15
+ self.X = torch.FloatTensor(X)
16
+ self.y = torch.FloatTensor(y).unsqueeze(1)
17
+
18
+ def __len__(self):
19
+ return len(self.X)
20
+
21
+ def __getitem__(self, idx):
22
+ return self.X[idx], self.y[idx]
23
+
24
+
25
+ class OptionPricingNN(nn.Module):
26
+ """Neural network for option pricing"""
27
+ def __init__(self, input_size: int, hidden_sizes: list = [256, 128, 64, 32]):
28
+ super().__init__()
29
+
30
+ layers = []
31
+ prev_size = input_size
32
+ for hidden_size in hidden_sizes:
33
+ layers.extend([
34
+ nn.Linear(prev_size, hidden_size),
35
+ nn.ReLU(),
36
+ nn.Dropout(0.2)
37
+ ])
38
+ prev_size = hidden_size
39
+
40
+ layers.append(nn.Linear(prev_size, 1))
41
+ self.network = nn.Sequential(*layers)
42
+
43
+ def forward(self, x):
44
+ return self.network(x)
45
+
46
+
47
+ class BlackScholes:
48
+ """Analytical Black-Scholes for baseline comparison"""
49
+
50
+ @staticmethod
51
+ def d1(S, K, T, r, sigma):
52
+ from scipy.stats import norm
53
+ return (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
54
+
55
+ @staticmethod
56
+ def d2(S, K, T, r, sigma):
57
+ return BlackScholes.d1(S, K, T, r, sigma) - sigma * np.sqrt(T)
58
+
59
+ @staticmethod
60
+ def call_price(S, K, T, r, sigma):
61
+ from scipy.stats import norm
62
+ d1 = BlackScholes.d1(S, K, T, r, sigma)
63
+ d2 = BlackScholes.d2(S, K, T, r, sigma)
64
+ return S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
65
+
66
+ @staticmethod
67
+ def put_price(S, K, T, r, sigma):
68
+ from scipy.stats import norm
69
+ d1 = BlackScholes.d1(S, K, T, r, sigma)
70
+ d2 = BlackScholes.d2(S, K, T, r, sigma)
71
+ return K * np.exp(-r * T) * norm.cdf(-d2) - S * norm.cdf(-d1)
72
+
73
+ @staticmethod
74
+ def implied_volatility(price, S, K, T, r, option_type='call', tol=1e-5, max_iter=100):
75
+ """Find implied volatility using Newton-Raphson"""
76
+ sigma = 0.2 # Initial guess
77
+ for _ in range(max_iter):
78
+ if option_type == 'call':
79
+ price_est = BlackScholes.call_price(S, K, T, r, sigma)
80
+ else:
81
+ price_est = BlackScholes.put_price(S, K, T, r, sigma)
82
+
83
+ diff = price_est - price
84
+ if abs(diff) < tol:
85
+ return sigma
86
+
87
+ # Vega
88
+ from scipy.stats import norm
89
+ d1 = BlackScholes.d1(S, K, T, r, sigma)
90
+ vega = S * norm.pdf(d1) * np.sqrt(T)
91
+
92
+ if vega < 1e-10:
93
+ break
94
+
95
+ sigma -= diff / vega
96
+ sigma = max(sigma, 0.001)
97
+
98
+ return sigma
99
+
100
+
101
+ class MLOptionsPricer:
102
+ """ML-based options pricing engine"""
103
+
104
+ def __init__(self, hidden_sizes: list = [256, 128, 64, 32],
105
+ device: str = 'cpu'):
106
+ self.hidden_sizes = hidden_sizes
107
+ self.device = torch.device(device)
108
+ self.model = None
109
+ self.bs = BlackScholes()
110
+
111
+ def prepare_features(self, options_df: pd.DataFrame) -> np.ndarray:
112
+ """
113
+ Prepare features for ML model
114
+
115
+ Expected columns: S, K, T, r, sigma_hist, option_type,
116
+ S_lag_1, S_lag_2, ..., S_lag_20
117
+ """
118
+ features = []
119
+
120
+ # Core features
121
+ features.append(options_df['S'].values)
122
+ features.append(options_df['K'].values)
123
+ features.append(options_df['T'].values)
124
+ features.append(options_df['r'].values)
125
+ features.append(options_df['sigma_hist'].values)
126
+ features.append((options_df['S'] / options_df['K']).values) # Moneyness
127
+ features.append(options_df['T'].values * 252) # Days to expiry
128
+
129
+ # Option type encoding
130
+ features.append((options_df['option_type'] == 'call').astype(float).values)
131
+
132
+ # Lag features (past 20 days of underlying price)
133
+ for i in range(1, 21):
134
+ col = f'S_lag_{i}'
135
+ if col in options_df.columns:
136
+ features.append(options_df[col].values)
137
+
138
+ return np.column_stack(features)
139
+
140
+ def fit(self, X_train: np.ndarray, y_train: np.ndarray,
141
+ X_val: Optional[np.ndarray] = None, y_val: Optional[np.ndarray] = None,
142
+ epochs: int = 100, batch_size: int = 256, lr: float = 1e-3) -> Dict:
143
+ """Train the neural network"""
144
+ input_size = X_train.shape[1]
145
+ self.model = OptionPricingNN(input_size, self.hidden_sizes).to(self.device)
146
+
147
+ train_dataset = OptionDataset(X_train, y_train)
148
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
149
+
150
+ optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
151
+ scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10)
152
+ criterion = nn.MSELoss()
153
+
154
+ metrics = {'train_loss': [], 'val_loss': [], 'val_mae': []}
155
+
156
+ for epoch in range(epochs):
157
+ self.model.train()
158
+ epoch_loss = 0
159
+ for X_batch, y_batch in train_loader:
160
+ X_batch, y_batch = X_batch.to(self.device), y_batch.to(self.device)
161
+ optimizer.zero_grad()
162
+ pred = self.model(X_batch)
163
+ loss = criterion(pred, y_batch)
164
+ loss.backward()
165
+ optimizer.step()
166
+ epoch_loss += loss.item()
167
+
168
+ avg_train_loss = epoch_loss / len(train_loader)
169
+ metrics['train_loss'].append(avg_train_loss)
170
+
171
+ # Validation
172
+ if X_val is not None and y_val is not None:
173
+ self.model.eval()
174
+ with torch.no_grad():
175
+ X_val_t = torch.FloatTensor(X_val).to(self.device)
176
+ y_val_t = torch.FloatTensor(y_val).to(self.device)
177
+ val_pred = self.model(X_val_t)
178
+ val_loss = criterion(val_pred, y_val_t).item()
179
+ val_mae = torch.mean(torch.abs(val_pred - y_val_t)).item()
180
+ metrics['val_loss'].append(val_loss)
181
+ metrics['val_mae'].append(val_mae)
182
+
183
+ scheduler.step(val_loss)
184
+
185
+ if epoch % 10 == 0:
186
+ print(f" Epoch {epoch}: train_loss={avg_train_loss:.6f}, "
187
+ f"val_loss={val_loss:.6f}, val_mae={val_mae:.4f}")
188
+
189
+ return metrics
190
+
191
+ def predict(self, X: np.ndarray) -> np.ndarray:
192
+ """Predict option prices"""
193
+ if self.model is None:
194
+ raise ValueError("Model must be trained before prediction")
195
+
196
+ self.model.eval()
197
+ with torch.no_grad():
198
+ X_t = torch.FloatTensor(X).to(self.device)
199
+ pred = self.model(X_t).cpu().numpy().flatten()
200
+
201
+ return pred
202
+
203
+ def predict_iv(self, options_df: pd.DataFrame, market_prices: np.ndarray) -> np.ndarray:
204
+ """
205
+ Predict implied volatility by inverting the model
206
+ Uses Black-Scholes as baseline and ML as correction
207
+ """
208
+ S = options_df['S'].values
209
+ K = options_df['K'].values
210
+ T = options_df['T'].values
211
+ r = options_df['r'].values
212
+ option_type = options_df['option_type'].values
213
+
214
+ # Get ML prediction
215
+ X = self.prepare_features(options_df)
216
+ ml_price = self.predict(X)
217
+
218
+ # Get Black-Scholes baseline
219
+ bs_iv = np.array([
220
+ self.bs.implied_volatility(
221
+ market_prices[i], S[i], K[i], T[i], r[i], option_type[i]
222
+ )
223
+ for i in range(len(market_prices))
224
+ ])
225
+
226
+ # ML-adjusted IV: if ML price differs from market, adjust IV accordingly
227
+ ml_iv = np.array([
228
+ self.bs.implied_volatility(
229
+ ml_price[i], S[i], K[i], T[i], r[i], option_type[i]
230
+ )
231
+ for i in range(len(ml_price))
232
+ ])
233
+
234
+ # Ensemble: weighted average
235
+ ensemble_iv = 0.5 * bs_iv + 0.5 * ml_iv
236
+
237
+ return ensemble_iv
238
+
239
+ def detect_mispricing(self, options_df: pd.DataFrame,
240
+ market_prices: np.ndarray,
241
+ threshold: float = 0.05) -> pd.DataFrame:
242
+ """
243
+ Detect mispriced options
244
+
245
+ Returns options where |ML_price - market_price| / market_price > threshold
246
+ """
247
+ X = self.prepare_features(options_df)
248
+ ml_prices = self.predict(X)
249
+
250
+ mispricing = (ml_prices - market_prices) / market_prices
251
+
252
+ result = options_df.copy()
253
+ result['ml_price'] = ml_prices
254
+ result['market_price'] = market_prices
255
+ result['mispricing_pct'] = mispricing * 100
256
+ result['signal'] = np.where(
257
+ mispricing > threshold, 'OVERPRICED',
258
+ np.where(mispricing < -threshold, 'UNDERPRICED', 'FAIR')
259
+ )
260
+
261
+ return result
262
+
263
+ def generate_synthetic_options(self, n_samples: int = 10000,
264
+ S_range: Tuple[float, float] = (50, 200),
265
+ K_range: Tuple[float, float] = (50, 200),
266
+ T_range: Tuple[float, float] = (0.01, 1.0),
267
+ r_range: Tuple[float, float] = (0.01, 0.05),
268
+ sigma_range: Tuple[float, float] = (0.1, 0.5)) -> pd.DataFrame:
269
+ """Generate synthetic option data for training"""
270
+ np.random.seed(42)
271
+
272
+ S = np.random.uniform(*S_range, n_samples)
273
+ K = np.random.uniform(*K_range, n_samples)
274
+ T = np.random.uniform(*T_range, n_samples)
275
+ r = np.random.uniform(*r_range, n_samples)
276
+ sigma = np.random.uniform(*sigma_range, n_samples)
277
+ option_type = np.random.choice(['call', 'put'], n_samples)
278
+
279
+ # Generate lag features (simulated price history)
280
+ lags = {}
281
+ for i in range(1, 21):
282
+ lags[f'S_lag_{i}'] = S * (1 + np.random.normal(0, 0.01, n_samples))
283
+
284
+ # Calculate prices using Black-Scholes with noise
285
+ prices = []
286
+ for i in range(n_samples):
287
+ if option_type[i] == 'call':
288
+ price = self.bs.call_price(S[i], K[i], T[i], r[i], sigma[i])
289
+ else:
290
+ price = self.bs.put_price(S[i], K[i], T[i], r[i], sigma[i])
291
+ # Add noise
292
+ price *= (1 + np.random.normal(0, 0.02))
293
+ prices.append(max(price, 0.01))
294
+
295
+ df = pd.DataFrame({
296
+ 'S': S,
297
+ 'K': K,
298
+ 'T': T,
299
+ 'r': r,
300
+ 'sigma_hist': sigma,
301
+ 'option_type': option_type,
302
+ 'price': prices,
303
+ **lags
304
+ })
305
+
306
+ return df