Add market microstructure features: Kyle's lambda, VPIN, Roll measure, OFI, Amihud
Browse files- market_microstructure.py +382 -0
market_microstructure.py
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Market Microstructure Features
|
| 2 |
+
|
| 3 |
+
Based on Marcos Lopez de Prado and the mlfinlab library.
|
| 4 |
+
|
| 5 |
+
This is what separates retail technical analysis from institutional quant.
|
| 6 |
+
Order flow, liquidity, and market impact contain genuine alpha.
|
| 7 |
+
"""
|
| 8 |
+
import numpy as np
|
| 9 |
+
import pandas as pd
|
| 10 |
+
from typing import Dict, List, Optional, Tuple
|
| 11 |
+
import warnings
|
| 12 |
+
warnings.filterwarnings('ignore')
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class MicrostructureFeatures:
|
| 16 |
+
"""
|
| 17 |
+
Extract market microstructure features from tick-level data.
|
| 18 |
+
|
| 19 |
+
Key insight: The market is not a continuous price stream.
|
| 20 |
+
It is a series of discrete transactions driven by informed vs.
|
| 21 |
+
uninformed traders. Microstructure features detect this.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
@staticmethod
|
| 25 |
+
def bid_ask_spread(bid: pd.Series, ask: pd.Series) -> pd.Series:
|
| 26 |
+
"""
|
| 27 |
+
Raw bid-ask spread.
|
| 28 |
+
|
| 29 |
+
Wider spreads = lower liquidity, higher execution cost.
|
| 30 |
+
"""
|
| 31 |
+
return ask - bid
|
| 32 |
+
|
| 33 |
+
@staticmethod
|
| 34 |
+
def relative_spread(bid: pd.Series, ask: pd.Series,
|
| 35 |
+
mid: Optional[pd.Series] = None) -> pd.Series:
|
| 36 |
+
"""
|
| 37 |
+
Spread as percentage of mid price.
|
| 38 |
+
"""
|
| 39 |
+
if mid is None:
|
| 40 |
+
mid = (bid + ask) / 2
|
| 41 |
+
return (ask - bid) / mid
|
| 42 |
+
|
| 43 |
+
@staticmethod
|
| 44 |
+
def effective_spread(price: pd.Series, bid: pd.Series,
|
| 45 |
+
ask: pd.Series) -> pd.Series:
|
| 46 |
+
"""
|
| 47 |
+
Effective spread = 2 * |trade_price - mid_price|.
|
| 48 |
+
|
| 49 |
+
Measures actual execution cost vs. quoted spread.
|
| 50 |
+
"""
|
| 51 |
+
mid = (bid + ask) / 2
|
| 52 |
+
return 2 * np.abs(price - mid) / mid
|
| 53 |
+
|
| 54 |
+
@staticmethod
|
| 55 |
+
def realized_spread(price: pd.Series, bid: pd.Series, ask: pd.Series,
|
| 56 |
+
future_mid: pd.Series) -> pd.Series:
|
| 57 |
+
"""
|
| 58 |
+
Realized spread = 2 * |trade_price - future_mid|.
|
| 59 |
+
|
| 60 |
+
Measures adverse selection. If realized spread > effective spread,
|
| 61 |
+
your trade moved the market against you.
|
| 62 |
+
"""
|
| 63 |
+
mid = (bid + ask) / 2
|
| 64 |
+
return 2 * np.abs(price - future_mid) / mid
|
| 65 |
+
|
| 66 |
+
@staticmethod
|
| 67 |
+
def price_impact(price: pd.Series, volume: pd.Series,
|
| 68 |
+
bid: pd.Series, ask: pd.Series) -> pd.Series:
|
| 69 |
+
"""
|
| 70 |
+
Kyle's Lambda — price impact coefficient.
|
| 71 |
+
|
| 72 |
+
delta_price = lambda * signed_volume + noise
|
| 73 |
+
|
| 74 |
+
Higher lambda = less liquid market, your orders move prices more.
|
| 75 |
+
"""
|
| 76 |
+
mid = (bid + ask) / 2
|
| 77 |
+
mid_change = mid.diff()
|
| 78 |
+
|
| 79 |
+
# Signed volume: Lee-Ready tick test
|
| 80 |
+
signed_vol = np.where(
|
| 81 |
+
price > mid.shift(1), volume,
|
| 82 |
+
np.where(price < mid.shift(1), -volume, 0)
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
# Rolling regression via covariance/variance ratio
|
| 86 |
+
return pd.Series(signed_vol).rolling(100).cov(
|
| 87 |
+
pd.Series(mid_change).rolling(100)
|
| 88 |
+
) / pd.Series(signed_vol).rolling(100).var().replace(0, np.nan)
|
| 89 |
+
|
| 90 |
+
@staticmethod
|
| 91 |
+
def order_flow_imbalance(bid_size: pd.Series, ask_size: pd.Series) -> pd.Series:
|
| 92 |
+
"""
|
| 93 |
+
OFI = (bid_size - ask_size) / (bid_size + ask_size).
|
| 94 |
+
|
| 95 |
+
Positive = more buying pressure = bullish.
|
| 96 |
+
|
| 97 |
+
This is genuine short-term alpha in liquid markets.
|
| 98 |
+
"""
|
| 99 |
+
return (bid_size - ask_size) / (bid_size + ask_size + 1e-10)
|
| 100 |
+
|
| 101 |
+
@staticmethod
|
| 102 |
+
def volume_imbalance(buy_volume: pd.Series, sell_volume: pd.Series) -> pd.Series:
|
| 103 |
+
"""
|
| 104 |
+
Volume imbalance = (buy_vol - sell_vol) / (buy_vol + sell_vol).
|
| 105 |
+
|
| 106 |
+
Classification via tick test or quote test.
|
| 107 |
+
"""
|
| 108 |
+
return (buy_volume - sell_volume) / (buy_volume + sell_volume + 1e-10)
|
| 109 |
+
|
| 110 |
+
@staticmethod
|
| 111 |
+
def trade_sign_classification(price: pd.Series,
|
| 112 |
+
bid: pd.Series,
|
| 113 |
+
ask: pd.Series) -> pd.Series:
|
| 114 |
+
"""
|
| 115 |
+
Lee-Ready tick test for trade direction classification.
|
| 116 |
+
|
| 117 |
+
If trade price > mid → buy (aggressor is buyer)
|
| 118 |
+
If trade price < mid → sell (aggressor is seller)
|
| 119 |
+
If trade price = mid → use tick test (compare to previous trade)
|
| 120 |
+
"""
|
| 121 |
+
mid = (bid + ask) / 2
|
| 122 |
+
|
| 123 |
+
# Quote test
|
| 124 |
+
sign = np.where(price > mid, 1, np.where(price < mid, -1, 0))
|
| 125 |
+
|
| 126 |
+
# Tick test for mid-trades
|
| 127 |
+
price_change = price.diff()
|
| 128 |
+
tick_sign = np.where(
|
| 129 |
+
price_change > 0, 1,
|
| 130 |
+
np.where(price_change < 0, -1, 0)
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
# Use tick test where quote test is inconclusive
|
| 134 |
+
sign = np.where(sign == 0, tick_sign, sign)
|
| 135 |
+
|
| 136 |
+
# If still 0, carry forward
|
| 137 |
+
sign = pd.Series(sign).fillna(method='ffill').fillna(0).values
|
| 138 |
+
|
| 139 |
+
return pd.Series(sign, index=price.index)
|
| 140 |
+
|
| 141 |
+
@staticmethod
|
| 142 |
+
def amihud_illiquidity(price: pd.Series, volume: pd.Series,
|
| 143 |
+
window: int = 21) -> pd.Series:
|
| 144 |
+
"""
|
| 145 |
+
Amihud illiquidity = |return| / (price * volume).
|
| 146 |
+
|
| 147 |
+
Higher = less liquid.
|
| 148 |
+
|
| 149 |
+
Used in academic literature to measure market quality.
|
| 150 |
+
Predicts returns (illiquid stocks earn premium).
|
| 151 |
+
"""
|
| 152 |
+
returns = price.pct_change().abs()
|
| 153 |
+
dollar_volume = price * volume
|
| 154 |
+
|
| 155 |
+
return (returns / dollar_volume).rolling(window).mean() * 1e6
|
| 156 |
+
|
| 157 |
+
@staticmethod
|
| 158 |
+
def kyles_lambda(price: pd.Series, volume: pd.Series,
|
| 159 |
+
trade_sign: pd.Series, window: int = 100) -> pd.Series:
|
| 160 |
+
"""
|
| 161 |
+
Kyle's Lambda — price impact per unit of order flow.
|
| 162 |
+
|
| 163 |
+
Lambda = Cov(delta_price, signed_volume) / Var(signed_volume)
|
| 164 |
+
|
| 165 |
+
Proxy for adverse selection and market depth.
|
| 166 |
+
"""
|
| 167 |
+
delta_price = price.diff()
|
| 168 |
+
signed_volume = trade_sign * volume
|
| 169 |
+
|
| 170 |
+
cov = delta_price.rolling(window).cov(signed_volume)
|
| 171 |
+
var = signed_volume.rolling(window).var()
|
| 172 |
+
|
| 173 |
+
return cov / var.replace(0, np.nan)
|
| 174 |
+
|
| 175 |
+
@staticmethod
|
| 176 |
+
def vpin_approximation(price: pd.Series, volume: pd.Series,
|
| 177 |
+
bucket_vol: float = 10000) -> float:
|
| 178 |
+
"""
|
| 179 |
+
VPIN — Volume-Synchronized Probability of Informed Trading.
|
| 180 |
+
|
| 181 |
+
Simplified approximation using equal-volume buckets.
|
| 182 |
+
|
| 183 |
+
High VPIN = high probability of informed trading = adverse selection risk.
|
| 184 |
+
"""
|
| 185 |
+
# Classify trades
|
| 186 |
+
mid = price.rolling(2).mean()
|
| 187 |
+
trade_sign = np.where(price > mid.shift(1), 1, -1)
|
| 188 |
+
|
| 189 |
+
signed_volume = trade_sign * volume
|
| 190 |
+
buy_volume = np.where(signed_volume > 0, volume, 0)
|
| 191 |
+
sell_volume = np.where(signed_volume < 0, volume, 0)
|
| 192 |
+
|
| 193 |
+
# Create volume buckets
|
| 194 |
+
cumulative = np.cumsum(volume)
|
| 195 |
+
n_buckets = int(cumulative[-1] / bucket_vol)
|
| 196 |
+
|
| 197 |
+
if n_buckets < 10:
|
| 198 |
+
return np.nan
|
| 199 |
+
|
| 200 |
+
bucket_boundaries = np.linspace(0, cumulative[-1], n_buckets + 1)
|
| 201 |
+
|
| 202 |
+
bucket_buy = []
|
| 203 |
+
bucket_sell = []
|
| 204 |
+
|
| 205 |
+
for i in range(n_buckets):
|
| 206 |
+
mask = (cumulative >= bucket_boundaries[i]) & (cumulative < bucket_boundaries[i+1])
|
| 207 |
+
bucket_buy.append(np.sum(buy_volume[mask]))
|
| 208 |
+
bucket_sell.append(np.sum(sell_volume[mask]))
|
| 209 |
+
|
| 210 |
+
bucket_buy = np.array(bucket_buy)
|
| 211 |
+
bucket_sell = np.array(bucket_sell)
|
| 212 |
+
bucket_volume = bucket_buy + bucket_sell
|
| 213 |
+
|
| 214 |
+
# VPIN = average |buy - sell| / volume
|
| 215 |
+
vpin_values = np.abs(bucket_buy - bucket_sell) / (bucket_volume + 1e-10)
|
| 216 |
+
|
| 217 |
+
return np.mean(vpin_values)
|
| 218 |
+
|
| 219 |
+
@staticmethod
|
| 220 |
+
def roll_measure(price: pd.Series, window: int = 20) -> pd.Series:
|
| 221 |
+
"""
|
| 222 |
+
Roll's measure — estimate bid-ask spread from serial covariance.
|
| 223 |
+
|
| 224 |
+
Spread = 2 * sqrt(-Cov(delta_price_t, delta_price_{t-1}))
|
| 225 |
+
|
| 226 |
+
Only valid when covariance is negative.
|
| 227 |
+
"""
|
| 228 |
+
delta = price.diff()
|
| 229 |
+
cov = delta.rolling(window).cov(delta.shift(1))
|
| 230 |
+
|
| 231 |
+
# Roll's measure
|
| 232 |
+
roll = 2 * np.sqrt(np.maximum(-cov, 0))
|
| 233 |
+
|
| 234 |
+
return roll
|
| 235 |
+
|
| 236 |
+
@staticmethod
|
| 237 |
+
def hasbrouck_lambda(price: pd.Series, volume: pd.Series,
|
| 238 |
+
window: int = 100) -> pd.Series:
|
| 239 |
+
"""
|
| 240 |
+
Hasbrouck's Lambda — information-based price impact.
|
| 241 |
+
|
| 242 |
+
Measures how much of the price change is due to information
|
| 243 |
+
vs. liquidity demand.
|
| 244 |
+
"""
|
| 245 |
+
# Simplified: correlation of returns with lagged signed volume
|
| 246 |
+
returns = price.pct_change()
|
| 247 |
+
trade_sign = np.sign(price.diff().fillna(0))
|
| 248 |
+
signed_volume = trade_sign * volume
|
| 249 |
+
|
| 250 |
+
return returns.rolling(window).corr(signed_volume.shift(1))
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def compute_all_microstructure_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 254 |
+
"""
|
| 255 |
+
Compute all microstructure features from a tick DataFrame.
|
| 256 |
+
|
| 257 |
+
Required columns: price, volume, bid, ask, bid_size, ask_size
|
| 258 |
+
"""
|
| 259 |
+
required = ['price', 'volume', 'bid', 'ask', 'bid_size', 'ask_size']
|
| 260 |
+
for col in required:
|
| 261 |
+
if col not in df.columns:
|
| 262 |
+
raise ValueError(f"Missing required column: {col}")
|
| 263 |
+
|
| 264 |
+
features = pd.DataFrame(index=df.index)
|
| 265 |
+
|
| 266 |
+
# Basic spread
|
| 267 |
+
features['spread'] = MicrostructureFeatures.bid_ask_spread(df['bid'], df['ask'])
|
| 268 |
+
features['relative_spread'] = MicrostructureFeatures.relative_spread(
|
| 269 |
+
df['bid'], df['ask']
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
# Effective spread
|
| 273 |
+
features['effective_spread'] = MicrostructureFeatures.effective_spread(
|
| 274 |
+
df['price'], df['bid'], df['ask']
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
# Order flow imbalance
|
| 278 |
+
features['ofi'] = MicrostructureFeatures.order_flow_imbalance(
|
| 279 |
+
df['bid_size'], df['ask_size']
|
| 280 |
+
)
|
| 281 |
+
|
| 282 |
+
# Trade sign classification
|
| 283 |
+
features['trade_sign'] = MicrostructureFeatures.trade_sign_classification(
|
| 284 |
+
df['price'], df['bid'], df['ask']
|
| 285 |
+
)
|
| 286 |
+
|
| 287 |
+
# Signed volume
|
| 288 |
+
features['signed_volume'] = features['trade_sign'] * df['volume']
|
| 289 |
+
features['volume_imbalance'] = MicrostructureFeatures.volume_imbalance(
|
| 290 |
+
np.where(features['trade_sign'] > 0, df['volume'], 0),
|
| 291 |
+
np.where(features['trade_sign'] < 0, df['volume'], 0)
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
# Amihud illiquidity (using daily approximation from intraday)
|
| 295 |
+
features['amihud_illiquidity'] = MicrostructureFeatures.amihud_illiquidity(
|
| 296 |
+
df['price'], df['volume']
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
+
# Kyle's lambda
|
| 300 |
+
features['kyle_lambda'] = MicrostructureFeatures.kyles_lambda(
|
| 301 |
+
df['price'], df['volume'], features['trade_sign']
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
# Roll's measure
|
| 305 |
+
features['roll_measure'] = MicrostructureFeatures.roll_measure(df['price'])
|
| 306 |
+
|
| 307 |
+
# Hasbrouck lambda
|
| 308 |
+
features['hasbrouck_lambda'] = MicrostructureFeatures.hasbrouck_lambda(
|
| 309 |
+
df['price'], df['volume']
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
# VPIN (computed once, broadcast)
|
| 313 |
+
vpin = MicrostructureFeatures.vpin_approximation(df['price'], df['volume'])
|
| 314 |
+
features['vpin'] = vpin
|
| 315 |
+
|
| 316 |
+
return features.replace([np.inf, -np.inf], np.nan).fillna(method='ffill').fillna(0)
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
def generate_synthetic_tick_data(n_ticks: int = 10000,
|
| 320 |
+
base_price: float = 100.0,
|
| 321 |
+
volatility: float = 0.001,
|
| 322 |
+
spread_bps: float = 1.0) -> pd.DataFrame:
|
| 323 |
+
"""
|
| 324 |
+
Generate synthetic tick-level data for testing microstructure features.
|
| 325 |
+
"""
|
| 326 |
+
np.random.seed(42)
|
| 327 |
+
|
| 328 |
+
# Price process: random walk with slight mean reversion
|
| 329 |
+
prices = [base_price]
|
| 330 |
+
for _ in range(n_ticks - 1):
|
| 331 |
+
# Small random walk
|
| 332 |
+
change = np.random.randn() * volatility * base_price
|
| 333 |
+
# Mean reversion
|
| 334 |
+
change -= 0.01 * (prices[-1] - base_price)
|
| 335 |
+
prices.append(max(prices[-1] + change, 0.01))
|
| 336 |
+
|
| 337 |
+
prices = np.array(prices)
|
| 338 |
+
|
| 339 |
+
# Bid-ask spread
|
| 340 |
+
half_spread = prices * spread_bps / 20000 # bps to dollars
|
| 341 |
+
bid = prices - half_spread
|
| 342 |
+
ask = prices + half_spread
|
| 343 |
+
|
| 344 |
+
# Sizes (power law: few large orders, many small)
|
| 345 |
+
bid_size = np.random.lognormal(8, 1.5, n_ticks).astype(int)
|
| 346 |
+
ask_size = np.random.lognormal(8, 1.5, n_ticks).astype(int)
|
| 347 |
+
|
| 348 |
+
# Volume (trades happen at mid mostly)
|
| 349 |
+
volume = np.random.lognormal(6, 1.2, n_ticks).astype(int)
|
| 350 |
+
|
| 351 |
+
# Timestamp
|
| 352 |
+
times = pd.date_range('2024-01-01 09:30', periods=n_ticks, freq='1s')
|
| 353 |
+
|
| 354 |
+
return pd.DataFrame({
|
| 355 |
+
'timestamp': times,
|
| 356 |
+
'price': prices,
|
| 357 |
+
'bid': bid,
|
| 358 |
+
'ask': ask,
|
| 359 |
+
'bid_size': bid_size,
|
| 360 |
+
'ask_size': ask_size,
|
| 361 |
+
'volume': volume
|
| 362 |
+
}).set_index('timestamp')
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
if __name__ == '__main__':
|
| 366 |
+
# Test microstructure features
|
| 367 |
+
tick_data = generate_synthetic_tick_data(n_ticks=5000)
|
| 368 |
+
features = compute_all_microstructure_features(tick_data)
|
| 369 |
+
|
| 370 |
+
print("Market Microstructure Features")
|
| 371 |
+
print("=" * 60)
|
| 372 |
+
print(f"\nDataset: {len(tick_data)} ticks")
|
| 373 |
+
print(f"Features computed: {len(features.columns)}")
|
| 374 |
+
print(f"\nFeature Summary:")
|
| 375 |
+
print(features.describe().round(6))
|
| 376 |
+
|
| 377 |
+
print(f"\nVPIN (Volume-Synchronized Probability of Informed Trading):")
|
| 378 |
+
print(f" {features['vpin'].iloc[0]:.4f}")
|
| 379 |
+
|
| 380 |
+
print(f"\nSample Features (last 5 ticks):")
|
| 381 |
+
print(features[['spread', 'relative_spread', 'ofi', 'kyle_lambda',
|
| 382 |
+
'amihud_illiquidity']].tail().round(6))
|