""" Example 5: Complete GeoBotv1 Framework - Final Features This example demonstrates the final critical components that complete GeoBotv1 to 100% research-grade capability: 1. Vector Autoregression (VAR/SVAR/DFM) - Econometric time-series analysis 2. Hawkes Processes - Conflict contagion and self-exciting dynamics 3. Quasi-Experimental Methods - Causal inference without randomization - Synthetic Control Method (SCM) - Difference-in-Differences (DiD) - Regression Discontinuity Design (RDD) - Instrumental Variables (IV) These methods are essential for: - Multi-country forecasting with spillovers (VAR) - Modeling conflict escalation and contagion (Hawkes) - Estimating policy effects and counterfactuals (quasi-experimental) GeoBotv1 is now COMPLETE with all research-grade mathematical components! """ import numpy as np import sys sys.path.append('..') from datetime import datetime, timedelta # Time-series models from geobot.timeseries import ( VARModel, SVARModel, DynamicFactorModel, GrangerCausality, UnivariateHawkesProcess, MultivariateHawkesProcess, ConflictContagionModel ) # Quasi-experimental methods from geobot.models import ( SyntheticControlMethod, DifferenceinDifferences, RegressionDiscontinuity, InstrumentalVariables ) def demo_var_model(): """Demonstrate Vector Autoregression for multi-country forecasting.""" print("\n" + "="*80) print("1. Vector Autoregression (VAR) - Multi-Country Spillovers") print("="*80) # Simulate data for 3 countries # Country dynamics with interdependencies np.random.seed(42) T = 100 n_vars = 3 # Generate VAR(2) data # Y_t = A_1 Y_{t-1} + A_2 Y_{t-2} + noise A1 = np.array([ [0.5, 0.2, 0.1], # Country 1: affected by all [0.1, 0.6, 0.15], # Country 2: strong self-dependence [0.05, 0.1, 0.55] # Country 3: weak spillovers ]) A2 = np.array([ [0.2, 0.05, 0.0], [0.1, 0.1, 0.05], [0.0, 0.05, 0.2] ]) # Simulate data = np.zeros((T, n_vars)) data[0] = np.random.randn(n_vars) * 0.1 data[1] = np.random.randn(n_vars) * 0.1 for t in range(2, T): data[t] = (A1 @ data[t-1] + A2 @ data[t-2] + np.random.randn(n_vars) * 0.1) print(f"\nSimulated {T} time periods for {n_vars} countries") print(f"Variables: GDP growth, Military spending, Stability index\n") # Fit VAR model var = VARModel(n_lags=2) variable_names = ['GDP_growth', 'Military_spend', 'Stability'] results = var.fit(data, variable_names) print(f"VAR({results.n_lags}) Estimation Results:") print(f" Log-likelihood: {results.log_likelihood:.2f}") print(f" AIC: {results.aic:.2f}") print(f" BIC: {results.bic:.2f}") # Forecast forecast = var.forecast(results, steps=10) print(f"\n10-step ahead forecast:") print(f" GDP growth: {forecast[-1, 0]:.3f}") print(f" Military spending: {forecast[-1, 1]:.3f}") print(f" Stability: {forecast[-1, 2]:.3f}") # Granger causality print("\nGranger Causality Tests:") for i in range(n_vars): for j in range(n_vars): if i != j: gc_result = var.granger_causality(results, i, j) if gc_result['p_value'] < 0.05: print(f" {variable_names[j]} → {variable_names[i]}: " f"F={gc_result['f_statistic']:.2f}, p={gc_result['p_value']:.3f} ✓") # Impulse response functions irf_result = var.impulse_response(results, steps=10) print("\nImpulse Response Functions computed (10 steps)") print(f" Shock to Military spending → GDP growth at t=5: {irf_result.irf[0, 1, 5]:.4f}") # Forecast error variance decomposition fevd = var.forecast_error_variance_decomposition(results, steps=10) print("\nForecast Error Variance Decomposition (horizon=10):") for i, var_name in enumerate(variable_names): contributions = fevd[i, :, -1] print(f" {var_name} variance explained by:") for j, source_name in enumerate(variable_names): print(f" {source_name}: {contributions[j]:.1%}") print("\n✓ VAR model demonstrates multi-country interdependencies!") def demo_hawkes_process(): """Demonstrate Hawkes processes for conflict contagion.""" print("\n" + "="*80) print("2. Hawkes Processes - Conflict Escalation and Contagion") print("="*80) # Simulate conflict events print("\nSimulating conflict events with self-excitation...") hawkes = UnivariateHawkesProcess() # Parameters: baseline=0.3, excitation=0.6, decay=1.2 # Branching ratio = 0.6/1.2 = 0.5 (stable, subcritical) events = hawkes.simulate(mu=0.3, alpha=0.6, beta=1.2, T=100.0) print(f"Generated {len(events)} conflict events over 100 time units") print(f"Average rate: {len(events) / 100.0:.2f} events/unit\n") # Fit model result = hawkes.fit(events, T=100.0) print("Estimated Hawkes Parameters:") print(f" Baseline intensity (μ): {result.params.mu:.3f}") print(f" Excitation (α): {result.params.alpha:.3f}") print(f" Decay rate (β): {result.params.beta:.3f}") print(f" Branching ratio: {result.params.branching_ratio:.3f}") print(f" Process is {'STABLE' if result.params.is_stable else 'EXPLOSIVE'}") # Predict intensity t_future = 105.0 intensity = hawkes.predict_intensity(events, result.params, t_future) print(f"\nPredicted conflict intensity at t={t_future}: {intensity:.3f}") # Multivariate: conflict contagion between countries print("\n" + "-"*80) print("Multivariate Hawkes: Cross-Country Conflict Contagion") print("-"*80) countries = ['Syria', 'Iraq', 'Lebanon'] contagion_model = ConflictContagionModel(countries=countries) # Simulate with cross-excitation mu = np.array([0.5, 0.3, 0.2]) # Different baseline rates alpha = np.array([ [0.3, 0.15, 0.1], # Syria: high self-excitation, moderate contagion [0.2, 0.25, 0.1], # Iraq: affected by Syria [0.15, 0.1, 0.2] # Lebanon: affected by both ]) beta = np.ones((3, 3)) * 1.5 multi_hawkes = MultivariateHawkesProcess(n_dimensions=3) events_multi = multi_hawkes.simulate(mu=mu, alpha=alpha, beta=beta, T=100.0) print(f"\nSimulated events:") for i, country in enumerate(countries): print(f" {country}: {len(events_multi[i])} events") # Fit multivariate model events_dict = {country: events_multi[i] for i, country in enumerate(countries)} fit_result = contagion_model.fit(events_dict, T=100.0) print(f"\nFitted contagion model:") print(f" Spectral radius: {fit_result['spectral_radius']:.3f} (< 1 = stable)") print(f" Most contagious source: {fit_result['most_contagious_source']}") print(f" Most vulnerable target: {fit_result['most_vulnerable_target']}") # Identify contagion pathways pathways = contagion_model.identify_contagion_pathways(fit_result, threshold=0.1) print("\nSignificant contagion pathways (branching ratio > 0.1):") for source, target, strength in pathways[:5]: print(f" {source} → {target}: {strength:.3f}") # Risk assessment risks = contagion_model.contagion_risk(events_dict, fit_result, t=105.0, horizon=5.0) print("\nConflict risk over next 5 time units:") for country, risk in risks.items(): print(f" {country}: {risk:.1%}") print("\n✓ Hawkes processes capture conflict escalation dynamics!") def demo_synthetic_control(): """Demonstrate Synthetic Control Method.""" print("\n" + "="*80) print("3. Synthetic Control Method - Policy Impact Estimation") print("="*80) # Scenario: Estimate effect of sanctions on target country's GDP print("\nScenario: Economic sanctions imposed on Country A at t=50") print("Question: What is the causal effect on GDP growth?\n") # Generate data np.random.seed(42) T = 100 J = 10 # 10 control countries # Pre-treatment: all countries follow similar trends time = np.arange(T) trend = 0.02 * time + np.random.randn(T) * 0.1 # Control countries control_outcomes = np.zeros((T, J)) for j in range(J): control_outcomes[:, j] = trend + np.random.randn(T) * 0.15 + np.random.randn() * 0.5 # Treated country (matches controls pre-treatment) treated_outcome = trend + np.random.randn(T) * 0.15 # Treatment effect: negative shock starting at t=50 treatment_time = 50 true_effect = -0.8 treated_outcome[treatment_time:] += true_effect + np.random.randn(T - treatment_time) * 0.1 # Fit SCM scm = SyntheticControlMethod() result = scm.fit( treated_outcome=treated_outcome, control_outcomes=control_outcomes, treatment_time=treatment_time, control_names=[f"Country_{j+1}" for j in range(J)] ) print("Synthetic Control Results:") print(f" Pre-treatment fit (RMSPE): {result.pre_treatment_fit:.4f}") print(f"\nSynthetic Country A is weighted combination of:") for j, weight in enumerate(result.weights): if weight > 0.01: # Only show significant weights print(f" {result.control_units[j]}: {weight:.1%}") # Treatment effects avg_effect = np.mean(result.treatment_effect[treatment_time:]) print(f"\nEstimated treatment effect (post-sanctions):") print(f" Average: {avg_effect:.3f} (true effect: {true_effect:.3f})") print(f" Final period: {result.treatment_effect[-1]:.3f}") # Placebo test p_value = scm.placebo_test(treated_outcome, control_outcomes, treatment_time, n_permutations=J) print(f"\nPlacebo test p-value: {p_value:.3f}") if p_value < 0.05: print(" ✓ Effect is statistically significant (unusual compared to placebos)") else: print(" ✗ Effect not significant (could be random)") print("\n✓ Synthetic control provides credible counterfactual!") def demo_difference_in_differences(): """Demonstrate Difference-in-Differences.""" print("\n" + "="*80) print("4. Difference-in-Differences (DiD) - Regime Change Analysis") print("="*80) # Scenario: Regime change in treated country print("\nScenario: Regime change in Country T at t=50") print("Compare to similar countries without regime change\n") np.random.seed(42) # Pre-treatment (similar trends) treated_pre = 3.0 + np.random.randn(50) * 0.5 control_pre = 3.2 + np.random.randn(50) * 0.5 # Post-treatment (treatment effect = +1.5 on outcome) true_effect = 1.5 treated_post = 3.0 + true_effect + np.random.randn(50) * 0.5 control_post = 3.2 + np.random.randn(50) * 0.5 # No effect # Estimate DiD did = DifferenceinDifferences() result = did.estimate(treated_pre, treated_post, control_pre, control_post) print("Difference-in-Differences Results:") print(f"\n Pre-treatment difference: {result.pre_treatment_diff:.3f}") print(f" Post-treatment difference: {result.post_treatment_diff:.3f}") print(f"\n Average Treatment Effect (ATT): {result.att:.3f}") print(f" Standard error: {result.se:.3f}") print(f" t-statistic: {result.t_stat:.3f}") print(f" p-value: {result.p_value:.4f}") if result.p_value < 0.05: print(f"\n ✓ Regime change had significant effect (true effect: {true_effect:.3f})") else: print("\n ✗ Effect not statistically significant") # Assumption check if abs(result.pre_treatment_diff) < 0.5: print("\n ✓ Parallel trends assumption plausible (small pre-treatment diff)") else: print("\n ⚠ Parallel trends questionable (large pre-treatment diff)") print("\n✓ DiD isolates causal effect of regime change!") def demo_regression_discontinuity(): """Demonstrate Regression Discontinuity Design.""" print("\n" + "="*80) print("5. Regression Discontinuity Design (RDD) - Election Effects") print("="*80) # Scenario: Effect of winning election on military policy print("\nScenario: Effect of hawkish candidate winning election") print("Running variable: Vote share (cutoff = 50%)") print("Outcome: Military spending increase\n") np.random.seed(42) n = 500 # Vote share (running variable) vote_share = np.random.uniform(0.3, 0.7, n) # Outcome: military spending # Smooth function of vote share + discontinuity at 50% outcome = 2.0 + 1.5 * vote_share + np.random.randn(n) * 0.3 # Treatment effect: +0.8 if vote > 50% true_effect = 0.8 outcome[vote_share >= 0.5] += true_effect # Estimate RDD rdd = RegressionDiscontinuity(cutoff=0.5) result = rdd.estimate_sharp( running_var=vote_share, outcome=outcome, bandwidth=0.15, # 15% bandwidth kernel='triangular' ) print("Regression Discontinuity Results:") print(f"\n Bandwidth: {result.bandwidth:.3f}") print(f" Observations below cutoff: {result.n_left}") print(f" Observations above cutoff: {result.n_right}") print(f"\n Treatment effect (LATE): {result.treatment_effect:.3f}") print(f" Standard error: {result.se:.3f}") print(f" t-statistic: {result.t_stat:.3f}") print(f" p-value: {result.p_value:.4f}") if result.p_value < 0.05: print(f"\n ✓ Winning election causes increase in military spending") print(f" (true effect: {true_effect:.3f})") else: print("\n ✗ Effect not statistically significant") print("\n✓ RDD exploits threshold-based treatment assignment!") def demo_instrumental_variables(): """Demonstrate Instrumental Variables.""" print("\n" + "="*80) print("6. Instrumental Variables (IV) - Trade and Conflict") print("="*80) # Scenario: Effect of trade on conflict (trade is endogenous) print("\nScenario: Does trade reduce conflict?") print("Problem: Trade is endogenous (reverse causality, omitted variables)") print("Instrument: Geographic distance to major trade routes\n") np.random.seed(42) n = 300 # Instrument: distance (exogenous) distance = np.random.uniform(100, 1000, n) # Unobserved confounders unobserved = np.random.randn(n) # Trade (endogenous): affected by distance and confounders trade = 50 - 0.03 * distance + 2.0 * unobserved + np.random.randn(n) * 5 # Conflict: true effect of trade = -0.15, but also affected by confounders true_effect = -0.15 conflict = 10 + true_effect * trade - 1.5 * unobserved + np.random.randn(n) * 2 # Estimate with IV iv = InstrumentalVariables() result = iv.estimate_2sls( outcome=conflict, endogenous=trade, instrument=distance ) print("Instrumental Variables (2SLS) Results:") print(f"\n First stage F-statistic: {result.first_stage_f:.2f}") if result.weak_instrument: print(" ⚠ Warning: Weak instrument (F < 10)") else: print(" ✓ Strong instrument (F > 10)") print(f"\n OLS estimate (biased): {result.beta_ols[0]:.4f}") print(f" IV estimate (consistent): {result.beta_iv[0]:.4f}") print(f" IV standard error: {result.se_iv[0]:.4f}") print(f"\n True causal effect: {true_effect:.4f}") # Hausman test (informal) if abs(result.beta_ols[0] - result.beta_iv[0]) > 0.05: print("\n ✓ OLS and IV differ substantially → endogeneity present") print(" IV corrects for bias!") else: print("\n OLS and IV similar → endogeneity may be small") print("\n✓ IV isolates causal effect using exogenous variation!") def demo_dynamic_factor_model(): """Demonstrate Dynamic Factor Model for nowcasting.""" print("\n" + "="*80) print("7. Dynamic Factor Model (DFM) - High-Dimensional Nowcasting") print("="*80) # Scenario: Nowcast geopolitical tension from many indicators print("\nScenario: Nowcast regional tension from 50 economic/political indicators") print("DFM extracts common latent factors driving all indicators\n") np.random.seed(42) T = 200 n_indicators = 50 n_factors = 3 # True factors (latent tensions) true_factors = np.zeros((T, n_factors)) for k in range(n_factors): # AR(1) dynamics for t in range(1, T): true_factors[t, k] = 0.8 * true_factors[t-1, k] + np.random.randn() * 0.5 # Factor loadings (how indicators load on factors) true_loadings = np.random.randn(n_indicators, n_factors) # Observed indicators = factors * loadings + idiosyncratic noise data = true_factors @ true_loadings.T + np.random.randn(T, n_indicators) * 0.5 # Fit DFM dfm = DynamicFactorModel(n_factors=3, n_lags=1) model = dfm.fit(data) print(f"Dynamic Factor Model Results:") print(f"\n Number of indicators: {n_indicators}") print(f" Number of factors: {n_factors}") print(f" Explained variance: {model['explained_variance_ratio']:.1%}") # Extracted factors factors = model['factors'] print(f"\n Extracted factor dimensions: {factors.shape}") print(f" Factor 1 final value: {factors[-1, 0]:.3f}") print(f" Factor 2 final value: {factors[-1, 1]:.3f}") print(f" Factor 3 final value: {factors[-1, 2]:.3f}") # Forecast forecast = dfm.forecast(model, steps=10) print(f"\n 10-step ahead forecast dimensions: {forecast.shape}") print(f" Average forecasted indicator value: {np.mean(forecast[-1]):.3f}") # Correlation with true factors corr_0 = np.corrcoef(true_factors[:, 0], factors[:, 0])[0, 1] print(f"\n Factor recovery (correlation with true): {abs(corr_0):.3f}") print("\n✓ DFM reduces dimensionality while preserving information!") def main(): """Run all demonstrations of final features.""" print("=" * 80) print("GeoBotv1 - COMPLETE FRAMEWORK DEMONSTRATION") print("=" * 80) print("\nThis example showcases the final components that complete GeoBotv1:") print("• Vector Autoregression (VAR/SVAR/DFM)") print("• Hawkes Processes for conflict contagion") print("• Quasi-Experimental Causal Inference") print(" - Synthetic Control Method") print(" - Difference-in-Differences") print(" - Regression Discontinuity Design") print(" - Instrumental Variables") # Run all demonstrations demo_var_model() demo_hawkes_process() demo_synthetic_control() demo_difference_in_differences() demo_regression_discontinuity() demo_instrumental_variables() demo_dynamic_factor_model() print("\n" + "=" * 80) print("GeoBotv1 Framework is NOW 100% COMPLETE!") print("=" * 80) print("\n🎉 All Research-Grade Mathematical Components Implemented:") print("\n📊 CORE FRAMEWORKS:") print(" ✓ Optimal Transport (Wasserstein, Kantorovich, Sinkhorn)") print(" ✓ Causal Inference (DAGs, SCMs, Do-Calculus)") print(" ✓ Bayesian Inference (MCMC, Particle Filters, VI)") print(" ✓ Stochastic Processes (SDEs, Jump-Diffusion)") print(" ✓ Time-Series Models (Kalman, HMM, VAR, Hawkes)") print(" ✓ Quasi-Experimental Methods (SCM, DiD, RDD, IV)") print(" ✓ Machine Learning (GNNs, Risk Scoring, Embeddings)") print("\n📈 SPECIALIZED CAPABILITIES:") print(" ✓ Multi-country interdependency modeling (VAR)") print(" ✓ Conflict contagion and escalation (Hawkes)") print(" ✓ Policy counterfactuals (Synthetic Control)") print(" ✓ Regime change effects (Difference-in-Differences)") print(" ✓ Election outcomes impact (Regression Discontinuity)") print(" ✓ Trade-conflict nexus (Instrumental Variables)") print(" ✓ High-dimensional nowcasting (Dynamic Factor Models)") print("\n🔬 MATHEMATICAL RIGOR:") print(" ✓ Measure-theoretic probability foundations") print(" ✓ Continuous-time dynamics (SDEs)") print(" ✓ Causal identification strategies") print(" ✓ Structural econometric methods") print(" ✓ Point process theory") print(" ✓ Optimal transport geometry") print("\n💡 GeoBotv1 is ready for production geopolitical forecasting!") print("=" * 80 + "\n") if __name__ == "__main__": main()