GeoBot-Forecasting-Framework / examples /05_complete_framework.py
clarkkitchen22's picture
Initial GeoBot Forecasting Framework commit
484e3bc
"""
Example 5: Complete GeoBotv1 Framework - Final Features
This example demonstrates the final critical components that complete GeoBotv1
to 100% research-grade capability:
1. Vector Autoregression (VAR/SVAR/DFM) - Econometric time-series analysis
2. Hawkes Processes - Conflict contagion and self-exciting dynamics
3. Quasi-Experimental Methods - Causal inference without randomization
- Synthetic Control Method (SCM)
- Difference-in-Differences (DiD)
- Regression Discontinuity Design (RDD)
- Instrumental Variables (IV)
These methods are essential for:
- Multi-country forecasting with spillovers (VAR)
- Modeling conflict escalation and contagion (Hawkes)
- Estimating policy effects and counterfactuals (quasi-experimental)
GeoBotv1 is now COMPLETE with all research-grade mathematical components!
"""
import numpy as np
import sys
sys.path.append('..')
from datetime import datetime, timedelta
# Time-series models
from geobot.timeseries import (
VARModel,
SVARModel,
DynamicFactorModel,
GrangerCausality,
UnivariateHawkesProcess,
MultivariateHawkesProcess,
ConflictContagionModel
)
# Quasi-experimental methods
from geobot.models import (
SyntheticControlMethod,
DifferenceinDifferences,
RegressionDiscontinuity,
InstrumentalVariables
)
def demo_var_model():
"""Demonstrate Vector Autoregression for multi-country forecasting."""
print("\n" + "="*80)
print("1. Vector Autoregression (VAR) - Multi-Country Spillovers")
print("="*80)
# Simulate data for 3 countries
# Country dynamics with interdependencies
np.random.seed(42)
T = 100
n_vars = 3
# Generate VAR(2) data
# Y_t = A_1 Y_{t-1} + A_2 Y_{t-2} + noise
A1 = np.array([
[0.5, 0.2, 0.1], # Country 1: affected by all
[0.1, 0.6, 0.15], # Country 2: strong self-dependence
[0.05, 0.1, 0.55] # Country 3: weak spillovers
])
A2 = np.array([
[0.2, 0.05, 0.0],
[0.1, 0.1, 0.05],
[0.0, 0.05, 0.2]
])
# Simulate
data = np.zeros((T, n_vars))
data[0] = np.random.randn(n_vars) * 0.1
data[1] = np.random.randn(n_vars) * 0.1
for t in range(2, T):
data[t] = (A1 @ data[t-1] + A2 @ data[t-2] +
np.random.randn(n_vars) * 0.1)
print(f"\nSimulated {T} time periods for {n_vars} countries")
print(f"Variables: GDP growth, Military spending, Stability index\n")
# Fit VAR model
var = VARModel(n_lags=2)
variable_names = ['GDP_growth', 'Military_spend', 'Stability']
results = var.fit(data, variable_names)
print(f"VAR({results.n_lags}) Estimation Results:")
print(f" Log-likelihood: {results.log_likelihood:.2f}")
print(f" AIC: {results.aic:.2f}")
print(f" BIC: {results.bic:.2f}")
# Forecast
forecast = var.forecast(results, steps=10)
print(f"\n10-step ahead forecast:")
print(f" GDP growth: {forecast[-1, 0]:.3f}")
print(f" Military spending: {forecast[-1, 1]:.3f}")
print(f" Stability: {forecast[-1, 2]:.3f}")
# Granger causality
print("\nGranger Causality Tests:")
for i in range(n_vars):
for j in range(n_vars):
if i != j:
gc_result = var.granger_causality(results, i, j)
if gc_result['p_value'] < 0.05:
print(f" {variable_names[j]} β†’ {variable_names[i]}: "
f"F={gc_result['f_statistic']:.2f}, p={gc_result['p_value']:.3f} βœ“")
# Impulse response functions
irf_result = var.impulse_response(results, steps=10)
print("\nImpulse Response Functions computed (10 steps)")
print(f" Shock to Military spending β†’ GDP growth at t=5: {irf_result.irf[0, 1, 5]:.4f}")
# Forecast error variance decomposition
fevd = var.forecast_error_variance_decomposition(results, steps=10)
print("\nForecast Error Variance Decomposition (horizon=10):")
for i, var_name in enumerate(variable_names):
contributions = fevd[i, :, -1]
print(f" {var_name} variance explained by:")
for j, source_name in enumerate(variable_names):
print(f" {source_name}: {contributions[j]:.1%}")
print("\nβœ“ VAR model demonstrates multi-country interdependencies!")
def demo_hawkes_process():
"""Demonstrate Hawkes processes for conflict contagion."""
print("\n" + "="*80)
print("2. Hawkes Processes - Conflict Escalation and Contagion")
print("="*80)
# Simulate conflict events
print("\nSimulating conflict events with self-excitation...")
hawkes = UnivariateHawkesProcess()
# Parameters: baseline=0.3, excitation=0.6, decay=1.2
# Branching ratio = 0.6/1.2 = 0.5 (stable, subcritical)
events = hawkes.simulate(mu=0.3, alpha=0.6, beta=1.2, T=100.0)
print(f"Generated {len(events)} conflict events over 100 time units")
print(f"Average rate: {len(events) / 100.0:.2f} events/unit\n")
# Fit model
result = hawkes.fit(events, T=100.0)
print("Estimated Hawkes Parameters:")
print(f" Baseline intensity (ΞΌ): {result.params.mu:.3f}")
print(f" Excitation (Ξ±): {result.params.alpha:.3f}")
print(f" Decay rate (Ξ²): {result.params.beta:.3f}")
print(f" Branching ratio: {result.params.branching_ratio:.3f}")
print(f" Process is {'STABLE' if result.params.is_stable else 'EXPLOSIVE'}")
# Predict intensity
t_future = 105.0
intensity = hawkes.predict_intensity(events, result.params, t_future)
print(f"\nPredicted conflict intensity at t={t_future}: {intensity:.3f}")
# Multivariate: conflict contagion between countries
print("\n" + "-"*80)
print("Multivariate Hawkes: Cross-Country Conflict Contagion")
print("-"*80)
countries = ['Syria', 'Iraq', 'Lebanon']
contagion_model = ConflictContagionModel(countries=countries)
# Simulate with cross-excitation
mu = np.array([0.5, 0.3, 0.2]) # Different baseline rates
alpha = np.array([
[0.3, 0.15, 0.1], # Syria: high self-excitation, moderate contagion
[0.2, 0.25, 0.1], # Iraq: affected by Syria
[0.15, 0.1, 0.2] # Lebanon: affected by both
])
beta = np.ones((3, 3)) * 1.5
multi_hawkes = MultivariateHawkesProcess(n_dimensions=3)
events_multi = multi_hawkes.simulate(mu=mu, alpha=alpha, beta=beta, T=100.0)
print(f"\nSimulated events:")
for i, country in enumerate(countries):
print(f" {country}: {len(events_multi[i])} events")
# Fit multivariate model
events_dict = {country: events_multi[i] for i, country in enumerate(countries)}
fit_result = contagion_model.fit(events_dict, T=100.0)
print(f"\nFitted contagion model:")
print(f" Spectral radius: {fit_result['spectral_radius']:.3f} (< 1 = stable)")
print(f" Most contagious source: {fit_result['most_contagious_source']}")
print(f" Most vulnerable target: {fit_result['most_vulnerable_target']}")
# Identify contagion pathways
pathways = contagion_model.identify_contagion_pathways(fit_result, threshold=0.1)
print("\nSignificant contagion pathways (branching ratio > 0.1):")
for source, target, strength in pathways[:5]:
print(f" {source} β†’ {target}: {strength:.3f}")
# Risk assessment
risks = contagion_model.contagion_risk(events_dict, fit_result, t=105.0, horizon=5.0)
print("\nConflict risk over next 5 time units:")
for country, risk in risks.items():
print(f" {country}: {risk:.1%}")
print("\nβœ“ Hawkes processes capture conflict escalation dynamics!")
def demo_synthetic_control():
"""Demonstrate Synthetic Control Method."""
print("\n" + "="*80)
print("3. Synthetic Control Method - Policy Impact Estimation")
print("="*80)
# Scenario: Estimate effect of sanctions on target country's GDP
print("\nScenario: Economic sanctions imposed on Country A at t=50")
print("Question: What is the causal effect on GDP growth?\n")
# Generate data
np.random.seed(42)
T = 100
J = 10 # 10 control countries
# Pre-treatment: all countries follow similar trends
time = np.arange(T)
trend = 0.02 * time + np.random.randn(T) * 0.1
# Control countries
control_outcomes = np.zeros((T, J))
for j in range(J):
control_outcomes[:, j] = trend + np.random.randn(T) * 0.15 + np.random.randn() * 0.5
# Treated country (matches controls pre-treatment)
treated_outcome = trend + np.random.randn(T) * 0.15
# Treatment effect: negative shock starting at t=50
treatment_time = 50
true_effect = -0.8
treated_outcome[treatment_time:] += true_effect + np.random.randn(T - treatment_time) * 0.1
# Fit SCM
scm = SyntheticControlMethod()
result = scm.fit(
treated_outcome=treated_outcome,
control_outcomes=control_outcomes,
treatment_time=treatment_time,
control_names=[f"Country_{j+1}" for j in range(J)]
)
print("Synthetic Control Results:")
print(f" Pre-treatment fit (RMSPE): {result.pre_treatment_fit:.4f}")
print(f"\nSynthetic Country A is weighted combination of:")
for j, weight in enumerate(result.weights):
if weight > 0.01: # Only show significant weights
print(f" {result.control_units[j]}: {weight:.1%}")
# Treatment effects
avg_effect = np.mean(result.treatment_effect[treatment_time:])
print(f"\nEstimated treatment effect (post-sanctions):")
print(f" Average: {avg_effect:.3f} (true effect: {true_effect:.3f})")
print(f" Final period: {result.treatment_effect[-1]:.3f}")
# Placebo test
p_value = scm.placebo_test(treated_outcome, control_outcomes, treatment_time, n_permutations=J)
print(f"\nPlacebo test p-value: {p_value:.3f}")
if p_value < 0.05:
print(" βœ“ Effect is statistically significant (unusual compared to placebos)")
else:
print(" βœ— Effect not significant (could be random)")
print("\nβœ“ Synthetic control provides credible counterfactual!")
def demo_difference_in_differences():
"""Demonstrate Difference-in-Differences."""
print("\n" + "="*80)
print("4. Difference-in-Differences (DiD) - Regime Change Analysis")
print("="*80)
# Scenario: Regime change in treated country
print("\nScenario: Regime change in Country T at t=50")
print("Compare to similar countries without regime change\n")
np.random.seed(42)
# Pre-treatment (similar trends)
treated_pre = 3.0 + np.random.randn(50) * 0.5
control_pre = 3.2 + np.random.randn(50) * 0.5
# Post-treatment (treatment effect = +1.5 on outcome)
true_effect = 1.5
treated_post = 3.0 + true_effect + np.random.randn(50) * 0.5
control_post = 3.2 + np.random.randn(50) * 0.5 # No effect
# Estimate DiD
did = DifferenceinDifferences()
result = did.estimate(treated_pre, treated_post, control_pre, control_post)
print("Difference-in-Differences Results:")
print(f"\n Pre-treatment difference: {result.pre_treatment_diff:.3f}")
print(f" Post-treatment difference: {result.post_treatment_diff:.3f}")
print(f"\n Average Treatment Effect (ATT): {result.att:.3f}")
print(f" Standard error: {result.se:.3f}")
print(f" t-statistic: {result.t_stat:.3f}")
print(f" p-value: {result.p_value:.4f}")
if result.p_value < 0.05:
print(f"\n βœ“ Regime change had significant effect (true effect: {true_effect:.3f})")
else:
print("\n βœ— Effect not statistically significant")
# Assumption check
if abs(result.pre_treatment_diff) < 0.5:
print("\n βœ“ Parallel trends assumption plausible (small pre-treatment diff)")
else:
print("\n ⚠ Parallel trends questionable (large pre-treatment diff)")
print("\nβœ“ DiD isolates causal effect of regime change!")
def demo_regression_discontinuity():
"""Demonstrate Regression Discontinuity Design."""
print("\n" + "="*80)
print("5. Regression Discontinuity Design (RDD) - Election Effects")
print("="*80)
# Scenario: Effect of winning election on military policy
print("\nScenario: Effect of hawkish candidate winning election")
print("Running variable: Vote share (cutoff = 50%)")
print("Outcome: Military spending increase\n")
np.random.seed(42)
n = 500
# Vote share (running variable)
vote_share = np.random.uniform(0.3, 0.7, n)
# Outcome: military spending
# Smooth function of vote share + discontinuity at 50%
outcome = 2.0 + 1.5 * vote_share + np.random.randn(n) * 0.3
# Treatment effect: +0.8 if vote > 50%
true_effect = 0.8
outcome[vote_share >= 0.5] += true_effect
# Estimate RDD
rdd = RegressionDiscontinuity(cutoff=0.5)
result = rdd.estimate_sharp(
running_var=vote_share,
outcome=outcome,
bandwidth=0.15, # 15% bandwidth
kernel='triangular'
)
print("Regression Discontinuity Results:")
print(f"\n Bandwidth: {result.bandwidth:.3f}")
print(f" Observations below cutoff: {result.n_left}")
print(f" Observations above cutoff: {result.n_right}")
print(f"\n Treatment effect (LATE): {result.treatment_effect:.3f}")
print(f" Standard error: {result.se:.3f}")
print(f" t-statistic: {result.t_stat:.3f}")
print(f" p-value: {result.p_value:.4f}")
if result.p_value < 0.05:
print(f"\n βœ“ Winning election causes increase in military spending")
print(f" (true effect: {true_effect:.3f})")
else:
print("\n βœ— Effect not statistically significant")
print("\nβœ“ RDD exploits threshold-based treatment assignment!")
def demo_instrumental_variables():
"""Demonstrate Instrumental Variables."""
print("\n" + "="*80)
print("6. Instrumental Variables (IV) - Trade and Conflict")
print("="*80)
# Scenario: Effect of trade on conflict (trade is endogenous)
print("\nScenario: Does trade reduce conflict?")
print("Problem: Trade is endogenous (reverse causality, omitted variables)")
print("Instrument: Geographic distance to major trade routes\n")
np.random.seed(42)
n = 300
# Instrument: distance (exogenous)
distance = np.random.uniform(100, 1000, n)
# Unobserved confounders
unobserved = np.random.randn(n)
# Trade (endogenous): affected by distance and confounders
trade = 50 - 0.03 * distance + 2.0 * unobserved + np.random.randn(n) * 5
# Conflict: true effect of trade = -0.15, but also affected by confounders
true_effect = -0.15
conflict = 10 + true_effect * trade - 1.5 * unobserved + np.random.randn(n) * 2
# Estimate with IV
iv = InstrumentalVariables()
result = iv.estimate_2sls(
outcome=conflict,
endogenous=trade,
instrument=distance
)
print("Instrumental Variables (2SLS) Results:")
print(f"\n First stage F-statistic: {result.first_stage_f:.2f}")
if result.weak_instrument:
print(" ⚠ Warning: Weak instrument (F < 10)")
else:
print(" βœ“ Strong instrument (F > 10)")
print(f"\n OLS estimate (biased): {result.beta_ols[0]:.4f}")
print(f" IV estimate (consistent): {result.beta_iv[0]:.4f}")
print(f" IV standard error: {result.se_iv[0]:.4f}")
print(f"\n True causal effect: {true_effect:.4f}")
# Hausman test (informal)
if abs(result.beta_ols[0] - result.beta_iv[0]) > 0.05:
print("\n βœ“ OLS and IV differ substantially β†’ endogeneity present")
print(" IV corrects for bias!")
else:
print("\n OLS and IV similar β†’ endogeneity may be small")
print("\nβœ“ IV isolates causal effect using exogenous variation!")
def demo_dynamic_factor_model():
"""Demonstrate Dynamic Factor Model for nowcasting."""
print("\n" + "="*80)
print("7. Dynamic Factor Model (DFM) - High-Dimensional Nowcasting")
print("="*80)
# Scenario: Nowcast geopolitical tension from many indicators
print("\nScenario: Nowcast regional tension from 50 economic/political indicators")
print("DFM extracts common latent factors driving all indicators\n")
np.random.seed(42)
T = 200
n_indicators = 50
n_factors = 3
# True factors (latent tensions)
true_factors = np.zeros((T, n_factors))
for k in range(n_factors):
# AR(1) dynamics
for t in range(1, T):
true_factors[t, k] = 0.8 * true_factors[t-1, k] + np.random.randn() * 0.5
# Factor loadings (how indicators load on factors)
true_loadings = np.random.randn(n_indicators, n_factors)
# Observed indicators = factors * loadings + idiosyncratic noise
data = true_factors @ true_loadings.T + np.random.randn(T, n_indicators) * 0.5
# Fit DFM
dfm = DynamicFactorModel(n_factors=3, n_lags=1)
model = dfm.fit(data)
print(f"Dynamic Factor Model Results:")
print(f"\n Number of indicators: {n_indicators}")
print(f" Number of factors: {n_factors}")
print(f" Explained variance: {model['explained_variance_ratio']:.1%}")
# Extracted factors
factors = model['factors']
print(f"\n Extracted factor dimensions: {factors.shape}")
print(f" Factor 1 final value: {factors[-1, 0]:.3f}")
print(f" Factor 2 final value: {factors[-1, 1]:.3f}")
print(f" Factor 3 final value: {factors[-1, 2]:.3f}")
# Forecast
forecast = dfm.forecast(model, steps=10)
print(f"\n 10-step ahead forecast dimensions: {forecast.shape}")
print(f" Average forecasted indicator value: {np.mean(forecast[-1]):.3f}")
# Correlation with true factors
corr_0 = np.corrcoef(true_factors[:, 0], factors[:, 0])[0, 1]
print(f"\n Factor recovery (correlation with true): {abs(corr_0):.3f}")
print("\nβœ“ DFM reduces dimensionality while preserving information!")
def main():
"""Run all demonstrations of final features."""
print("=" * 80)
print("GeoBotv1 - COMPLETE FRAMEWORK DEMONSTRATION")
print("=" * 80)
print("\nThis example showcases the final components that complete GeoBotv1:")
print("β€’ Vector Autoregression (VAR/SVAR/DFM)")
print("β€’ Hawkes Processes for conflict contagion")
print("β€’ Quasi-Experimental Causal Inference")
print(" - Synthetic Control Method")
print(" - Difference-in-Differences")
print(" - Regression Discontinuity Design")
print(" - Instrumental Variables")
# Run all demonstrations
demo_var_model()
demo_hawkes_process()
demo_synthetic_control()
demo_difference_in_differences()
demo_regression_discontinuity()
demo_instrumental_variables()
demo_dynamic_factor_model()
print("\n" + "=" * 80)
print("GeoBotv1 Framework is NOW 100% COMPLETE!")
print("=" * 80)
print("\nπŸŽ‰ All Research-Grade Mathematical Components Implemented:")
print("\nπŸ“Š CORE FRAMEWORKS:")
print(" βœ“ Optimal Transport (Wasserstein, Kantorovich, Sinkhorn)")
print(" βœ“ Causal Inference (DAGs, SCMs, Do-Calculus)")
print(" βœ“ Bayesian Inference (MCMC, Particle Filters, VI)")
print(" βœ“ Stochastic Processes (SDEs, Jump-Diffusion)")
print(" βœ“ Time-Series Models (Kalman, HMM, VAR, Hawkes)")
print(" βœ“ Quasi-Experimental Methods (SCM, DiD, RDD, IV)")
print(" βœ“ Machine Learning (GNNs, Risk Scoring, Embeddings)")
print("\nπŸ“ˆ SPECIALIZED CAPABILITIES:")
print(" βœ“ Multi-country interdependency modeling (VAR)")
print(" βœ“ Conflict contagion and escalation (Hawkes)")
print(" βœ“ Policy counterfactuals (Synthetic Control)")
print(" βœ“ Regime change effects (Difference-in-Differences)")
print(" βœ“ Election outcomes impact (Regression Discontinuity)")
print(" βœ“ Trade-conflict nexus (Instrumental Variables)")
print(" βœ“ High-dimensional nowcasting (Dynamic Factor Models)")
print("\nπŸ”¬ MATHEMATICAL RIGOR:")
print(" βœ“ Measure-theoretic probability foundations")
print(" βœ“ Continuous-time dynamics (SDEs)")
print(" βœ“ Causal identification strategies")
print(" βœ“ Structural econometric methods")
print(" βœ“ Point process theory")
print(" βœ“ Optimal transport geometry")
print("\nπŸ’‘ GeoBotv1 is ready for production geopolitical forecasting!")
print("=" * 80 + "\n")
if __name__ == "__main__":
main()