GeoBot-Forecasting-Framework / geobot /inference /bayesian_engine.py

Update geobot/inference/bayesian_engine.py

7e82512 verified about 1 month ago

16.9 kB

	"""
	Bayesian Inference Engine

	Provides principled way to update beliefs as new intelligence, rumors,
	events, or data arrive.

	Components:
	- Priors: Baseline beliefs
	- Likelihood: Evidence
	- Posteriors: Updated beliefs

	Necessary for:
	- Real-time updates
	- Intelligence feeds
	- Event-driven recalibration
	- Uncertainty tracking

	Monte Carlo + Bayesian updating = elite forecasting
	"""

	import numpy as np
	import pandas as pd
	from typing import Dict, List, Optional, Callable, Any, Tuple
	from dataclasses import dataclass
	from scipy import stats


	@dataclass
	class Prior:
	"""
	Represents a prior distribution.

	Attributes
	----------
	name : str
	Name of the variable
	distribution : Any
	Prior distribution (scipy.stats distribution)
	parameters : dict
	Distribution parameters
	"""
	name: str
	distribution: Any
	parameters: Dict[str, float]

	def sample(self, n_samples: int = 1) -> np.ndarray:
	"""
	Sample from prior.

	Parameters
	----------
	n_samples : int
	Number of samples

	Returns
	-------
	np.ndarray
	Samples from prior
	"""
	return self.distribution.rvs(size=n_samples, **self.parameters)

	def pdf(self, x: np.ndarray) -> np.ndarray:
	"""
	Evaluate prior probability density.

	Parameters
	----------
	x : np.ndarray
	Points to evaluate

	Returns
	-------
	np.ndarray
	Probability densities
	"""
	return self.distribution.pdf(x, **self.parameters)

	def log_pdf(self, x: np.ndarray) -> np.ndarray:
	"""
	Evaluate log prior probability density.

	Parameters
	----------
	x : np.ndarray
	Points to evaluate

	Returns
	-------
	np.ndarray
	Log probability densities
	"""
	return self.distribution.logpdf(x, **self.parameters)


	@dataclass
	class Evidence:
	"""
	Represents evidence/observation.

	Attributes
	----------
	observation : Any
	Observed data
	likelihood_fn : Callable
	Likelihood function
	timestamp : float
	Time of observation
	confidence : float
	Confidence in observation (0-1)
	"""
	observation: Any
	likelihood_fn: Callable
	timestamp: float
	confidence: float = 1.0


	class BayesianEngine:
	"""
	Bayesian inference engine for belief updating.

	This engine maintains and updates probability distributions
	as new evidence arrives, enabling real-time forecasting with
	uncertainty quantification.
	"""

	def __init__(self):
	"""Initialize Bayesian engine."""
	self.priors: Dict[str, Prior] = {}
	self.posteriors: Dict[str, np.ndarray] = {}
	self.evidence_history: List[Evidence] = []

	def set_prior(self, prior: Prior) -> None:
	"""
	Set prior distribution for a variable.

	Parameters
	----------
	prior : Prior
	Prior distribution
	"""
	self.priors[prior.name] = prior

	def update(
	self,
	variable: str,
	evidence: Evidence,
	method: str = 'grid'
	) -> np.ndarray:
	"""
	Update beliefs given evidence.

	Parameters
	----------
	variable : str
	Variable to update
	evidence : Evidence
	New evidence
	method : str
	Update method ('grid', 'mcmc', 'analytical')

	Returns
	-------
	np.ndarray
	Posterior samples
	"""
	if variable not in self.priors:
	raise ValueError(f"No prior set for {variable}")

	self.evidence_history.append(evidence)

	if method == 'grid':
	posterior = self._grid_update(variable, evidence)
	elif method == 'mcmc':
	posterior = self._mcmc_update(variable, evidence)
	elif method == 'analytical':
	posterior = self._analytical_update(variable, evidence)
	else:
	raise ValueError(f"Unknown method: {method}")

	self.posteriors[variable] = posterior
	return posterior

	def _grid_update(self, variable: str, evidence: Evidence) -> np.ndarray:
	"""
	Grid approximation for Bayesian update.

	Parameters
	----------
	variable : str
	Variable name
	evidence : Evidence
	Evidence

	Returns
	-------
	np.ndarray
	Posterior samples
	"""
	prior = self.priors[variable]

	# Create grid based on distribution type
	n_grid = 1000

	# Check distribution type by name
	dist_name = prior.distribution.name if hasattr(prior.distribution, 'name') else 'unknown'

	if dist_name == 'beta':
	# Beta distribution always has support [0, 1]
	grid = np.linspace(0, 1, n_grid)
	elif dist_name == 'gamma':
	# Gamma distribution has support [0, inf)
	# Use reasonable upper bound based on parameters
	shape = prior.parameters.get('a', 1)
	scale = prior.parameters.get('scale', 1)
	mean = shape * scale
	std = np.sqrt(shape) * scale
	grid = np.linspace(0, mean + 4*std, n_grid)
	elif dist_name == 'uniform':
	# Uniform distribution uses loc and scale
	loc = prior.parameters.get('loc', 0)
	scale = prior.parameters.get('scale', 1)
	grid = np.linspace(loc, loc + scale, n_grid)
	else:
	# Default for normal and other distributions
	mean = prior.parameters.get('loc', 0)
	std = prior.parameters.get('scale', 1)
	grid = np.linspace(mean - 4std, mean + 4std, n_grid)

	# Compute prior * likelihood
	prior_vals = prior.pdf(grid)
	likelihood_vals = evidence.likelihood_fn(grid, evidence.observation)

	# Weight by evidence confidence
	likelihood_vals = likelihood_vals ** evidence.confidence

	# Compute posterior (unnormalized)
	posterior_vals = prior_vals * likelihood_vals

	# Normalize
	posterior_vals /= posterior_vals.sum()

	# Sample from posterior
	n_samples = 10000
	posterior_samples = np.random.choice(grid, size=n_samples, p=posterior_vals)

	return posterior_samples

	def _mcmc_update(
	self,
	variable: str,
	evidence: Evidence,
	n_samples: int = 10000
	) -> np.ndarray:
	"""
	MCMC-based Bayesian update.

	Parameters
	----------
	variable : str
	Variable name
	evidence : Evidence
	Evidence
	n_samples : int
	Number of MCMC samples

	Returns
	-------
	np.ndarray
	Posterior samples
	"""
	prior = self.priors[variable]

	def log_posterior(x):
	log_prior = prior.log_pdf(np.array([x]))[0]
	log_likelihood = np.log(evidence.likelihood_fn(np.array([x]), evidence.observation)[0] + 1e-10)
	return log_prior + evidence.confidence * log_likelihood

	# Simple Metropolis-Hastings
	samples = []
	current = prior.sample(1)[0]
	current_log_p = log_posterior(current)

	for _ in range(n_samples):
	# Propose
	proposal = current + np.random.normal(0, 0.1)
	proposal_log_p = log_posterior(proposal)

	# Accept/reject
	log_alpha = proposal_log_p - current_log_p
	if np.log(np.random.uniform()) < log_alpha:
	current = proposal
	current_log_p = proposal_log_p

	samples.append(current)

	return np.array(samples)

	def _analytical_update(self, variable: str, evidence: Evidence) -> np.ndarray:
	"""
	Analytical Bayesian update (for conjugate priors).

	Parameters
	----------
	variable : str
	Variable name
	evidence : Evidence
	Evidence

	Returns
	-------
	np.ndarray
	Posterior samples
	"""
	# Placeholder - would implement conjugate updates
	# For now, fall back to grid
	return self._grid_update(variable, evidence)

	def get_posterior_summary(self, variable: str) -> Dict[str, float]:
	"""
	Get summary statistics of posterior.

	Parameters
	----------
	variable : str
	Variable name

	Returns
	-------
	dict
	Summary statistics
	"""
	if variable not in self.posteriors:
	raise ValueError(f"No posterior for {variable}")

	samples = self.posteriors[variable]

	return {
	'mean': np.mean(samples),
	'median': np.median(samples),
	'std': np.std(samples),
	'q5': np.percentile(samples, 5),
	'q25': np.percentile(samples, 25),
	'q75': np.percentile(samples, 75),
	'q95': np.percentile(samples, 95)
	}

	def get_credible_interval(
	self,
	variable: str,
	alpha: float = 0.05
	) -> Tuple[float, float]:
	"""
	Get credible interval for posterior.

	Parameters
	----------
	variable : str
	Variable name
	alpha : float
	Significance level

	Returns
	-------
	tuple
	(lower, upper) bounds of credible interval
	"""
	if variable not in self.posteriors:
	raise ValueError(f"No posterior for {variable}")

	samples = self.posteriors[variable]
	lower = np.percentile(samples, 100 * alpha / 2)
	upper = np.percentile(samples, 100 * (1 - alpha / 2))

	return lower, upper

	def compute_bayes_factor(
	self,
	variable: str,
	hypothesis1: Callable,
	hypothesis2: Callable
	) -> float:
	"""
	Compute Bayes factor for two hypotheses.

	Parameters
	----------
	variable : str
	Variable name
	hypothesis1 : callable
	First hypothesis (returns bool)
	hypothesis2 : callable
	Second hypothesis (returns bool)

	Returns
	-------
	float
	Bayes factor (BF > 1 favors hypothesis1)
	"""
	if variable not in self.posteriors:
	raise ValueError(f"No posterior for {variable}")

	samples = self.posteriors[variable]

	p1 = np.mean([hypothesis1(x) for x in samples])
	p2 = np.mean([hypothesis2(x) for x in samples])

	if p2 == 0:
	return np.inf
	return p1 / p2


	class BeliefUpdater:
	"""
	High-level interface for updating geopolitical beliefs.

	This class provides domain-specific methods for updating
	beliefs based on intelligence, events, and rumors.
	"""

	def __init__(self):
	"""Initialize belief updater."""
	self.engine = BayesianEngine()
	self.beliefs: Dict[str, Dict[str, Any]] = {}

	def initialize_belief(
	self,
	name: str,
	prior_mean: float,
	prior_std: float,
	belief_type: str = 'continuous'
	) -> None:
	"""
	Initialize a belief with prior.

	Parameters
	----------
	name : str
	Belief name
	prior_mean : float
	Prior mean
	prior_std : float
	Prior standard deviation
	belief_type : str
	Type of belief ('continuous', 'probability')
	"""
	if belief_type == 'continuous':
	distribution = stats.norm
	parameters = {'loc': prior_mean, 'scale': prior_std}
	elif belief_type == 'probability':
	# Use beta distribution for probabilities
	# Convert mean/std to alpha/beta parameters
	mean = np.clip(prior_mean, 0.01, 0.99)
	var = prior_std ** 2
	alpha = mean * (mean * (1 - mean) / var - 1)
	beta = (1 - mean) * (mean * (1 - mean) / var - 1)
	distribution = stats.beta
	# Beta uses 'a' and 'b' parameters, not 'loc' and 'scale'
	parameters = {'a': alpha, 'b': beta}
	else:
	distribution = stats.norm
	parameters = {'loc': prior_mean, 'scale': prior_std}

	prior = Prior(
	name=name,
	distribution=distribution,
	parameters=parameters
	)

	self.engine.set_prior(prior)
	self.beliefs[name] = {
	'type': belief_type,
	'initialized': True
	}

	def update_from_intelligence(
	self,
	belief: str,
	observation: float,
	reliability: float = 0.8
	) -> Dict[str, float]:
	"""
	Update belief from intelligence report.

	Parameters
	----------
	belief : str
	Belief name
	observation : float
	Observed value from intelligence
	reliability : float
	Reliability of intelligence source (0-1)

	Returns
	-------
	dict
	Posterior summary
	"""
	def likelihood_fn(x, obs):
	# Gaussian likelihood centered at observation
	# Width depends on reliability
	std = 1.0 / reliability
	return stats.norm.pdf(x, loc=obs, scale=std)

	evidence = Evidence(
	observation=observation,
	likelihood_fn=likelihood_fn,
	timestamp=pd.Timestamp.now().timestamp(),
	confidence=reliability
	)

	self.engine.update(belief, evidence, method='grid')
	return self.engine.get_posterior_summary(belief)

	def update_from_event(
	self,
	belief: str,
	event_impact: float,
	event_certainty: float = 1.0
	) -> Dict[str, float]:
	"""
	Update belief from observed event.

	Parameters
	----------
	belief : str
	Belief name
	event_impact : float
	Impact of event (shift in belief)
	event_certainty : float
	Certainty that event occurred (0-1)

	Returns
	-------
	dict
	Posterior summary
	"""
	# Get current belief
	if belief not in self.engine.posteriors:
	# Use prior
	current_samples = self.engine.priors[belief].sample(10000)
	else:
	current_samples = self.engine.posteriors[belief]

	current_mean = np.mean(current_samples)

	# Create shifted observation
	observation = current_mean + event_impact

	def likelihood_fn(x, obs):
	return stats.norm.pdf(x, loc=obs, scale=abs(event_impact) * 0.1)

	evidence = Evidence(
	observation=observation,
	likelihood_fn=likelihood_fn,
	timestamp=pd.Timestamp.now().timestamp(),
	confidence=event_certainty
	)

	self.engine.update(belief, evidence, method='grid')
	return self.engine.get_posterior_summary(belief)

	def get_belief_probability(
	self,
	belief: str,
	threshold: float,
	direction: str = 'greater'
	) -> float:
	"""
	Get probability that belief exceeds threshold.

	Parameters
	----------
	belief : str
	Belief name
	threshold : float
	Threshold value
	direction : str
	'greater' or 'less'

	Returns
	-------
	float
	Probability
	"""
	if belief not in self.engine.posteriors:
	samples = self.engine.priors[belief].sample(10000)
	else:
	samples = self.engine.posteriors[belief]

	if direction == 'greater':
	return np.mean(samples > threshold)
	else:
	return np.mean(samples < threshold)

	def compare_beliefs(self, belief1: str, belief2: str) -> Dict[str, float]:
	"""
	Compare two beliefs.

	Parameters
	----------
	belief1 : str
	First belief
	belief2 : str
	Second belief

	Returns
	-------
	dict
	Comparison results
	"""
	if belief1 not in self.engine.posteriors:
	samples1 = self.engine.priors[belief1].sample(10000)
	else:
	samples1 = self.engine.posteriors[belief1]

	if belief2 not in self.engine.posteriors:
	samples2 = self.engine.priors[belief2].sample(10000)
	else:
	samples2 = self.engine.posteriors[belief2]

	return {
	'p_belief1_greater': np.mean(samples1 > samples2),
	'mean_difference': np.mean(samples1 - samples2),
	'correlation': np.corrcoef(samples1, samples2)[0, 1]
	}