from abc import ABC, abstractmethod import numpy as np class YFDistribution(ABC): """Abstract class that defines a distribution over Y and F random variables.""" @abstractmethod def mu_f(self) -> float: """Calculate the mean of F.""" pass @abstractmethod def mu_y(self) -> float: """Calculate the mean of Y.""" pass @abstractmethod def variance_f(self) -> float: """Calculate the variance of F.""" pass @abstractmethod def variance_y(self) -> float: """Calculate the variance of Y.""" pass @abstractmethod def covariance_f_y(self) -> float: """Calculate the covariance of F and Y.""" pass @abstractmethod def covariance_f2_y(self) -> float: """Calculate the covariance of F^2 and Y.""" pass @abstractmethod def covariance_f_y2(self) -> float: """Calculate the covariance of F and Y^2.""" pass @abstractmethod def covariance_f2_y2(self) -> float: """Calculate the covariance of F^2 and Y^2.""" pass def correlation(self) -> float: """Calculate the correlation of F and Y.""" return self.covariance_f_y() / np.sqrt(self.variance_f() * self.variance_y()) class MultivariateGaussianYFDistribution(YFDistribution): """Multivariate Gaussian distribution for Y and F random variables.""" def __init__( self, mu_y: float, mu_f: float, var_y: float, var_f: float, cov_y_f: float ): """ Initialize multivariate Gaussian Y and F distribution. Args: mu_y: Mean of Y mu_f: Mean of F var_y: Variance of Y var_f: Variance of F cov_y_f: Covariance between Y and F """ if var_y <= 0: raise ValueError("var_y must be positive") if var_f <= 0: raise ValueError("var_f must be positive") # Check that covariance matrix is positive semi-definite correlation = cov_y_f / (var_y * var_f) ** 0.5 if abs(correlation) > 1: raise ValueError("Covariance matrix must be positive semi-definite") self._mu_y = mu_y self._mu_f = mu_f self._var_y = var_y self._var_f = var_f self._cov_y_f = cov_y_f def mu_f(self) -> float: return self._mu_f def mu_y(self) -> float: return self._mu_y def variance_f(self) -> float: return self._var_f def variance_y(self) -> float: return self._var_y def covariance_f_y(self) -> float: return self._cov_y_f def covariance_f2_y(self) -> float: """Calculate Cov(F^2,Y) for multivariate Gaussian.""" # For multivariate Gaussian (X,Y), Cov(X^2,Y) = 2*μ_X*Cov(X,Y) return 2 * self._mu_f * self._cov_y_f def covariance_f_y2(self) -> float: """Calculate Cov(F,Y^2) for multivariate Gaussian.""" # For multivariate Gaussian (X,Y), Cov(X,Y^2) = 2*μ_Y*Cov(X,Y) return 2 * self._mu_y * self._cov_y_f def covariance_f2_y2(self) -> float: """Calculate Cov(F^2,Y^2) for multivariate Gaussian.""" # For multivariate Gaussian (X,Y), Cov(X^2,Y^2) = 4*μ_X*μ_Y*Cov(X,Y) + 2*Cov(X,Y)^2 return 4 * self._mu_f * self._mu_y * self._cov_y_f + 2 * self._cov_y_f**2 class BinaryYFDistribution(YFDistribution): """Concrete instance where Y and F are both binary random variables.""" def __init__(self, p_f: float, p_y_given_f1: float, p_y_given_f0: float): """ Initialize binary Y and F distribution. Args: p_f: Probability of F = 1 p_y_given_f1: Probability of Y = 1 given F = 1 p_y_given_f0: Probability of Y = 1 given F = 0 """ if not (0 <= p_f <= 1): raise ValueError("p_f must be between 0 and 1") if not (0 <= p_y_given_f1 <= 1): raise ValueError("p_y_given_f1 must be between 0 and 1") if not (0 <= p_y_given_f0 <= 1): raise ValueError("p_y_given_f0 must be between 0 and 1") self.p_f = p_f self.p_y_given_f1 = p_y_given_f1 self.p_y_given_f0 = p_y_given_f0 # Calculate P(Y=1) using law of total probability self.p_y = p_y_given_f1 * p_f + p_y_given_f0 * (1 - p_f) def mu_f(self) -> float: return self.p_f def mu_y(self) -> float: return self.p_y def variance_f(self) -> float: """Calculate Var(F) = P(F=1) * (1 - P(F=1)) for binary F.""" return self.p_f * (1 - self.p_f) def variance_y(self) -> float: """Calculate Var(Y) = P(Y=1) * (1 - P(Y=1)) for binary Y.""" return self.p_y * (1 - self.p_y) def covariance_f_y(self) -> float: """Calculate Cov(F,Y) = E[FY] - E[F]E[Y].""" # E[FY] = P(F=1, Y=1) = P(Y=1|F=1) * P(F=1) e_fy = self.p_y_given_f1 * self.p_f e_f = self.p_f e_y = self.p_y return e_fy - e_f * e_y def covariance_f2_y(self) -> float: """Calculate Cov(F^2,Y). For binary F: F^2 = F, so this equals Cov(F,Y).""" return self.covariance_f_y() def covariance_f_y2(self) -> float: """Calculate Cov(F,Y^2). For binary Y: Y^2 = Y, so this equals Cov(F,Y).""" return self.covariance_f_y() def covariance_f2_y2(self) -> float: """Calculate Cov(F^2,Y^2). For binary F,Y: F^2 = F, Y^2 = Y, so this equals Cov(F,Y).""" return self.covariance_f_y()