| | import pandas as pd
|
| |
|
| | from core.estimation.inference.ci import (
|
| | ci_mean_analytic,
|
| | ci_median_analytic,
|
| | ci_deviation_analytic,
|
| | ci_mean_bootstrap,
|
| | ci_median_bootstrap,
|
| | ci_deviation_bootstrap,
|
| | )
|
| |
|
| | from core.estimation.inference.pi import (
|
| | pi_mean,
|
| | pi_median,
|
| | pi_iqr,
|
| | pi_bootstrap,
|
| | )
|
| |
|
| | from core.estimation.inference.confidence_regions import confidence_regions
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | def select_distribution(mean_estimator: str, sigma_estimator: str) -> str:
|
| | if mean_estimator == "Sample Mean" and sigma_estimator == "Deviation (1 ddof)":
|
| | return "t"
|
| | return "norm"
|
| |
|
| |
|
| | def validate_deviation_estimator(*, sigma_estimator: str, n: int):
|
| | if sigma_estimator == "Range (bias corrected)" and n > 25:
|
| | raise ValueError(
|
| | "Range-based confidence intervals require n ≤ 25. "
|
| | "Use another estimator or bootstrap."
|
| | )
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | def run_confidence_intervals(
|
| | *,
|
| | data,
|
| | alpha,
|
| | mean_estimator,
|
| | median_estimator,
|
| | sigma_estimator,
|
| | trim_param=None,
|
| | winsor_limits=None,
|
| | weights=None,
|
| | bootstrap_mean=False,
|
| | bootstrap_median=False,
|
| | bootstrap_deviation=False,
|
| | bootstrap_samples=1000,
|
| | ):
|
| | n = len(data)
|
| |
|
| | validate_deviation_estimator(
|
| | sigma_estimator=sigma_estimator,
|
| | n=n,
|
| | )
|
| |
|
| | dist = select_distribution(mean_estimator, sigma_estimator)
|
| |
|
| |
|
| | if bootstrap_mean:
|
| | mean_ci = ci_mean_bootstrap(
|
| | data=data,
|
| | estimator=mean_estimator,
|
| | alpha=alpha,
|
| | B=bootstrap_samples,
|
| | trim_param=trim_param,
|
| | winsor_limits=winsor_limits,
|
| | weights=weights,
|
| | )
|
| | else:
|
| | mean_ci = ci_mean_analytic(
|
| | data=data,
|
| | estimator=mean_estimator,
|
| | alpha=alpha,
|
| | dist=dist,
|
| | sigma_estimator=sigma_estimator,
|
| | trim_param=trim_param,
|
| | winsor_limits=winsor_limits,
|
| | weights=weights,
|
| | )
|
| |
|
| |
|
| | if bootstrap_median:
|
| | median_ci = ci_median_bootstrap(
|
| | data=data,
|
| | alpha=alpha,
|
| | B=bootstrap_samples,
|
| | )
|
| | else:
|
| | median_ci = ci_median_analytic(
|
| | data=data,
|
| | alpha=alpha,
|
| | sigma_estimator=sigma_estimator,
|
| | )
|
| |
|
| |
|
| | if bootstrap_deviation:
|
| | sigma_ci = ci_deviation_bootstrap(
|
| | data=data,
|
| | alpha=alpha,
|
| | B=bootstrap_samples,
|
| | estimator=sigma_estimator,
|
| | )
|
| | else:
|
| | sigma_ci = ci_deviation_analytic(
|
| | data=data,
|
| | alpha=alpha,
|
| | estimator=sigma_estimator,
|
| | )
|
| |
|
| | table = pd.DataFrame(
|
| | [
|
| | ["Confidence", "Mean", *mean_ci],
|
| | ["Confidence", "Median", *median_ci],
|
| | ["Confidence", "Deviation", *sigma_ci],
|
| | ],
|
| | columns=["Interval Type", "Statistic", "Lower", "Upper"],
|
| | )
|
| |
|
| | return table, mean_ci, sigma_ci, median_ci
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | def run_prediction_intervals(
|
| | *,
|
| | data,
|
| | alpha,
|
| | mean_estimator,
|
| | median_estimator,
|
| | sigma_estimator,
|
| | trim_param=None,
|
| | winsor_limits=None,
|
| | weights=None,
|
| | bootstrap=False,
|
| | bootstrap_samples=1000,
|
| | ):
|
| | dist = select_distribution(mean_estimator, sigma_estimator)
|
| |
|
| | rows = []
|
| |
|
| |
|
| | mean_pi = pi_mean(
|
| | data=data,
|
| | alpha=alpha,
|
| | estimator=mean_estimator,
|
| | dist=dist,
|
| | sigma_estimator=sigma_estimator,
|
| | trim_param=trim_param,
|
| | winsor_limits=winsor_limits,
|
| | weights=weights,
|
| | )
|
| | rows.append(["Prediction", "Mean", *mean_pi])
|
| |
|
| |
|
| | median_pi = pi_median(
|
| | data=data,
|
| | alpha=alpha,
|
| | sigma_estimator=sigma_estimator,
|
| | )
|
| | rows.append(["Prediction", "Median", *median_pi])
|
| |
|
| |
|
| | iqr_pi = pi_iqr(
|
| | data=data,
|
| | alpha=alpha,
|
| | )
|
| | rows.append(["Prediction", "IQR", *iqr_pi])
|
| |
|
| |
|
| | if bootstrap:
|
| | boot_pi = pi_bootstrap(
|
| | data=data,
|
| | alpha=alpha,
|
| | B=bootstrap_samples,
|
| | )
|
| | rows.append(["Prediction", "Bootstrap", *boot_pi])
|
| |
|
| | return pd.DataFrame(
|
| | rows,
|
| | columns=["Interval Type", "Statistic", "Lower", "Upper"],
|
| | )
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | def run_confidence_regions(
|
| | *,
|
| | data,
|
| | alpha,
|
| | mean_estimator,
|
| | median_estimator,
|
| | sigma_estimator,
|
| | trim_param,
|
| | winsor_limits,
|
| | weights,
|
| | bootstrap_mean,
|
| | bootstrap_median,
|
| | bootstrap_deviation,
|
| | bootstrap_samples,
|
| | mu_ci_source,
|
| | probs,
|
| | eps_mu,
|
| | eps_sigma,
|
| | add_ci_box,
|
| | ):
|
| | """
|
| | Use the CI machinery to compute CIs for mean, median and deviation,
|
| | then choose which CI to use for μ (mean-based or median-based) and
|
| | pass that CI plus the σ CI into the likelihood-based confidence
|
| | regions function.
|
| | """
|
| |
|
| | ci_table, mean_ci, sigma_ci, median_ci = run_confidence_intervals(
|
| | data=data,
|
| | alpha=alpha,
|
| | mean_estimator=mean_estimator,
|
| | median_estimator=median_estimator,
|
| | sigma_estimator=sigma_estimator,
|
| | trim_param=trim_param,
|
| | winsor_limits=winsor_limits,
|
| | weights=weights,
|
| | bootstrap_mean=bootstrap_mean,
|
| | bootstrap_median=bootstrap_median,
|
| | bootstrap_deviation=bootstrap_deviation,
|
| | bootstrap_samples=bootstrap_samples,
|
| | )
|
| |
|
| | if mu_ci_source == "Median-based CI":
|
| | mu_ci = median_ci
|
| | else:
|
| |
|
| | mu_ci = mean_ci
|
| |
|
| | fig = confidence_regions(
|
| | data=data,
|
| | mean_ci=mu_ci,
|
| | sigma_ci=sigma_ci,
|
| | probs=probs,
|
| | eps_mu=eps_mu,
|
| | eps_sigma=eps_sigma,
|
| | add_ci_box=add_ci_box,
|
| | )
|
| |
|
| | return fig
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | def run_intervals(
|
| | *,
|
| | data,
|
| | alpha,
|
| | mean_estimator,
|
| | median_estimator,
|
| | sigma_estimator,
|
| | bootstrap_mean,
|
| | bootstrap_median,
|
| | bootstrap_deviation,
|
| | bootstrap_samples,
|
| | ):
|
| | ci_table, mean_ci, sigma_ci = run_confidence_intervals(
|
| | data=data,
|
| | alpha=alpha,
|
| | mean_estimator=mean_estimator,
|
| | median_estimator=median_estimator,
|
| | sigma_estimator=sigma_estimator,
|
| | bootstrap_mean=bootstrap_mean,
|
| | bootstrap_median=bootstrap_median,
|
| | bootstrap_deviation=bootstrap_deviation,
|
| | bootstrap_samples=bootstrap_samples,
|
| | )
|
| |
|
| | pi_table = run_prediction_intervals(
|
| | data=data,
|
| | alpha=alpha,
|
| | mean_estimator=mean_estimator,
|
| | median_estimator=median_estimator,
|
| | sigma_estimator=sigma_estimator,
|
| | bootstrap=bootstrap_mean,
|
| | bootstrap_samples=bootstrap_samples,
|
| | )
|
| |
|
| | combined = pd.concat([ci_table, pi_table], ignore_index=True)
|
| |
|
| | return ci_table, pi_table, combined
|
| |
|