Oguzz07
/

causal-discovery-algorithm-selection

Model card Files Files and versions

xet

Community

Oguzz07 commited on 8 days ago

Commit

a47b09f

verified ·

1 Parent(s): eabf58d

Add causal_selection/benchmark.py

Browse files

Files changed (1) hide show

causal_selection/benchmark.py +249 -0

causal_selection/benchmark.py ADDED Viewed

	@@ -0,0 +1,249 @@

+"""
+Main benchmark runner: orchestrates data generation, algorithm runs, feature extraction,
+and result collection into a meta-dataset.
+"""
+import os
+import json
+import time
+import numpy as np
+import pandas as pd
+import logging
+import warnings
+from datetime import datetime
+from causal_selection.data.generator import (
+    load_bn_model, get_true_dag_adjmat, dag_to_cpdag, sample_dataset,
+    SMALL_NETWORKS, MEDIUM_NETWORKS, LARGE_NETWORKS, ALL_NETWORKS,
+    SAMPLE_SIZES, SEEDS_PER_CONFIG, get_network_tier
+)
+from causal_selection.discovery.algorithms import run_algorithm, ALGORITHM_POOL
+from causal_selection.discovery.evaluator import evaluate_algorithm_result
+from causal_selection.features.extractor import extract_all_features, FEATURE_NAMES
+warnings.filterwarnings('ignore')
+logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
+logger = logging.getLogger(__name__)
+RESULTS_DIR = '/app/causal_selection/data/results'
+ALGO_NAMES = list(ALGORITHM_POOL.keys())
+# Timeout per algorithm per dataset (seconds)
+TIMEOUT_MAP = {
+    'small': 60,      # 1 min for small networks
+    'medium': 180,    # 3 min for medium networks
+    'large': 300,     # 5 min for large networks
+}
+def run_single_config(network, n_samples, seed, timeout_sec=300):
+    """Run all algorithms on a single (network, n_samples, seed) configuration.
+    Returns:
+        dict with:
+            - 'meta_features': dict of feature values
+            - 'metrics': dict of algo_name -> metrics dict
+            - 'config': dict with network, n_samples, seed
+    """
+    logger.info(f"=== {network} N={n_samples} seed={seed} ===")
+    # Load network and ground truth
+    model = load_bn_model(network)
+    true_dag, node_names = get_true_dag_adjmat(model)
+    true_cpdag = dag_to_cpdag(true_dag)
+    # Sample data
+    t0 = time.time()
+    df = sample_dataset(model, n_samples, seed=seed)
+    sample_time = time.time() - t0
+    logger.info(f"  Sampled {df.shape} in {sample_time:.1f}s")
+    # Extract meta-features
+    t0 = time.time()
+    features = extract_all_features(df, n_probe_triplets=100)
+    feat_time = time.time() - t0
+    logger.info(f"  Extracted {len(features)} features in {feat_time:.1f}s")
+    # Run all algorithms
+    algo_metrics = {}
+    for algo_name in ALGO_NAMES:
+        t0 = time.time()
+        result = run_algorithm(algo_name, df, timeout_sec=timeout_sec)
+        metrics = evaluate_algorithm_result(result, true_cpdag)
+        algo_metrics[algo_name] = metrics
+        status_str = metrics['status']
+        if status_str == 'success':
+            logger.info(f"  {algo_name:15s}: SHD={metrics['shd']:3d} F1={metrics['skeleton_f1']:.3f} "
+                       f"time={metrics['runtime']:.1f}s")
+        else:
+            logger.info(f"  {algo_name:15s}: {status_str} time={metrics['runtime']:.1f}s")
+    return {
+        'meta_features': features,
+        'metrics': algo_metrics,
+        'config': {
+            'network': network,
+            'n_samples': n_samples,
+            'seed': seed,
+            'n_variables': len(node_names),
+            'n_true_edges': int(((true_cpdag + true_cpdag.T) > 0).sum() // 2),
+        }
+    }
+def build_meta_dataset(networks=None, save_intermediate=True):
+    """Run full benchmark and build meta-dataset.
+    Returns:
+        X: pd.DataFrame of meta-features
+        Y_shd: pd.DataFrame of SHD per algorithm (columns = algo names)
+        Y_nshd: pd.DataFrame of normalized SHD
+        configs: list of config dicts
+        full_results: list of full result dicts
+    """
+    if networks is None:
+        networks = ALL_NETWORKS
+    all_features = []
+    all_shd = []
+    all_nshd = []
+    all_configs = []
+    full_results = []
+    total_configs = 0
+    for net in networks:
+        tier = get_network_tier(net)
+        n_sizes = len(SAMPLE_SIZES[tier])
+        total_configs += n_sizes * SEEDS_PER_CONFIG
+    logger.info(f"Starting benchmark: {len(networks)} networks, ~{total_configs} configs")
+    config_idx = 0
+    for network in networks:
+        tier = get_network_tier(network)
+        sample_sizes = SAMPLE_SIZES[tier]
+        timeout = TIMEOUT_MAP[tier]
+        for n_samples in sample_sizes:
+            for seed in range(SEEDS_PER_CONFIG):
+                config_idx += 1
+                logger.info(f"\n[{config_idx}/{total_configs}] "
+                           f"{network} N={n_samples} seed={seed}")
+                try:
+                    result = run_single_config(network, n_samples, seed,
+                                              timeout_sec=timeout)
+                    # Extract feature vector
+                    feat_row = {name: result['meta_features'].get(name, 0.0)
+                               for name in FEATURE_NAMES}
+                    all_features.append(feat_row)
+                    # Extract SHD vector
+                    shd_row = {}
+                    nshd_row = {}
+                    for algo in ALGO_NAMES:
+                        m = result['metrics'][algo]
+                        shd_row[algo] = m['shd']
+                        nshd_row[algo] = m['normalized_shd']
+                    all_shd.append(shd_row)
+                    all_nshd.append(nshd_row)
+                    # Config info
+                    all_configs.append(result['config'])
+                    full_results.append(result)
+                except Exception as e:
+                    logger.error(f"FAILED config {network} N={n_samples} seed={seed}: {e}")
+                    continue
+                # Save intermediate results periodically
+                if save_intermediate and config_idx % 5 == 0:
+                    _save_intermediate(all_features, all_shd, all_nshd, all_configs)
+    # Build final DataFrames
+    X = pd.DataFrame(all_features, columns=FEATURE_NAMES)
+    Y_shd = pd.DataFrame(all_shd, columns=ALGO_NAMES)
+    Y_nshd = pd.DataFrame(all_nshd, columns=ALGO_NAMES)
+    configs_df = pd.DataFrame(all_configs)
+    # Save final results
+    os.makedirs(RESULTS_DIR, exist_ok=True)
+    X.to_csv(os.path.join(RESULTS_DIR, 'meta_features.csv'), index=False)
+    Y_shd.to_csv(os.path.join(RESULTS_DIR, 'shd_matrix.csv'), index=False)
+    Y_nshd.to_csv(os.path.join(RESULTS_DIR, 'normalized_shd_matrix.csv'), index=False)
+    configs_df.to_csv(os.path.join(RESULTS_DIR, 'configs.csv'), index=False)
+    # Save full results as JSON
+    _save_full_results(full_results)
+    logger.info(f"\n=== BENCHMARK COMPLETE ===")
+    logger.info(f"Total configs: {len(all_features)}")
+    logger.info(f"Meta-feature matrix: {X.shape}")
+    logger.info(f"SHD matrix: {Y_shd.shape}")
+    logger.info(f"Results saved to {RESULTS_DIR}")
+    return X, Y_shd, Y_nshd, configs_df, full_results
+def _save_intermediate(features, shds, nshds, configs):
+    """Save intermediate results."""
+    os.makedirs(RESULTS_DIR, exist_ok=True)
+    pd.DataFrame(features).to_csv(os.path.join(RESULTS_DIR, 'meta_features_partial.csv'), index=False)
+    pd.DataFrame(shds).to_csv(os.path.join(RESULTS_DIR, 'shd_matrix_partial.csv'), index=False)
+    pd.DataFrame(nshds).to_csv(os.path.join(RESULTS_DIR, 'normalized_shd_partial.csv'), index=False)
+    pd.DataFrame(configs).to_csv(os.path.join(RESULTS_DIR, 'configs_partial.csv'), index=False)
+def _save_full_results(results):
+    """Save full results (without numpy arrays)."""
+    serializable = []
+    for r in results:
+        entry = {
+            'config': r['config'],
+            'meta_features': {k: float(v) if isinstance(v, (np.floating, np.integer)) else v
+                             for k, v in r['meta_features'].items()},
+            'metrics': {}
+        }
+        for algo, m in r['metrics'].items():
+            entry['metrics'][algo] = {
+                k: float(v) if isinstance(v, (np.floating, np.integer)) else v
+                for k, v in m.items()
+            }
+        serializable.append(entry)
+    with open(os.path.join(RESULTS_DIR, 'full_results.json'), 'w') as f:
+        json.dump(serializable, f, indent=2)
+if __name__ == '__main__':
+    import sys
+    # Allow selecting network tier from command line
+    tier = sys.argv[1] if len(sys.argv) > 1 else 'small'
+    if tier == 'small':
+        networks = SMALL_NETWORKS
+    elif tier == 'medium':
+        networks = MEDIUM_NETWORKS
+    elif tier == 'large':
+        networks = LARGE_NETWORKS
+    elif tier == 'all':
+        networks = ALL_NETWORKS
+    else:
+        networks = [tier]  # single network name
+    logger.info(f"Running benchmark for tier: {tier} ({networks})")
+    X, Y_shd, Y_nshd, configs, results = build_meta_dataset(networks=networks)
+    # Print summary
+    print("\n" + "=" * 80)
+    print("BENCHMARK SUMMARY")
+    print("=" * 80)
+    print(f"\nMeta-feature matrix: {X.shape}")
+    print(f"SHD matrix: {Y_shd.shape}")
+    print(f"\nMean SHD per algorithm:")
+    print(Y_shd.mean().sort_values().to_string())
+    print(f"\nBest algorithm per config:")
+    best = Y_shd.idxmin(axis=1)
+    print(best.value_counts().to_string())