Oguzz07
/

causal-discovery-algorithm-selection

Model card Files Files and versions

xet

Community

Oguzz07 commited on 7 days ago

Commit

fbf08d1

verified ·

1 Parent(s): a47b09f

Add run_benchmark.py

Browse files

Files changed (1) hide show

run_benchmark.py +212 -0

run_benchmark.py ADDED Viewed

	@@ -0,0 +1,212 @@

+"""
+Resume benchmark from partial results, then run medium and large networks too.
+Optimized for CPU: reduced timeouts, skip heavy combos.
+"""
+import os
+import sys
+import time
+import numpy as np
+import pandas as pd
+import json
+import logging
+import warnings
+warnings.filterwarnings('ignore')
+logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
+logger = logging.getLogger(__name__)
+# Suppress the verbose BOSS/GRaSP output
+logging.getLogger('causallearn').setLevel(logging.WARNING)
+sys.path.insert(0, '/app')
+from causal_selection.data.generator import (
+    load_bn_model, get_true_dag_adjmat, dag_to_cpdag, sample_dataset,
+    SMALL_NETWORKS, MEDIUM_NETWORKS, LARGE_NETWORKS, get_network_tier
+)
+from causal_selection.discovery.algorithms import run_algorithm, ALGORITHM_POOL
+from causal_selection.discovery.evaluator import evaluate_algorithm_result
+from causal_selection.features.extractor import extract_all_features, FEATURE_NAMES
+ALGO_NAMES = list(ALGORITHM_POOL.keys())
+RESULTS_DIR = '/app/causal_selection/data/results'
+# More aggressive sample sizes - fewer but covering range
+SAMPLE_SIZES_FAST = {
+    'small': [500, 1000, 2000, 5000],
+    'medium': [500, 1000, 2000],
+    'large': [500, 1000],
+}
+# Per-algorithm timeout (seconds) - algorithm-specific!
+ALGO_TIMEOUT = {
+    'PC_discrete': {'small': 30, 'medium': 120, 'large': 300},
+    'FCI':         {'small': 30, 'medium': 120, 'large': 300},
+    'GES':         {'small': 30, 'medium': 120, 'large': 300},
+    'BOSS':        {'small': 30, 'medium': 120, 'large': 300},
+    'GRaSP':       {'small': 30, 'medium': 120, 'large': 300},
+    'HC':          {'small': 30, 'medium': 60,  'large': 120},
+    'Tabu':        {'small': 30, 'medium': 60,  'large': 120},
+    'MMHC':        {'small': 30, 'medium': 60,  'large': 120},
+    'K2':          {'small': 20, 'medium': 30,  'large': 60},
+}
+SEEDS = 2  # Reduced from 3 to speed up
+def load_existing_results():
+    """Load existing partial results to avoid re-running."""
+    existing = set()
+    partial_path = os.path.join(RESULTS_DIR, 'configs_partial.csv')
+    final_path = os.path.join(RESULTS_DIR, 'configs.csv')
+    for path in [partial_path, final_path]:
+        if os.path.exists(path):
+            df = pd.read_csv(path)
+            for _, row in df.iterrows():
+                key = (row['network'], int(row['n_samples']), int(row['seed']))
+                existing.add(key)
+    return existing
+def run_benchmark():
+    """Run full benchmark with resume capability."""
+    existing = load_existing_results()
+    logger.info(f"Found {len(existing)} existing configs")
+    # Load existing partial data
+    all_features = []
+    all_shd = []
+    all_nshd = []
+    all_configs = []
+    for prefix in ['meta_features_partial', 'meta_features']:
+        path = os.path.join(RESULTS_DIR, f'{prefix}.csv')
+        if os.path.exists(path):
+            df = pd.read_csv(path)
+            all_features = df.to_dict('records')
+            break
+    for prefix in ['shd_matrix_partial', 'shd_matrix']:
+        path = os.path.join(RESULTS_DIR, f'{prefix}.csv')
+        if os.path.exists(path):
+            df = pd.read_csv(path)
+            all_shd = df.to_dict('records')
+            break
+    for prefix in ['normalized_shd_partial', 'normalized_shd_matrix']:
+        path = os.path.join(RESULTS_DIR, f'{prefix}.csv')
+        if os.path.exists(path):
+            df = pd.read_csv(path)
+            all_nshd = df.to_dict('records')
+            break
+    for prefix in ['configs_partial', 'configs']:
+        path = os.path.join(RESULTS_DIR, f'{prefix}.csv')
+        if os.path.exists(path):
+            df = pd.read_csv(path)
+            all_configs = df.to_dict('records')
+            break
+    logger.info(f"Starting with {len(all_configs)} existing results")
+    # Generate all configs to run
+    all_networks = SMALL_NETWORKS + MEDIUM_NETWORKS + LARGE_NETWORKS
+    configs_to_run = []
+    for net in all_networks:
+        tier = get_network_tier(net)
+        for n_samples in SAMPLE_SIZES_FAST[tier]:
+            for seed in range(SEEDS):
+                key = (net, n_samples, seed)
+                if key not in existing:
+                    configs_to_run.append((net, n_samples, seed, tier))
+    logger.info(f"Configs to run: {len(configs_to_run)}")
+    total = len(configs_to_run)
+    for idx, (network, n_samples, seed, tier) in enumerate(configs_to_run):
+        logger.info(f"\n[{idx+1}/{total}] {network} N={n_samples} seed={seed}")
+        try:
+            # Load network
+            model = load_bn_model(network)
+            true_dag, node_names = get_true_dag_adjmat(model)
+            true_cpdag = dag_to_cpdag(true_dag)
+            # Sample data
+            df = sample_dataset(model, n_samples, seed=seed)
+            # Extract features
+            features = extract_all_features(df, n_probe_triplets=80)
+            # Run algorithms with per-algo timeout
+            algo_metrics = {}
+            for algo_name in ALGO_NAMES:
+                timeout = ALGO_TIMEOUT[algo_name][tier]
+                result = run_algorithm(algo_name, df, timeout_sec=timeout)
+                metrics = evaluate_algorithm_result(result, true_cpdag)
+                algo_metrics[algo_name] = metrics
+                s = metrics['status']
+                if s == 'success':
+                    logger.info(f"  {algo_name:12s}: SHD={metrics['shd']:3d} F1={metrics['skeleton_f1']:.3f} t={metrics['runtime']:.1f}s")
+                else:
+                    logger.info(f"  {algo_name:12s}: {s} t={metrics['runtime']:.1f}s")
+            # Store results
+            feat_row = {name: features.get(name, 0.0) for name in FEATURE_NAMES}
+            all_features.append(feat_row)
+            shd_row = {algo: algo_metrics[algo]['shd'] for algo in ALGO_NAMES}
+            nshd_row = {algo: algo_metrics[algo]['normalized_shd'] for algo in ALGO_NAMES}
+            all_shd.append(shd_row)
+            all_nshd.append(nshd_row)
+            config = {
+                'network': network,
+                'n_samples': n_samples,
+                'seed': seed,
+                'n_variables': len(node_names),
+                'n_true_edges': int(((true_cpdag + true_cpdag.T) > 0).sum() // 2),
+            }
+            all_configs.append(config)
+            # Save periodically
+            if (idx + 1) % 3 == 0:
+                _save_results(all_features, all_shd, all_nshd, all_configs, partial=True)
+        except Exception as e:
+            logger.error(f"FAILED {network} N={n_samples} seed={seed}: {e}")
+            import traceback
+            traceback.print_exc()
+    # Final save
+    _save_results(all_features, all_shd, all_nshd, all_configs, partial=False)
+    # Print summary
+    Y_shd = pd.DataFrame(all_shd)
+    configs_df = pd.DataFrame(all_configs)
+    print("\n" + "=" * 80)
+    print("BENCHMARK COMPLETE")
+    print("=" * 80)
+    print(f"Total configs: {len(all_configs)}")
+    print(f"Networks: {configs_df['network'].unique()}")
+    print(f"\nMean SHD per algorithm:")
+    print(Y_shd.mean().sort_values())
+    print(f"\nBest algorithm count:")
+    print(Y_shd.idxmin(axis=1).value_counts())
+def _save_results(features, shds, nshds, configs, partial=True):
+    os.makedirs(RESULTS_DIR, exist_ok=True)
+    suffix = '_partial' if partial else ''
+    pd.DataFrame(features).to_csv(os.path.join(RESULTS_DIR, f'meta_features{suffix}.csv'), index=False)
+    pd.DataFrame(shds).to_csv(os.path.join(RESULTS_DIR, f'shd_matrix{suffix}.csv'), index=False)
+    pd.DataFrame(nshds).to_csv(os.path.join(RESULTS_DIR, f'normalized_shd_matrix{suffix}.csv'), index=False)
+    pd.DataFrame(configs).to_csv(os.path.join(RESULTS_DIR, f'configs{suffix}.csv'), index=False)
+if __name__ == '__main__':
+    run_benchmark()