| """SDR Composition Analysis v3 — using cached retina.npz.""" |
| import json, os |
| from pathlib import Path |
| import numpy as np |
|
|
| OUT_DIR = Path(__file__).resolve().parents[1] / "docs" |
| RETINA = Path.home() / ".cache" / "autoresearch" / "retina.npz" |
|
|
| print("[SDR] Loading retina...") |
| data = np.load(RETINA) |
| sdr = data["sdr"] |
| n_tok, n_bits = sdr.shape |
| n_active = int(sdr.sum(axis=1).mean()) |
| print(f"[SDR] {n_tok} tokens x {n_bits} bits, ~{n_active} active/token ({n_active/n_bits*100:.2f}% density)") |
|
|
| |
| rng = np.random.RandomState(42) |
| sample_n = 500 |
| idx = rng.choice(n_tok, sample_n, replace=False) |
| codes = [set(np.where(sdr[i])[0]) for i in idx] |
|
|
| |
| jaccards = np.array([ |
| len(codes[i] & codes[j]) / max(len(codes[i] | codes[j]), 1) |
| for i in range(sample_n) for j in range(i+1, sample_n) |
| ]) |
| print(f"[SDR] Jaccard: mean={jaccards.mean():.4f} median={np.median(jaccards):.4f} " |
| f"P95={np.percentile(jaccards,95):.4f} any_overlap={ (jaccards>0).mean()*100:.1f}%") |
|
|
| |
| pair_results = [] |
| for _ in range(100): |
| i, j = rng.randint(sample_n, size=2) |
| if i == j: continue |
| u = codes[i] | codes[j] |
| best = max(len(u & codes[k]) / max(len(u | codes[k]), 1) for k in range(sample_n) if k not in (i, j)) |
| pair_results.append({"i": int(idx[i]), "j": int(idx[j]), "best_union_jaccard": float(best)}) |
|
|
| mean_best = np.mean([p["best_union_jaccard"] for p in pair_results]) |
| pct_match = sum(1 for p in pair_results if p["best_union_jaccard"] > 0.3) / len(pair_results) * 100 |
| print(f"[SDR] Union: mean_best={mean_best:.4f} pct_match_third_token={pct_match:.1f}%") |
|
|
| |
| inters = [len(codes[rng.randint(sample_n)] & codes[rng.randint(sample_n)]) for _ in range(500)] |
| print(f"[SDR] Intersection: mean={np.mean(inters):.1f} bits median={np.median(inters):.1f} max={max(inters)}") |
|
|
| results = { |
| "pairwise_jaccard": { |
| "mean": float(jaccards.mean()), "median": float(np.median(jaccards)), |
| "p95": float(np.percentile(jaccards,95)), "min": float(jaccards.min()), "max": float(jaccards.max()), |
| "pct_with_any_overlap": float((jaccards>0).mean()*100), |
| }, |
| "union_generalization": { |
| "n_pairs": len(pair_results), "mean_best_union_jaccard": float(mean_best), |
| "pct_union_matches_third_token": float(pct_match), |
| }, |
| "intersection": {"mean_active_shared": float(np.mean(inters)), "median_active_shared": float(np.median(inters)), "max_active_shared": int(max(inters))}, |
| "sparsity": {"n_tokens": int(n_tok), "sdr_dim": int(n_bits), "active_bits": int(n_active), "density_pct": float(n_active / n_bits * 100)}, |
| } |
| Path(OUT_DIR / "results_sdr_composition.json").write_text(json.dumps(results, indent=2)) |
| print(f"[SDR] Saved results_sdr_composition.json") |
|
|