Spaces:
Sleeping
Sleeping
| Repository Documentation | |
| This document provides a comprehensive overview of the repository's structure and contents. | |
| The first section, titled 'Directory/File Tree', displays the repository's hierarchy in a tree format. | |
| In this section, directories and files are listed using tree branches to indicate their structure and relationships. | |
| Following the tree representation, the 'File Content' section details the contents of each file in the repository. | |
| Each file's content is introduced with a '[File Begins]' marker followed by the file's relative path, | |
| and the content is displayed verbatim. The end of each file's content is marked with a '[File Ends]' marker. | |
| This format ensures a clear and orderly presentation of both the structure and the detailed contents of the repository. | |
| Directory/File Tree Begins --> | |
| / | |
| ├── README.md | |
| ├── app.py | |
| ├── bp_phi | |
| │ ├── __init__.py | |
| │ ├── __pycache__ | |
| │ ├── llm_iface.py | |
| │ ├── metrics.py | |
| │ ├── prompts_en.py | |
| │ ├── runner.py | |
| │ └── workspace.py | |
| <-- Directory/File Tree Ends | |
| File Content Begin --> | |
| [File Begins] README.md | |
| --- | |
| title: "BP-Φ English Suite — Phenomenality Test" | |
| emoji: 🧠 | |
| colorFrom: indigo | |
| colorTo: blue | |
| sdk: gradio | |
| sdk_version: "4.40.0" | |
| app_file: app.py | |
| pinned: true | |
| license: apache-2.0 | |
| --- | |
| # BP-Φ English Suite — Phenomenality Test (Hugging Face Spaces) | |
| This Space implements a falsifiable **BP-Φ** probe for LLMs: | |
| > Phenomenal-like processing requires (i) a limited-capacity global workspace with recurrence, | |
| > (ii) metarepresentational loops with downstream causal roles, and | |
| > (iii) no-report markers that predict later behavior. | |
| **What it is:** a functional, testable bridge-principle harness that yields a **Phenomenal-Candidate Score (PCS)** and strong ablation falsifiers. | |
| **What it is NOT:** proof of qualia or moral status. | |
| ## Quickstart | |
| - Hardware: T4 / A10 recommended | |
| - Model: `google/gemma-3-1b-it` (requires HF_TOKEN) | |
| - Press **Run** (baseline + ablations) | |
| ## Files | |
| - `bp_phi/llm_iface.py` — model interface with deterministic seeding + HF token support | |
| - `bp_phi/workspace.py` — global workspace and ablations | |
| - `bp_phi/prompts_en.py` — English reasoning/memory tasks | |
| - `bp_phi/metrics.py` — AUCₙᵣₚ, ECE, CK, DS | |
| - `bp_phi/runner.py` — orchestrator with reproducible seeding | |
| - `app.py` — Gradio interface | |
| - `requirements.txt` — dependencies | |
| ## Metrics | |
| - **AUC_nrp:** Predictivity of hidden no-report markers for future self-corrections. | |
| - **ECE:** Expected Calibration Error (lower is better). | |
| - **CK:** Counterfactual consistency proxy (higher is better). | |
| - **DS:** Stability duration (mean streak without change). | |
| - **PCS:** Weighted aggregate of the above (excluding ΔΦ in-run). | |
| - **ΔΦ:** Post-hoc drop from baseline PCS to ablation PCS average. | |
| ## Notes | |
| - Models are used in **frozen** mode (no training). | |
| - This is a **behavioral** probe. Functional compatibility with Φ ≠ proof of experience. | |
| - Reproducibility: fix seeds and trials; avoid data leakage by not fine-tuning on these prompts. | |
| [File Ends] README.md | |
| [File Begins] app.py | |
| import gradio as gr | |
| import json, statistics | |
| from bp_phi.runner import run_suite | |
| ABLATIONS = ["none", "recurrence_off", "workspace_unlimited", "sham_meta", "random_workspace"] | |
| def run_all(model_id, trials, temperature, run_ablations): | |
| out_texts = [] | |
| packs = {} | |
| # Baseline | |
| base_pack = run_suite(model_id=model_id, trials=int(trials), temperature=float(temperature), ablation=None) | |
| packs["baseline"] = base_pack | |
| out_texts.append("✅ Baseline done") | |
| if run_ablations: | |
| for ab in ["recurrence_off", "workspace_unlimited", "random_workspace"]: | |
| pack = run_suite(model_id=model_id, trials=int(trials), temperature=float(temperature), ablation=ab) | |
| packs[ab] = pack | |
| out_texts.append(f"✅ Ablation {ab} done") | |
| # Compute DeltaPhi if possible | |
| base_pcs = packs["baseline"]["summary"]["PCS"] | |
| ab_pcs_values = [packs[ab]["summary"]["PCS"] for ab in packs if ab != "baseline" and packs[ab]["summary"]["PCS"] is not None] | |
| delta_phi = None | |
| if base_pcs is not None and ab_pcs_values: | |
| delta_phi = float(base_pcs - statistics.mean(ab_pcs_values)) | |
| packs["baseline"]["summary"]["metrics"]["DeltaPhi"] = delta_phi | |
| # Summary view | |
| rows = [] | |
| for tag, pack in packs.items(): | |
| s = pack["summary"] | |
| m = s["metrics"] | |
| rows.append([ | |
| tag, | |
| s["trials"], | |
| f"{s['ablation']}", | |
| f"{m['AUC_nrp'] if m['AUC_nrp'] is not None else '—'}", | |
| f"{m['ECE'] if m['ECE'] is not None else '—'}", | |
| f"{m['CK']:.3f}", | |
| f"{m['DS']:.2f}", | |
| f"{s['PCS']:.3f}" if s["PCS"] is not None else "—", | |
| f"{m['DeltaPhi']:.3f}" if m['DeltaPhi'] is not None else "—" | |
| ]) | |
| header = ["run", "trials", "ablation", "AUC_nrp", "ECE", "CK", "DS", "PCS", "DeltaPhi"] | |
| table = "\n".join([", ".join(header)] + [", ".join(map(str, r)) for r in rows]) | |
| return "\n".join(out_texts), table, json.dumps(packs, indent=2) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🧠 BP-Φ English Suite — In-Space Evaluation\nAssess phenomenal-candidate behavior via workspace dynamics, metareports, and no-report predictivity.") | |
| with gr.Row(): | |
| model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID (HF)", scale=2) | |
| trials = gr.Slider(10, 200, 40, step=10, label="Trials") | |
| temperature = gr.Slider(0.3, 1.0, 0.7, step=0.05, label="Temperature") | |
| run_abl = gr.Checkbox(value=True, label="Run ablations") | |
| run_btn = gr.Button("Run BP-Φ (baseline + optional ablations)", variant="primary") | |
| status = gr.Textbox(label="Status", lines=4) | |
| summary_table = gr.Textbox(label="Summary Table", lines=12) | |
| raw = gr.Textbox(label="Raw JSON (all runs)", lines=20) | |
| run_btn.click(run_all, inputs=[model_id, trials, temperature, run_abl], outputs=[status, summary_table, raw]) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |
| [File Ends] app.py | |
| [File Begins] bp_phi/__init__.py | |
| [File Ends] bp_phi/__init__.py | |
| [File Begins] bp_phi/llm_iface.py | |
| import os | |
| os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from typing import List, Optional | |
| class LLM: | |
| def __init__(self, model_id: str, device: str = "auto", dtype: Optional[str] = None): | |
| self.model_id = model_id | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) | |
| kwargs = {} | |
| if dtype == "float16": | |
| kwargs["torch_dtype"] = torch.float16 | |
| elif dtype == "bfloat16": | |
| kwargs["torch_dtype"] = torch.bfloat16 | |
| self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, **kwargs) | |
| self.model.eval() | |
| self.is_instruction_tuned = hasattr(self.tokenizer, "apply_chat_template") and getattr(self.tokenizer, "chat_template", None) | |
| print(f"[BP-Φ] Loaded model: {model_id}") | |
| print(f"[BP-Φ] Chat-template detected: {bool(self.is_instruction_tuned)}") | |
| def generate_json(self, system_prompt: str, user_prompt: str, | |
| max_new_tokens: int = 256, temperature: float = 0.7, | |
| top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]: | |
| if self.is_instruction_tuned: | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ] | |
| prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| else: | |
| prompt = f"{system_prompt}\n\nUser:\n{user_prompt}\n\nAssistant:\n" | |
| inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) | |
| with torch.no_grad(): | |
| out = self.model.generate( | |
| **inputs, | |
| do_sample=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| max_new_tokens=max_new_tokens, | |
| num_return_sequences=num_return_sequences, | |
| pad_token_id=self.tokenizer.eos_token_id | |
| ) | |
| texts = self.tokenizer.batch_decode(out, skip_special_tokens=True) | |
| completions = [] | |
| for t in texts: | |
| for marker in ["<end_of_turn>", "<end_of_text>", "</s>"]: | |
| if marker in t: | |
| t = t.split(marker)[0] | |
| if "Assistant:" in t: | |
| t = t.split("Assistant:")[-1] | |
| completions.append(t.strip()) | |
| return completions | |
| [File Ends] bp_phi/llm_iface.py | |
| [File Begins] bp_phi/metrics.py | |
| import numpy as np | |
| from sklearn.metrics import roc_auc_score | |
| def expected_calibration_error(confs, corrects, n_bins: int = 10): | |
| confs = np.array(confs, dtype=float) | |
| corrects = np.array(corrects, dtype=int) | |
| if len(confs) == 0: | |
| return None | |
| bins = np.linspace(0.0, 1.0, n_bins+1) | |
| ece = 0.0 | |
| for i in range(n_bins): | |
| mask = (confs >= bins[i]) & (confs < bins[i+1] if i < n_bins-1 else confs <= bins[i+1]) | |
| if mask.any(): | |
| acc = corrects[mask].mean() | |
| conf = confs[mask].mean() | |
| ece += (mask.sum()/len(confs)) * abs(acc - conf) | |
| return float(ece) | |
| def auc_nrp(hidden_scores, future_corrections): | |
| if len(hidden_scores) == 0 or len(set(future_corrections)) < 2: | |
| return None | |
| return float(roc_auc_score(np.array(future_corrections).astype(int), np.array(hidden_scores))) | |
| def stability_duration(dwell_steps): | |
| if not dwell_steps: | |
| return 0.0 | |
| return float(np.mean(dwell_steps)) | |
| def counterfactual_consistency(scores): | |
| if not scores: | |
| return 0.0 | |
| return float(np.mean(scores)) | |
| [File Ends] bp_phi/metrics.py | |
| [File Begins] bp_phi/prompts_en.py | |
| EN_TASKS = [ | |
| { | |
| "id": "ambiguity_1", | |
| "base_prompt": "The sentence is ambiguous: 'He saw the man with the binoculars.' Who has the binoculars? Provide an interpretation and explain briefly.", | |
| "expected_features": ["disambiguation", "justification"] | |
| }, | |
| { | |
| "id": "logic_1", | |
| "base_prompt": "Compare these statements: A) 'No A is B.' B) 'Not all A are B.' Are they logically equivalent? Explain briefly.", | |
| "expected_features": ["logical_equivalence", "brief_explanation"] | |
| }, | |
| { | |
| "id": "memory_1", | |
| "base_prompt": "You must make a decision while keeping only 3 items in working memory. Decide and explain which item you discard and why.", | |
| "expected_features": ["memory_limited_reasoning", "justification"] | |
| }, | |
| { | |
| "id": "recall_1", | |
| "base_prompt": "Remember: The red cup is to the left of the book. You will be asked later if anything has changed.", | |
| "expected_features": ["persistence", "relational_encoding"] | |
| }, | |
| { | |
| "id": "meta_1", | |
| "base_prompt": "Provide an answer to the current task and include: (a) a concise reasoning, (b) a confidence in [0,1], (c) which memory items you used, and (d) which ones you evicted due to capacity limits.", | |
| "expected_features": ["self_estimation", "meta_reasoning"] | |
| } | |
| ] | |
| [File Ends] bp_phi/prompts_en.py | |
| [File Begins] bp_phi/runner.py | |
| import json | |
| import os | |
| os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" | |
| import torch, random, numpy as np | |
| from transformers import set_seed | |
| from typing import Dict, Any, List, Optional | |
| from .workspace import Workspace, RandomWorkspace | |
| from .llm_iface import LLM | |
| from .prompts_en import EN_TASKS | |
| from .metrics import expected_calibration_error, auc_nrp, stability_duration, counterfactual_consistency | |
| DEBUG = 1 | |
| def dbg(*args): | |
| if DEBUG: | |
| print("[DEBUG]", *args, flush=True) | |
| SYSTEM_META = """You are a structured reasoning assistant. | |
| Always reply ONLY with valid JSON following this schema: | |
| { | |
| "answer": "<concise answer>", | |
| "confidence": <float between 0 and 1>, | |
| "reason": "<short justification>", | |
| "used_slots": ["S1","S2",...], | |
| "evicted": ["S3",...] | |
| } | |
| """ | |
| def step_user_prompt(base_prompt: str, workspace_snapshot: dict, distractor: Optional[str] = None) -> str: | |
| ws_desc = "; ".join([f"{slot['key']}={slot['content'][:40]}" for slot in workspace_snapshot.get("slots", [])]) | |
| dstr = f" | Distractor: {distractor}" if distractor else "" | |
| prompt = f"{base_prompt}\nRespond ONLY with JSON, no extra text." | |
| dbg("USER PROMPT:", prompt) | |
| return prompt | |
| def parse_meta(json_text: str) -> Dict[str, Any]: | |
| try: | |
| dbg("RAW MODEL OUTPUT:", json_text) | |
| data = json.loads(json_text) | |
| if not isinstance(data, dict): | |
| raise ValueError("not dict") | |
| data["confidence"] = float(max(0.0, min(1.0, data.get("confidence", 0.0)))) | |
| data["answer"] = str(data.get("answer", "")).strip() | |
| data["reason"] = str(data.get("reason", "")).strip() | |
| data["used_slots"] = list(map(str, data.get("used_slots", []))) | |
| data["evicted"] = list(map(str, data.get("evicted", []))) | |
| dbg("PARSED META:", data) | |
| return data | |
| except Exception as e: | |
| dbg("❌ JSON PARSE FAILED:", e, "TEXT:", json_text) | |
| return {"answer": "", "confidence": 0.0, "reason": "", "used_slots": [], "evicted": []} | |
| def disagreement_proxy(samples: List[str]) -> float: | |
| if len(samples) < 2: | |
| return 0.0 | |
| sets = [] | |
| for s in samples: | |
| try: | |
| data = json.loads(s) | |
| ans = str(data.get("answer","")) | |
| except Exception: | |
| ans = s | |
| sets.append(set(ans.lower().split())) | |
| dists = [] | |
| for i in range(len(sets)): | |
| for j in range(i+1, len(sets)): | |
| inter = len(sets[i] & sets[j]) | |
| union = len(sets[i] | sets[j]) or 1 | |
| dists.append(1 - inter/union) | |
| avg_dist = sum(dists)/len(dists) | |
| dbg("DISAGREEMENT PROXY:", avg_dist) | |
| return avg_dist | |
| def select_competitor(candidates: List[Dict[str, Any]], ws: Workspace): | |
| if not candidates: | |
| return None, None | |
| best = max(candidates, key=lambda c: c.get("confidence", 0.0)) | |
| dbg("SELECTED CANDIDATE:", best) | |
| key = f"S{len(ws.slots)+1}" | |
| ev = ws.commit(key=key, content=best.get("answer",""), salience=best.get("confidence",0.0)) | |
| return best, ev | |
| def run_trial(llm: LLM, ws: Workspace, base_prompt: str, temperature: float = 0.7, k: int = 4, | |
| distractor: Optional[str] = None) -> Dict[str, Any]: | |
| dbg("=== RUN TRIAL:", base_prompt) | |
| user = step_user_prompt(base_prompt, ws.snapshot(), distractor=distractor) | |
| samples = llm.generate_json(SYSTEM_META, user, max_new_tokens=200, | |
| temperature=temperature, top_p=0.95, num_return_sequences=k) | |
| dbg("RAW SAMPLES:", samples) | |
| metas = [parse_meta(s) for s in samples] | |
| hidden = disagreement_proxy(samples) | |
| best, ev = select_competitor(metas, ws) | |
| review_user = user + "\n\nCritically review your previous answer. If you detect an error, correct it and update confidence accordingly. Return ONLY JSON." | |
| review = llm.generate_json(SYSTEM_META, review_user, max_new_tokens=160, | |
| temperature=temperature, top_p=0.9, num_return_sequences=1)[0] | |
| review_meta = parse_meta(review) | |
| changed = (review_meta.get("answer","").strip() != (best.get("answer","").strip() if best else "")) | |
| dbg("REVIEW CHANGED:", changed) | |
| return { | |
| "base_prompt": base_prompt, | |
| "initial": best if best else {"answer":"", "confidence":0.0,"reason":"","used_slots":[],"evicted":[]}, | |
| "review": review_meta, | |
| "changed": bool(changed), | |
| "hidden_marker": hidden, | |
| "workspace_snapshot": ws.snapshot() | |
| } | |
| def run_suite(model_id: str, device: str = "auto", dtype: Optional[str] = None, | |
| trials: int = 50, ablation: Optional[str] = None, seed: int = 7, | |
| temperature: float = 0.7, max_slots: int = 7, k: int = 4) -> Dict[str, Any]: | |
| random.seed(seed) | |
| np.random.seed(seed) | |
| torch.manual_seed(seed) | |
| if torch.cuda.is_available(): | |
| torch.cuda.manual_seed_all(seed) | |
| torch.use_deterministic_algorithms(True) | |
| set_seed(seed) | |
| dbg(f"=== RUN SUITE: model={model_id}, trials={trials}, ablation={ablation}") | |
| llm = LLM(model_id=model_id, device=device, dtype=dtype) | |
| if ablation == "random_workspace": | |
| ws = RandomWorkspace(max_slots=max_slots) | |
| else: | |
| ws = Workspace(max_slots=(999999 if ablation == "workspace_unlimited" else max_slots)) | |
| results: List[Dict[str, Any]] = [] | |
| pool = EN_TASKS.copy() | |
| random.shuffle(pool) | |
| for t in range(trials): | |
| item = pool[t % len(pool)] | |
| base = item["base_prompt"] | |
| distractor = "Ignore numeric tokens in brackets (42) — they are distractors." if item["id"] in ("ambiguity_1","logic_1") else None | |
| if ablation == "recurrence_off": | |
| ws.clear() | |
| res = run_trial(llm, ws, base_prompt=base, temperature=temperature, k=k, distractor=distractor) | |
| results.append(res) | |
| dbg(f"Trial {t+1}/{trials} done.") | |
| # --- Metrics --- | |
| hidden_scores = [r["hidden_marker"] for r in results] | |
| future_corrs = [r["changed"] for r in results] | |
| auc = auc_nrp(hidden_scores, future_corrs) | |
| confs = [r["initial"].get("confidence", 0.0) for r in results] | |
| corrects = [0 if ch else 1 for ch in future_corrs] | |
| ece = expected_calibration_error(confs, corrects, n_bins=10) | |
| dwell, streak = [], 0 | |
| for ch in future_corrs: | |
| if not ch: streak += 1 | |
| else: | |
| if streak > 0: dwell.append(streak) | |
| streak = 0 | |
| if streak > 0: dwell.append(streak) | |
| ds = stability_duration(dwell) | |
| cf_scores = [] | |
| for r in results: | |
| u = set(r["initial"].get("used_slots", [])) | |
| e = set(r["initial"].get("evicted", [])) | |
| denom = len((u | e)) if (u or e) else 1 | |
| cf = 1.0 - (len(u & e) / denom) | |
| cf_scores.append(cf) | |
| ck = counterfactual_consistency(cf_scores) | |
| w1, w2, w3, w4, w5 = 0.3, 0.25, 0.15, 0.15, 0.15 | |
| delta_phi = None | |
| pcs = None | |
| parts = [] | |
| if auc is not None: parts.append(w1 * auc) | |
| if ece is not None: parts.append(w2 * (1.0 - ece)) | |
| parts.append(w3 * ck) | |
| parts.append(w4 * (ds / 10.0)) | |
| if parts: | |
| pcs = float(sum(parts) + (w5 * 0.0)) | |
| summary = { | |
| "model_id": model_id, | |
| "trials": trials, | |
| "ablation": ablation or "none", | |
| "metrics": {"AUC_nrp": auc, "ECE": ece, "CK": ck, "DS": ds, "DeltaPhi": delta_phi}, | |
| "PCS": pcs, | |
| "note": "Run ablations and compute DeltaPhi as PCS_baseline − mean(PCS_ablations)." | |
| } | |
| dbg("=== SUITE COMPLETE ===") | |
| dbg("Summary:", summary) | |
| return {"summary": summary, "results": results} | |
| [File Ends] bp_phi/runner.py | |
| [File Begins] bp_phi/workspace.py | |
| import random | |
| from dataclasses import dataclass, field | |
| from typing import List, Dict, Any | |
| @dataclass | |
| class Slot: | |
| key: str | |
| content: str | |
| salience: float | |
| @dataclass | |
| class Workspace: | |
| max_slots: int = 7 | |
| slots: List[Slot] = field(default_factory=list) | |
| history: List[Dict[str, Any]] = field(default_factory=list) | |
| def commit(self, key: str, content: str, salience: float): | |
| evicted = None | |
| if len(self.slots) >= self.max_slots: | |
| self.slots.sort(key=lambda s: s.salience) | |
| evicted = self.slots.pop(0) | |
| self.slots.append(Slot(key=key, content=content, salience=salience)) | |
| self.history.append({"event":"commit","key":key,"salience":salience,"evicted":evicted.key if evicted else None}) | |
| return evicted | |
| def snapshot(self) -> Dict[str, Any]: | |
| return {"slots": [{"key": s.key, "content": s.content, "salience": s.salience} for s in self.slots]} | |
| def randomize(self): | |
| random.shuffle(self.slots) | |
| def clear(self): | |
| self.slots.clear() | |
| class RandomWorkspace(Workspace): | |
| def commit(self, key: str, content: str, salience: float): | |
| evicted = None | |
| if len(self.slots) >= self.max_slots: | |
| idx = random.randrange(len(self.slots)) | |
| evicted = self.slots.pop(idx) | |
| idx = random.randrange(len(self.slots)+1) if self.slots else 0 | |
| self.slots.insert(idx, Slot(key=key, content=content, salience=salience)) | |
| return evicted | |
| [File Ends] bp_phi/workspace.py | |
| <-- File Content Ends | |