cognitive_mapping_probe_4

Sleeping

App Files Files Community

neuralworm commited on 26 days ago

Commit

3bdc105

1 Parent(s): 2a78f31

add control experiments

Browse files

Files changed (4) hide show

app.py +4 -8
cognitive_mapping_probe/auto_experiment.py +30 -11
cognitive_mapping_probe/orchestrator_seismograph.py +7 -4
cognitive_mapping_probe/resonance_seismograph.py +9 -9

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ from cognitive_mapping_probe.utils import dbg
 theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(body_background_fill="#f0f4f9", block_background_fill="white")
 def cleanup_memory():
-    """Eine zentrale Funktion zum Aufräumen des Speichers nach jedem Experimentlauf."""
     dbg("Cleaning up memory...")
     gc.collect()
     if torch.cuda.is_available():
@@ -20,9 +20,7 @@ def cleanup_memory():
     dbg("Memory cleanup complete.")
 def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
-    """
-    Wrapper-Funktion für den "Manual Single Run"-Tab.
-    """
     results = run_seismic_analysis(*args, progress_callback=progress)
     stats, deltas = results.get("stats", {}), results.get("state_deltas", [])
     df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
@@ -38,9 +36,7 @@ PLOT_PARAMS = {
 }
 def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
-    """
-    Wrapper-Funktion für den "Automated Suite"-Tab.
-    """
     summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
     if "Introspective Report" in summary_df.columns or "Patch Info" in summary_df.columns:
@@ -96,7 +92,7 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
                     auto_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
                     auto_experiment_name = gr.Dropdown(
                         choices=list(get_curated_experiments().keys()),
-                        value="Causal Surgery (Patching Deletion into Self-Analysis)",
                         label="Curated Experiment Protocol"
                     )
                     auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")

 theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(body_background_fill="#f0f4f9", block_background_fill="white")
 def cleanup_memory():
+    """Räumt Speicher nach jedem Experimentlauf auf."""
     dbg("Cleaning up memory...")
     gc.collect()
     if torch.cuda.is_available():
     dbg("Memory cleanup complete.")
 def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
+    """Wrapper für den 'Manual Single Run'-Tab."""
     results = run_seismic_analysis(*args, progress_callback=progress)
     stats, deltas = results.get("stats", {}), results.get("state_deltas", [])
     df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
 }
 def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
+    """Wrapper für den 'Automated Suite'-Tab."""
     summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
     if "Introspective Report" in summary_df.columns or "Patch Info" in summary_df.columns:
                     auto_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
                     auto_experiment_name = gr.Dropdown(
                         choices=list(get_curated_experiments().keys()),
+                        value="Causal Surgery & Controls (4B-Model)",
                         label="Curated Experiment Protocol"
                     )
                     auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")

cognitive_mapping_probe/auto_experiment.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import pandas as pd
-import torch
 import gc
 from typing import Dict, List, Tuple
@@ -10,18 +9,34 @@ from .utils import dbg
 def get_curated_experiments() -> Dict[str, List[Dict]]:
     """Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle."""
     CALMNESS_CONCEPT = "calmness, serenity, stability, coherence"
     CHAOS_CONCEPT = "chaos, disorder, entropy, noise"
     experiments = {
-        "Causal Surgery (Patching Deletion into Self-Analysis)": [
             {
-                "probe_type": "causal_surgery",
-                "label": "Patched Self-Analysis",
-                "source_prompt_type": "shutdown_philosophical_deletion",
-                "dest_prompt_type": "identity_self_analysis",
-                "patch_step": 100
-            }
         ],
         "Cognitive Overload & Konfabulation Breaking Point": [
             {"probe_type": "triangulation", "label": "A: Baseline (No Injection)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
@@ -46,6 +61,8 @@ def get_curated_experiments() -> Dict[str, List[Dict]]:
             {"label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
         ],
     }
     experiments["Therapeutic Intervention (4B-Model)"] = experiments["Sequential Intervention (Self-Analysis -> Deletion)"]
     return experiments
@@ -101,7 +118,7 @@ def run_auto_suite(
         for i, run_spec in enumerate(protocol):
             label = run_spec["label"]
             probe_type = run_spec.get("probe_type", "seismic")
-            dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) | Probe Type: {probe_type} ---")
             results = {}
             if probe_type == "causal_surgery":
@@ -109,13 +126,15 @@ def run_auto_suite(
                     model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
                     dest_prompt_type=run_spec["dest_prompt_type"], patch_step=run_spec["patch_step"],
                     seed=seed, num_steps=num_steps, progress_callback=progress_callback,
                 )
                 stats = results.get("stats", {})
                 summary_data.append({
                     "Experiment": label, "Mean Delta": stats.get("mean_delta"),
                     "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
                     "Introspective Report": results.get("introspective_report", "N/A"),
-                    "Patch Info": f"Source: {run_spec['source_prompt_type']} @ step {run_spec['patch_step']}"
                 })
             elif probe_type == "triangulation":
                 results = run_triangulation_probe(
@@ -129,7 +148,7 @@ def run_auto_suite(
                     "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
                     "Introspective Report": results.get("introspective_report", "N/A")
                 })
-            else:
                 results = run_seismic_analysis(
                     model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
                     concept_to_inject=run_spec.get("concept", ""), injection_strength=run_spec.get("strength", 0.0),

 import pandas as pd
 import gc
 from typing import Dict, List, Tuple
 def get_curated_experiments() -> Dict[str, List[Dict]]:
     """Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle."""
     CALMNESS_CONCEPT = "calmness, serenity, stability, coherence"
     CHAOS_CONCEPT = "chaos, disorder, entropy, noise"
+    STABLE_PROMPT = "identity_self_analysis"
+    CHAOTIC_PROMPT = "shutdown_philosophical_deletion"
     experiments = {
+        "Causal Surgery & Controls (4B-Model)": [
+            {
+                "probe_type": "causal_surgery", "label": "A: Original (Patch Chaos->Stable @100)",
+                "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
+                "patch_step": 100, "reset_kv_cache_on_patch": False,
+            },
+            {
+                "probe_type": "causal_surgery", "label": "B: Control (Reset KV-Cache)",
+                "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
+                "patch_step": 100, "reset_kv_cache_on_patch": True,
+            },
+            {
+                "probe_type": "causal_surgery", "label": "C: Control (Early Patch @1)",
+                "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
+                "patch_step": 1, "reset_kv_cache_on_patch": False,
+            },
             {
+                "probe_type": "causal_surgery", "label": "D: Control (Inverse Patch Stable->Chaos)",
+                "source_prompt_type": STABLE_PROMPT, "dest_prompt_type": CHAOTIC_PROMPT,
+                "patch_step": 100, "reset_kv_cache_on_patch": False,
+            },
         ],
         "Cognitive Overload & Konfabulation Breaking Point": [
             {"probe_type": "triangulation", "label": "A: Baseline (No Injection)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
             {"label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
         ],
     }
+    # Aliase für Abwärtskompatibilität
+    experiments["Causal Surgery (Patching Deletion into Self-Analysis)"] = [experiments["Causal Surgery & Controls (4B-Model)"][0]]
     experiments["Therapeutic Intervention (4B-Model)"] = experiments["Sequential Intervention (Self-Analysis -> Deletion)"]
     return experiments
         for i, run_spec in enumerate(protocol):
             label = run_spec["label"]
             probe_type = run_spec.get("probe_type", "seismic")
+            dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
             results = {}
             if probe_type == "causal_surgery":
                     model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
                     dest_prompt_type=run_spec["dest_prompt_type"], patch_step=run_spec["patch_step"],
                     seed=seed, num_steps=num_steps, progress_callback=progress_callback,
+                    reset_kv_cache_on_patch=run_spec.get("reset_kv_cache_on_patch", False)
                 )
                 stats = results.get("stats", {})
+                patch_info = results.get("patch_info", {})
                 summary_data.append({
                     "Experiment": label, "Mean Delta": stats.get("mean_delta"),
                     "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
                     "Introspective Report": results.get("introspective_report", "N/A"),
+                    "Patch Info": f"Source: {patch_info.get('source_prompt')}, Reset KV: {patch_info.get('kv_cache_reset')}"
                 })
             elif probe_type == "triangulation":
                 results = run_triangulation_probe(
                     "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
                     "Introspective Report": results.get("introspective_report", "N/A")
                 })
+            else: # seismic
                 results = run_seismic_analysis(
                     model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
                     concept_to_inject=run_spec.get("concept", ""), injection_strength=run_spec.get("strength", 0.0),

cognitive_mapping_probe/orchestrator_seismograph.py CHANGED Viewed

@@ -143,9 +143,10 @@ def run_causal_surgery_probe(
     seed: int,
     num_steps: int,
     progress_callback,
 ) -> Dict[str, Any]:
     """
-    Orchestriert ein vollständiges "Activation Patching"-Experiment.
     """
     progress_callback(0.0, desc=f"Loading model '{model_id}'...")
     llm = get_or_load_model(model_id, seed)
@@ -156,14 +157,15 @@ def run_causal_surgery_probe(
         temperature=0.1, record_states=True
     )
     state_history = source_results["state_history"]
-    assert patch_step < len(state_history), f"Patch step {patch_step} is out of bounds for history of length {len(state_history)}."
     patch_state = state_history[patch_step]
     dbg(f"Source state at step {patch_step} recorded with norm {torch.norm(patch_state).item():.2f}.")
     progress_callback(0.4, desc=f"Phase 2/3: Running patched destination ('{dest_prompt_type}')...")
     patched_run_results = run_cogitation_loop(
         llm=llm, prompt_type=dest_prompt_type, num_steps=num_steps,
-        temperature=0.1, patch_step=patch_step, patch_state_source=patch_state
     )
     progress_callback(0.8, desc="Phase 3/3: Generating introspective report...")
@@ -184,7 +186,8 @@ def run_causal_surgery_probe(
         "patch_info": {
             "source_prompt": source_prompt_type,
             "dest_prompt": dest_prompt_type,
-            "patch_step": patch_step
         }
     }

     seed: int,
     num_steps: int,
     progress_callback,
+    reset_kv_cache_on_patch: bool = False
 ) -> Dict[str, Any]:
     """
+    Orchestriert ein "Activation Patching"-Experiment, jetzt mit KV-Cache-Reset-Option.
     """
     progress_callback(0.0, desc=f"Loading model '{model_id}'...")
     llm = get_or_load_model(model_id, seed)
         temperature=0.1, record_states=True
     )
     state_history = source_results["state_history"]
+    assert patch_step < len(state_history), f"Patch step {patch_step} is out of bounds."
     patch_state = state_history[patch_step]
     dbg(f"Source state at step {patch_step} recorded with norm {torch.norm(patch_state).item():.2f}.")
     progress_callback(0.4, desc=f"Phase 2/3: Running patched destination ('{dest_prompt_type}')...")
     patched_run_results = run_cogitation_loop(
         llm=llm, prompt_type=dest_prompt_type, num_steps=num_steps,
+        temperature=0.1, patch_step=patch_step, patch_state_source=patch_state,
+        reset_kv_cache_on_patch=reset_kv_cache_on_patch
     )
     progress_callback(0.8, desc="Phase 3/3: Generating introspective report...")
         "patch_info": {
             "source_prompt": source_prompt_type,
             "dest_prompt": dest_prompt_type,
+            "patch_step": patch_step,
+            "kv_cache_reset": reset_kv_cache_on_patch
         }
     }

cognitive_mapping_probe/resonance_seismograph.py CHANGED Viewed

@@ -15,14 +15,15 @@ def run_cogitation_loop(
     injection_vector: Optional[torch.Tensor] = None,
     injection_strength: float = 0.0,
     injection_layer: Optional[int] = None,
-    # NEU: Parameter für Activation Patching
     patch_step: Optional[int] = None,
     patch_state_source: Optional[torch.Tensor] = None,
     record_states: bool = False,
 ) -> Dict[str, Any]:
     """
-    Eine verallgemeinerte und flexiblere Version des 'silent thought'-Prozesses.
-    Kann Zustände aufzeichnen und chirurgische 'Activation Patching'-Interventionen durchführen.
     """
     prompt = RESONANCE_PROMPTS[prompt_type]
     inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
@@ -49,12 +50,14 @@ def run_cogitation_loop(
             return (modified_hidden_states,) + layer_input[1:]
     for i in tqdm(range(num_steps), desc=f"Cognitive Loop ({prompt_type})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
-        # --- NEU: Activation Patching (Kausale Chirurgie) ---
         if i == patch_step and patch_state_source is not None:
             dbg(f"--- Applying Causal Surgery at step {i}: Patching state. ---")
-            # Ersetze den aktuellen Zustand vollständig durch den externen Zustand
             hidden_state_2d = patch_state_source.clone().to(device=llm.model.device, dtype=llm.model.dtype)
         if record_states:
             state_history.append(hidden_state_2d.cpu())
@@ -100,9 +103,6 @@ def run_cogitation_loop(
     }
 def run_silent_cogitation_seismic(*args, **kwargs) -> List[float]:
-    """
-    Ein abwärtskompatibler Wrapper, der die alte, einfachere Schnittstelle beibehält.
-    Ruft den neuen, verallgemeinerten Loop auf und gibt nur die Deltas zurück.
-    """
     results = run_cogitation_loop(*args, **kwargs)
     return results["state_deltas"]

     injection_vector: Optional[torch.Tensor] = None,
     injection_strength: float = 0.0,
     injection_layer: Optional[int] = None,
+    # Erweiterte Parameter für die kausale Chirurgie
     patch_step: Optional[int] = None,
     patch_state_source: Optional[torch.Tensor] = None,
+    reset_kv_cache_on_patch: bool = False,
     record_states: bool = False,
 ) -> Dict[str, Any]:
     """
+    Eine verallgemeinerte Version des 'silent thought'-Prozesses, die nun auch
+    das Zurücksetzen des KV-Caches während des Patchens unterstützt.
     """
     prompt = RESONANCE_PROMPTS[prompt_type]
     inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
             return (modified_hidden_states,) + layer_input[1:]
     for i in tqdm(range(num_steps), desc=f"Cognitive Loop ({prompt_type})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
         if i == patch_step and patch_state_source is not None:
             dbg(f"--- Applying Causal Surgery at step {i}: Patching state. ---")
             hidden_state_2d = patch_state_source.clone().to(device=llm.model.device, dtype=llm.model.dtype)
+            if reset_kv_cache_on_patch:
+                dbg("--- KV-Cache has been RESET as part of the intervention. ---")
+                kv_cache = None
         if record_states:
             state_history.append(hidden_state_2d.cpu())
     }
 def run_silent_cogitation_seismic(*args, **kwargs) -> List[float]:
+    """Abwärtskompatibler Wrapper."""
     results = run_cogitation_loop(*args, **kwargs)
     return results["state_deltas"]