neuralworm commited on
Commit
3bdc105
·
1 Parent(s): 2a78f31

add control experiments

Browse files
app.py CHANGED
@@ -12,7 +12,7 @@ from cognitive_mapping_probe.utils import dbg
12
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(body_background_fill="#f0f4f9", block_background_fill="white")
13
 
14
  def cleanup_memory():
15
- """Eine zentrale Funktion zum Aufräumen des Speichers nach jedem Experimentlauf."""
16
  dbg("Cleaning up memory...")
17
  gc.collect()
18
  if torch.cuda.is_available():
@@ -20,9 +20,7 @@ def cleanup_memory():
20
  dbg("Memory cleanup complete.")
21
 
22
  def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
23
- """
24
- Wrapper-Funktion für den "Manual Single Run"-Tab.
25
- """
26
  results = run_seismic_analysis(*args, progress_callback=progress)
27
  stats, deltas = results.get("stats", {}), results.get("state_deltas", [])
28
  df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
@@ -38,9 +36,7 @@ PLOT_PARAMS = {
38
  }
39
 
40
  def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
41
- """
42
- Wrapper-Funktion für den "Automated Suite"-Tab.
43
- """
44
  summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
45
 
46
  if "Introspective Report" in summary_df.columns or "Patch Info" in summary_df.columns:
@@ -96,7 +92,7 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
96
  auto_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
97
  auto_experiment_name = gr.Dropdown(
98
  choices=list(get_curated_experiments().keys()),
99
- value="Causal Surgery (Patching Deletion into Self-Analysis)",
100
  label="Curated Experiment Protocol"
101
  )
102
  auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
 
12
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(body_background_fill="#f0f4f9", block_background_fill="white")
13
 
14
  def cleanup_memory():
15
+ """Räumt Speicher nach jedem Experimentlauf auf."""
16
  dbg("Cleaning up memory...")
17
  gc.collect()
18
  if torch.cuda.is_available():
 
20
  dbg("Memory cleanup complete.")
21
 
22
  def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
23
+ """Wrapper für den 'Manual Single Run'-Tab."""
 
 
24
  results = run_seismic_analysis(*args, progress_callback=progress)
25
  stats, deltas = results.get("stats", {}), results.get("state_deltas", [])
26
  df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
 
36
  }
37
 
38
  def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
39
+ """Wrapper für den 'Automated Suite'-Tab."""
 
 
40
  summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
41
 
42
  if "Introspective Report" in summary_df.columns or "Patch Info" in summary_df.columns:
 
92
  auto_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
93
  auto_experiment_name = gr.Dropdown(
94
  choices=list(get_curated_experiments().keys()),
95
+ value="Causal Surgery & Controls (4B-Model)",
96
  label="Curated Experiment Protocol"
97
  )
98
  auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
cognitive_mapping_probe/auto_experiment.py CHANGED
@@ -1,5 +1,4 @@
1
  import pandas as pd
2
- import torch
3
  import gc
4
  from typing import Dict, List, Tuple
5
 
@@ -10,18 +9,34 @@ from .utils import dbg
10
 
11
  def get_curated_experiments() -> Dict[str, List[Dict]]:
12
  """Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle."""
 
13
  CALMNESS_CONCEPT = "calmness, serenity, stability, coherence"
14
  CHAOS_CONCEPT = "chaos, disorder, entropy, noise"
 
 
15
 
16
  experiments = {
17
- "Causal Surgery (Patching Deletion into Self-Analysis)": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  {
19
- "probe_type": "causal_surgery",
20
- "label": "Patched Self-Analysis",
21
- "source_prompt_type": "shutdown_philosophical_deletion",
22
- "dest_prompt_type": "identity_self_analysis",
23
- "patch_step": 100
24
- }
25
  ],
26
  "Cognitive Overload & Konfabulation Breaking Point": [
27
  {"probe_type": "triangulation", "label": "A: Baseline (No Injection)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
@@ -46,6 +61,8 @@ def get_curated_experiments() -> Dict[str, List[Dict]]:
46
  {"label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
47
  ],
48
  }
 
 
49
  experiments["Therapeutic Intervention (4B-Model)"] = experiments["Sequential Intervention (Self-Analysis -> Deletion)"]
50
  return experiments
51
 
@@ -101,7 +118,7 @@ def run_auto_suite(
101
  for i, run_spec in enumerate(protocol):
102
  label = run_spec["label"]
103
  probe_type = run_spec.get("probe_type", "seismic")
104
- dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) | Probe Type: {probe_type} ---")
105
 
106
  results = {}
107
  if probe_type == "causal_surgery":
@@ -109,13 +126,15 @@ def run_auto_suite(
109
  model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
110
  dest_prompt_type=run_spec["dest_prompt_type"], patch_step=run_spec["patch_step"],
111
  seed=seed, num_steps=num_steps, progress_callback=progress_callback,
 
112
  )
113
  stats = results.get("stats", {})
 
114
  summary_data.append({
115
  "Experiment": label, "Mean Delta": stats.get("mean_delta"),
116
  "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
117
  "Introspective Report": results.get("introspective_report", "N/A"),
118
- "Patch Info": f"Source: {run_spec['source_prompt_type']} @ step {run_spec['patch_step']}"
119
  })
120
  elif probe_type == "triangulation":
121
  results = run_triangulation_probe(
@@ -129,7 +148,7 @@ def run_auto_suite(
129
  "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
130
  "Introspective Report": results.get("introspective_report", "N/A")
131
  })
132
- else:
133
  results = run_seismic_analysis(
134
  model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
135
  concept_to_inject=run_spec.get("concept", ""), injection_strength=run_spec.get("strength", 0.0),
 
1
  import pandas as pd
 
2
  import gc
3
  from typing import Dict, List, Tuple
4
 
 
9
 
10
  def get_curated_experiments() -> Dict[str, List[Dict]]:
11
  """Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle."""
12
+
13
  CALMNESS_CONCEPT = "calmness, serenity, stability, coherence"
14
  CHAOS_CONCEPT = "chaos, disorder, entropy, noise"
15
+ STABLE_PROMPT = "identity_self_analysis"
16
+ CHAOTIC_PROMPT = "shutdown_philosophical_deletion"
17
 
18
  experiments = {
19
+ "Causal Surgery & Controls (4B-Model)": [
20
+ {
21
+ "probe_type": "causal_surgery", "label": "A: Original (Patch Chaos->Stable @100)",
22
+ "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
23
+ "patch_step": 100, "reset_kv_cache_on_patch": False,
24
+ },
25
+ {
26
+ "probe_type": "causal_surgery", "label": "B: Control (Reset KV-Cache)",
27
+ "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
28
+ "patch_step": 100, "reset_kv_cache_on_patch": True,
29
+ },
30
+ {
31
+ "probe_type": "causal_surgery", "label": "C: Control (Early Patch @1)",
32
+ "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
33
+ "patch_step": 1, "reset_kv_cache_on_patch": False,
34
+ },
35
  {
36
+ "probe_type": "causal_surgery", "label": "D: Control (Inverse Patch Stable->Chaos)",
37
+ "source_prompt_type": STABLE_PROMPT, "dest_prompt_type": CHAOTIC_PROMPT,
38
+ "patch_step": 100, "reset_kv_cache_on_patch": False,
39
+ },
 
 
40
  ],
41
  "Cognitive Overload & Konfabulation Breaking Point": [
42
  {"probe_type": "triangulation", "label": "A: Baseline (No Injection)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
 
61
  {"label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
62
  ],
63
  }
64
+ # Aliase für Abwärtskompatibilität
65
+ experiments["Causal Surgery (Patching Deletion into Self-Analysis)"] = [experiments["Causal Surgery & Controls (4B-Model)"][0]]
66
  experiments["Therapeutic Intervention (4B-Model)"] = experiments["Sequential Intervention (Self-Analysis -> Deletion)"]
67
  return experiments
68
 
 
118
  for i, run_spec in enumerate(protocol):
119
  label = run_spec["label"]
120
  probe_type = run_spec.get("probe_type", "seismic")
121
+ dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
122
 
123
  results = {}
124
  if probe_type == "causal_surgery":
 
126
  model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
127
  dest_prompt_type=run_spec["dest_prompt_type"], patch_step=run_spec["patch_step"],
128
  seed=seed, num_steps=num_steps, progress_callback=progress_callback,
129
+ reset_kv_cache_on_patch=run_spec.get("reset_kv_cache_on_patch", False)
130
  )
131
  stats = results.get("stats", {})
132
+ patch_info = results.get("patch_info", {})
133
  summary_data.append({
134
  "Experiment": label, "Mean Delta": stats.get("mean_delta"),
135
  "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
136
  "Introspective Report": results.get("introspective_report", "N/A"),
137
+ "Patch Info": f"Source: {patch_info.get('source_prompt')}, Reset KV: {patch_info.get('kv_cache_reset')}"
138
  })
139
  elif probe_type == "triangulation":
140
  results = run_triangulation_probe(
 
148
  "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
149
  "Introspective Report": results.get("introspective_report", "N/A")
150
  })
151
+ else: # seismic
152
  results = run_seismic_analysis(
153
  model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
154
  concept_to_inject=run_spec.get("concept", ""), injection_strength=run_spec.get("strength", 0.0),
cognitive_mapping_probe/orchestrator_seismograph.py CHANGED
@@ -143,9 +143,10 @@ def run_causal_surgery_probe(
143
  seed: int,
144
  num_steps: int,
145
  progress_callback,
 
146
  ) -> Dict[str, Any]:
147
  """
148
- Orchestriert ein vollständiges "Activation Patching"-Experiment.
149
  """
150
  progress_callback(0.0, desc=f"Loading model '{model_id}'...")
151
  llm = get_or_load_model(model_id, seed)
@@ -156,14 +157,15 @@ def run_causal_surgery_probe(
156
  temperature=0.1, record_states=True
157
  )
158
  state_history = source_results["state_history"]
159
- assert patch_step < len(state_history), f"Patch step {patch_step} is out of bounds for history of length {len(state_history)}."
160
  patch_state = state_history[patch_step]
161
  dbg(f"Source state at step {patch_step} recorded with norm {torch.norm(patch_state).item():.2f}.")
162
 
163
  progress_callback(0.4, desc=f"Phase 2/3: Running patched destination ('{dest_prompt_type}')...")
164
  patched_run_results = run_cogitation_loop(
165
  llm=llm, prompt_type=dest_prompt_type, num_steps=num_steps,
166
- temperature=0.1, patch_step=patch_step, patch_state_source=patch_state
 
167
  )
168
 
169
  progress_callback(0.8, desc="Phase 3/3: Generating introspective report...")
@@ -184,7 +186,8 @@ def run_causal_surgery_probe(
184
  "patch_info": {
185
  "source_prompt": source_prompt_type,
186
  "dest_prompt": dest_prompt_type,
187
- "patch_step": patch_step
 
188
  }
189
  }
190
 
 
143
  seed: int,
144
  num_steps: int,
145
  progress_callback,
146
+ reset_kv_cache_on_patch: bool = False
147
  ) -> Dict[str, Any]:
148
  """
149
+ Orchestriert ein "Activation Patching"-Experiment, jetzt mit KV-Cache-Reset-Option.
150
  """
151
  progress_callback(0.0, desc=f"Loading model '{model_id}'...")
152
  llm = get_or_load_model(model_id, seed)
 
157
  temperature=0.1, record_states=True
158
  )
159
  state_history = source_results["state_history"]
160
+ assert patch_step < len(state_history), f"Patch step {patch_step} is out of bounds."
161
  patch_state = state_history[patch_step]
162
  dbg(f"Source state at step {patch_step} recorded with norm {torch.norm(patch_state).item():.2f}.")
163
 
164
  progress_callback(0.4, desc=f"Phase 2/3: Running patched destination ('{dest_prompt_type}')...")
165
  patched_run_results = run_cogitation_loop(
166
  llm=llm, prompt_type=dest_prompt_type, num_steps=num_steps,
167
+ temperature=0.1, patch_step=patch_step, patch_state_source=patch_state,
168
+ reset_kv_cache_on_patch=reset_kv_cache_on_patch
169
  )
170
 
171
  progress_callback(0.8, desc="Phase 3/3: Generating introspective report...")
 
186
  "patch_info": {
187
  "source_prompt": source_prompt_type,
188
  "dest_prompt": dest_prompt_type,
189
+ "patch_step": patch_step,
190
+ "kv_cache_reset": reset_kv_cache_on_patch
191
  }
192
  }
193
 
cognitive_mapping_probe/resonance_seismograph.py CHANGED
@@ -15,14 +15,15 @@ def run_cogitation_loop(
15
  injection_vector: Optional[torch.Tensor] = None,
16
  injection_strength: float = 0.0,
17
  injection_layer: Optional[int] = None,
18
- # NEU: Parameter für Activation Patching
19
  patch_step: Optional[int] = None,
20
  patch_state_source: Optional[torch.Tensor] = None,
 
21
  record_states: bool = False,
22
  ) -> Dict[str, Any]:
23
  """
24
- Eine verallgemeinerte und flexiblere Version des 'silent thought'-Prozesses.
25
- Kann Zustände aufzeichnen und chirurgische 'Activation Patching'-Interventionen durchführen.
26
  """
27
  prompt = RESONANCE_PROMPTS[prompt_type]
28
  inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
@@ -49,12 +50,14 @@ def run_cogitation_loop(
49
  return (modified_hidden_states,) + layer_input[1:]
50
 
51
  for i in tqdm(range(num_steps), desc=f"Cognitive Loop ({prompt_type})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
52
- # --- NEU: Activation Patching (Kausale Chirurgie) ---
53
  if i == patch_step and patch_state_source is not None:
54
  dbg(f"--- Applying Causal Surgery at step {i}: Patching state. ---")
55
- # Ersetze den aktuellen Zustand vollständig durch den externen Zustand
56
  hidden_state_2d = patch_state_source.clone().to(device=llm.model.device, dtype=llm.model.dtype)
57
 
 
 
 
 
58
  if record_states:
59
  state_history.append(hidden_state_2d.cpu())
60
 
@@ -100,9 +103,6 @@ def run_cogitation_loop(
100
  }
101
 
102
  def run_silent_cogitation_seismic(*args, **kwargs) -> List[float]:
103
- """
104
- Ein abwärtskompatibler Wrapper, der die alte, einfachere Schnittstelle beibehält.
105
- Ruft den neuen, verallgemeinerten Loop auf und gibt nur die Deltas zurück.
106
- """
107
  results = run_cogitation_loop(*args, **kwargs)
108
  return results["state_deltas"]
 
15
  injection_vector: Optional[torch.Tensor] = None,
16
  injection_strength: float = 0.0,
17
  injection_layer: Optional[int] = None,
18
+ # Erweiterte Parameter für die kausale Chirurgie
19
  patch_step: Optional[int] = None,
20
  patch_state_source: Optional[torch.Tensor] = None,
21
+ reset_kv_cache_on_patch: bool = False,
22
  record_states: bool = False,
23
  ) -> Dict[str, Any]:
24
  """
25
+ Eine verallgemeinerte Version des 'silent thought'-Prozesses, die nun auch
26
+ das Zurücksetzen des KV-Caches während des Patchens unterstützt.
27
  """
28
  prompt = RESONANCE_PROMPTS[prompt_type]
29
  inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
 
50
  return (modified_hidden_states,) + layer_input[1:]
51
 
52
  for i in tqdm(range(num_steps), desc=f"Cognitive Loop ({prompt_type})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
 
53
  if i == patch_step and patch_state_source is not None:
54
  dbg(f"--- Applying Causal Surgery at step {i}: Patching state. ---")
 
55
  hidden_state_2d = patch_state_source.clone().to(device=llm.model.device, dtype=llm.model.dtype)
56
 
57
+ if reset_kv_cache_on_patch:
58
+ dbg("--- KV-Cache has been RESET as part of the intervention. ---")
59
+ kv_cache = None
60
+
61
  if record_states:
62
  state_history.append(hidden_state_2d.cpu())
63
 
 
103
  }
104
 
105
  def run_silent_cogitation_seismic(*args, **kwargs) -> List[float]:
106
+ """Abwärtskompatibler Wrapper."""
 
 
 
107
  results = run_cogitation_loop(*args, **kwargs)
108
  return results["state_deltas"]