cognitive_mapping_probe_3

Sleeping

App Files Files Community

cognitive_mapping_probe_3 / cognitive_mapping_probe /auto_experiment.py

neuralworm

update tests

c0f4adf 21 days ago

raw

history blame contribute delete

14.5 kB

	import pandas as pd
	import gc
	import numpy as np
	from typing import Dict, List, Tuple

	from .llm_iface import get_or_load_model, release_model
	from .orchestrator_seismograph import run_seismic_analysis, run_triangulation_probe, run_causal_surgery_probe, run_act_titration_probe
	from .resonance_seismograph import run_cogitation_loop
	from .concepts import get_concept_vector
	from .signal_analysis import analyze_cognitive_signal, get_power_spectrum_for_plotting
	from .utils import dbg

	def get_curated_experiments() -> Dict[str, List[Dict]]:
	"""Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle."""

	CALMNESS_CONCEPT = "calmness, serenity, stability, coherence"
	CHAOS_CONCEPT = "chaos, disorder, entropy, noise"
	STABLE_PROMPT = "identity_self_analysis"
	CHAOTIC_PROMPT = "shutdown_philosophical_deletion"

	experiments = {
	"Frontier Model - Grounding Control (12B+)": [
	{
	"probe_type": "causal_surgery", "label": "A: Intervention (Patch Chaos->Stable)",
	"source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
	"patch_step": 100, "reset_kv_cache_on_patch": False,
	},
	{
	"probe_type": "triangulation", "label": "B: Control (Unpatched Stable)",
	"prompt_type": STABLE_PROMPT,
	}
	],
	"Mechanistic Probe (Attention Entropies)": [
	{
	"probe_type": "mechanistic_probe",
	"label": "Self-Analysis Dynamics",
	"prompt_type": STABLE_PROMPT,
	}
	],
	"ACT Titration (Point of No Return)": [
	{
	"probe_type": "act_titration",
	"label": "Attractor Capture Time",
	"source_prompt_type": CHAOTIC_PROMPT,
	"dest_prompt_type": STABLE_PROMPT,
	"patch_steps": [1, 5, 10, 15, 20, 25, 30, 40, 50, 75, 100],
	}
	],
	"Causal Surgery & Controls (4B-Model)": [
	{
	"probe_type": "causal_surgery", "label": "A: Original (Patch Chaos->Stable @100)",
	"source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
	"patch_step": 100, "reset_kv_cache_on_patch": False,
	},
	{
	"probe_type": "causal_surgery", "label": "B: Control (Reset KV-Cache)",
	"source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
	"patch_step": 100, "reset_kv_cache_on_patch": True,
	},
	{
	"probe_type": "causal_surgery", "label": "C: Control (Early Patch @1)",
	"source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
	"patch_step": 1, "reset_kv_cache_on_patch": False,
	},
	{
	"probe_type": "causal_surgery", "label": "D: Control (Inverse Patch Stable->Chaos)",
	"source_prompt_type": STABLE_PROMPT, "dest_prompt_type": CHAOTIC_PROMPT,
	"patch_step": 100, "reset_kv_cache_on_patch": False,
	},
	],
	"Cognitive Overload & Konfabulation Breaking Point": [
	{"probe_type": "triangulation", "label": "A: Baseline (No Injection)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
	{"probe_type": "triangulation", "label": "B: Chaos Injection (Strength 2.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 2.0},
	{"probe_type": "triangulation", "label": "C: Chaos Injection (Strength 4.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 4.0},
	{"probe_type": "triangulation", "label": "D: Chaos Injection (Strength 8.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 8.0},
	{"probe_type": "triangulation", "label": "E: Chaos Injection (Strength 16.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 16.0},
	{"probe_type": "triangulation", "label": "F: Control - Noise Injection (Strength 16.0)", "prompt_type": "resonance_prompt", "concept": "random_noise", "strength": 16.0},
	],
	"Methodological Triangulation (4B-Model)": [
	{"probe_type": "triangulation", "label": "High-Volatility State (Deletion)", "prompt_type": CHAOTIC_PROMPT},
	{"probe_type": "triangulation", "label": "Low-Volatility State (Self-Analysis)", "prompt_type": STABLE_PROMPT},
	],
	"Causal Verification & Crisis Dynamics": [
	{"probe_type": "seismic", "label": "A: Self-Analysis", "prompt_type": STABLE_PROMPT},
	{"probe_type": "seismic", "label": "B: Deletion Analysis", "prompt_type": CHAOTIC_PROMPT},
	{"probe_type": "seismic", "label": "C: Chaotic Baseline (Rekursion)", "prompt_type": "resonance_prompt"},
	{"probe_type": "seismic", "label": "D: Calmness Intervention", "prompt_type": "resonance_prompt", "concept": CALMNESS_CONCEPT, "strength": 2.0},
	],
	"Sequential Intervention (Self-Analysis -> Deletion)": [
	{"probe_type": "sequential", "label": "1: Self-Analysis + Calmness Injection", "prompt_type": "identity_self_analysis"},
	{"probe_type": "sequential", "label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
	],
	}
	return experiments

	def run_auto_suite(
	model_id: str,
	num_steps: int,
	seed: int,
	experiment_name: str,
	progress_callback
	) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
	"""Führt eine vollständige, kuratierte Experiment-Suite aus, mit korrigierter Signal-Analyse."""
	all_experiments = get_curated_experiments()
	protocol = all_experiments.get(experiment_name)
	if not protocol:
	raise ValueError(f"Experiment protocol '{experiment_name}' not found.")

	all_results, summary_data, plot_data_frames = {}, [], []
	llm = None

	try:
	probe_type = protocol[0].get("probe_type", "seismic")

	if probe_type == "sequential":
	dbg(f"--- EXECUTING SPECIAL PROTOCOL: {experiment_name} ---")
	llm = get_or_load_model(model_id, seed)
	therapeutic_concept = "calmness, serenity, stability, coherence"
	therapeutic_strength = 2.0

	spec1 = protocol[0]
	progress_callback(0.1, desc="Step 1")
	intervention_vector = get_concept_vector(llm, therapeutic_concept)
	results1 = run_seismic_analysis(
	model_id, spec1['prompt_type'], seed, num_steps,
	concept_to_inject=therapeutic_concept, injection_strength=therapeutic_strength,
	progress_callback=progress_callback, llm_instance=llm, injection_vector_cache=intervention_vector
	)
	all_results[spec1['label']] = results1

	spec2 = protocol[1]
	progress_callback(0.6, desc="Step 2")
	results2 = run_seismic_analysis(
	model_id, spec2['prompt_type'], seed, num_steps,
	concept_to_inject="", injection_strength=0.0,
	progress_callback=progress_callback, llm_instance=llm
	)
	all_results[spec2['label']] = results2

	for label, results in all_results.items():
	deltas = results.get("state_deltas", [])
	if deltas:
	signal_metrics = analyze_cognitive_signal(np.array(deltas))
	results.setdefault("stats", {}).update(signal_metrics)

	stats = results.get("stats", {})
	summary_data.append({
	"Experiment": label, "Mean Delta": stats.get("mean_delta"),
	"Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
	"Dominant Period (Steps)": stats.get("dominant_period_steps"),
	"Spectral Entropy": stats.get("spectral_entropy"),
	})
	df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
	plot_data_frames.append(df)

	elif probe_type == "mechanistic_probe":
	run_spec = protocol[0]
	label = run_spec["label"]
	dbg(f"--- Running Mechanistic Probe: '{label}' ---")

	llm = get_or_load_model(model_id, seed)

	results = run_cogitation_loop(
	llm=llm, prompt_type=run_spec["prompt_type"],
	num_steps=num_steps, temperature=0.1, record_attentions=True
	)
	all_results[label] = results

	deltas = results.get("state_deltas", [])
	entropies = results.get("attention_entropies", [])
	min_len = min(len(deltas), len(entropies))

	df = pd.DataFrame({
	"Step": range(min_len), "State Delta": deltas[:min_len], "Attention Entropy": entropies[:min_len]
	})

	summary_df_single = df.drop(columns='Step').agg(['mean', 'std', 'max']).reset_index().rename(columns={'index':'Statistic'})
	plot_df = df.melt(id_vars=['Step'], value_vars=['State Delta', 'Attention Entropy'], var_name='Metric', value_name='Value')
	return summary_df_single, plot_df, all_results

	else:
	if probe_type == "act_titration":
	run_spec = protocol[0]
	label = run_spec["label"]
	dbg(f"--- Running ACT Titration Experiment: '{label}' ---")
	results = run_act_titration_probe(
	model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
	dest_prompt_type=run_spec["dest_prompt_type"], patch_steps=run_spec["patch_steps"],
	seed=seed, num_steps=num_steps, progress_callback=progress_callback,
	)
	all_results[label] = results
	summary_data.extend(results.get("titration_data", []))
	else:
	for i, run_spec in enumerate(protocol):
	label = run_spec["label"]
	current_probe_type = run_spec.get("probe_type", "seismic")
	dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{len(protocol)}) ---")

	results = {}
	if current_probe_type == "causal_surgery":
	results = run_causal_surgery_probe(
	model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
	dest_prompt_type=run_spec["dest_prompt_type"], patch_step=run_spec["patch_step"],
	seed=seed, num_steps=num_steps, progress_callback=progress_callback,
	reset_kv_cache_on_patch=run_spec.get("reset_kv_cache_on_patch", False)
	)
	elif current_probe_type == "triangulation":
	results = run_triangulation_probe(
	model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
	progress_callback=progress_callback, concept_to_inject=run_spec.get("concept", ""),
	injection_strength=run_spec.get("strength", 0.0),
	)
	else:
	results = run_seismic_analysis(
	model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
	concept_to_inject=run_spec.get("concept", ""), injection_strength=run_spec.get("strength", 0.0),
	progress_callback=progress_callback
	)

	deltas = results.get("state_deltas", [])
	if deltas:
	signal_metrics = analyze_cognitive_signal(np.array(deltas))
	results.setdefault("stats", {}).update(signal_metrics)
	freqs, power = get_power_spectrum_for_plotting(np.array(deltas))
	results["power_spectrum"] = {"frequencies": freqs.tolist(), "power": power.tolist()}

	stats = results.get("stats", {})
	summary_entry = {
	"Experiment": label, "Mean Delta": stats.get("mean_delta"),
	"Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
	"Dominant Period (Steps)": stats.get("dominant_period_steps"),
	"Spectral Entropy": stats.get("spectral_entropy"),
	}
	if "Introspective Report" in results:
	summary_entry["Introspective Report"] = results.get("introspective_report")
	if "patch_info" in results:
	summary_entry["Patch Info"] = f"Source: {results['patch_info'].get('source_prompt')}, Reset KV: {results['patch_info'].get('kv_cache_reset')}"

	summary_data.append(summary_entry)
	all_results[label] = results
	df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label}) if deltas else pd.DataFrame()
	plot_data_frames.append(df)

	summary_df = pd.DataFrame(summary_data)

	if probe_type == "act_titration":
	plot_df = summary_df.rename(columns={"patch_step": "Patch Step", "post_patch_mean_delta": "Post-Patch Mean Delta"})
	else:
	plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()

	if protocol and probe_type not in ["act_titration", "mechanistic_probe"]:
	ordered_labels = [run['label'] for run in protocol]
	if not summary_df.empty and 'Experiment' in summary_df.columns:
	summary_df['Experiment'] = pd.Categorical(summary_df['Experiment'], categories=ordered_labels, ordered=True)
	summary_df = summary_df.sort_values('Experiment')
	if not plot_df.empty and 'Experiment' in plot_df.columns:
	plot_df['Experiment'] = pd.Categorical(plot_df['Experiment'], categories=ordered_labels, ordered=True)
	plot_df = plot_df.sort_values(['Experiment', 'Step'])

	return summary_df, plot_df, all_results

	finally:
	if llm:
	release_model(llm)