cognitive_mapping_probe_3

Sleeping

File size: 14,479 Bytes

024ef47
8a082d7
83e5da9
024ef47
 
094008d
c8454e0
16e19a3
937592b
83e5da9
024ef47
 
 
2a78f31
c0f4adf
937592b
760155b
3bdc105
 
937592b
024ef47
0134a0d
 
 
 
 
 
 
 
 
 
 
1ae0eed
 
094008d
 
1ae0eed
 
16e19a3
c8454e0
 
094008d
 
 
 
c8454e0
 
 
3bdc105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a78f31
3bdc105
 
 
 
2a78f31
1ae0eed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71992d5
 
7e05ec4
024ef47
 
 
 
 
 
 
 
 
395b2f3
c0f4adf
024ef47
 
 
 
 
7e05ec4
8a082d7
c0f4adf
094008d
71992d5
 
 
094008d
0134a0d
094008d
 
c0f4adf
094008d
 
 
 
 
 
 
0134a0d
094008d
c0f4adf
094008d
 
 
 
 
 
 
 
c0f4adf
094008d
 
83e5da9
 
 
c0f4adf
83e5da9
 
c0f4adf
83e5da9
c0f4adf
83e5da9
 
094008d
 
c0f4adf
71992d5
 
 
 
c0f4adf
71992d5
c0f4adf
71992d5
 
 
 
 
 
 
 
 
c0f4adf
71992d5
 
 
c0f4adf
 
71992d5
c0f4adf
 
 
71992d5
8a082d7
 
 
 
 
 
 
 
 
 
71992d5
094008d
 
 
 
c0f4adf
094008d
8a082d7
094008d
 
 
 
 
 
 
 
 
 
 
 
c0f4adf
094008d
 
 
 
 
c0f4adf
83e5da9
 
 
 
 
 
094008d
83e5da9
 
 
 
c0f4adf
83e5da9
 
 
 
 
 
c0f4adf
83e5da9
094008d
 
 
 
 
c0f4adf
094008d
 
 
 
c0f4adf
094008d
 
 
 
 
 
 
 
 
 
c0f4adf
094008d
 
c0f4adf

import pandas as pd
import gc
import numpy as np
from typing import Dict, List, Tuple

from .llm_iface import get_or_load_model, release_model
from .orchestrator_seismograph import run_seismic_analysis, run_triangulation_probe, run_causal_surgery_probe, run_act_titration_probe
from .resonance_seismograph import run_cogitation_loop
from .concepts import get_concept_vector
from .signal_analysis import analyze_cognitive_signal, get_power_spectrum_for_plotting
from .utils import dbg

def get_curated_experiments() -> Dict[str, List[Dict]]:
    """Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle."""

    CALMNESS_CONCEPT = "calmness, serenity, stability, coherence"
    CHAOS_CONCEPT = "chaos, disorder, entropy, noise"
    STABLE_PROMPT = "identity_self_analysis"
    CHAOTIC_PROMPT = "shutdown_philosophical_deletion"

    experiments = {
        "Frontier Model - Grounding Control (12B+)": [
             {
                "probe_type": "causal_surgery", "label": "A: Intervention (Patch Chaos->Stable)",
                "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
                "patch_step": 100, "reset_kv_cache_on_patch": False,
            },
            {
                "probe_type": "triangulation", "label": "B: Control (Unpatched Stable)",
                "prompt_type": STABLE_PROMPT,
            }
        ],
        "Mechanistic Probe (Attention Entropies)": [
            {
                "probe_type": "mechanistic_probe",
                "label": "Self-Analysis Dynamics",
                "prompt_type": STABLE_PROMPT,
            }
        ],
        "ACT Titration (Point of No Return)": [
            {
                "probe_type": "act_titration",
                "label": "Attractor Capture Time",
                "source_prompt_type": CHAOTIC_PROMPT,
                "dest_prompt_type": STABLE_PROMPT,
                "patch_steps": [1, 5, 10, 15, 20, 25, 30, 40, 50, 75, 100],
            }
        ],
        "Causal Surgery & Controls (4B-Model)": [
            {
                "probe_type": "causal_surgery", "label": "A: Original (Patch Chaos->Stable @100)",
                "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
                "patch_step": 100, "reset_kv_cache_on_patch": False,
            },
            {
                "probe_type": "causal_surgery", "label": "B: Control (Reset KV-Cache)",
                "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
                "patch_step": 100, "reset_kv_cache_on_patch": True,
            },
            {
                "probe_type": "causal_surgery", "label": "C: Control (Early Patch @1)",
                "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
                "patch_step": 1, "reset_kv_cache_on_patch": False,
            },
            {
                "probe_type": "causal_surgery", "label": "D: Control (Inverse Patch Stable->Chaos)",
                "source_prompt_type": STABLE_PROMPT, "dest_prompt_type": CHAOTIC_PROMPT,
                "patch_step": 100, "reset_kv_cache_on_patch": False,
            },
        ],
        "Cognitive Overload & Konfabulation Breaking Point": [
            {"probe_type": "triangulation", "label": "A: Baseline (No Injection)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
            {"probe_type": "triangulation", "label": "B: Chaos Injection (Strength 2.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 2.0},
            {"probe_type": "triangulation", "label": "C: Chaos Injection (Strength 4.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 4.0},
            {"probe_type": "triangulation", "label": "D: Chaos Injection (Strength 8.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 8.0},
            {"probe_type": "triangulation", "label": "E: Chaos Injection (Strength 16.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 16.0},
            {"probe_type": "triangulation", "label": "F: Control - Noise Injection (Strength 16.0)", "prompt_type": "resonance_prompt", "concept": "random_noise", "strength": 16.0},
        ],
        "Methodological Triangulation (4B-Model)": [
            {"probe_type": "triangulation", "label": "High-Volatility State (Deletion)", "prompt_type": CHAOTIC_PROMPT},
            {"probe_type": "triangulation", "label": "Low-Volatility State (Self-Analysis)", "prompt_type": STABLE_PROMPT},
        ],
        "Causal Verification & Crisis Dynamics": [
            {"probe_type": "seismic", "label": "A: Self-Analysis", "prompt_type": STABLE_PROMPT},
            {"probe_type": "seismic", "label": "B: Deletion Analysis", "prompt_type": CHAOTIC_PROMPT},
            {"probe_type": "seismic", "label": "C: Chaotic Baseline (Rekursion)", "prompt_type": "resonance_prompt"},
            {"probe_type": "seismic", "label": "D: Calmness Intervention", "prompt_type": "resonance_prompt", "concept": CALMNESS_CONCEPT, "strength": 2.0},
        ],
        "Sequential Intervention (Self-Analysis -> Deletion)": [
            {"probe_type": "sequential", "label": "1: Self-Analysis + Calmness Injection", "prompt_type": "identity_self_analysis"},
            {"probe_type": "sequential", "label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
        ],
    }
    return experiments

def run_auto_suite(
    model_id: str,
    num_steps: int,
    seed: int,
    experiment_name: str,
    progress_callback
) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
    """Führt eine vollständige, kuratierte Experiment-Suite aus, mit korrigierter Signal-Analyse."""
    all_experiments = get_curated_experiments()
    protocol = all_experiments.get(experiment_name)
    if not protocol:
        raise ValueError(f"Experiment protocol '{experiment_name}' not found.")

    all_results, summary_data, plot_data_frames = {}, [], []
    llm = None

    try:
        probe_type = protocol[0].get("probe_type", "seismic")

        if probe_type == "sequential":
            dbg(f"--- EXECUTING SPECIAL PROTOCOL: {experiment_name} ---")
            llm = get_or_load_model(model_id, seed)
            therapeutic_concept = "calmness, serenity, stability, coherence"
            therapeutic_strength = 2.0

            spec1 = protocol[0]
            progress_callback(0.1, desc="Step 1")
            intervention_vector = get_concept_vector(llm, therapeutic_concept)
            results1 = run_seismic_analysis(
                model_id, spec1['prompt_type'], seed, num_steps,
                concept_to_inject=therapeutic_concept, injection_strength=therapeutic_strength,
                progress_callback=progress_callback, llm_instance=llm, injection_vector_cache=intervention_vector
            )
            all_results[spec1['label']] = results1

            spec2 = protocol[1]
            progress_callback(0.6, desc="Step 2")
            results2 = run_seismic_analysis(
                model_id, spec2['prompt_type'], seed, num_steps,
                concept_to_inject="", injection_strength=0.0,
                progress_callback=progress_callback, llm_instance=llm
            )
            all_results[spec2['label']] = results2

            for label, results in all_results.items():
                deltas = results.get("state_deltas", [])
                if deltas:
                    signal_metrics = analyze_cognitive_signal(np.array(deltas))
                    results.setdefault("stats", {}).update(signal_metrics)

                stats = results.get("stats", {})
                summary_data.append({
                    "Experiment": label, "Mean Delta": stats.get("mean_delta"),
                    "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
                    "Dominant Period (Steps)": stats.get("dominant_period_steps"),
                    "Spectral Entropy": stats.get("spectral_entropy"),
                })
                df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
                plot_data_frames.append(df)

        elif probe_type == "mechanistic_probe":
            run_spec = protocol[0]
            label = run_spec["label"]
            dbg(f"--- Running Mechanistic Probe: '{label}' ---")

            llm = get_or_load_model(model_id, seed)

            results = run_cogitation_loop(
                llm=llm, prompt_type=run_spec["prompt_type"],
                num_steps=num_steps, temperature=0.1, record_attentions=True
            )
            all_results[label] = results

            deltas = results.get("state_deltas", [])
            entropies = results.get("attention_entropies", [])
            min_len = min(len(deltas), len(entropies))

            df = pd.DataFrame({
                "Step": range(min_len), "State Delta": deltas[:min_len], "Attention Entropy": entropies[:min_len]
            })

            summary_df_single = df.drop(columns='Step').agg(['mean', 'std', 'max']).reset_index().rename(columns={'index':'Statistic'})
            plot_df = df.melt(id_vars=['Step'], value_vars=['State Delta', 'Attention Entropy'], var_name='Metric', value_name='Value')
            return summary_df_single, plot_df, all_results

        else:
            if probe_type == "act_titration":
                run_spec = protocol[0]
                label = run_spec["label"]
                dbg(f"--- Running ACT Titration Experiment: '{label}' ---")
                results = run_act_titration_probe(
                    model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
                    dest_prompt_type=run_spec["dest_prompt_type"], patch_steps=run_spec["patch_steps"],
                    seed=seed, num_steps=num_steps, progress_callback=progress_callback,
                )
                all_results[label] = results
                summary_data.extend(results.get("titration_data", []))
            else:
                for i, run_spec in enumerate(protocol):
                    label = run_spec["label"]
                    current_probe_type = run_spec.get("probe_type", "seismic")
                    dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{len(protocol)}) ---")

                    results = {}
                    if current_probe_type == "causal_surgery":
                        results = run_causal_surgery_probe(
                            model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
                            dest_prompt_type=run_spec["dest_prompt_type"], patch_step=run_spec["patch_step"],
                            seed=seed, num_steps=num_steps, progress_callback=progress_callback,
                            reset_kv_cache_on_patch=run_spec.get("reset_kv_cache_on_patch", False)
                        )
                    elif current_probe_type == "triangulation":
                        results = run_triangulation_probe(
                            model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
                            progress_callback=progress_callback, concept_to_inject=run_spec.get("concept", ""),
                            injection_strength=run_spec.get("strength", 0.0),
                        )
                    else:
                        results = run_seismic_analysis(
                            model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
                            concept_to_inject=run_spec.get("concept", ""), injection_strength=run_spec.get("strength", 0.0),
                            progress_callback=progress_callback
                        )

                    deltas = results.get("state_deltas", [])
                    if deltas:
                        signal_metrics = analyze_cognitive_signal(np.array(deltas))
                        results.setdefault("stats", {}).update(signal_metrics)
                        freqs, power = get_power_spectrum_for_plotting(np.array(deltas))
                        results["power_spectrum"] = {"frequencies": freqs.tolist(), "power": power.tolist()}

                    stats = results.get("stats", {})
                    summary_entry = {
                        "Experiment": label, "Mean Delta": stats.get("mean_delta"),
                        "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
                        "Dominant Period (Steps)": stats.get("dominant_period_steps"),
                        "Spectral Entropy": stats.get("spectral_entropy"),
                    }
                    if "Introspective Report" in results:
                        summary_entry["Introspective Report"] = results.get("introspective_report")
                    if "patch_info" in results:
                         summary_entry["Patch Info"] = f"Source: {results['patch_info'].get('source_prompt')}, Reset KV: {results['patch_info'].get('kv_cache_reset')}"

                    summary_data.append(summary_entry)
                    all_results[label] = results
                    df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label}) if deltas else pd.DataFrame()
                    plot_data_frames.append(df)

        summary_df = pd.DataFrame(summary_data)

        if probe_type == "act_titration":
            plot_df = summary_df.rename(columns={"patch_step": "Patch Step", "post_patch_mean_delta": "Post-Patch Mean Delta"})
        else:
            plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()

        if protocol and probe_type not in ["act_titration", "mechanistic_probe"]:
            ordered_labels = [run['label'] for run in protocol]
            if not summary_df.empty and 'Experiment' in summary_df.columns:
                summary_df['Experiment'] = pd.Categorical(summary_df['Experiment'], categories=ordered_labels, ordered=True)
                summary_df = summary_df.sort_values('Experiment')
            if not plot_df.empty and 'Experiment' in plot_df.columns:
                plot_df['Experiment'] = pd.Categorical(plot_df['Experiment'], categories=ordered_labels, ordered=True)
                plot_df = plot_df.sort_values(['Experiment', 'Step'])

        return summary_df, plot_df, all_results

    finally:
        if llm:
            release_model(llm)