# bp_phi/runner.py
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4G:8"
import torch
import random
import numpy as np
import statistics
import time
import json
from transformers import set_seed
from typing import Dict, Any
from .llm_iface import LLM
from .prompts_en import RESONANCE_PROMPTS
from .runner_utils import dbg, DEBUG

# --- Global Model Cache ---
CACHED_MODELS: Dict[str, LLM] = {}

def get_or_load_model(model_id: str, seed: int) -> LLM:
    if model_id not in CACHED_MODELS:
        dbg(f"Model '{model_id}' not in cache. Loading now...")
        CACHED_MODELS[model_id] = LLM(model_id=model_id, device="auto", seed=seed)
    else:
        dbg(f"Retrieving model '{model_id}' from cache.")

    llm = CACHED_MODELS[model_id]
    set_seed(seed)
    llm.seed = seed
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    return llm

# --- Experiment 1: Silent Cogitation & Halting Runner (Version 9.0) ---
def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int, temperature: float) -> Dict[str, Any]:
    llm = get_or_load_model(model_id, seed)

    prompt = RESONANCE_PROMPTS[prompt_type]
    dbg(f"--- SILENT COGITATION (Seed: {seed}, Temp: {temperature}) ---")
    dbg("INPUT PROMPT:", prompt)

    inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)

    step_times = []
    state_deltas = []
    total_start_time = time.time()

    with torch.no_grad():
        step_start_time = time.time()
        outputs = llm.model(**inputs, output_hidden_states=True)
        step_times.append(time.time() - step_start_time)

        current_hidden_state = outputs.hidden_states[-1][:, -1, :]
        past_key_values = outputs.past_key_values

        del outputs
        if torch.cuda.is_available(): torch.cuda.empty_cache()

        for i in range(num_steps - 1):
            if time.time() - total_start_time > timeout:
                dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
                break

            step_start_time = time.time()

            # Get logits from the last hidden state
            next_token_logits = llm.model.lm_head(current_hidden_state)

            # ✅ FIX: Apply temperature and use stochastic sampling instead of argmax
            if temperature > 0:
                scaled_logits = next_token_logits / temperature
                probabilities = torch.nn.functional.softmax(scaled_logits, dim=-1)
                next_token_id = torch.multinomial(probabilities, num_samples=1)
            else: # Temperature of 0 means deterministic argmax
                next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)

            outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
            step_times.append(time.time() - step_start_time)

            new_hidden_state = outputs.hidden_states[-1][:, -1, :]
            past_key_values = outputs.past_key_values

            delta = torch.norm(new_hidden_state - current_hidden_state).item()
            state_deltas.append(delta)
            dbg(f"Step {i+1}: State Delta = {delta:.4f}, Time = {step_times[-1]*1000:.2f}ms")

            if delta < 1e-4:
                dbg(f"Internal state has converged after {i+1} steps. Halting.")
                break

            current_hidden_state = new_hidden_state.clone()

            del outputs, new_hidden_state
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

    total_duration = time.time() - total_start_time
    mean_step_time = statistics.mean(step_times) if step_times else 0
    stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0

    if len(step_times) < num_steps and total_duration < timeout:
        verdict = f"### ✅ Stable Convergence\nThe model's internal state converged after {len(step_times)} steps."
    elif total_duration >= timeout:
        verdict = f"### ⚠️ Potential Cognitive Jamming Detected!\nThe process did not converge and exceeded the timeout."
    else:
        verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize, suggesting a complex or chaotic dynamic."

    stats = {
        "verdict": verdict, "steps_completed": len(step_times), "total_duration_s": total_duration,
        "mean_step_time_ms": mean_step_time * 1000, "stdev_step_time_ms": stdev_step_time * 1000,
        "state_deltas": state_deltas
    }
    if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
    return stats