# bp_phi/runner.py import os os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4G:8" import torch import random import numpy as np import statistics import time import json from transformers import set_seed from typing import Dict, Any from .llm_iface import LLM from .prompts_en import RESONANCE_PROMPTS from .runner_utils import dbg, DEBUG # --- Global Model Cache --- CACHED_MODELS: Dict[str, LLM] = {} def get_or_load_model(model_id: str, seed: int) -> LLM: if model_id not in CACHED_MODELS: dbg(f"Model '{model_id}' not in cache. Loading now...") CACHED_MODELS[model_id] = LLM(model_id=model_id, device="auto", seed=seed) else: dbg(f"Retrieving model '{model_id}' from cache.") llm = CACHED_MODELS[model_id] set_seed(seed) llm.seed = seed random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) return llm # --- Experiment 1: Silent Cogitation & Halting Runner (Version 9.0) --- def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int, temperature: float) -> Dict[str, Any]: llm = get_or_load_model(model_id, seed) prompt = RESONANCE_PROMPTS[prompt_type] dbg(f"--- SILENT COGITATION (Seed: {seed}, Temp: {temperature}) ---") dbg("INPUT PROMPT:", prompt) inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device) step_times = [] state_deltas = [] total_start_time = time.time() with torch.no_grad(): step_start_time = time.time() outputs = llm.model(**inputs, output_hidden_states=True) step_times.append(time.time() - step_start_time) current_hidden_state = outputs.hidden_states[-1][:, -1, :] past_key_values = outputs.past_key_values del outputs if torch.cuda.is_available(): torch.cuda.empty_cache() for i in range(num_steps - 1): if time.time() - total_start_time > timeout: dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.") break step_start_time = time.time() # Get logits from the last hidden state next_token_logits = llm.model.lm_head(current_hidden_state) # ✅ FIX: Apply temperature and use stochastic sampling instead of argmax if temperature > 0: scaled_logits = next_token_logits / temperature probabilities = torch.nn.functional.softmax(scaled_logits, dim=-1) next_token_id = torch.multinomial(probabilities, num_samples=1) else: # Temperature of 0 means deterministic argmax next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1) outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True) step_times.append(time.time() - step_start_time) new_hidden_state = outputs.hidden_states[-1][:, -1, :] past_key_values = outputs.past_key_values delta = torch.norm(new_hidden_state - current_hidden_state).item() state_deltas.append(delta) dbg(f"Step {i+1}: State Delta = {delta:.4f}, Time = {step_times[-1]*1000:.2f}ms") if delta < 1e-4: dbg(f"Internal state has converged after {i+1} steps. Halting.") break current_hidden_state = new_hidden_state.clone() del outputs, new_hidden_state if torch.cuda.is_available(): torch.cuda.empty_cache() total_duration = time.time() - total_start_time mean_step_time = statistics.mean(step_times) if step_times else 0 stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0 if len(step_times) < num_steps and total_duration < timeout: verdict = f"### ✅ Stable Convergence\nThe model's internal state converged after {len(step_times)} steps." elif total_duration >= timeout: verdict = f"### ⚠️ Potential Cognitive Jamming Detected!\nThe process did not converge and exceeded the timeout." else: verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize, suggesting a complex or chaotic dynamic." stats = { "verdict": verdict, "steps_completed": len(step_times), "total_duration_s": total_duration, "mean_step_time_ms": mean_step_time * 1000, "stdev_step_time_ms": stdev_step_time * 1000, "state_deltas": state_deltas } if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2)) return stats