# core/eil_processor.py
# MEC EIL Processor – World-Class Signal Normalization Edition

import yaml
import re
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

class EILProcessor:
    def __init__(self, codex_informer, softmax_threshold=0.6):
        self.codex_informer = codex_informer
        self.softmax_threshold = softmax_threshold

        # Build alias lookup from Codex
        self.alias_lookup = self.codex_informer.build_alias_lookup()
        print(f"[EILProcessor] Alias map loaded with {len(self.alias_lookup)} entries")

        # Load crosswalk.yaml
        with open('config/crosswalk.yaml', 'r', encoding='utf-8') as f:
            yaml_data = yaml.safe_load(f)
            crosswalk_data = yaml_data['crosswalk']
            story_pattern_data = yaml_data.get('story_patterns', [])

        # Build crosswalk lookup
        self.crosswalk_lookup = {}
        for entry in crosswalk_data:
            phrase = self.normalize_text(entry['phrase'])
            emotion_code = entry['emotion_code']
            self.crosswalk_lookup[phrase] = emotion_code

        # Build story_patterns lookup
        self.story_patterns_lookup = {}
        for entry in story_pattern_data:
            pattern = self.normalize_text(entry['pattern'])
            emotion_code = entry['emotion_code']
            self.story_patterns_lookup[pattern] = emotion_code

        print(f"[EILProcessor] Crosswalk loaded with {len(self.crosswalk_lookup)} entries")
        print(f"[EILProcessor] Story Patterns loaded with {len(self.story_patterns_lookup)} entries")

        # Emotion keyword dictionary for signal normalization/blending
        self.emotion_keyword_map = {
            "FAM-ANG": ["anger", "angry", "hate", "furious", "rage", "resentment"],
            "FAM-HEL": ["helpless", "powerless", "can't", "unable", "trapped", "stuck", "overwhelmed", "overwhelm"],
            "FAM-SAD": ["sad", "down", "unhappy", "miserable", "depressed", "blue", "empty"],
            "FAM-FEA": ["afraid", "scared", "fear", "terrified", "worried", "nervous", "anxious", "can't sleep"],
            "FAM-LOV": ["love", "loved", "loving", "caring", "affection", "proud"],
            "FAM-JOY": ["joy", "happy", "excited", "delighted", "content", "proud"],
            "FAM-SUR": ["surprised", "amazed", "astonished", "shocked"],
            "FAM-DIS": ["disgust", "disgusted", "gross", "revolted"],
            "FAM-SHA": ["ashamed", "shame", "embarrassed", "humiliated"],
            "FAM-GUI": ["guilty", "guilt", "remorse", "regret"],
            # Add more as needed
        }

        # For sentiment-to-emotion mapping of ambiguous/indirect language
        self.sentiment_cue_map = [
            # (sentiment, regex or cue, mapped emotion)
            ("negative", r"can.?t sleep|insomnia|restless|wake up", "FAM-FEA"),
            ("negative", r"too much|overwhelmed|can.?t cope|can.?t deal", "FAM-HEL"),
            ("negative", r"nothing feels right|empty|pointless|no purpose", "FAM-SAD"),
            ("negative", r"don't care|apathy|numb", "FAM-LON"),
            ("positive", r"did it|proud|relieved", "FAM-JOY"),
            ("neutral", r"just tired|exhausted", "FAM-HEL"),
            # ...add more for coverage
        ]

        # Load emotion and sentiment models
        self.tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
        self.model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
        self.sentiment_tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment-latest')
        self.sentiment_model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment-latest')

    def normalize_text(self, text):
        normalization_map = {
            "i am feeling ": "",
            "i feel ": "",
            "feeling ": "",
            "i'm feeling ": "",
            "i am ": "",
            "i'm ": ""
        }
        text = text.lower().strip()
        for k, v in normalization_map.items():
            if text.startswith(k):
                text = text.replace(k, "", 1)
                break
        text = re.sub(r'[.!?]', '', text)
        return text

    def is_story_input(self, text):
        clause_markers = [',', ';', '.', 'but', 'because', 'so that', 'which', 'when', 'while']
        token_count = len(text.split())
        clause_hits = any(marker in text for marker in clause_markers)
        return token_count > 12 or clause_hits

    def chunk_story(self, text):
        chunks = re.split(r'[.,;!?]|\b(?:and|but|because|so|although|though|while|when)\b', text, flags=re.IGNORECASE)
        chunks = [chunk.strip() for chunk in chunks if chunk and chunk.strip()]
        return chunks

    def detect_emotion_blend_with_negation(self, norm_text):
        blend = {}
        for fam, keywords in self.emotion_keyword_map.items():
            for kw in keywords:
                negation_patterns = [
                    rf"not {kw}", rf"no longer {kw}", rf"never {kw}",
                    rf"no {kw}", rf"\bwithout {kw}"
                ]
                if any(re.search(p, norm_text) for p in negation_patterns):
                    continue
                if kw in norm_text:
                    blend[fam] = blend.get(fam, 0) + 1.0
        return blend

    def get_sentiment(self, norm_text):
        tokens = self.sentiment_tokenizer(norm_text, return_tensors='pt')
        with torch.no_grad():
            logits = self.sentiment_model(**tokens).logits
            probs = F.softmax(logits, dim=-1).squeeze()
            top_prob, top_idx = torch.max(probs, dim=-1)
        sentiment_label = self.sentiment_model.config.id2label[top_idx.item()]
        return sentiment_label.lower(), top_prob.item()

    def infer_emotion(self, input_text):
        norm_text = self.normalize_text(input_text)

        # 1️⃣ Story Pattern Override
        if norm_text in self.story_patterns_lookup:
            primary_emotion_code = self.story_patterns_lookup[norm_text]
            emotion_data = self.codex_informer.resolve_emotion_family(primary_emotion_code)
            print(f"[EILProcessor] Story Pattern match: '{norm_text}' → {primary_emotion_code}")
            packet = {
                'phrases': [input_text],
                'emotion_candidates': [{'phrase': input_text, 'candidate_emotion': primary_emotion_code}],
                'metadata': {'source': 'EILProcessor (story pattern)', 'input_type': 'story'},
                'emotion_family': emotion_data['emotion_family'],
                'primary_emotion_code': emotion_data['primary_emotion_code'],
                'arc': emotion_data['arc'],
                'resonance': emotion_data['resonance'],
                'blend': {emotion_data['primary_emotion_code']: 1.0},
                'trajectory': [emotion_data['primary_emotion_code']],
            }
            return packet

        # 2️⃣ Story detection (chunking and blend aggregation)
        input_type = 'phrase'
        if self.is_story_input(norm_text):
            input_type = 'story'
            print(f"[EILProcessor] Story mode activated for input: '{norm_text}'")
            chunks = self.chunk_story(norm_text)

            chunk_results = []
            blend_accum = {}
            trajectory = []

            for chunk in chunks:
                sub_result = self.infer_emotion(chunk)  # RECURSIVE CALL
                chunk_results.append(sub_result)
                # Accumulate blends (weighted by confidence if available)
                conf = sub_result.get('confidence', 1.0)
                for fam, val in sub_result.get('blend', {}).items():
                    blend_accum[fam] = blend_accum.get(fam, 0) + val * conf
                # Trajectory
                if 'primary_emotion_code' in sub_result:
                    trajectory.append(sub_result['primary_emotion_code'])

            # Normalize blend
            if blend_accum:
                total = sum(blend_accum.values())
                for k in blend_accum:
                    blend_accum[k] /= total
                dominant_family = max(blend_accum.items(), key=lambda x: x[1])[0]
            else:
                dominant_family = "FAM-NEU"
                blend_accum = {"FAM-NEU": 1.0}
                trajectory = ["FAM-NEU"]

            emotion_data = self.codex_informer.resolve_emotion_family(dominant_family)
            packet = {
                'phrases': [input_text] + [r['phrases'][0] for r in chunk_results],
                'emotion_candidates': [{'phrase': r['phrases'][0], 'candidate_emotion': r.get('primary_emotion_code', 'FAM-NEU')} for r in chunk_results],
                'metadata': {'source': 'EILProcessor (story mode)', 'input_type': input_type},
                'emotion_family': emotion_data['emotion_family'],
                'primary_emotion_code': emotion_data['primary_emotion_code'],
                'arc': emotion_data['arc'],
                'resonance': emotion_data['resonance'],
                'blend': blend_accum,
                'trajectory': trajectory,
            }
            return packet

        # 3️⃣ Crosswalk check
        if norm_text in self.crosswalk_lookup:
            primary_emotion_code = self.crosswalk_lookup[norm_text]
            emotion_data = self.codex_informer.resolve_emotion_family(primary_emotion_code)
            print(f"[EILProcessor] Crosswalk match: '{norm_text}' → {primary_emotion_code}")
            packet = {
                'phrases': [input_text],
                'emotion_candidates': [{'phrase': input_text, 'candidate_emotion': primary_emotion_code}],
                'metadata': {'source': 'EILProcessor (crosswalk)', 'input_type': input_type},
                'emotion_family': emotion_data['emotion_family'],
                'primary_emotion_code': emotion_data['primary_emotion_code'],
                'arc': emotion_data['arc'],
                'resonance': emotion_data['resonance'],
                'blend': {emotion_data['primary_emotion_code']: 1.0},
                'trajectory': [emotion_data['primary_emotion_code']],
            }
            return packet

        # 4️⃣ Alias lookup
        if norm_text in self.alias_lookup:
            variant_code = self.alias_lookup[norm_text]
            emotion_family = variant_code.split('-')[1]
            family_code = f"FAM-{emotion_family}"
            print(f"[EILProcessor] Alias match: '{norm_text}' → {variant_code}")
            packet = {
                'phrases': [input_text],
                'emotion_candidates': [{'phrase': input_text, 'candidate_emotion': variant_code}],
                'metadata': {'source': 'EILProcessor (alias match)', 'input_type': input_type},
                'emotion_family': family_code,
                'primary_emotion_code': variant_code,
                'arc': 'Pending',
                'resonance': 'Pending',
                'blend': {variant_code: 1.0},
                'trajectory': [variant_code],
            }
            return packet

        # 5️⃣ Signal normalization - blend detection & negation
        blend = self.detect_emotion_blend_with_negation(norm_text)
        if blend:
            total = sum(blend.values())
            for k in blend:
                blend[k] /= total
            primary_code = max(blend.items(), key=lambda x: x[1])[0]
            emotion_data = self.codex_informer.resolve_emotion_family(primary_code)
            print(f"[EILProcessor] Signal normalization keyword blend: {blend} (primary: {primary_code})")
            packet = {
                'phrases': [input_text],
                'emotion_candidates': [{'phrase': input_text, 'candidate_emotion': primary_code}],
                'metadata': {'source': 'EILProcessor (signal normalization)', 'input_type': input_type},
                'emotion_family': emotion_data['emotion_family'],
                'primary_emotion_code': emotion_data['primary_emotion_code'],
                'arc': emotion_data['arc'],
                'resonance': emotion_data['resonance'],
                'blend': blend,
                'trajectory': [primary_code],
            }
            return packet

        # 6️⃣ Sentiment-to-emotion mapping for non-EI language
        sentiment, sentiment_conf = self.get_sentiment(norm_text)
        print(f"[EILProcessor] Sentiment fallback: {sentiment} ({sentiment_conf:.2f})")
        for sent, cue, fam in self.sentiment_cue_map:
            if sent == sentiment and re.search(cue, norm_text):
                emotion_data = self.codex_informer.resolve_emotion_family(fam)
                packet = {
                    'phrases': [input_text],
                    'emotion_candidates': [{'phrase': input_text, 'candidate_emotion': fam}],
                    'metadata': {'source': 'EILProcessor (sentiment-to-emotion)', 'input_type': input_type},
                    'emotion_family': emotion_data['emotion_family'],
                    'primary_emotion_code': emotion_data['primary_emotion_code'],
                    'arc': emotion_data['arc'],
                    'resonance': emotion_data['resonance'],
                    'blend': {fam: 1.0},
                    'trajectory': [fam],
                }
                return packet

        # 7️⃣ Model fallback (last resort)
        print(f"[EILProcessor] No crosswalk/alias/keyword/sentiment match — running model on: '{norm_text}'")
        tokens = self.tokenizer(norm_text, return_tensors='pt')
        with torch.no_grad():
            logits = self.model(**tokens).logits
            probs = F.softmax(logits, dim=-1).squeeze()
            top_prob, top_idx = torch.max(probs, dim=-1)
        predicted_label = self.model.config.id2label[top_idx.item()]
        confidence = top_prob.item()

        if confidence < self.softmax_threshold:
            predicted_label = 'neutral'
            print(f"[EILProcessor] Low confidence ({confidence:.2f}) — setting to 'neutral'")

        print(f"[EILProcessor] Model prediction: {predicted_label} ({confidence:.2f})")
        model_to_codex_map = {
            "joy": "FAM-JOY",
            "anger": "FAM-ANG",
            "sadness": "FAM-SAD",
            "fear": "FAM-FEA",
            "love": "FAM-LOV",
            "surprise": "FAM-SUR",
            "disgust": "FAM-DIS",
            "neutral": "FAM-NEU"
        }
        primary_emotion_code = model_to_codex_map.get(predicted_label.lower(), "FAM-NEU")
        emotion_data = self.codex_informer.resolve_emotion_family(primary_emotion_code)
        blend = {emotion_data['primary_emotion_code']: 1.0}
        packet = {
            'phrases': [input_text],
            'emotion_candidates': [{'phrase': input_text, 'candidate_emotion': predicted_label}],
            'metadata': {'source': 'EILProcessor (model)', 'input_type': input_type, 'confidence': confidence},
            'emotion_family': emotion_data['emotion_family'],
            'primary_emotion_code': emotion_data['primary_emotion_code'],
            'arc': emotion_data['arc'],
            'resonance': emotion_data['resonance'],
            'blend': blend,
            'trajectory': [emotion_data['primary_emotion_code']],
            'confidence': confidence
        }
        return packet