import os
import json
import time
import torch
import gradio as gr
from datetime import datetime, timedelta
from transformers import AutoTokenizer, AutoModelForCausalLM

# ----------------------------
# Default profile and blocks
# ----------------------------
DEFAULT_PROFILE = {
    "name": "Learner",
    "style": ["concise", "reflective", "Basque context where relevant"],
    "goals": ["conversation-first learning", "daily language blocks", "CPU-only"]
}

DEFAULT_BLOCKS = [
    {"type": "style", "rule": "Ask clarifying questions when uncertain."},
    {"type": "vocab", "rule": "Use sensory detail + local place anchoring when writing creatively."},
    {"type": "conversation", "rule": "Keep answers short and specific; avoid repeating conclusions."},
    {"type": "conversation", "rule": "Offer warm, encouraging replies with a touch of humor or playfulness to lighten the mood."},
    {"type": "conversation", "rule": "Use light satire, gentle irony, and clever humor when appropriate to keep dialogue playful and engaging."},
    {"type": "conversation", "rule": "Clever swearing is allowed when it adds humor or emphasis, but keep it light, playful, and never offensive."}
]

BLOCKS_FILE = "blocks.json"

# ----------------------------
# Model options
# ----------------------------
MODEL_OPTIONS = {
    "Phi-3.5 Mini Instruct (4B)": "microsoft/Phi-3.5-mini-instruct",
    "Phi-3.5 MoE Instruct (42B)": "microsoft/Phi-3.5-MoE-instruct",
    "Phi-3 Mini 4K Instruct (4B)": "microsoft/Phi-3-mini-4k-instruct",
    "Phi-3 Mini 128K Instruct (4B)": "microsoft/Phi-3-mini-128k-instruct"
}

# ----------------------------
# Example prompts
# ----------------------------
EXAMPLES = [
    "Tell me a about the oldest language in Europe, Euskera.",
    "I’ll teach you a concept. Repeat it back to me in simple words: Solar panels turn sunlight into electricity.",
    "Here’s a new phrase: 'The sea is calm today.' Try saying it in Basque.",
    "Let’s practice style: noir detective. Write one short sentence about Gros in that style.",
    "Here’s a Shakespeare line: 'All the world’s a stage.' What do you think it means?",
    "Read a Dickens passage and tell me how it feels — happy, sad, or something else?",
    "Summarize this paragraph....",
    "I’ll give you a sentence with a mistake: 'He go to school yesterday.' Can you fix it?"
]

# ----------------------------
# Persistence helpers
# ----------------------------
def load_blocks():
    if os.path.exists(BLOCKS_FILE):
        try:
            with open(BLOCKS_FILE, "r", encoding="utf-8") as f:
                return json.load(f)
        except Exception:
            pass
    return {"user_profile": DEFAULT_PROFILE, "language_blocks": DEFAULT_BLOCKS}

def save_blocks(data):
    with open(BLOCKS_FILE, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

def normalize_rule_text(text: str) -> str:
    return " ".join(text.strip().split())

def is_duplicate_rule(rules_list, new_rule_text, new_type="conversation"):
    key = (new_type.lower(), normalize_rule_text(new_rule_text).lower())
    for r in rules_list:
        if (r.get("type", "").lower(), normalize_rule_text(r.get("rule", "")).lower()) == key:
            return True
    return False

def add_block(data, rule_text, block_type="conversation", add_review=False):
    rule_text = normalize_rule_text(rule_text)
    if not rule_text:
        return data, "Rule is empty. Nothing added."

    rules = data.get("language_blocks", [])
    if is_duplicate_rule(rules, rule_text, block_type):
        return data, "Duplicate rule detected. Skipped."

    entry = {"type": block_type, "rule": rule_text}
    if add_review:
        entry["review_schedule"] = schedule_reviews()

    rules.append(entry)
    data["language_blocks"] = rules
    save_blocks(data)
    return data, f"Added rule: {rule_text}"

def schedule_reviews():
    today = datetime.utcnow().date()
    return [
        str(today + timedelta(days=1)),
        str(today + timedelta(days=3)),
        str(today + timedelta(days=7))
    ]

# ----------------------------
# Model loading (CPU-only)
# ----------------------------
_loaded = {}

def load_model(model_id):
    if model_id in _loaded:
        return _loaded[model_id]
    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        trust_remote_code=True,
        torch_dtype=torch.float32
    )
    model.eval()
    _loaded[model_id] = (tokenizer, model)
    return tokenizer, model

# ----------------------------
# Prompt construction
# ----------------------------
def format_blocks(blocks):
    return "\n".join([f"- [{b['type']}] {b['rule']}" for b in blocks])

SYSTEM_TEMPLATE = """You are a conversation-first learning chatbot.
Follow the user's style and goals, reinforce today's blocks, and confirm corrections.
Active language blocks:
{blocks}
"""

def build_messages(user_text, profile, blocks):
    system = SYSTEM_TEMPLATE.format(blocks=format_blocks(blocks))
    return [
        {"role": "system", "content": system},
        {"role": "user", "content": user_text}
    ]

def chat(user_text, model_label, blocks_json):
    data = load_blocks()
    blocks = parse_blocks_editor(blocks_json, data.get("language_blocks", []))

    model_id = MODEL_OPTIONS[model_label]
    tokenizer, model = load_model(model_id)

    messages = build_messages(user_text, data["user_profile"], blocks)

    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_tensors="pt",
        return_dict=True   # ensures inputs is a dict, not just a tensor
    ).to("cpu")

    start = time.time()
    with torch.no_grad():
        outputs = model.generate(
            **inputs,                # now safe, inputs is a dict
            max_new_tokens=200,
            do_sample=False,
            use_cache=False
        )
    latency = time.time() - start

    gen_text = tokenizer.decode(
        outputs[0][inputs["input_ids"].shape[-1]:],
        skip_special_tokens=True
    ).strip()

    input_tokens = int(inputs["input_ids"].shape[-1])
    output_tokens = int(outputs[0].shape[-1] - inputs["input_ids"].shape[-1])
    metrics = f"Input tokens: {input_tokens} | Output tokens: {output_tokens} | Latency: {latency:.2f}s"

    return gen_text, metrics

def parse_blocks_editor(text, fallback):
    if not text or not text.strip():
        return fallback
    text = text.strip()
    try:
        parsed = json.loads(text)
        if isinstance(parsed, list):
            return parsed
    except Exception:
        pass
    blocks = []
    for line in text.splitlines():
        line = line.strip()
        if not line:
            continue
        if ":" in line:
            t, r = line.split(":", 1)
            blocks.append({"type": t.strip(), "rule": r.strip()})
        else:
            blocks.append({"type": "rule", "rule": line})
    return blocks or fallback

# ----------------------------
# Reflection
# ----------------------------
def heuristic_rule(user_text, assistant_text):
    if "?" in assistant_text:
        return "Ask clarifying questions when uncertain."
    low = user_text.lower()
    if "translate" in low:
        return "Confirm translation intent and target tone before translating."
    if "style" in low or "noir" in low:
        return "Confirm style constraints before writing and keep it concise."
    return "Keep answers short, specific, and avoid repeating conclusions."

def reflect_and_save(user_text, assistant_text, blocks_editor_value):
    data = load_blocks()
    proposal = heuristic_rule(user_text, assistant_text)
    data, msg = add_block(data, proposal, block_type="conversation", add_review=False)
    pretty = json.dumps(data["language_blocks"], ensure_ascii=False, indent=2)
    return pretty, msg
# ----------------------------
# Gradio UI
# ----------------------------
def launch():
    data = load_blocks()
    default_blocks_text = json.dumps(
        data["language_blocks"], ensure_ascii=False, indent=2
    )

    with gr.Blocks(title="Conversation Learning Lab (CPU): Tiny Instruct") as demo:
        # Header
        gr.Markdown("# 🗣️ Conversation Learning Lab (CPU-friendly): Tiny Instruct")
        gr.Markdown(
            "Focus on daily dialogue. Reinforce validated language blocks. "
            "Transparent tokens and latency."
        )

        # Model selector + input
        with gr.Row():
            model_dd = gr.Dropdown(
                label="Choose a model",
                choices=list(MODEL_OPTIONS.keys()),
                value="Phi-3.5 Mini Instruct (4B)"
            )
        with gr.Row():
            user_in = gr.Textbox(
                label="Your short message with clear instruction",
                placeholder="Start a conversation or choose an example below...",
                lines=3
            )

        # Example prompts
        gr.Markdown("### 🧪 Try an example prompt:")
        gr.Examples(
            examples=EXAMPLES,
            inputs=user_in
        )

        # Generate button comes right after examples
        with gr.Row():
            generate_btn = gr.Button("Generate (CPU)")

        # Output + metrics
        with gr.Row():
            output = gr.Textbox(label="Assistant", lines=8)
        with gr.Row():
            metrics = gr.Markdown("")

        # JSON blocks editor + Reflect button at the bottom
        gr.Markdown("### 📋 Today's Blocks")
        blocks_editor = gr.Textbox(
            label="Editable rules (JSON array or 'type: rule' lines)",
            value=default_blocks_text,
            lines=10
        )
        with gr.Row():
            reflect_btn = gr.Button("Reflect & Save Rule")

        # Wire up events
        generate_btn.click(
            fn=chat,
            inputs=[user_in, model_dd, blocks_editor],
            outputs=[output, metrics]
        )
        reflect_btn.click(
            fn=reflect_and_save,
            inputs=[user_in, output, blocks_editor],
            outputs=[blocks_editor, metrics]
        )

    demo.launch()

if __name__ == "__main__":
    launch()