ReadStream

import os
import json
import html
import gradio as gr
import pymupdf4llm
from pipeline import run_livestream_pipeline, extract_youtube_video_id, DEFAULT_HF_TOKEN

def load_template_with_data(video_id: str, chat_data: list, selection: str) -> str:
    template_path = os.path.join(os.path.dirname(__file__), "theater_template.html")
    with open(template_path, "r", encoding="utf-8") as f:
        template = f.read()
    
    # Inject values
    inner_html = template.replace("{{VIDEO_ID}}", video_id)
    inner_html = inner_html.replace("{{CHAT_DATA_JSON}}", json.dumps(chat_data, ensure_ascii=False))
    
    provenance_html = get_provenance_html(selection)
    inner_html = inner_html.replace("{{PROVENANCE_HTML}}", provenance_html)
    
    # Escape HTML to be safely embedded inside srcdoc attribute of an iframe
    escaped_inner_html = html.escape(inner_html)
    
    # Wrap in iframe to ensure scripts execute correctly in Gradio 6+ without innerHTML restrictions
    iframe_code = (
        f'<iframe srcdoc="{escaped_inner_html}" style="width: 100%; height: 650px; '
        f'border: none; border-radius: 12px; background-color: #0b0c10; box-shadow: 0 4px 12px rgba(0,0,0,0.45);"></iframe>'
    )
    return iframe_code

def get_demo_html() -> str:
    demo_json_path = os.path.join(os.path.dirname(__file__), "sample", "demo_chat_1.json")
    if os.path.exists(demo_json_path):
        with open(demo_json_path, "r", encoding="utf-8") as f:
            chat_data = json.load(f)
    else:
        chat_data = []
    return load_template_with_data("xLljoibgUvk", chat_data, "Steve Jobs 1983 Speech (Demo)")

def get_demo_2_html() -> str:
    demo_json_path = os.path.join(os.path.dirname(__file__), "sample", "demo_chat_2.json")
    if os.path.exists(demo_json_path):
        with open(demo_json_path, "r", encoding="utf-8") as f:
            chat_data = json.load(f)
    else:
        chat_data = []
    return load_template_with_data("NtRf4icqE7o", chat_data, "Carl Sagan Demon-Haunted World (Demo)")

def get_demo_3_html() -> str:
    demo_json_path = os.path.join(os.path.dirname(__file__), "sample", "demo_chat_3.json")
    if os.path.exists(demo_json_path):
        with open(demo_json_path, "r", encoding="utf-8") as f:
            chat_data = json.load(f)
    else:
        chat_data = []
    return load_template_with_data("g6eQMrA1_-I", chat_data, "Edward Teller - Schrödinger's Cat (Demo)")

def get_demo_4_html() -> str:
    demo_json_path = os.path.join(os.path.dirname(__file__), "sample", "demo_chat_4.json")
    if os.path.exists(demo_json_path):
        with open(demo_json_path, "r", encoding="utf-8") as f:
            chat_data = json.load(f)
    else:
        chat_data = []
    return load_template_with_data("hJP5GqnTrNo", chat_data, "Sal Khan - Khanmigo AI Tutor (Demo)")

# Global store for custom simulations
custom_simulation_store = {
    "video_id": "",
    "chat_data": None,
    "pdf_name": None,
    "has_pasted_text": False
}

def get_provenance_html(selection: str) -> str:
    apa_citations = {
        "Steve Jobs 1983 Speech (Demo)": (
            "Zohar, E., Bloom, P., & Inzlicht, M. (2026). Against frictionless AI. "
            "<i>Communications Psychology</i>, 4(1), Article 402. "
            "<a href='https://doi.org/10.1038/s44271-026-00402-1' target='_blank' style='color:#7f5af0; text-decoration:none;'>https://doi.org/10.1038/s44271-026-00402-1</a>"
        ),
        "Carl Sagan Demon-Haunted World (Demo)": (
            "Giroux, H. A. (2013). Beyond dystopian education in a neoliberal society. "
            "<i>Fast Capitalism</i>, 10(1). "
            "<a href='https://doi.org/10.32855/fcapital.201301.010' target='_blank' style='color:#7f5af0; text-decoration:none;'>https://doi.org/10.32855/fcapital.201301.010</a>"
        ),
        "Edward Teller - Schrödinger's Cat (Demo)": (
            "Claeys, G. (2010). The origins of dystopia: Wells, Huxley and Orwell. "
            "In G. Claeys (Ed.), <i>The Cambridge Companion to Utopian Literature</i> (pp. 107–131). "
            "Cambridge University Press. "
            "<a href='https://doi.org/10.1017/CCOL9780521886659.005' target='_blank' style='color:#7f5af0; text-decoration:none;'>https://doi.org/10.1017/CCOL9780521886659.005</a>"
        ),
        "Sal Khan - Khanmigo AI Tutor (Demo)": (
            "Pepple, D., & Muthuthantrige, N. (2026). Artificial intelligence, innovation and the new "
            "architecture of exploitation: Towards reconfiguring humanness in the age of algorithmic labour. "
            "<i>Journal of Innovation & Knowledge</i>, 11(1), 100878. "
            "<a href='https://doi.org/10.1016/j.jik.2025.100878' target='_blank' style='color:#7f5af0; text-decoration:none;'>https://doi.org/10.1016/j.jik.2025.100878</a>"
        )
    }

    if selection in apa_citations:
        citation = apa_citations[selection]
        return (
            f"<div class='provenance-container'>"
            f"<span class='provenance-title'>📄 Provenance</span>"
            f"Comments generated using AI from published works: {citation}"
            f"</div>"
        )
    elif selection == "My Custom Simulation":
        pdf_name = custom_simulation_store.get("pdf_name")
        if pdf_name:
            source_info = f"uploaded file <i>{html.escape(pdf_name)}</i>"
        elif custom_simulation_store.get("has_pasted_text"):
            source_info = "pasted custom reference text"
        else:
            source_info = None

        if source_info:
            return (
                f"<div class='provenance-container'>"
                f"<span class='provenance-title'>📄 Provenance</span>"
                f"Comments generated using AI from {source_info}."
                f"</div>"
            )

    return ""


def play_simulation_selection(selection: str) -> str:
    if selection == "Steve Jobs 1983 Speech (Demo)":
        return get_demo_html()
    elif selection == "Carl Sagan Demon-Haunted World (Demo)":
        return get_demo_2_html()
    elif selection == "Edward Teller - Schrödinger's Cat (Demo)":
        return get_demo_3_html()
    elif selection == "Sal Khan - Khanmigo AI Tutor (Demo)":
        return get_demo_4_html()
    elif selection == "My Custom Simulation":
        if custom_simulation_store["video_id"] and custom_simulation_store["chat_data"]:
            return load_template_with_data(custom_simulation_store["video_id"], custom_simulation_store["chat_data"], selection)
        else:
            return "<div style='color:#ff0055; text-align:center; padding:50px; font-family:sans-serif;'>No custom simulation has been generated yet. Please choose 'Configure Custom Solution'.</div>"
    return ""


def handle_generation(yt_url: str, pdf_file, doc_text: str, srt_text: str, hf_token: str, use_ocr: bool = False):
    # 1. Validate YouTube Link
    video_id = extract_youtube_video_id(yt_url)
    if not video_id:
        return (
            gr.update(),
            "### ❌ Error\nInvalid YouTube URL. Please provide a valid YouTube link or 11-character Video ID.",
            gr.update()
        )
        
    # 2. Identify Document Source
    doc_path = None
    document_content = None
    pdf_name = None
    has_pasted_text = False
    if pdf_file is not None:
        doc_path = pdf_file.name
        pdf_name = os.path.basename(pdf_file.name)
    elif doc_text.strip():
        document_content = doc_text.strip()
        has_pasted_text = True
    else:
        return (
            gr.update(),
            "### ❌ Error\nPlease upload a PDF/text file or paste some reference document text.",
            gr.update()
        )
        
    # 3. Clean manual transcript input if any
    manual_transcript = srt_text.strip() if srt_text.strip() else None
    if not manual_transcript:
        return (
            gr.update(),
            "### ❌ Error\nPlease paste the timestamped video transcript. (Auto-fetching is disabled due to server IP blocks).",
            gr.update()
        )
    
    # 4. Use provided token or default token
    token = hf_token.strip() if hf_token.strip() else DEFAULT_HF_TOKEN
    
    status_msg = "### ⚙️ Running Pipeline...\n"
    status_msg += "- Processing pasted transcript...\n"
        
    status_msg += "- Concurrently segmenting transcript and extracting PDF text...\n"
    status_msg += "- Mapping content and generating draft comments with Pro model...\n"
    status_msg += "- Refining comments with Flash model..."
    
    # 5. Run the pipeline
    try:
        chat_data = run_livestream_pipeline(
            video_id=video_id,
            doc_text=document_content,
            doc_path=doc_path,
            transcript_text=manual_transcript,
            token=token,
            use_ocr=use_ocr
        )
        
        # Save to global store
        custom_simulation_store["video_id"] = video_id
        custom_simulation_store["chat_data"] = chat_data
        custom_simulation_store["pdf_name"] = pdf_name
        custom_simulation_store["has_pasted_text"] = has_pasted_text
        
        success_msg = (
            f"### 🎉 Success!\n"
            f"Livestream simulation generated successfully for video ID `{video_id}`!\n"
            f"Navigate back to the **Theater Mode** tab and select **My Custom Simulation** to play it."
        )
        
        # Create updated HTML player
        new_html = load_template_with_data(video_id, chat_data, "My Custom Simulation")
        
        return (
            new_html,
            success_msg,
            gr.update(choices=["Steve Jobs 1983 Speech (Demo)", "Carl Sagan Demon-Haunted World (Demo)", "Edward Teller - Schrödinger's Cat (Demo)", "Sal Khan - Khanmigo AI Tutor (Demo)", "My Custom Simulation"], value="My Custom Simulation")
        )
    except Exception as e:
        error_msg = f"### ❌ Error running pipeline\n{e}"
        if "Content safety check failed" in str(e):
            error_msg += (
                "\n\n**Tip**: This combination of video and reference document was flagged by an "
                "automated safety check before any chat was generated. Try a different reference "
                "document or a different video."
            )
        return (
            gr.update(),
            error_msg,
            gr.update()
        )


# Gradio Theme
custom_theme = gr.themes.Default(
    primary_hue="purple",
    secondary_hue="indigo",
    neutral_hue="slate"
).set(
    body_background_fill="#0b0c10",
    body_background_fill_dark="#0b0c10",
    
    body_text_color="#fffffe",
    body_text_color_dark="#fffffe",
    
    body_text_color_subdued="#94a1b2",
    body_text_color_subdued_dark="#94a1b2",
    
    block_background_fill="#161a23",
    block_background_fill_dark="#161a23",
    
    block_border_color="rgba(255, 255, 255, 0.08)",
    block_border_color_dark="rgba(255, 255, 255, 0.08)",
    
    block_title_text_color="#fffffe",
    block_title_text_color_dark="#fffffe",
    
    block_label_text_color="#94a1b2",
    block_label_text_color_dark="#94a1b2",
    
    input_background_fill="#11141a",
    input_background_fill_dark="#11141a",
    
    input_placeholder_color="#94a1b2",
    input_placeholder_color_dark="#94a1b2",
    
    input_border_color="rgba(255, 255, 255, 0.08)",
    input_border_color_dark="rgba(255, 255, 255, 0.08)",
    
    checkbox_label_background_fill="#11141a",
    checkbox_label_background_fill_dark="#11141a",
    
    checkbox_label_background_fill_selected="#161a23",
    checkbox_label_background_fill_selected_dark="#161a23",
    
    checkbox_label_text_color="#fffffe",
    checkbox_label_text_color_dark="#fffffe",
    
    checkbox_label_text_color_selected="#fffffe",
    checkbox_label_text_color_selected_dark="#fffffe",
    
    checkbox_label_border_color="rgba(255, 255, 255, 0.08)",
    checkbox_label_border_color_dark="rgba(255, 255, 255, 0.08)",
    
    checkbox_label_border_color_selected="#7f5af0",
    checkbox_label_border_color_selected_dark="#7f5af0",
    
    panel_background_fill="#161a23",
    panel_background_fill_dark="#161a23",
    
    panel_border_color="rgba(255, 255, 255, 0.08)",
    panel_border_color_dark="rgba(255, 255, 255, 0.08)",
    
    border_color_primary="rgba(255, 255, 255, 0.08)",
    border_color_primary_dark="rgba(255, 255, 255, 0.08)",
    
    button_primary_background_fill="#7f5af0",
    button_primary_background_fill_dark="#7f5af0",
    
    button_primary_text_color="#ffffff",
    button_primary_text_color_dark="#ffffff",
    
    button_primary_background_fill_hover="#9370db",
    button_primary_background_fill_hover_dark="#9370db",
    
    button_secondary_background_fill="#161a23",
    button_secondary_background_fill_dark="#161a23",
    
    button_secondary_background_fill_hover="rgba(255, 255, 255, 0.08)",
    button_secondary_background_fill_hover_dark="rgba(255, 255, 255, 0.08)",
    
    button_secondary_text_color="#fffffe",
    button_secondary_text_color_dark="#fffffe",
    
    button_secondary_border_color="rgba(255, 255, 255, 0.08)",
    button_secondary_border_color_dark="rgba(255, 255, 255, 0.08)"
)

custom_css = """
.tab-container button:hover, 
button[role="tab"]:hover,
.tab-wrapper button:hover,
.tabs button:hover,
.tab-nav button:hover,
.tabitem button:hover {
    background-color: rgba(255, 255, 255, 0.08) !important;
    color: #fffffe !important;
}
"""

with gr.Blocks(title="ReadStream") as demo:
    gr.HTML(
        """
        <div style="text-align: center; margin-bottom: 20px; padding-top: 10px;">
            <h1 style="color: #fffffe; font-size: 2.2rem; font-weight: 700; margin-bottom: 5px; letter-spacing: -0.5px;">ReadStream</h1>
            <p style="color: #94a1b2; font-size: 1rem;">Fused commentary from reference video and document - Select 'Configure Custom Simulation' to create your own</p>
        </div>
        """
    )
    
    with gr.Tabs():
        # Tab 1: Theater Player
        with gr.TabItem("🎭 Theater Mode"):
            with gr.Row():
                sim_selector = gr.Radio(
                    choices=["Steve Jobs 1983 Speech (Demo)", "Carl Sagan Demon-Haunted World (Demo)", "Edward Teller - Schrödinger's Cat (Demo)", "Sal Khan - Khanmigo AI Tutor (Demo)", "My Custom Simulation"],
                    value="Steve Jobs 1983 Speech (Demo)",
                    label="Choose Simulation to Play",
                    interactive=True
                )
            
            # The player frame
            player_frame = gr.HTML(value=get_demo_html())
            
            # Trigger updates when selection changes
            sim_selector.change(
                fn=play_simulation_selection,
                inputs=[sim_selector],
                outputs=[player_frame]
            )
            
        # Tab 2: Generator Config
        with gr.TabItem("⚙️ Configure Custom Simulation"):
            gr.Markdown(
                """
                ### Configure Your Custom Simulation
                Input a YouTube link and upload a reference document 
                to generate a synchronized chat replay.
                """
            )
            
            with gr.Row():
                with gr.Column(scale=1):
                    yt_url_input = gr.Textbox(
                        label="YouTube URL or Video ID",
                        placeholder="https://www.youtube.com/watch?v=...",
                        info="Maximum length: 10 minutes recommended."
                    )
                    
                    token_input = gr.Textbox(
                        label="Hugging Face Token (Recommended)",
                        placeholder="Leave blank to use default token...",
                        type="password",
                        info="Token used to contact Inference Provider."
                    )
                    
                    pdf_input = gr.File(
                        label="Upload Reference PDF/Text",
                        file_types=[".pdf", ".txt"],
                        file_count="single"
                    )
                    
                    fallback_text_input = gr.Textbox(
                        label="Or Paste Reference Text",
                        placeholder="Alternative if not uploading a file...",
                        lines=4
                    )
                    
                with gr.Column(scale=1):
                    fallback_srt_input = gr.Textbox(
                        label="Paste Transcript (Required)",
                        placeholder="Paste the YouTube timestamped transcript here...",
                        lines=12
                    )
                    
                    generate_btn = gr.Button("🚀 Generate Simulation", variant="primary")
                    use_ocr_checkbox = gr.Checkbox(label="Enable OCR for PDFs (slow — use only for scanned/image-based PDFs)", value=False)
                    
            status_output = gr.Markdown(value="*Awaiting configuration...*")
            
            # Link callback
            generate_btn.click(
                fn=handle_generation,
                inputs=[
                    yt_url_input,
                    pdf_input,
                    fallback_text_input,
                    fallback_srt_input,
                    token_input,
                    use_ocr_checkbox
                ],
                outputs=[
                    player_frame,
                    status_output,
                    sim_selector
                ]
            )

if __name__ == "__main__":
    demo.launch(theme=custom_theme, css=custom_css)