Spaces:
Running
Running
| import os | |
| import json | |
| import html | |
| import gradio as gr | |
| import pymupdf4llm | |
| from pipeline import run_livestream_pipeline, extract_youtube_video_id, DEFAULT_HF_TOKEN | |
| def load_template_with_data(video_id: str, chat_data: list, selection: str) -> str: | |
| template_path = os.path.join(os.path.dirname(__file__), "theater_template.html") | |
| with open(template_path, "r", encoding="utf-8") as f: | |
| template = f.read() | |
| # Inject values | |
| inner_html = template.replace("{{VIDEO_ID}}", video_id) | |
| inner_html = inner_html.replace("{{CHAT_DATA_JSON}}", json.dumps(chat_data, ensure_ascii=False)) | |
| provenance_html = get_provenance_html(selection) | |
| inner_html = inner_html.replace("{{PROVENANCE_HTML}}", provenance_html) | |
| # Escape HTML to be safely embedded inside srcdoc attribute of an iframe | |
| escaped_inner_html = html.escape(inner_html) | |
| # Wrap in iframe to ensure scripts execute correctly in Gradio 6+ without innerHTML restrictions | |
| iframe_code = ( | |
| f'<iframe srcdoc="{escaped_inner_html}" style="width: 100%; height: 650px; ' | |
| f'border: none; border-radius: 12px; background-color: #0b0c10; box-shadow: 0 4px 12px rgba(0,0,0,0.45);"></iframe>' | |
| ) | |
| return iframe_code | |
| def get_demo_html() -> str: | |
| demo_json_path = os.path.join(os.path.dirname(__file__), "sample", "demo_chat_1.json") | |
| if os.path.exists(demo_json_path): | |
| with open(demo_json_path, "r", encoding="utf-8") as f: | |
| chat_data = json.load(f) | |
| else: | |
| chat_data = [] | |
| return load_template_with_data("xLljoibgUvk", chat_data, "Steve Jobs 1983 Speech (Demo)") | |
| def get_demo_2_html() -> str: | |
| demo_json_path = os.path.join(os.path.dirname(__file__), "sample", "demo_chat_2.json") | |
| if os.path.exists(demo_json_path): | |
| with open(demo_json_path, "r", encoding="utf-8") as f: | |
| chat_data = json.load(f) | |
| else: | |
| chat_data = [] | |
| return load_template_with_data("NtRf4icqE7o", chat_data, "Carl Sagan Demon-Haunted World (Demo)") | |
| def get_demo_3_html() -> str: | |
| demo_json_path = os.path.join(os.path.dirname(__file__), "sample", "demo_chat_3.json") | |
| if os.path.exists(demo_json_path): | |
| with open(demo_json_path, "r", encoding="utf-8") as f: | |
| chat_data = json.load(f) | |
| else: | |
| chat_data = [] | |
| return load_template_with_data("g6eQMrA1_-I", chat_data, "Edward Teller - Schrödinger's Cat (Demo)") | |
| def get_demo_4_html() -> str: | |
| demo_json_path = os.path.join(os.path.dirname(__file__), "sample", "demo_chat_4.json") | |
| if os.path.exists(demo_json_path): | |
| with open(demo_json_path, "r", encoding="utf-8") as f: | |
| chat_data = json.load(f) | |
| else: | |
| chat_data = [] | |
| return load_template_with_data("hJP5GqnTrNo", chat_data, "Sal Khan - Khanmigo AI Tutor (Demo)") | |
| # Global store for custom simulations | |
| custom_simulation_store = { | |
| "video_id": "", | |
| "chat_data": None, | |
| "pdf_name": None, | |
| "has_pasted_text": False | |
| } | |
| def get_provenance_html(selection: str) -> str: | |
| apa_citations = { | |
| "Steve Jobs 1983 Speech (Demo)": ( | |
| "Zohar, E., Bloom, P., & Inzlicht, M. (2026). Against frictionless AI. " | |
| "<i>Communications Psychology</i>, 4(1), Article 402. " | |
| "<a href='https://doi.org/10.1038/s44271-026-00402-1' target='_blank' style='color:#7f5af0; text-decoration:none;'>https://doi.org/10.1038/s44271-026-00402-1</a>" | |
| ), | |
| "Carl Sagan Demon-Haunted World (Demo)": ( | |
| "Giroux, H. A. (2013). Beyond dystopian education in a neoliberal society. " | |
| "<i>Fast Capitalism</i>, 10(1). " | |
| "<a href='https://doi.org/10.32855/fcapital.201301.010' target='_blank' style='color:#7f5af0; text-decoration:none;'>https://doi.org/10.32855/fcapital.201301.010</a>" | |
| ), | |
| "Edward Teller - Schrödinger's Cat (Demo)": ( | |
| "Claeys, G. (2010). The origins of dystopia: Wells, Huxley and Orwell. " | |
| "In G. Claeys (Ed.), <i>The Cambridge Companion to Utopian Literature</i> (pp. 107–131). " | |
| "Cambridge University Press. " | |
| "<a href='https://doi.org/10.1017/CCOL9780521886659.005' target='_blank' style='color:#7f5af0; text-decoration:none;'>https://doi.org/10.1017/CCOL9780521886659.005</a>" | |
| ), | |
| "Sal Khan - Khanmigo AI Tutor (Demo)": ( | |
| "Pepple, D., & Muthuthantrige, N. (2026). Artificial intelligence, innovation and the new " | |
| "architecture of exploitation: Towards reconfiguring humanness in the age of algorithmic labour. " | |
| "<i>Journal of Innovation & Knowledge</i>, 11(1), 100878. " | |
| "<a href='https://doi.org/10.1016/j.jik.2025.100878' target='_blank' style='color:#7f5af0; text-decoration:none;'>https://doi.org/10.1016/j.jik.2025.100878</a>" | |
| ) | |
| } | |
| if selection in apa_citations: | |
| citation = apa_citations[selection] | |
| return ( | |
| f"<div class='provenance-container'>" | |
| f"<span class='provenance-title'>📄 Provenance</span>" | |
| f"Comments generated using AI from published works: {citation}" | |
| f"</div>" | |
| ) | |
| elif selection == "My Custom Simulation": | |
| pdf_name = custom_simulation_store.get("pdf_name") | |
| if pdf_name: | |
| source_info = f"uploaded file <i>{html.escape(pdf_name)}</i>" | |
| elif custom_simulation_store.get("has_pasted_text"): | |
| source_info = "pasted custom reference text" | |
| else: | |
| source_info = None | |
| if source_info: | |
| return ( | |
| f"<div class='provenance-container'>" | |
| f"<span class='provenance-title'>📄 Provenance</span>" | |
| f"Comments generated using AI from {source_info}." | |
| f"</div>" | |
| ) | |
| return "" | |
| def play_simulation_selection(selection: str) -> str: | |
| if selection == "Steve Jobs 1983 Speech (Demo)": | |
| return get_demo_html() | |
| elif selection == "Carl Sagan Demon-Haunted World (Demo)": | |
| return get_demo_2_html() | |
| elif selection == "Edward Teller - Schrödinger's Cat (Demo)": | |
| return get_demo_3_html() | |
| elif selection == "Sal Khan - Khanmigo AI Tutor (Demo)": | |
| return get_demo_4_html() | |
| elif selection == "My Custom Simulation": | |
| if custom_simulation_store["video_id"] and custom_simulation_store["chat_data"]: | |
| return load_template_with_data(custom_simulation_store["video_id"], custom_simulation_store["chat_data"], selection) | |
| else: | |
| return "<div style='color:#ff0055; text-align:center; padding:50px; font-family:sans-serif;'>No custom simulation has been generated yet. Please choose 'Configure Custom Solution'.</div>" | |
| return "" | |
| def handle_generation(yt_url: str, pdf_file, doc_text: str, srt_text: str, hf_token: str, use_ocr: bool = False): | |
| # 1. Validate YouTube Link | |
| video_id = extract_youtube_video_id(yt_url) | |
| if not video_id: | |
| return ( | |
| gr.update(), | |
| "### ❌ Error\nInvalid YouTube URL. Please provide a valid YouTube link or 11-character Video ID.", | |
| gr.update() | |
| ) | |
| # 2. Identify Document Source | |
| doc_path = None | |
| document_content = None | |
| pdf_name = None | |
| has_pasted_text = False | |
| if pdf_file is not None: | |
| doc_path = pdf_file.name | |
| pdf_name = os.path.basename(pdf_file.name) | |
| elif doc_text.strip(): | |
| document_content = doc_text.strip() | |
| has_pasted_text = True | |
| else: | |
| return ( | |
| gr.update(), | |
| "### ❌ Error\nPlease upload a PDF/text file or paste some reference document text.", | |
| gr.update() | |
| ) | |
| # 3. Clean manual transcript input if any | |
| manual_transcript = srt_text.strip() if srt_text.strip() else None | |
| if not manual_transcript: | |
| return ( | |
| gr.update(), | |
| "### ❌ Error\nPlease paste the timestamped video transcript. (Auto-fetching is disabled due to server IP blocks).", | |
| gr.update() | |
| ) | |
| # 4. Use provided token or default token | |
| token = hf_token.strip() if hf_token.strip() else DEFAULT_HF_TOKEN | |
| status_msg = "### ⚙️ Running Pipeline...\n" | |
| status_msg += "- Processing pasted transcript...\n" | |
| status_msg += "- Concurrently segmenting transcript and extracting PDF text...\n" | |
| status_msg += "- Mapping content and generating draft comments with Pro model...\n" | |
| status_msg += "- Refining comments with Flash model..." | |
| # 5. Run the pipeline | |
| try: | |
| chat_data = run_livestream_pipeline( | |
| video_id=video_id, | |
| doc_text=document_content, | |
| doc_path=doc_path, | |
| transcript_text=manual_transcript, | |
| token=token, | |
| use_ocr=use_ocr | |
| ) | |
| # Save to global store | |
| custom_simulation_store["video_id"] = video_id | |
| custom_simulation_store["chat_data"] = chat_data | |
| custom_simulation_store["pdf_name"] = pdf_name | |
| custom_simulation_store["has_pasted_text"] = has_pasted_text | |
| success_msg = ( | |
| f"### 🎉 Success!\n" | |
| f"Livestream simulation generated successfully for video ID `{video_id}`!\n" | |
| f"Navigate back to the **Theater Mode** tab and select **My Custom Simulation** to play it." | |
| ) | |
| # Create updated HTML player | |
| new_html = load_template_with_data(video_id, chat_data, "My Custom Simulation") | |
| return ( | |
| new_html, | |
| success_msg, | |
| gr.update(choices=["Steve Jobs 1983 Speech (Demo)", "Carl Sagan Demon-Haunted World (Demo)", "Edward Teller - Schrödinger's Cat (Demo)", "Sal Khan - Khanmigo AI Tutor (Demo)", "My Custom Simulation"], value="My Custom Simulation") | |
| ) | |
| except Exception as e: | |
| error_msg = f"### ❌ Error running pipeline\n{e}" | |
| if "Content safety check failed" in str(e): | |
| error_msg += ( | |
| "\n\n**Tip**: This combination of video and reference document was flagged by an " | |
| "automated safety check before any chat was generated. Try a different reference " | |
| "document or a different video." | |
| ) | |
| return ( | |
| gr.update(), | |
| error_msg, | |
| gr.update() | |
| ) | |
| # Gradio Theme | |
| custom_theme = gr.themes.Default( | |
| primary_hue="purple", | |
| secondary_hue="indigo", | |
| neutral_hue="slate" | |
| ).set( | |
| body_background_fill="#0b0c10", | |
| body_background_fill_dark="#0b0c10", | |
| body_text_color="#fffffe", | |
| body_text_color_dark="#fffffe", | |
| body_text_color_subdued="#94a1b2", | |
| body_text_color_subdued_dark="#94a1b2", | |
| block_background_fill="#161a23", | |
| block_background_fill_dark="#161a23", | |
| block_border_color="rgba(255, 255, 255, 0.08)", | |
| block_border_color_dark="rgba(255, 255, 255, 0.08)", | |
| block_title_text_color="#fffffe", | |
| block_title_text_color_dark="#fffffe", | |
| block_label_text_color="#94a1b2", | |
| block_label_text_color_dark="#94a1b2", | |
| input_background_fill="#11141a", | |
| input_background_fill_dark="#11141a", | |
| input_placeholder_color="#94a1b2", | |
| input_placeholder_color_dark="#94a1b2", | |
| input_border_color="rgba(255, 255, 255, 0.08)", | |
| input_border_color_dark="rgba(255, 255, 255, 0.08)", | |
| checkbox_label_background_fill="#11141a", | |
| checkbox_label_background_fill_dark="#11141a", | |
| checkbox_label_background_fill_selected="#161a23", | |
| checkbox_label_background_fill_selected_dark="#161a23", | |
| checkbox_label_text_color="#fffffe", | |
| checkbox_label_text_color_dark="#fffffe", | |
| checkbox_label_text_color_selected="#fffffe", | |
| checkbox_label_text_color_selected_dark="#fffffe", | |
| checkbox_label_border_color="rgba(255, 255, 255, 0.08)", | |
| checkbox_label_border_color_dark="rgba(255, 255, 255, 0.08)", | |
| checkbox_label_border_color_selected="#7f5af0", | |
| checkbox_label_border_color_selected_dark="#7f5af0", | |
| panel_background_fill="#161a23", | |
| panel_background_fill_dark="#161a23", | |
| panel_border_color="rgba(255, 255, 255, 0.08)", | |
| panel_border_color_dark="rgba(255, 255, 255, 0.08)", | |
| border_color_primary="rgba(255, 255, 255, 0.08)", | |
| border_color_primary_dark="rgba(255, 255, 255, 0.08)", | |
| button_primary_background_fill="#7f5af0", | |
| button_primary_background_fill_dark="#7f5af0", | |
| button_primary_text_color="#ffffff", | |
| button_primary_text_color_dark="#ffffff", | |
| button_primary_background_fill_hover="#9370db", | |
| button_primary_background_fill_hover_dark="#9370db", | |
| button_secondary_background_fill="#161a23", | |
| button_secondary_background_fill_dark="#161a23", | |
| button_secondary_background_fill_hover="rgba(255, 255, 255, 0.08)", | |
| button_secondary_background_fill_hover_dark="rgba(255, 255, 255, 0.08)", | |
| button_secondary_text_color="#fffffe", | |
| button_secondary_text_color_dark="#fffffe", | |
| button_secondary_border_color="rgba(255, 255, 255, 0.08)", | |
| button_secondary_border_color_dark="rgba(255, 255, 255, 0.08)" | |
| ) | |
| custom_css = """ | |
| .tab-container button:hover, | |
| button[role="tab"]:hover, | |
| .tab-wrapper button:hover, | |
| .tabs button:hover, | |
| .tab-nav button:hover, | |
| .tabitem button:hover { | |
| background-color: rgba(255, 255, 255, 0.08) !important; | |
| color: #fffffe !important; | |
| } | |
| """ | |
| with gr.Blocks(title="ReadStream") as demo: | |
| gr.HTML( | |
| """ | |
| <div style="text-align: center; margin-bottom: 20px; padding-top: 10px;"> | |
| <h1 style="color: #fffffe; font-size: 2.2rem; font-weight: 700; margin-bottom: 5px; letter-spacing: -0.5px;">ReadStream</h1> | |
| <p style="color: #94a1b2; font-size: 1rem;">Fused commentary from reference video and document - Select 'Configure Custom Simulation' to create your own</p> | |
| </div> | |
| """ | |
| ) | |
| with gr.Tabs(): | |
| # Tab 1: Theater Player | |
| with gr.TabItem("🎭 Theater Mode"): | |
| with gr.Row(): | |
| sim_selector = gr.Radio( | |
| choices=["Steve Jobs 1983 Speech (Demo)", "Carl Sagan Demon-Haunted World (Demo)", "Edward Teller - Schrödinger's Cat (Demo)", "Sal Khan - Khanmigo AI Tutor (Demo)", "My Custom Simulation"], | |
| value="Steve Jobs 1983 Speech (Demo)", | |
| label="Choose Simulation to Play", | |
| interactive=True | |
| ) | |
| # The player frame | |
| player_frame = gr.HTML(value=get_demo_html()) | |
| # Trigger updates when selection changes | |
| sim_selector.change( | |
| fn=play_simulation_selection, | |
| inputs=[sim_selector], | |
| outputs=[player_frame] | |
| ) | |
| # Tab 2: Generator Config | |
| with gr.TabItem("⚙️ Configure Custom Simulation"): | |
| gr.Markdown( | |
| """ | |
| ### Configure Your Custom Simulation | |
| Input a YouTube link and upload a reference document | |
| to generate a synchronized chat replay. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| yt_url_input = gr.Textbox( | |
| label="YouTube URL or Video ID", | |
| placeholder="https://www.youtube.com/watch?v=...", | |
| info="Maximum length: 10 minutes recommended." | |
| ) | |
| token_input = gr.Textbox( | |
| label="Hugging Face Token (Recommended)", | |
| placeholder="Leave blank to use default token...", | |
| type="password", | |
| info="Token used to contact Inference Provider." | |
| ) | |
| pdf_input = gr.File( | |
| label="Upload Reference PDF/Text", | |
| file_types=[".pdf", ".txt"], | |
| file_count="single" | |
| ) | |
| fallback_text_input = gr.Textbox( | |
| label="Or Paste Reference Text", | |
| placeholder="Alternative if not uploading a file...", | |
| lines=4 | |
| ) | |
| with gr.Column(scale=1): | |
| fallback_srt_input = gr.Textbox( | |
| label="Paste Transcript (Required)", | |
| placeholder="Paste the YouTube timestamped transcript here...", | |
| lines=12 | |
| ) | |
| generate_btn = gr.Button("🚀 Generate Simulation", variant="primary") | |
| use_ocr_checkbox = gr.Checkbox(label="Enable OCR for PDFs (slow — use only for scanned/image-based PDFs)", value=False) | |
| status_output = gr.Markdown(value="*Awaiting configuration...*") | |
| # Link callback | |
| generate_btn.click( | |
| fn=handle_generation, | |
| inputs=[ | |
| yt_url_input, | |
| pdf_input, | |
| fallback_text_input, | |
| fallback_srt_input, | |
| token_input, | |
| use_ocr_checkbox | |
| ], | |
| outputs=[ | |
| player_frame, | |
| status_output, | |
| sim_selector | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(theme=custom_theme, css=custom_css) | |