import queue import threading import spaces import os import io import soundfile as sf import gradio as gr import numpy as np import time import pymupdf import requests from pathlib import Path import torch from huggingface_hub import InferenceClient from kokoro import KModel, KPipeline # ----------------------------------------------------------------------------- # Get default podcast materials, from Daily papers and one download # ----------------------------------------------------------------------------- from papers import PaperManager paper_manager = PaperManager() top_papers = paper_manager.get_top_content() PODCAST_SUBJECT = list(top_papers.values())[0] os.makedirs("examples", exist_ok=True) response = requests.get("https://www.palantir.com/assets/xrfr7uokpv1b/1wtb4LWF7XIuJisnMwH0XW/dc37fdda646a5df6c5b86f695ce990c0/NYT_-_Our_Oppenheimer_Moment-_The_Creation_of_A.I._Weapons.pdf") with open("examples/Essay_Palantir.pdf", 'wb') as f: f.write(response.content) # ----------------------------------------------------------------------------- # LLM that writes the script (unchanged) # ----------------------------------------------------------------------------- from prompts import SYSTEM_PROMPT client = InferenceClient( "meta-llama/Llama-3.3-70B-Instruct", provider="cerebras", token=os.getenv("HF_TOKEN"), ) def generate_podcast_script(subject: str, steering_question: str | None = None) -> str: """Ask the LLM for a script of a podcast given by two hosts.""" messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights. {subject[:10000]}"""}, ] if steering_question and len(steering_question) > 0: messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"}) response = client.chat_completion( messages, max_tokens=8156, ) full_text = response.choices[0].message.content assert "[JANE]" in full_text dialogue_start_index = full_text.find("[JANE]") podcast_text = full_text[dialogue_start_index:] return podcast_text # ----------------------------------------------------------------------------- # Kokoro TTS # ----------------------------------------------------------------------------- CUDA_AVAILABLE = torch.cuda.is_available() kmodel = KModel(repo_id='hexgrad/Kokoro-82M').to("cuda" if CUDA_AVAILABLE else "cpu").eval() kpipeline = KPipeline(lang_code="a") # English voices MALE_VOICE = "am_adam" FEMALE_VOICE = "af_heart" # Pre‑warm voices to avoid first‑call latency for v in (MALE_VOICE, FEMALE_VOICE): kpipeline.load_voice(v) @spaces.GPU def generate_podcast(url: str, pdf_path: str, topic: str): if pdf_path: with pymupdf.open(pdf_path) as pdf_doc: material_text = "" for page in pdf_doc: material_text += page.get_text() elif url: response = requests.get(f'https://r.jina.ai/{url}') material_text = response.text else: material_text = PODCAST_SUBJECT # Generate podcast script! podcast_script = generate_podcast_script(material_text, topic) lines = [l for l in podcast_script.strip().splitlines() if l.strip()] pipeline = kpipeline pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE) pipeline_voice_male = pipeline.load_voice(MALE_VOICE) speed = 1. sr = 24000 for line in lines: # Expect "[S1] ..." or "[S2] ..." if line.startswith("[MIKE]"): pipeline_voice = pipeline_voice_male voice = MALE_VOICE utterance = line[len("[MIKE]"):].strip() elif line.startswith("[JANE]"): pipeline_voice = pipeline_voice_female voice = FEMALE_VOICE utterance = line[len("[JANE]"):].strip() else: # fallback pipeline_voice = pipeline_voice_female voice = FEMALE_VOICE utterance = line for _, ps, _ in pipeline(utterance, voice, speed): t0 = time.time() ref_s = pipeline_voice[len(ps) - 1] audio_numpy = kmodel(ps, ref_s, speed).numpy() yield (sr, audio_numpy) t1 = time.time() print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}") EXAMPLES = [ ["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"], [None, str(Path("examples/Essay_Palantir.pdf")), "Make sure to keep some critic spirit in the analysis!"], ] demo = gr.Interface( title="Open NotebookLM 🎙️", description=f"""Generates a podcast discussion between two hosts about the materials of your choice. If you do not specify any source materials below, the podcast will be about the top trending [Daily paper](https://huggingface.co/papers/), '**{list(top_papers.keys())[0]}**' Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M), lightning-fast inference for [Llama-3.3-70B](meta-llama/Llama-3.3-70B-Instruct) by Cerebras, and uses elements from a NotebookLM app by [Gabriel Chua](https://huggingface.co/spaces/gabrielchua/open-notebooklm).""", fn=generate_podcast, inputs=[ gr.Textbox( label="🔗 Type a Webpage URL to discuss it (Optional)", placeholder="The URL you want to discuss the content for.", ), gr.File( label="Upload a PDF as discussion material (Optional)", file_types=[".pdf"], file_count="single", ), gr.Textbox(label="🤔 Do you have a more specific topic or question on the materials?", placeholder="You can leave this blank."), ], outputs=[ gr.Audio( label="Listen to your podcast! 🔊", format="wav", streaming=True, ), ], theme=gr.themes.Soft(), submit_btn="Generate podcast 🎙️", # clear_btn=gr.Button("🗑️"), examples=EXAMPLES, cache_examples=True, ) if __name__ == "__main__": demo.launch()