Spaces:
Running
on
Zero
Running
on
Zero
import queue | |
import threading | |
import spaces | |
import os | |
import io | |
import soundfile as sf | |
import gradio as gr | |
import numpy as np | |
import time | |
import pymupdf | |
import requests | |
from pathlib import Path | |
import torch | |
from huggingface_hub import InferenceClient | |
from kokoro import KModel, KPipeline | |
# ----------------------------------------------------------------------------- | |
# Get default podcast materials, from Daily papers and one download | |
# ----------------------------------------------------------------------------- | |
from papers import PaperManager | |
paper_manager = PaperManager() | |
top_papers = paper_manager.get_top_content() | |
PODCAST_SUBJECT = list(top_papers.values())[0] | |
os.makedirs("examples", exist_ok=True) | |
response = requests.get("https://www.palantir.com/assets/xrfr7uokpv1b/1wtb4LWF7XIuJisnMwH0XW/dc37fdda646a5df6c5b86f695ce990c0/NYT_-_Our_Oppenheimer_Moment-_The_Creation_of_A.I._Weapons.pdf") | |
with open("examples/Essay_Palantir.pdf", 'wb') as f: | |
f.write(response.content) | |
# ----------------------------------------------------------------------------- | |
# LLM that writes the script (unchanged) | |
# ----------------------------------------------------------------------------- | |
from prompts import SYSTEM_PROMPT | |
client = InferenceClient( | |
"meta-llama/Llama-3.3-70B-Instruct", | |
provider="cerebras", | |
token=os.getenv("HF_TOKEN"), | |
) | |
def generate_podcast_script(subject: str, steering_question: str | None = None) -> str: | |
"""Ask the LLM for a script of a podcast given by two hosts.""" | |
messages = [ | |
{"role": "system", "content": SYSTEM_PROMPT}, | |
{"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights. | |
{subject[:10000]}"""}, | |
] | |
if steering_question and len(steering_question) > 0: | |
messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"}) | |
response = client.chat_completion( | |
messages, | |
max_tokens=8156, | |
) | |
full_text = response.choices[0].message.content | |
assert "[JANE]" in full_text | |
dialogue_start_index = full_text.find("[JANE]") | |
podcast_text = full_text[dialogue_start_index:] | |
return podcast_text | |
# ----------------------------------------------------------------------------- | |
# Kokoro TTS | |
# ----------------------------------------------------------------------------- | |
CUDA_AVAILABLE = torch.cuda.is_available() | |
kmodel = KModel(repo_id='hexgrad/Kokoro-82M').to("cuda" if CUDA_AVAILABLE else "cpu").eval() | |
kpipeline = KPipeline(lang_code="a") # English voices | |
MALE_VOICE = "am_adam" | |
FEMALE_VOICE = "af_heart" | |
# Pre‑warm voices to avoid first‑call latency | |
for v in (MALE_VOICE, FEMALE_VOICE): | |
kpipeline.load_voice(v) | |
def generate_podcast(url: str, pdf_path: str, topic: str): | |
if pdf_path: | |
with pymupdf.open(pdf_path) as pdf_doc: | |
material_text = "" | |
for page in pdf_doc: | |
material_text += page.get_text() | |
elif url: | |
response = requests.get(f'https://r.jina.ai/{url}') | |
material_text = response.text | |
else: | |
material_text = PODCAST_SUBJECT | |
# Generate podcast script! | |
podcast_script = generate_podcast_script(material_text, topic) | |
lines = [l for l in podcast_script.strip().splitlines() if l.strip()] | |
pipeline = kpipeline | |
pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE) | |
pipeline_voice_male = pipeline.load_voice(MALE_VOICE) | |
speed = 1. | |
sr = 24000 | |
for line in lines: | |
# Expect "[S1] ..." or "[S2] ..." | |
if line.startswith("[MIKE]"): | |
pipeline_voice = pipeline_voice_male | |
voice = MALE_VOICE | |
utterance = line[len("[MIKE]"):].strip() | |
elif line.startswith("[JANE]"): | |
pipeline_voice = pipeline_voice_female | |
voice = FEMALE_VOICE | |
utterance = line[len("[JANE]"):].strip() | |
else: # fallback | |
pipeline_voice = pipeline_voice_female | |
voice = FEMALE_VOICE | |
utterance = line | |
for _, ps, _ in pipeline(utterance, voice, speed): | |
t0 = time.time() | |
ref_s = pipeline_voice[len(ps) - 1] | |
audio_numpy = kmodel(ps, ref_s, speed).numpy() | |
yield (sr, audio_numpy) | |
t1 = time.time() | |
print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}") | |
EXAMPLES = [ | |
["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"], | |
[None, str(Path("examples/Essay_Palantir.pdf")), "Make sure to keep some critic spirit in the analysis!"], | |
] | |
demo = gr.Interface( | |
title="Open NotebookLM 🎙️", | |
description=f"""Generates a podcast discussion between two hosts about the materials of your choice. | |
If you do not specify any source materials below, the podcast will be about the top trending [Daily paper](https://huggingface.co/papers/), '**{list(top_papers.keys())[0]}**' | |
Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M), lightning-fast inference for [Llama-3.3-70B](meta-llama/Llama-3.3-70B-Instruct) by Cerebras, and uses elements from a NotebookLM app by [Gabriel Chua](https://huggingface.co/spaces/gabrielchua/open-notebooklm).""", | |
fn=generate_podcast, | |
inputs=[ | |
gr.Textbox( | |
label="🔗 Type a Webpage URL to discuss it (Optional)", | |
placeholder="The URL you want to discuss the content for.", | |
), | |
gr.File( | |
label="Upload a PDF as discussion material (Optional)", | |
file_types=[".pdf"], | |
file_count="single", | |
), | |
gr.Textbox(label="🤔 Do you have a more specific topic or question on the materials?", placeholder="You can leave this blank."), | |
], | |
outputs=[ | |
gr.Audio( | |
label="Listen to your podcast! 🔊", | |
format="wav", | |
streaming=True, | |
), | |
], | |
theme=gr.themes.Soft(), | |
submit_btn="Generate podcast 🎙️", | |
# clear_btn=gr.Button("🗑️"), | |
examples=EXAMPLES, | |
cache_examples=True, | |
) | |
if __name__ == "__main__": | |
demo.launch() | |