podcast-jobs / app.py
fdaudens's picture
fdaudens HF Staff
add functions to app.py
72ae2e5
import queue
import threading
import spaces
import os
import io
import soundfile as sf
import gradio as gr
import numpy as np
import time
import pymupdf
import requests
from pathlib import Path
from pydub import AudioSegment # Add this import
import tempfile
import re
from update_rss import generate_headline_and_description, get_next_episode_number, update_rss
import torch
from huggingface_hub import InferenceClient
from kokoro import KModel, KPipeline
# -----------------------------------------------------------------------------
# to-do
# - Add field for the podcast title and description
# - add field for the script
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Get default podcast materials, from Daily papers and one download
# -----------------------------------------------------------------------------
from papers import PaperManager
paper_manager = PaperManager()
top_papers = paper_manager.get_top_content()
PODCAST_SUBJECT = list(top_papers.values())[0]
# -----------------------------------------------------------------------------
# LLM that writes the script (unchanged)
# -----------------------------------------------------------------------------
from prompts import SYSTEM_PROMPT
# client = InferenceClient(
# "meta-llama/Llama-3.3-70B-Instruct",
# provider="cerebras",
# token=os.getenv("HF_TOKEN"),
# )
client = InferenceClient(
"Qwen/Qwen3-32B",
provider="hf-inference",
token=os.getenv("HF_TOKEN"),
)
def sanitize_script(script: str) -> str:
"""Remove special characters like '*' from the script."""
# Remove asterisk and other special formatting characters (add more as needed)
return re.sub(r'[\*\_\~\`]', '', script)
def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
"""Ask the LLM for a script of a podcast given by two hosts."""
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.\n{subject[:10000]}"""},
]
if steering_question and len(steering_question) > 0:
messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
response = client.chat_completion(
messages,
max_tokens=8156,
)
full_text = response.choices[0].message.content
assert "[JANE]" in full_text
dialogue_start_index = full_text.find("[JANE]")
podcast_text = full_text[dialogue_start_index:]
podcast_text = sanitize_script(podcast_text)
return podcast_text
# -----------------------------------------------------------------------------
# Kokoro TTS
# -----------------------------------------------------------------------------
CUDA_AVAILABLE = torch.cuda.is_available()
kmodel = KModel(repo_id='hexgrad/Kokoro-82M').to("cuda" if CUDA_AVAILABLE else "cpu").eval()
kpipeline = KPipeline(lang_code="a") # English voices
MALE_VOICE = "am_adam"
FEMALE_VOICE = "af_heart"
# Pre‑warm voices to avoid first‑call latency
for v in (MALE_VOICE, FEMALE_VOICE):
kpipeline.load_voice(v)
@spaces.GPU
def generate_podcast(topic: str):
material_text = PODCAST_SUBJECT
# Generate podcast script!
podcast_script = generate_podcast_script(material_text, topic)
lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
pipeline = kpipeline
pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
speed = 1.
sr = 24000
for line in lines:
if line.startswith("[MIKE]"):
pipeline_voice = pipeline_voice_male
voice = MALE_VOICE
utterance = line[len("[MIKE]"):].strip()
elif line.startswith("[JANE]"):
pipeline_voice = pipeline_voice_female
voice = FEMALE_VOICE
utterance = line[len("[JANE]"):].strip()
else: # fallback
pipeline_voice = pipeline_voice_female
voice = FEMALE_VOICE
utterance = line
for _, ps, _ in pipeline(utterance, voice, speed):
t0 = time.time()
ref_s = pipeline_voice[len(ps) - 1]
audio_numpy = kmodel(ps, ref_s, speed).numpy()
yield (sr, audio_numpy)
t1 = time.time()
print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
EXAMPLES = [
["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
[None, str(Path("examples/Essay_Palantir.pdf")), "Make sure to keep some critic spirit in the analysis!"],
]
demo = gr.Interface(
title="Daily Paper Podcast 🎙️",
description=f"""Generates a podcast discussion between two hosts about today's top trending paper on Hugging Face: '**{list(top_papers.keys())[0]}**'
Based on [Open NotebookLM](spaces/m-ric/open-notebooklm), powered by [Kokoro TTS](hexgrad/Kokoro-82M) and [Qwen3-32B](Qwen/Qwen3-32B) running on HF Inference.""",
fn=generate_podcast,
inputs=[
gr.Textbox(
label="🤔 Do you have a specific aspect of the paper you'd like the hosts to focus on?",
placeholder="You can leave this blank for a general discussion.",
),
],
outputs=[
gr.Audio(
label="Listen to your podcast! 🔊",
format="wav",
streaming=True,
),
],
theme=gr.themes.Soft(),
submit_btn="Generate podcast 🎙️",
)
if __name__ == "__main__":
demo.launch()