Spaces:
Running
on
Zero
Running
on
Zero
import queue | |
import threading | |
import spaces | |
import os | |
import io | |
import soundfile as sf | |
import gradio as gr | |
import numpy as np | |
import time | |
import pymupdf | |
import requests | |
from pathlib import Path | |
from pydub import AudioSegment # Add this import | |
import tempfile | |
import re | |
from update_rss import generate_headline_and_description, get_next_episode_number, update_rss | |
import torch | |
from huggingface_hub import InferenceClient | |
from kokoro import KModel, KPipeline | |
# ----------------------------------------------------------------------------- | |
# to-do | |
# - Add field for the podcast title and description | |
# - add field for the script | |
# ----------------------------------------------------------------------------- | |
# ----------------------------------------------------------------------------- | |
# Get default podcast materials, from Daily papers and one download | |
# ----------------------------------------------------------------------------- | |
from papers import PaperManager | |
paper_manager = PaperManager() | |
top_papers = paper_manager.get_top_content() | |
PODCAST_SUBJECT = list(top_papers.values())[0] | |
# ----------------------------------------------------------------------------- | |
# LLM that writes the script (unchanged) | |
# ----------------------------------------------------------------------------- | |
from prompts import SYSTEM_PROMPT | |
# client = InferenceClient( | |
# "meta-llama/Llama-3.3-70B-Instruct", | |
# provider="cerebras", | |
# token=os.getenv("HF_TOKEN"), | |
# ) | |
client = InferenceClient( | |
"Qwen/Qwen3-32B", | |
provider="hf-inference", | |
token=os.getenv("HF_TOKEN"), | |
) | |
def sanitize_script(script: str) -> str: | |
"""Remove special characters like '*' from the script.""" | |
# Remove asterisk and other special formatting characters (add more as needed) | |
return re.sub(r'[\*\_\~\`]', '', script) | |
def generate_podcast_script(subject: str, steering_question: str | None = None) -> str: | |
"""Ask the LLM for a script of a podcast given by two hosts.""" | |
messages = [ | |
{"role": "system", "content": SYSTEM_PROMPT}, | |
{"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.\n{subject[:10000]}"""}, | |
] | |
if steering_question and len(steering_question) > 0: | |
messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"}) | |
response = client.chat_completion( | |
messages, | |
max_tokens=8156, | |
) | |
full_text = response.choices[0].message.content | |
assert "[JANE]" in full_text | |
dialogue_start_index = full_text.find("[JANE]") | |
podcast_text = full_text[dialogue_start_index:] | |
podcast_text = sanitize_script(podcast_text) | |
return podcast_text | |
# ----------------------------------------------------------------------------- | |
# Kokoro TTS | |
# ----------------------------------------------------------------------------- | |
CUDA_AVAILABLE = torch.cuda.is_available() | |
kmodel = KModel(repo_id='hexgrad/Kokoro-82M').to("cuda" if CUDA_AVAILABLE else "cpu").eval() | |
kpipeline = KPipeline(lang_code="a") # English voices | |
MALE_VOICE = "am_adam" | |
FEMALE_VOICE = "af_heart" | |
# Pre‑warm voices to avoid first‑call latency | |
for v in (MALE_VOICE, FEMALE_VOICE): | |
kpipeline.load_voice(v) | |
def generate_podcast(topic: str): | |
material_text = PODCAST_SUBJECT | |
# Generate podcast script! | |
podcast_script = generate_podcast_script(material_text, topic) | |
lines = [l for l in podcast_script.strip().splitlines() if l.strip()] | |
pipeline = kpipeline | |
pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE) | |
pipeline_voice_male = pipeline.load_voice(MALE_VOICE) | |
speed = 1. | |
sr = 24000 | |
for line in lines: | |
if line.startswith("[MIKE]"): | |
pipeline_voice = pipeline_voice_male | |
voice = MALE_VOICE | |
utterance = line[len("[MIKE]"):].strip() | |
elif line.startswith("[JANE]"): | |
pipeline_voice = pipeline_voice_female | |
voice = FEMALE_VOICE | |
utterance = line[len("[JANE]"):].strip() | |
else: # fallback | |
pipeline_voice = pipeline_voice_female | |
voice = FEMALE_VOICE | |
utterance = line | |
for _, ps, _ in pipeline(utterance, voice, speed): | |
t0 = time.time() | |
ref_s = pipeline_voice[len(ps) - 1] | |
audio_numpy = kmodel(ps, ref_s, speed).numpy() | |
yield (sr, audio_numpy) | |
t1 = time.time() | |
print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}") | |
EXAMPLES = [ | |
["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"], | |
[None, str(Path("examples/Essay_Palantir.pdf")), "Make sure to keep some critic spirit in the analysis!"], | |
] | |
demo = gr.Interface( | |
title="Daily Paper Podcast 🎙️", | |
description=f"""Generates a podcast discussion between two hosts about today's top trending paper on Hugging Face: '**{list(top_papers.keys())[0]}**' | |
Based on [Open NotebookLM](spaces/m-ric/open-notebooklm), powered by [Kokoro TTS](hexgrad/Kokoro-82M) and [Qwen3-32B](Qwen/Qwen3-32B) running on HF Inference.""", | |
fn=generate_podcast, | |
inputs=[ | |
gr.Textbox( | |
label="🤔 Do you have a specific aspect of the paper you'd like the hosts to focus on?", | |
placeholder="You can leave this blank for a general discussion.", | |
), | |
], | |
outputs=[ | |
gr.Audio( | |
label="Listen to your podcast! 🔊", | |
format="wav", | |
streaming=True, | |
), | |
], | |
theme=gr.themes.Soft(), | |
submit_btn="Generate podcast 🎙️", | |
) | |
if __name__ == "__main__": | |
demo.launch() | |