Spaces:

csccorner
/

URL-to-Audio-Summary

Running

File size: 3,388 Bytes

d9efe10
 
ff06deb
d9efe10
e9d5607
 
83f07d9
 
556278e
d9efe10
83f07d9
d9efe10
 
 
83f07d9
d9efe10
83f07d9
d9efe10
 
 
 
 
 
ffe9821
d9efe10
556278e
83f07d9
ff06deb
 
83f07d9
e9d5607
 
 
 
 
 
 
 
 
 
 
 
ff06deb
4714e38
261214f
 
 
 
556278e
ff06deb
556278e
 
261214f
4714e38
556278e
 
4714e38
ff06deb
261214f
 
 
f4064e9
261214f
 
 
556278e
261214f
 
d9efe10
261214f
 
556278e
ff06deb
 
261214f
d9efe10
 
556278e
ff06deb
 
 
 
d9efe10
 
556278e
261214f
d9efe10
83f07d9
 
d9efe10

import gradio as gr
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFacePipeline
from transformers import pipeline
from bs4 import BeautifulSoup
import requests
from TTS.api import TTS
import tempfile
import os

# Setup summarization LLM
summary_pipe = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)
llm = HuggingFacePipeline(pipeline=summary_pipe)

# Prompt for more engaging summary
summary_prompt = PromptTemplate.from_template("""
Summarize the following article content in a clear, warm, and motivational tone like a preacher speaking to an audience:

{text}

Summary:
""")

summary_chain = summary_prompt | llm

# TTS model setup
tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False)
SPEAKER_LIST = tts_model.speakers
DEFAULT_SPEAKER = SPEAKER_LIST[0] if SPEAKER_LIST else None

def extract_main_content(url):
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.content, "html.parser")
        for tag in soup(["nav", "header", "footer", "aside", "script", "style", "noscript"]):
            tag.decompose()
        paragraphs = soup.find_all("p")
        content = "\n".join([p.get_text() for p in paragraphs if len(p.get_text()) > 60])
        return content.strip()
    except Exception as e:
        return f"Error extracting article content: {str(e)}"

def generate_human_like_audio(text, speaker):
    try:
        # Save in tmp directory accessible by Gradio
        base = tempfile.gettempdir()
        wav_path = os.path.join(base, "summary.wav")
        mp3_path = os.path.join(base, "summary.mp3")

        tts_model.tts_to_file(text=text, speaker=speaker, file_path=wav_path)
        os.system(f"ffmpeg -y -i {wav_path} -codec:a libmp3lame -qscale:a 4 {mp3_path}")

        return wav_path if os.path.exists(wav_path) else None, mp3_path if os.path.exists(mp3_path) else None
    except Exception as e:
        print(f"TTS ERROR: {e}")
        return None, None

def url_to_audio_summary(url, speaker):
    article_text = extract_main_content(url)
    if article_text.startswith("Error"):
        return article_text, None, None

    # Truncate
    if len(article_text) > 1500:
        article_text = article_text[:1500] + "..."

    summary = (summary_chain.invoke if hasattr(summary_chain, 'invoke') else summary_chain)({"text": article_text})
    summary = summary.get("text") if isinstance(summary, dict) else summary

    wav_path, mp3_path = generate_human_like_audio(summary, speaker)
    return summary, wav_path, mp3_path

def interface_wrapper(url, speaker):
    summary, wav_path, mp3_path = url_to_audio_summary(url, speaker)
    return summary, wav_path, mp3_path

iface = gr.Interface(
    fn=interface_wrapper,
    inputs=[
        gr.Textbox(label="Article URL", placeholder="Paste a news/blog URL here..."),
        gr.Dropdown(choices=SPEAKER_LIST, value=DEFAULT_SPEAKER, label="Select Speaker")
    ],
    outputs=[
        gr.Textbox(label="Summary"),
        gr.Audio(label="Preacher-style Audio Summary", type="filepath"),
        gr.File(label="Download MP3")
    ],
    title="Preaching-Style URL to Audio Agent",
    description="Summarizes article content and reads it aloud in a warm, preacher-style voice using YourTTS. CPU-only."
)

if __name__ == "__main__":
    iface.launch()