Spaces:

siddhartharya
/

My_NotebookLM_Podcast_Generator

Running

File size: 2,909 Bytes

import gradio as gr
import PyPDF2
import docx
import requests
from bs4 import BeautifulSoup
from groq import Groq
from gtts import gTTS
from pydub import AudioSegment
import os
import io

# Initialize Groq client
groq_client = Groq()

def extract_text(file_or_url):
    if isinstance(file_or_url, str):  # URL
        response = requests.get(file_or_url)
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()
    elif file_or_url is not None:
        if file_or_url.name.endswith('.pdf'):
            reader = PyPDF2.PdfReader(file_or_url.file)
            return ' '.join([page.extract_text() for page in reader.pages])
        elif file_or_url.name.endswith('.docx'):
            doc = docx.Document(file_or_url.file)
            return ' '.join([para.text for para in doc.paragraphs])
    return ""

def generate_podcast_script(text):
    prompt = f"""Generate a podcast script between a man and a woman discussing the following text:
    {text}
    The podcast should be informative and engaging, with a natural conversation flow.
    Limit the script to approximately 750 words to fit within a 5-minute podcast."""

    response = groq_client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are an AI assistant that generates podcast scripts based on given text."},
            {"role": "user", "content": prompt}
        ],
        model="llama-3.1-70b-versatile",  # Using LLaMa 3.1 70B model
        max_tokens=1000,
        temperature=0.7
    )
    return response.choices[0].message.content

def text_to_speech(script):
    lines = script.split('\n')
    audio_segments = []
    for line in lines:
        if line.startswith("Man:"):
            tts = gTTS(line[4:], lang='en', tld='co.uk')
        elif line.startswith("Woman:"):
            tts = gTTS(line[6:], lang='en', tld='com.au')
        else:
            continue
        buffer = io.BytesIO()
        tts.write_to_fp(buffer)
        buffer.seek(0)
        audio_segments.append(AudioSegment.from_mp3(buffer))
    
    final_audio = sum(audio_segments)
    final_audio = final_audio[:300000]  # Trim to 5 minutes (300,000 ms)
    buffer = io.BytesIO()
    final_audio.export(buffer, format="mp3")
    buffer.seek(0)
    return buffer

def generate_podcast(file_or_url):
    text = extract_text(file_or_url)
    if not text:
        return None, "Failed to extract text. Please check your input."
    script = generate_podcast_script(text)
    audio_file = text_to_speech(script)
    return audio_file, script

iface = gr.Interface(
    fn=generate_podcast,
    inputs=[
        gr.File(label="Upload PDF/DOC file"),
        gr.Textbox(label="Or enter URL")
    ],
    outputs=[
        gr.Audio(label="Generated Podcast"),
        gr.Textbox(label="Podcast Script")
    ],
    title="Custom NotebookLM-type Podcast Generator (LLaMa 3.1 70B)"
)

iface.launch()