Spaces:

siddhartharya
/

My_NotebookLM_Podcast_Generator

Running

App Files Files Community

My_NotebookLM_Podcast_Generator / app.py

siddhartharya

Update app.py

773cc27 verified 11 days ago

raw

history blame contribute delete

No virus

3.78 kB

	import gradio as gr
	from utils import generate_script, generate_audio, truncate_text, extract_text_from_url
	from prompts import SYSTEM_PROMPT
	from pydub import AudioSegment
	import pypdf
	import os
	import tempfile

	def generate_podcast(file, url, tone, length):
	try:
	if file and url:
	return None, "Please provide either a PDF file or a URL, not both."

	if file:
	if not file.name.lower().endswith('.pdf'):
	return None, "Please upload a PDF file."

	pdf_reader = pypdf.PdfReader(file.name)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	elif url:
	text = extract_text_from_url(url)
	else:
	return None, "Please provide either a PDF file or a URL."

	truncated_text = truncate_text(text)
	if len(truncated_text) < len(text):
	print("Warning: The input text was truncated to fit within 2048 tokens.")

	script = generate_script(SYSTEM_PROMPT, truncated_text, tone, length)

	audio_segments = []
	transcript = ""
	try:
	for item in script.dialogue:
	audio_file = generate_audio(item.text, item.speaker)
	audio_segment = AudioSegment.from_mp3(audio_file)
	audio_segments.append(audio_segment)
	transcript += f"{item.speaker}: {item.text}\n\n"
	os.remove(audio_file) # Clean up temporary audio file
	except Exception as e:
	raise gr.Error(f"Error generating audio: {str(e)}")

	combined_audio = sum(audio_segments)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
	combined_audio.export(temp_audio.name, format="mp3")
	temp_audio_path = temp_audio.name

	return temp_audio_path, transcript

	except Exception as e:
	return None, f"An error occurred: {str(e)}"

	instructions = """
	# Podcast Generator

	Welcome to the Podcast Generator project! This tool creates custom podcast episodes using AI-generated content.

	## Features
	* Generate podcast scripts from PDF content or web pages
	* Convert text to speech for a natural listening experience
	* Choose the tone of your podcast (Humorous, Casual, or Formal)
	* Export episodes as MP3 files

	## How to Use
	1. Upload a PDF file OR enter a URL (content will be truncated to 2048 tokens if longer)
	2. Select the desired tone:
	- Humorous: Expect jokes, puns, and playful banter
	- Casual: Colloquial language, like a conversation between college students
	- Formal: Professional podcast style with well-structured arguments
	3. Choose the podcast length
	4. Click "Generate" to create your podcast
	5. Listen to the generated audio and review the transcript

	Note: This tool uses the LLaMa 3.1 70B model for script generation and gTTS for text-to-speech conversion. The podcast features Sarah (American accent) and Maria (British accent) as hosts.
	"""

	iface = gr.Interface(
	fn=generate_podcast,
	inputs=[
	gr.File(label="Upload PDF file (optional)", file_types=[".pdf"]),
	gr.Textbox(label="OR Enter URL"),
	gr.Radio(["humorous", "casual", "formal"], label="Select podcast tone", value="casual"),
	gr.Radio(["Short (1-2 min)", "Medium (3-5 min)"], label="Podcast length", value="Medium (3-5 min)")
	],
	outputs=[
	gr.Audio(label="Generated Podcast"),
	gr.Markdown(label="Transcript")
	],
	title="Custom NotebookLM-type Podcast Generator (2048 token limit)",
	description=instructions,
	allow_flagging="never",
	theme=gr.themes.Soft()
	)

	if __name__ == "__main__":
	iface.launch()