yasserrmd's picture
Update app.py
3a5956d verified
raw
history blame
4.2 kB
# app.py
import gradio as gr
from extract_text_from_pdf import PDFTextExtractor
from generate_transcript import TranscriptProcessor
from generate_audio import TTSGenerator
import pickle
import os
import tempfile
import shutil
import spaces
def create_temp_session_directory():
return tempfile.mkdtemp()
# Combined function to perform all steps sequentially
@spaces.GPU
def process_pdf_to_podcast(pdf_file):
session_dir = create_temp_session_directory()
# Define paths within the session directory
pdf_path = os.path.join(session_dir, "uploaded_pdf.pdf")
clean_text_path = os.path.join(session_dir, "clean_text.txt")
transcript_path = os.path.join(session_dir, "data.pkl")
tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl")
audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3")
text_model= "llama3-70b-8192"
# Step 1: Extract Text from PDF
shutil.copy(pdf_file, pdf_path)
extractor = PDFTextExtractor(pdf_path,clean_text_path)
clean_text_path = extractor.clean_and_save_text()
# Display a preview of extracted text
with open(clean_text_path, 'r', encoding='utf-8') as file:
text_preview = file.read(500)
# Step 2: Generate Transcript
processor = TranscriptProcessor(clean_text_path,transcript_path,tts_ready_path,text_model)
transcript_path = processor.generate_transcript()
# Load the generated transcript for preview
with open(transcript_path, 'rb') as f:
transcript_preview = pickle.load(f)
# Step 3: Rewrite Transcript for TTS
tts_ready_path = processor.rewrite_transcript()
# Load the rewritten transcript for preview and editing
with open(tts_ready_path, 'rb') as f:
tts_ready_preview = pickle.load(f)
return (
f"Steps 1-3 completed. Preview and adjust the rewritten transcript if needed.",
text_preview,
transcript_preview,
tts_ready_preview,
session_dir
)
# Final Step: Generate Audio after optional adjustments
@spaces.GPU
def generate_audio_from_modified_text(tts_ready_text,session_dir):
tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl")
audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3")
# Save any modified TTS-ready transcript
with open(tts_ready_path, 'wb') as f:
pickle.dump(tts_ready_text, f)
# Generate audio from the TTS-ready transcript
tts_gen = TTSGenerator(tts_ready_path,audio_output_path)
audio_path = tts_gen.generate_audio()
return f"Step 4 complete. Audio saved to {audio_path}.", audio_path
# Gradio Interface
with gr.Blocks() as app:
gr.Markdown("# PDF to Podcast Conversion Application")
# Single-click initiation of Steps 1-3
with gr.Row():
pdf_input = gr.File(label="Upload PDF")
run_all_button = gr.Button("Run All Steps (1-3)")
output_status = gr.Textbox(label="Status")
# Step 1 Preview of Extracted Text
extracted_text_preview = gr.Textbox(label="Extracted Text Preview (First 500 Characters)", interactive=False)
# Step 2 Preview of Generated Transcript
transcript_preview = gr.Textbox(label="Generated Transcript Preview", interactive=False)
# Step 3 Editable Rewritten Transcript for TTS
tts_ready_preview = gr.Textbox(label="Editable Rewritten Transcript for TTS", interactive=True)
# Button for generating audio with editable transcript
generate_audio_button = gr.Button("Generate Audio from Edited Transcript")
final_audio_output = gr.Audio(label="Generated Podcast Audio")
session_dir = gr.State()
# Step 1-3 execution
run_all_button.click(
process_pdf_to_podcast,
inputs=pdf_input,
outputs=[output_status, extracted_text_preview, transcript_preview, tts_ready_preview,session_dir]
)
# Final step: Generate Audio from modified TTS-ready transcript
generate_audio_button.click(
generate_audio_from_modified_text,
inputs=[tts_ready_preview, session_dir],
outputs=[output_status, final_audio_output]
)
app.launch()