# app.py import gradio as gr from extract_text_from_pdf import PDFTextExtractor from generate_transcript import TranscriptProcessor from generate_audio import TTSGenerator import pickle import os import tempfile import shutil import spaces def create_temp_session_directory(): return tempfile.mkdtemp() # Combined function to perform all steps sequentially @spaces.GPU(duration=120) def process_pdf_to_podcast(pdf_file): session_dir = create_temp_session_directory() # Define paths within the session directory pdf_path = os.path.join(session_dir, "uploaded_pdf.pdf") clean_text_path = os.path.join(session_dir, "clean_text.txt") transcript_path = os.path.join(session_dir, "data.pkl") tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl") audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3") text_model= "llama3-70b-8192" # Step 1: Extract Text from PDF shutil.copy(pdf_file, pdf_path) extractor = PDFTextExtractor(pdf_path,clean_text_path) clean_text_path = extractor.clean_and_save_text() # Display a preview of extracted text with open(clean_text_path, 'r', encoding='utf-8') as file: text_preview = file.read(500) # Step 2: Generate Transcript processor = TranscriptProcessor(clean_text_path,transcript_path,tts_ready_path,text_model) transcript_path = processor.generate_transcript() # Load the generated transcript for preview with open(transcript_path, 'rb') as f: transcript_preview = pickle.load(f) # Step 3: Rewrite Transcript for TTS tts_ready_path = processor.rewrite_transcript() # Load the rewritten transcript for preview and editing with open(tts_ready_path, 'rb') as f: tts_ready_preview = pickle.load(f) return ( f"Steps 1-3 completed. Preview and adjust the rewritten transcript if needed.", text_preview, transcript_preview, tts_ready_preview, session_dir ) # Final Step: Generate Audio after optional adjustments @spaces.GPU(duration=300) def generate_audio_from_modified_text(tts_ready_text,session_dir): tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl") audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3") # Save any modified TTS-ready transcript with open(tts_ready_path, 'wb') as f: pickle.dump(tts_ready_text, f) # Generate audio from the TTS-ready transcript tts_gen = TTSGenerator(tts_ready_path,audio_output_path) audio_path = tts_gen.generate_audio() return f"Step 4 complete. Audio saved to {audio_path}.", audio_path # Gradio Interface with gr.Blocks() as app: gr.Markdown("# PDF to Podcast Conversion Application") # Single-click initiation of Steps 1-3 with gr.Row(): pdf_input = gr.File(label="Upload PDF") run_all_button = gr.Button("Run All Steps (1-3)") output_status = gr.Textbox(label="Status") # Step 1 Preview of Extracted Text extracted_text_preview = gr.Textbox(label="Extracted Text Preview (First 500 Characters)", interactive=False) # Step 2 Preview of Generated Transcript transcript_preview = gr.Textbox(label="Generated Transcript Preview", interactive=False) # Step 3 Editable Rewritten Transcript for TTS tts_ready_preview = gr.Textbox(label="Editable Rewritten Transcript for TTS", interactive=True) # Button for generating audio with editable transcript generate_audio_button = gr.Button("Generate Audio from Edited Transcript") final_audio_output = gr.Audio(label="Generated Podcast Audio") session_dir = gr.State() # Step 1-3 execution run_all_button.click( process_pdf_to_podcast, inputs=pdf_input, outputs=[output_status, extracted_text_preview, transcript_preview, tts_ready_preview,session_dir] ) # Final step: Generate Audio from modified TTS-ready transcript generate_audio_button.click( generate_audio_from_modified_text, inputs=[tts_ready_preview, session_dir], outputs=[output_status, final_audio_output] ) app.launch()