File size: 3,944 Bytes
f08a3f5
 
 
 
 
 
 
 
c634737
191aa05
8284962
f08a3f5
c634737
 
 
 
f08a3f5
8284962
f08a3f5
c634737
 
 
 
 
 
 
 
 
3290c15
c634737
f08a3f5
191aa05
f08a3f5
3290c15
f08a3f5
 
 
 
 
 
 
5fdd625
f08a3f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8284962
f08a3f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# app.py

import gradio as gr
from extract_text_from_pdf import PDFTextExtractor
from generate_transcript import TranscriptProcessor
from generate_audio import TTSGenerator
import pickle
import os
import tempfile
import shutil
import spaces


def create_temp_session_directory():
    return tempfile.mkdtemp()

# Combined function to perform all steps sequentially
@spaces.GPU
def process_pdf_to_podcast(pdf_file):

    session_dir = create_temp_session_directory()
    
    # Define paths within the session directory
    pdf_path = os.path.join(session_dir, "uploaded_pdf.pdf")
    clean_text_path = os.path.join(session_dir, "clean_text.txt")
    transcript_path = os.path.join(session_dir, "data.pkl")
    tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl")
    audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3")
    text_model= "llama3-70b-8192"
    
    # Step 1: Extract Text from PDF
    shutil.copy(pdf_file, pdf_path)
    
    extractor = PDFTextExtractor(pdf_path,clean_text_path,transcript_path,tts_ready_path,text_model)
    clean_text_path = extractor.clean_and_save_text()
    
    # Display a preview of extracted text
    with open(clean_text_path, 'r', encoding='utf-8') as file:
        text_preview = file.read(500)
    
    # Step 2: Generate Transcript
    processor = TranscriptProcessor(clean_text_path,text_model)
    transcript_path = processor.generate_transcript()
    
    # Load the generated transcript for preview
    with open(transcript_path, 'rb') as f:
        transcript_preview = pickle.load(f)
    
    # Step 3: Rewrite Transcript for TTS
    tts_ready_path = processor.rewrite_transcript()
    
    # Load the rewritten transcript for preview and editing
    with open(tts_ready_path, 'rb') as f:
        tts_ready_preview = pickle.load(f)
    
    return (
        f"Steps 1-3 completed. Preview and adjust the rewritten transcript if needed.", 
        text_preview,
        transcript_preview,
        tts_ready_preview
    )


# Final Step: Generate Audio after optional adjustments
@spaces.GPU
def generate_audio_from_modified_text(tts_ready_text):
    # Save any modified TTS-ready transcript
    with open(tts_ready_path, 'wb') as f:
        pickle.dump(tts_ready_text, f)
    
    # Generate audio from the TTS-ready transcript
    tts_gen = TTSGenerator(tts_ready_path)
    audio_path = tts_gen.generate_audio()
    
    return f"Step 4 complete. Audio saved to {audio_path}.", audio_path


# Gradio Interface
with gr.Blocks() as app:
    gr.Markdown("# PDF to Podcast Conversion Application")
    
    # Single-click initiation of Steps 1-3
    with gr.Row():
        pdf_input = gr.File(label="Upload PDF")
        run_all_button = gr.Button("Run All Steps (1-3)")
        output_status = gr.Textbox(label="Status")
    
    # Step 1 Preview of Extracted Text
    extracted_text_preview = gr.Textbox(label="Extracted Text Preview (First 500 Characters)", interactive=False)
    
    # Step 2 Preview of Generated Transcript
    transcript_preview = gr.Textbox(label="Generated Transcript Preview", interactive=False)
    
    # Step 3 Editable Rewritten Transcript for TTS
    tts_ready_preview = gr.Textbox(label="Editable Rewritten Transcript for TTS", interactive=True)
    
    # Button for generating audio with editable transcript
    generate_audio_button = gr.Button("Generate Audio from Edited Transcript")
    final_audio_output = gr.Audio(label="Generated Podcast Audio")
    
    # Step 1-3 execution
    run_all_button.click(
        process_pdf_to_podcast, 
        inputs=pdf_input, 
        outputs=[output_status, extracted_text_preview, transcript_preview, tts_ready_preview]
    )

    # Final step: Generate Audio from modified TTS-ready transcript
    generate_audio_button.click(
        generate_audio_from_modified_text, 
        inputs=tts_ready_preview, 
        outputs=[output_status, final_audio_output]
    )

app.launch()