yalrashed commited on
Commit
8638395
·
verified ·
1 Parent(s): 405a174

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -26
app.py CHANGED
@@ -9,21 +9,34 @@ from src.format_script import format_script
9
  from src.eleven import generate_podcast as generate_audio
10
 
11
  def process_file(pdf_file, progress=gr.Progress()):
 
 
12
  # Create temp directory for intermediate files
13
  os.makedirs('temp', exist_ok=True)
14
 
15
  try:
16
  # Step 1: Process PDF
17
- progress(0.1, desc="Cleaning PDF...")
18
- with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_pdf:
19
- tmp_pdf.write(pdf_file.read())
20
- pdf_text = extract_text_from_pdf(tmp_pdf.name)
 
 
 
 
 
 
 
 
21
 
22
- # Clean and process text
23
  chunks = create_screenplay_chunks(pdf_text)
24
  cleaned_text = ""
 
 
25
  for i, chunk in enumerate(chunks):
26
- progress(0.1 + (0.2 * (i/len(chunks))), desc=f"Processing chunk {i+1}/{len(chunks)}...")
 
27
  processed = process_screenplay_chunk(chunk, i)
28
  cleaned_text += processed + "\n\n"
29
 
@@ -32,26 +45,26 @@ def process_file(pdf_file, progress=gr.Progress()):
32
  f.write(cleaned_text)
33
 
34
  # Step 2: Generate Initial Podcast Script
35
- progress(0.3, desc="Generating podcast script...")
36
  initial_script = generate_initial_podcast(cleaned_text)
37
  with open("temp/generated_podcast.pkl", "wb") as f:
38
  pickle.dump(initial_script, f)
39
 
40
  # Step 3: Dramatize Script
41
- progress(0.5, desc="Dramatizing script...")
42
  dramatized_script = dramatize_podcast(initial_script)
43
- with open("temp/dramatized_podcast.pkl", "wb") as f:
44
- pickle.dump(dramatized_script, f)
45
-
46
  # Step 4: Format Script
47
- progress(0.7, desc="Formatting script...")
48
  format_script("temp/dramatized_podcast.txt", "temp/formatted_podcast.txt")
49
  with open("temp/formatted_podcast.txt", "r") as f:
50
  formatted_script = f.read()
51
 
52
  # Step 5: Generate Audio
53
  progress(0.9, desc="Generating audio...")
54
- audio_file = generate_audio() # This will read formatted_podcast.txt
55
 
56
  progress(1.0, desc="Complete!")
57
 
@@ -65,10 +78,6 @@ def process_file(pdf_file, progress=gr.Progress()):
65
 
66
  except Exception as e:
67
  raise gr.Error(f"Error processing file: {str(e)}")
68
- finally:
69
- # Cleanup temp files if needed
70
- if os.path.exists(tmp_pdf.name):
71
- os.unlink(tmp_pdf.name)
72
 
73
  # Create Gradio interface
74
  with gr.Blocks(title="PDF to Podcast Converter") as demo:
@@ -140,15 +149,6 @@ with gr.Blocks(title="PDF to Podcast Converter") as demo:
140
  outputs=outputs,
141
  api_name="convert"
142
  )
143
-
144
- # Example section
145
- gr.Examples(
146
- examples=["example.pdf"],
147
- inputs=pdf_input,
148
- outputs=outputs,
149
- fn=process_file,
150
- cache_examples=True
151
- )
152
 
153
  if __name__ == "__main__":
154
  demo.launch()
 
9
  from src.eleven import generate_podcast as generate_audio
10
 
11
  def process_file(pdf_file, progress=gr.Progress()):
12
+ """Process uploaded PDF through the entire pipeline"""
13
+
14
  # Create temp directory for intermediate files
15
  os.makedirs('temp', exist_ok=True)
16
 
17
  try:
18
  # Step 1: Process PDF
19
+ progress(0.1, desc="Reading PDF...")
20
+ if pdf_file is None:
21
+ raise gr.Error("Please upload a PDF file")
22
+
23
+ # Read PDF content
24
+ pdf_content = pdf_file.read()
25
+
26
+ # Extract and clean text
27
+ progress(0.2, desc="Extracting text...")
28
+ pdf_text = extract_text_from_pdf(pdf_content)
29
+ if not pdf_text:
30
+ raise gr.Error("Failed to extract text from PDF")
31
 
32
+ # Process text in chunks
33
  chunks = create_screenplay_chunks(pdf_text)
34
  cleaned_text = ""
35
+ chunk_count = len(chunks)
36
+
37
  for i, chunk in enumerate(chunks):
38
+ progress(0.2 + (0.2 * (i/chunk_count)),
39
+ desc=f"Processing chunk {i+1}/{chunk_count}...")
40
  processed = process_screenplay_chunk(chunk, i)
41
  cleaned_text += processed + "\n\n"
42
 
 
45
  f.write(cleaned_text)
46
 
47
  # Step 2: Generate Initial Podcast Script
48
+ progress(0.4, desc="Generating podcast script...")
49
  initial_script = generate_initial_podcast(cleaned_text)
50
  with open("temp/generated_podcast.pkl", "wb") as f:
51
  pickle.dump(initial_script, f)
52
 
53
  # Step 3: Dramatize Script
54
+ progress(0.6, desc="Dramatizing script...")
55
  dramatized_script = dramatize_podcast(initial_script)
56
+ with open("temp/dramatized_podcast.txt", "w") as f:
57
+ f.write(dramatized_script)
58
+
59
  # Step 4: Format Script
60
+ progress(0.8, desc="Formatting script...")
61
  format_script("temp/dramatized_podcast.txt", "temp/formatted_podcast.txt")
62
  with open("temp/formatted_podcast.txt", "r") as f:
63
  formatted_script = f.read()
64
 
65
  # Step 5: Generate Audio
66
  progress(0.9, desc="Generating audio...")
67
+ audio_file = generate_audio()
68
 
69
  progress(1.0, desc="Complete!")
70
 
 
78
 
79
  except Exception as e:
80
  raise gr.Error(f"Error processing file: {str(e)}")
 
 
 
 
81
 
82
  # Create Gradio interface
83
  with gr.Blocks(title="PDF to Podcast Converter") as demo:
 
149
  outputs=outputs,
150
  api_name="convert"
151
  )
 
 
 
 
 
 
 
 
 
152
 
153
  if __name__ == "__main__":
154
  demo.launch()