siddhartharya commited on
Commit
db3e1e6
·
verified ·
1 Parent(s): ea2bf21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -37
app.py CHANGED
@@ -7,55 +7,85 @@ import os
7
  import tempfile
8
 
9
  def generate_podcast(file, url, tone, length):
10
- if file and url:
11
- raise gr.Error("Please provide either a PDF file or a URL, not both.")
12
-
13
- if file:
14
- if not file.name.lower().endswith('.pdf'):
15
- raise gr.Error("Please upload a PDF file.")
16
 
17
- try:
 
 
 
18
  pdf_reader = pypdf.PdfReader(file.name)
19
  text = ""
20
  for page in pdf_reader.pages:
21
  text += page.extract_text()
22
- except Exception as e:
23
- raise gr.Error(f"Error reading the PDF file: {str(e)}")
24
- elif url:
25
- try:
26
  text = extract_text_from_url(url)
27
- except Exception as e:
28
- raise gr.Error(f"Error extracting text from URL: {str(e)}")
29
- else:
30
- raise gr.Error("Please provide either a PDF file or a URL.")
31
-
32
- truncated_text = truncate_text(text)
33
- if len(truncated_text) < len(text):
34
- print("Warning: The input text was truncated to fit within 2048 tokens.")
35
-
36
- try:
37
  script = generate_script(SYSTEM_PROMPT, truncated_text, tone, length)
38
- except Exception as e:
39
- raise gr.Error(f"Error generating script: {str(e)}")
40
-
41
- audio_segments = []
42
- transcript = ""
43
- try:
44
  for item in script.dialogue:
45
  audio_file = generate_audio(item.text, item.speaker)
46
  audio_segment = AudioSegment.from_wav(audio_file)
47
  audio_segments.append(audio_segment)
48
  transcript += f"**{item.speaker}**: {item.text}\n\n"
49
  os.remove(audio_file) # Clean up temporary audio file
50
- except Exception as e:
51
- raise gr.Error(f"Error generating audio: {str(e)}")
52
-
53
- combined_audio = sum(audio_segments)
54
-
55
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
56
- combined_audio.export(temp_audio.name, format="wav")
57
- temp_audio_path = temp_audio.name
58
 
59
- return temp_audio_path, transcript
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- # The rest of the app.py file remains the same
 
 
7
  import tempfile
8
 
9
  def generate_podcast(file, url, tone, length):
10
+ try:
11
+ if file and url:
12
+ return None, "Please provide either a PDF file or a URL, not both."
 
 
 
13
 
14
+ if file:
15
+ if not file.name.lower().endswith('.pdf'):
16
+ return None, "Please upload a PDF file."
17
+
18
  pdf_reader = pypdf.PdfReader(file.name)
19
  text = ""
20
  for page in pdf_reader.pages:
21
  text += page.extract_text()
22
+ elif url:
 
 
 
23
  text = extract_text_from_url(url)
24
+ else:
25
+ return None, "Please provide either a PDF file or a URL."
26
+
27
+ truncated_text = truncate_text(text)
28
+ if len(truncated_text) < len(text):
29
+ print("Warning: The input text was truncated to fit within 2048 tokens.")
30
+
 
 
 
31
  script = generate_script(SYSTEM_PROMPT, truncated_text, tone, length)
32
+
33
+ audio_segments = []
34
+ transcript = ""
 
 
 
35
  for item in script.dialogue:
36
  audio_file = generate_audio(item.text, item.speaker)
37
  audio_segment = AudioSegment.from_wav(audio_file)
38
  audio_segments.append(audio_segment)
39
  transcript += f"**{item.speaker}**: {item.text}\n\n"
40
  os.remove(audio_file) # Clean up temporary audio file
41
+
42
+ combined_audio = sum(audio_segments)
43
+
44
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
45
+ combined_audio.export(temp_audio.name, format="wav")
46
+ return temp_audio.name, transcript
 
 
47
 
48
+ except Exception as e:
49
+ return None, f"An error occurred: {str(e)}"
50
+
51
+ instructions = """
52
+ # Podcast Generator
53
+
54
+ Welcome to the Podcast Generator project! This tool allows you to create custom podcast episodes using AI-generated content.
55
+
56
+ ## Features
57
+ * Generate podcast scripts from PDF content or web pages
58
+ * Convert text to speech for a natural listening experience
59
+ * Choose the tone of your podcast
60
+ * Export episodes as MP3 files
61
+
62
+ ## How to Use
63
+ 1. Upload a PDF file OR enter a URL (content will be truncated to 2048 tokens if longer)
64
+ 2. Select the desired tone (humorous, casual, formal)
65
+ 3. Choose the podcast length
66
+ 4. Click "Generate" to create your podcast
67
+ 5. Listen to the generated audio and review the transcript
68
+
69
+ Note: This tool uses the LLaMa 3.1 70B model for script generation and Voice RSS for text-to-speech conversion. The input is limited to 2048 tokens to ensure compatibility with the model. The podcast features John (Male, American accent) and Lily (Female, British accent) as hosts.
70
+ """
71
+
72
+ iface = gr.Interface(
73
+ fn=generate_podcast,
74
+ inputs=[
75
+ gr.File(label="Upload PDF file (optional)", file_types=[".pdf"]),
76
+ gr.Textbox(label="OR Enter URL"),
77
+ gr.Radio(["humorous", "casual", "formal"], label="Select podcast tone", value="casual"),
78
+ gr.Radio(["Short (1-2 min)", "Medium (3-5 min)"], label="Podcast length", value="Medium (3-5 min)")
79
+ ],
80
+ outputs=[
81
+ gr.Audio(label="Generated Podcast"),
82
+ gr.Markdown(label="Transcript")
83
+ ],
84
+ title="Custom NotebookLM-type Podcast Generator (2048 token limit)",
85
+ description=instructions,
86
+ allow_flagging="never",
87
+ theme=gr.themes.Soft()
88
+ )
89
 
90
+ if __name__ == "__main__":
91
+ iface.launch()