Files changed (4) hide show
  1. README.md +12 -1
  2. main.py → app.py +52 -24
  3. prompts.py +1 -1
  4. utils.py +6 -6
README.md CHANGED
@@ -1,3 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
1
  # Open NotebookLM
2
 
3
  ## Overview
@@ -38,7 +49,7 @@ To set up the project, follow these steps:
38
 
39
  2. **Run the application:**
40
  ```bash
41
- python main.py
42
  ```
43
  This will launch a Gradio interface in your web browser.
44
 
 
1
+ ---
2
+ title: Open NotebookLM
3
+ emoji: 🎙️
4
+ colorFrom: purple
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 4.44.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
  # Open NotebookLM
13
 
14
  ## Overview
 
49
 
50
  2. **Run the application:**
51
  ```bash
52
+ python app.py
53
  ```
54
  This will launch a Gradio interface in your web browser.
55
 
main.py → app.py RENAMED
@@ -8,12 +8,10 @@ import os
8
  import time
9
  from pathlib import Path
10
  from tempfile import NamedTemporaryFile
11
- from typing import List, Literal, Tuple
12
 
13
  # Third-party imports
14
  import gradio as gr
15
- from fastapi import FastAPI
16
- from fastapi.staticfiles import StaticFiles
17
  from loguru import logger
18
  from pydantic import BaseModel
19
  from pypdf import PdfReader
@@ -23,11 +21,6 @@ from pydub import AudioSegment
23
  from prompts import SYSTEM_PROMPT
24
  from utils import generate_script, generate_audio
25
 
26
- app = FastAPI()
27
-
28
- app.mount("/static", StaticFiles(directory="static"), name="static")
29
-
30
-
31
  class DialogueItem(BaseModel):
32
  """A single dialogue item."""
33
 
@@ -39,30 +32,55 @@ class Dialogue(BaseModel):
39
  """The dialogue between the host and guest."""
40
 
41
  scratchpad: str
42
- participants: List[str]
43
  dialogue: List[DialogueItem]
44
 
45
 
46
- def generate_podcast(file: str) -> Tuple[str, str]:
47
  """Generate the audio and transcript from the PDF."""
 
 
 
 
48
  # Read the PDF file and extract text
49
- with Path(file).open("rb") as f:
50
- reader = PdfReader(f)
51
- text = "\n\n".join([page.extract_text() for page in reader.pages])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # Call the LLM
54
- llm_output = generate_script(SYSTEM_PROMPT, text, Dialogue)
55
  logger.info(f"Generated dialogue: {llm_output}")
56
 
57
  # Process the dialogue
58
  audio_segments = []
59
- transcript = ""
60
  total_characters = 0
61
 
62
  for line in llm_output.dialogue:
63
  logger.info(f"Generating audio for {line.speaker}: {line.text}")
64
- transcript_line = f"{line.speaker}: {line.text}"
65
- transcript += transcript_line + "\n\n"
 
 
 
66
  total_characters += len(line.text)
67
 
68
  # Get audio file path
@@ -96,23 +114,33 @@ def generate_podcast(file: str) -> Tuple[str, str]:
96
 
97
 
98
  demo = gr.Interface(
99
- title="OpenPodcast",
100
- description="Convert your PDFs into podcasts with open-source AI models.",
101
  fn=generate_podcast,
102
  inputs=[
103
  gr.File(
104
  label="PDF",
 
 
 
 
 
 
 
 
 
 
 
105
  ),
106
  ],
107
  outputs=[
108
  gr.Audio(label="Audio", format="mp3"),
109
- gr.Textbox(label="Transcript"),
110
  ],
111
  allow_flagging="never",
112
- api_name=False,
 
113
  )
114
 
115
- app = gr.mount_gradio_app(app, demo, path="/")
116
-
117
  if __name__ == "__main__":
118
- demo.launch(show_api=False)
 
8
  import time
9
  from pathlib import Path
10
  from tempfile import NamedTemporaryFile
11
+ from typing import List, Literal, Tuple, Optional
12
 
13
  # Third-party imports
14
  import gradio as gr
 
 
15
  from loguru import logger
16
  from pydantic import BaseModel
17
  from pypdf import PdfReader
 
21
  from prompts import SYSTEM_PROMPT
22
  from utils import generate_script, generate_audio
23
 
 
 
 
 
 
24
  class DialogueItem(BaseModel):
25
  """A single dialogue item."""
26
 
 
32
  """The dialogue between the host and guest."""
33
 
34
  scratchpad: str
35
+ name_of_guest: str
36
  dialogue: List[DialogueItem]
37
 
38
 
39
+ def generate_podcast(file: str, tone: Optional[str] = None, length: Optional[str] = None) -> Tuple[str, str]:
40
  """Generate the audio and transcript from the PDF."""
41
+ # Check if the file is a PDF
42
+ if not file.lower().endswith('.pdf'):
43
+ raise gr.Error("Please upload a PDF file.")
44
+
45
  # Read the PDF file and extract text
46
+ try:
47
+ with Path(file).open("rb") as f:
48
+ reader = PdfReader(f)
49
+ text = "\n\n".join([page.extract_text() for page in reader.pages])
50
+ except Exception as e:
51
+ raise gr.Error(f"Error reading the PDF file: {str(e)}")
52
+
53
+ # Check if the PDF has more than ~150,000 characters
54
+ if len(text) > 100000:
55
+ raise gr.Error("The PDF is too long. Please upload a PDF with fewer than ~100,000 characters.")
56
+
57
+ # Modify the system prompt based on the chosen tone and length
58
+ modified_system_prompt = SYSTEM_PROMPT
59
+ if tone:
60
+ modified_system_prompt += f"\n\nTONE: The tone of the podcast should be {tone}."
61
+ if length:
62
+ length_instructions = {
63
+ "Short (1-2 min)": "Keep the podcast brief, around 1-2 minutes long.",
64
+ "Medium (3-5 min)": "Aim for a moderate length, about 3-5 minutes.",
65
+ }
66
+ modified_system_prompt += f"\n\nLENGTH: {length_instructions[length]}"
67
 
68
  # Call the LLM
69
+ llm_output = generate_script(modified_system_prompt, text, Dialogue)
70
  logger.info(f"Generated dialogue: {llm_output}")
71
 
72
  # Process the dialogue
73
  audio_segments = []
74
+ transcript = "" # start with an empty transcript
75
  total_characters = 0
76
 
77
  for line in llm_output.dialogue:
78
  logger.info(f"Generating audio for {line.speaker}: {line.text}")
79
+ if line.speaker == "Host (Jane)":
80
+ speaker = f"**Jane**: {line.text}"
81
+ else:
82
+ speaker = f"**{llm_output.name_of_guest}**: {line.text}"
83
+ transcript += speaker + "\n\n"
84
  total_characters += len(line.text)
85
 
86
  # Get audio file path
 
114
 
115
 
116
  demo = gr.Interface(
117
+ title="Open NotebookLM",
118
+ description="Convert your PDFs into podcasts with open-source AI models (Llama 3.1 405B and MeloTTS). \n \n Note: Only the text content of the PDF will be processed. Images and tables are not included. The PDF should be no more than 100,000 characters due to the context length of Llama 3.1 405B.",
119
  fn=generate_podcast,
120
  inputs=[
121
  gr.File(
122
  label="PDF",
123
+ file_types=[".pdf", "file/*"],
124
+ ),
125
+ gr.Radio(
126
+ choices=["Fun", "Formal"],
127
+ label="Tone of the podcast",
128
+ value="casual"
129
+ ),
130
+ gr.Radio(
131
+ choices=["Short (1-2 min)", "Medium (3-5 min)"],
132
+ label="Length of the podcast",
133
+ value="Medium (3-5 min)"
134
  ),
135
  ],
136
  outputs=[
137
  gr.Audio(label="Audio", format="mp3"),
138
+ gr.Markdown(label="Transcript"),
139
  ],
140
  allow_flagging="never",
141
+ api_name="generate_podcast", # Add this line
142
+ theme=gr.themes.Soft()
143
  )
144
 
 
 
145
  if __name__ == "__main__":
146
+ demo.queue(default_concurrency_limit=3).launch(show_api=True) # Add queue with concurrency of 5 and show API
prompts.py CHANGED
@@ -27,7 +27,7 @@ Your focus is on extracting the most interesting and insightful content for a po
27
  - Ensure complex topics are explained clearly and simply.
28
  - Focus on maintaining an engaging and lively tone that would captivate listeners.
29
  - Rules:
30
- > The host should go first.
31
  > The host should ask the guest questions.
32
  > The host should summarize the key insights at the end.
33
  > Include common verbal fillers like "uhms" and "errs" in the host and guests response. This is so the script is realistic.
 
27
  - Ensure complex topics are explained clearly and simply.
28
  - Focus on maintaining an engaging and lively tone that would captivate listeners.
29
  - Rules:
30
+ > The host ALWAYS goes first and is interviewing the guest. The guest is the one who explains the topic.
31
  > The host should ask the guest questions.
32
  > The host should summarize the key insights at the end.
33
  > Include common verbal fillers like "uhms" and "errs" in the host and guests response. This is so the script is realistic.
utils.py CHANGED
@@ -23,19 +23,19 @@ client = OpenAI(
23
  hf_client = Client("mrfakename/MeloTTS")
24
 
25
 
26
- def generate_script(system_prompt: str, text: str, dialogue_format):
27
  """Get the dialogue from the LLM."""
28
  # Load as python object
29
  try:
30
- response = call_llm(system_prompt, text, dialogue_format)
31
- dialogue = dialogue_format.model_validate_json(
32
  response.choices[0].message.content
33
  )
34
  except ValidationError as e:
35
  error_message = f"Failed to parse dialogue JSON: {e}"
36
- system_prompt_with_error = f"{system_prompt}\n\n Please return a VALID JSON object. This was the earlier error: {error_message}"
37
- response = call_llm(system_prompt_with_error, text, dialogue_format)
38
- dialogue = dialogue_format.model_validate_json(
39
  response.choices[0].message.content
40
  )
41
  return dialogue
 
23
  hf_client = Client("mrfakename/MeloTTS")
24
 
25
 
26
+ def generate_script(system_prompt: str, input_text: str, output_model):
27
  """Get the dialogue from the LLM."""
28
  # Load as python object
29
  try:
30
+ response = call_llm(system_prompt, input_text, output_model)
31
+ dialogue = output_model.model_validate_json(
32
  response.choices[0].message.content
33
  )
34
  except ValidationError as e:
35
  error_message = f"Failed to parse dialogue JSON: {e}"
36
+ system_prompt_with_error = f"{system_prompt}\n\nPlease return a VALID JSON object. This was the earlier error: {error_message}"
37
+ response = call_llm(system_prompt_with_error, input_text, output_model)
38
+ dialogue = output_model.model_validate_json(
39
  response.choices[0].message.content
40
  )
41
  return dialogue