Spaces:
Running
on
T4
Running
on
T4
Update app.py
#2
by
mrfakename
- opened
- README.md +12 -1
- main.py → app.py +52 -24
- prompts.py +1 -1
- utils.py +6 -6
README.md
CHANGED
@@ -1,3 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# Open NotebookLM
|
2 |
|
3 |
## Overview
|
@@ -38,7 +49,7 @@ To set up the project, follow these steps:
|
|
38 |
|
39 |
2. **Run the application:**
|
40 |
```bash
|
41 |
-
python
|
42 |
```
|
43 |
This will launch a Gradio interface in your web browser.
|
44 |
|
|
|
1 |
+
---
|
2 |
+
title: Open NotebookLM
|
3 |
+
emoji: 🎙️
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: red
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 4.44.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
# Open NotebookLM
|
13 |
|
14 |
## Overview
|
|
|
49 |
|
50 |
2. **Run the application:**
|
51 |
```bash
|
52 |
+
python app.py
|
53 |
```
|
54 |
This will launch a Gradio interface in your web browser.
|
55 |
|
main.py → app.py
RENAMED
@@ -8,12 +8,10 @@ import os
|
|
8 |
import time
|
9 |
from pathlib import Path
|
10 |
from tempfile import NamedTemporaryFile
|
11 |
-
from typing import List, Literal, Tuple
|
12 |
|
13 |
# Third-party imports
|
14 |
import gradio as gr
|
15 |
-
from fastapi import FastAPI
|
16 |
-
from fastapi.staticfiles import StaticFiles
|
17 |
from loguru import logger
|
18 |
from pydantic import BaseModel
|
19 |
from pypdf import PdfReader
|
@@ -23,11 +21,6 @@ from pydub import AudioSegment
|
|
23 |
from prompts import SYSTEM_PROMPT
|
24 |
from utils import generate_script, generate_audio
|
25 |
|
26 |
-
app = FastAPI()
|
27 |
-
|
28 |
-
app.mount("/static", StaticFiles(directory="static"), name="static")
|
29 |
-
|
30 |
-
|
31 |
class DialogueItem(BaseModel):
|
32 |
"""A single dialogue item."""
|
33 |
|
@@ -39,30 +32,55 @@ class Dialogue(BaseModel):
|
|
39 |
"""The dialogue between the host and guest."""
|
40 |
|
41 |
scratchpad: str
|
42 |
-
|
43 |
dialogue: List[DialogueItem]
|
44 |
|
45 |
|
46 |
-
def generate_podcast(file: str) -> Tuple[str, str]:
|
47 |
"""Generate the audio and transcript from the PDF."""
|
|
|
|
|
|
|
|
|
48 |
# Read the PDF file and extract text
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
# Call the LLM
|
54 |
-
llm_output = generate_script(
|
55 |
logger.info(f"Generated dialogue: {llm_output}")
|
56 |
|
57 |
# Process the dialogue
|
58 |
audio_segments = []
|
59 |
-
transcript = ""
|
60 |
total_characters = 0
|
61 |
|
62 |
for line in llm_output.dialogue:
|
63 |
logger.info(f"Generating audio for {line.speaker}: {line.text}")
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
66 |
total_characters += len(line.text)
|
67 |
|
68 |
# Get audio file path
|
@@ -96,23 +114,33 @@ def generate_podcast(file: str) -> Tuple[str, str]:
|
|
96 |
|
97 |
|
98 |
demo = gr.Interface(
|
99 |
-
title="
|
100 |
-
description="Convert your PDFs into podcasts with open-source AI models.",
|
101 |
fn=generate_podcast,
|
102 |
inputs=[
|
103 |
gr.File(
|
104 |
label="PDF",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
),
|
106 |
],
|
107 |
outputs=[
|
108 |
gr.Audio(label="Audio", format="mp3"),
|
109 |
-
gr.
|
110 |
],
|
111 |
allow_flagging="never",
|
112 |
-
api_name=
|
|
|
113 |
)
|
114 |
|
115 |
-
app = gr.mount_gradio_app(app, demo, path="/")
|
116 |
-
|
117 |
if __name__ == "__main__":
|
118 |
-
demo.launch(show_api=
|
|
|
8 |
import time
|
9 |
from pathlib import Path
|
10 |
from tempfile import NamedTemporaryFile
|
11 |
+
from typing import List, Literal, Tuple, Optional
|
12 |
|
13 |
# Third-party imports
|
14 |
import gradio as gr
|
|
|
|
|
15 |
from loguru import logger
|
16 |
from pydantic import BaseModel
|
17 |
from pypdf import PdfReader
|
|
|
21 |
from prompts import SYSTEM_PROMPT
|
22 |
from utils import generate_script, generate_audio
|
23 |
|
|
|
|
|
|
|
|
|
|
|
24 |
class DialogueItem(BaseModel):
|
25 |
"""A single dialogue item."""
|
26 |
|
|
|
32 |
"""The dialogue between the host and guest."""
|
33 |
|
34 |
scratchpad: str
|
35 |
+
name_of_guest: str
|
36 |
dialogue: List[DialogueItem]
|
37 |
|
38 |
|
39 |
+
def generate_podcast(file: str, tone: Optional[str] = None, length: Optional[str] = None) -> Tuple[str, str]:
|
40 |
"""Generate the audio and transcript from the PDF."""
|
41 |
+
# Check if the file is a PDF
|
42 |
+
if not file.lower().endswith('.pdf'):
|
43 |
+
raise gr.Error("Please upload a PDF file.")
|
44 |
+
|
45 |
# Read the PDF file and extract text
|
46 |
+
try:
|
47 |
+
with Path(file).open("rb") as f:
|
48 |
+
reader = PdfReader(f)
|
49 |
+
text = "\n\n".join([page.extract_text() for page in reader.pages])
|
50 |
+
except Exception as e:
|
51 |
+
raise gr.Error(f"Error reading the PDF file: {str(e)}")
|
52 |
+
|
53 |
+
# Check if the PDF has more than ~150,000 characters
|
54 |
+
if len(text) > 100000:
|
55 |
+
raise gr.Error("The PDF is too long. Please upload a PDF with fewer than ~100,000 characters.")
|
56 |
+
|
57 |
+
# Modify the system prompt based on the chosen tone and length
|
58 |
+
modified_system_prompt = SYSTEM_PROMPT
|
59 |
+
if tone:
|
60 |
+
modified_system_prompt += f"\n\nTONE: The tone of the podcast should be {tone}."
|
61 |
+
if length:
|
62 |
+
length_instructions = {
|
63 |
+
"Short (1-2 min)": "Keep the podcast brief, around 1-2 minutes long.",
|
64 |
+
"Medium (3-5 min)": "Aim for a moderate length, about 3-5 minutes.",
|
65 |
+
}
|
66 |
+
modified_system_prompt += f"\n\nLENGTH: {length_instructions[length]}"
|
67 |
|
68 |
# Call the LLM
|
69 |
+
llm_output = generate_script(modified_system_prompt, text, Dialogue)
|
70 |
logger.info(f"Generated dialogue: {llm_output}")
|
71 |
|
72 |
# Process the dialogue
|
73 |
audio_segments = []
|
74 |
+
transcript = "" # start with an empty transcript
|
75 |
total_characters = 0
|
76 |
|
77 |
for line in llm_output.dialogue:
|
78 |
logger.info(f"Generating audio for {line.speaker}: {line.text}")
|
79 |
+
if line.speaker == "Host (Jane)":
|
80 |
+
speaker = f"**Jane**: {line.text}"
|
81 |
+
else:
|
82 |
+
speaker = f"**{llm_output.name_of_guest}**: {line.text}"
|
83 |
+
transcript += speaker + "\n\n"
|
84 |
total_characters += len(line.text)
|
85 |
|
86 |
# Get audio file path
|
|
|
114 |
|
115 |
|
116 |
demo = gr.Interface(
|
117 |
+
title="Open NotebookLM",
|
118 |
+
description="Convert your PDFs into podcasts with open-source AI models (Llama 3.1 405B and MeloTTS). \n \n Note: Only the text content of the PDF will be processed. Images and tables are not included. The PDF should be no more than 100,000 characters due to the context length of Llama 3.1 405B.",
|
119 |
fn=generate_podcast,
|
120 |
inputs=[
|
121 |
gr.File(
|
122 |
label="PDF",
|
123 |
+
file_types=[".pdf", "file/*"],
|
124 |
+
),
|
125 |
+
gr.Radio(
|
126 |
+
choices=["Fun", "Formal"],
|
127 |
+
label="Tone of the podcast",
|
128 |
+
value="casual"
|
129 |
+
),
|
130 |
+
gr.Radio(
|
131 |
+
choices=["Short (1-2 min)", "Medium (3-5 min)"],
|
132 |
+
label="Length of the podcast",
|
133 |
+
value="Medium (3-5 min)"
|
134 |
),
|
135 |
],
|
136 |
outputs=[
|
137 |
gr.Audio(label="Audio", format="mp3"),
|
138 |
+
gr.Markdown(label="Transcript"),
|
139 |
],
|
140 |
allow_flagging="never",
|
141 |
+
api_name="generate_podcast", # Add this line
|
142 |
+
theme=gr.themes.Soft()
|
143 |
)
|
144 |
|
|
|
|
|
145 |
if __name__ == "__main__":
|
146 |
+
demo.queue(default_concurrency_limit=3).launch(show_api=True) # Add queue with concurrency of 5 and show API
|
prompts.py
CHANGED
@@ -27,7 +27,7 @@ Your focus is on extracting the most interesting and insightful content for a po
|
|
27 |
- Ensure complex topics are explained clearly and simply.
|
28 |
- Focus on maintaining an engaging and lively tone that would captivate listeners.
|
29 |
- Rules:
|
30 |
-
> The host
|
31 |
> The host should ask the guest questions.
|
32 |
> The host should summarize the key insights at the end.
|
33 |
> Include common verbal fillers like "uhms" and "errs" in the host and guests response. This is so the script is realistic.
|
|
|
27 |
- Ensure complex topics are explained clearly and simply.
|
28 |
- Focus on maintaining an engaging and lively tone that would captivate listeners.
|
29 |
- Rules:
|
30 |
+
> The host ALWAYS goes first and is interviewing the guest. The guest is the one who explains the topic.
|
31 |
> The host should ask the guest questions.
|
32 |
> The host should summarize the key insights at the end.
|
33 |
> Include common verbal fillers like "uhms" and "errs" in the host and guests response. This is so the script is realistic.
|
utils.py
CHANGED
@@ -23,19 +23,19 @@ client = OpenAI(
|
|
23 |
hf_client = Client("mrfakename/MeloTTS")
|
24 |
|
25 |
|
26 |
-
def generate_script(system_prompt: str,
|
27 |
"""Get the dialogue from the LLM."""
|
28 |
# Load as python object
|
29 |
try:
|
30 |
-
response = call_llm(system_prompt,
|
31 |
-
dialogue =
|
32 |
response.choices[0].message.content
|
33 |
)
|
34 |
except ValidationError as e:
|
35 |
error_message = f"Failed to parse dialogue JSON: {e}"
|
36 |
-
system_prompt_with_error = f"{system_prompt}\n\
|
37 |
-
response = call_llm(system_prompt_with_error,
|
38 |
-
dialogue =
|
39 |
response.choices[0].message.content
|
40 |
)
|
41 |
return dialogue
|
|
|
23 |
hf_client = Client("mrfakename/MeloTTS")
|
24 |
|
25 |
|
26 |
+
def generate_script(system_prompt: str, input_text: str, output_model):
|
27 |
"""Get the dialogue from the LLM."""
|
28 |
# Load as python object
|
29 |
try:
|
30 |
+
response = call_llm(system_prompt, input_text, output_model)
|
31 |
+
dialogue = output_model.model_validate_json(
|
32 |
response.choices[0].message.content
|
33 |
)
|
34 |
except ValidationError as e:
|
35 |
error_message = f"Failed to parse dialogue JSON: {e}"
|
36 |
+
system_prompt_with_error = f"{system_prompt}\n\nPlease return a VALID JSON object. This was the earlier error: {error_message}"
|
37 |
+
response = call_llm(system_prompt_with_error, input_text, output_model)
|
38 |
+
dialogue = output_model.model_validate_json(
|
39 |
response.choices[0].message.content
|
40 |
)
|
41 |
return dialogue
|