Spaces:

gabrielchua
/

open-notebooklm

Running on T4

App Files Files Community

Update app.py

by mrfakename - opened Sep 29, 2024

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+165

-126

Files changed (4) hide show

README.md +12 -1
main.py → app.py +52 -24
prompts.py +1 -1
utils.py +6 -6

README.md CHANGED Viewed

@@ -1,3 +1,14 @@
 # Open NotebookLM
 ## Overview
@@ -38,7 +49,7 @@ To set up the project, follow these steps:
 2. **Run the application:**
    ```bash
-   python main.py
    ```
    This will launch a Gradio interface in your web browser.

+---
+title: Open NotebookLM
+emoji: 🎙️
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 4.44.0
+app_file: app.py
+pinned: false
+---
 # Open NotebookLM
 ## Overview
 2. **Run the application:**
    ```bash
+   python app.py
    ```
    This will launch a Gradio interface in your web browser.

main.py → app.py RENAMED Viewed

@@ -8,12 +8,10 @@ import os
 import time
 from pathlib import Path
 from tempfile import NamedTemporaryFile
-from typing import List, Literal, Tuple
 # Third-party imports
 import gradio as gr
-from fastapi import FastAPI
-from fastapi.staticfiles import StaticFiles
 from loguru import logger
 from pydantic import BaseModel
 from pypdf import PdfReader
@@ -23,11 +21,6 @@ from pydub import AudioSegment
 from prompts import SYSTEM_PROMPT
 from utils import generate_script, generate_audio
-app = FastAPI()
-app.mount("/static", StaticFiles(directory="static"), name="static")
 class DialogueItem(BaseModel):
     """A single dialogue item."""
@@ -39,30 +32,55 @@ class Dialogue(BaseModel):
     """The dialogue between the host and guest."""
     scratchpad: str
-    participants: List[str]
     dialogue: List[DialogueItem]
-def generate_podcast(file: str) -> Tuple[str, str]:
     """Generate the audio and transcript from the PDF."""
     # Read the PDF file and extract text
-    with Path(file).open("rb") as f:
-        reader = PdfReader(f)
-        text = "\n\n".join([page.extract_text() for page in reader.pages])
     # Call the LLM
-    llm_output = generate_script(SYSTEM_PROMPT, text, Dialogue)
     logger.info(f"Generated dialogue: {llm_output}")
     # Process the dialogue
     audio_segments = []
-    transcript = ""
     total_characters = 0
     for line in llm_output.dialogue:
         logger.info(f"Generating audio for {line.speaker}: {line.text}")
-        transcript_line = f"{line.speaker}: {line.text}"
-        transcript += transcript_line + "\n\n"
         total_characters += len(line.text)
         # Get audio file path
@@ -96,23 +114,33 @@ def generate_podcast(file: str) -> Tuple[str, str]:
 demo = gr.Interface(
-    title="OpenPodcast",
-    description="Convert your PDFs into podcasts with open-source AI models.",
     fn=generate_podcast,
     inputs=[
         gr.File(
             label="PDF",
         ),
     ],
     outputs=[
         gr.Audio(label="Audio", format="mp3"),
-        gr.Textbox(label="Transcript"),
     ],
     allow_flagging="never",
-    api_name=False,
 )
-app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
-    demo.launch(show_api=False)

 import time
 from pathlib import Path
 from tempfile import NamedTemporaryFile
+from typing import List, Literal, Tuple, Optional
 # Third-party imports
 import gradio as gr
 from loguru import logger
 from pydantic import BaseModel
 from pypdf import PdfReader
 from prompts import SYSTEM_PROMPT
 from utils import generate_script, generate_audio
 class DialogueItem(BaseModel):
     """A single dialogue item."""
     """The dialogue between the host and guest."""
     scratchpad: str
+    name_of_guest: str
     dialogue: List[DialogueItem]
+def generate_podcast(file: str, tone: Optional[str] = None, length: Optional[str] = None) -> Tuple[str, str]:
     """Generate the audio and transcript from the PDF."""
+    # Check if the file is a PDF
+    if not file.lower().endswith('.pdf'):
+        raise gr.Error("Please upload a PDF file.")
     # Read the PDF file and extract text
+    try:
+        with Path(file).open("rb") as f:
+            reader = PdfReader(f)
+            text = "\n\n".join([page.extract_text() for page in reader.pages])
+    except Exception as e:
+        raise gr.Error(f"Error reading the PDF file: {str(e)}")
+    # Check if the PDF has more than ~150,000 characters
+    if len(text) > 100000:
+        raise gr.Error("The PDF is too long. Please upload a PDF with fewer than ~100,000 characters.")
+    # Modify the system prompt based on the chosen tone and length
+    modified_system_prompt = SYSTEM_PROMPT
+    if tone:
+        modified_system_prompt += f"\n\nTONE: The tone of the podcast should be {tone}."
+    if length:
+        length_instructions = {
+            "Short (1-2 min)": "Keep the podcast brief, around 1-2 minutes long.",
+            "Medium (3-5 min)": "Aim for a moderate length, about 3-5 minutes.",
+        }
+        modified_system_prompt += f"\n\nLENGTH: {length_instructions[length]}"
     # Call the LLM
+    llm_output = generate_script(modified_system_prompt, text, Dialogue)
     logger.info(f"Generated dialogue: {llm_output}")
     # Process the dialogue
     audio_segments = []
+    transcript = "" # start with an empty transcript
     total_characters = 0
     for line in llm_output.dialogue:
         logger.info(f"Generating audio for {line.speaker}: {line.text}")
+        if line.speaker == "Host (Jane)":
+            speaker = f"**Jane**: {line.text}"
+        else:
+            speaker = f"**{llm_output.name_of_guest}**: {line.text}"
+        transcript += speaker + "\n\n"
         total_characters += len(line.text)
         # Get audio file path
 demo = gr.Interface(
+    title="Open NotebookLM",
+    description="Convert your PDFs into podcasts with open-source AI models (Llama 3.1 405B and MeloTTS). \n \n Note: Only the text content of the PDF will be processed. Images and tables are not included. The PDF should be no more than 100,000 characters due to the context length of Llama 3.1 405B.",
     fn=generate_podcast,
     inputs=[
         gr.File(
             label="PDF",
+            file_types=[".pdf", "file/*"],
+        ),
+        gr.Radio(
+            choices=["Fun", "Formal"],
+            label="Tone of the podcast",
+            value="casual"
+        ),
+        gr.Radio(
+            choices=["Short (1-2 min)", "Medium (3-5 min)"],
+            label="Length of the podcast",
+            value="Medium (3-5 min)"
         ),
     ],
     outputs=[
         gr.Audio(label="Audio", format="mp3"),
+        gr.Markdown(label="Transcript"),
     ],
     allow_flagging="never",
+    api_name="generate_podcast",  # Add this line
+    theme=gr.themes.Soft()
 )
 if __name__ == "__main__":
+    demo.queue(default_concurrency_limit=3).launch(show_api=True)  # Add queue with concurrency of 5 and show API

prompts.py CHANGED Viewed

@@ -27,7 +27,7 @@ Your focus is on extracting the most interesting and insightful content for a po
    - Ensure complex topics are explained clearly and simply.
    - Focus on maintaining an engaging and lively tone that would captivate listeners.
    - Rules:
-        > The host should go first.
         > The host should ask the guest questions.
         > The host should summarize the key insights at the end.
         > Include common verbal fillers like "uhms" and "errs" in the host and guests response. This is so the script is realistic.

    - Ensure complex topics are explained clearly and simply.
    - Focus on maintaining an engaging and lively tone that would captivate listeners.
    - Rules:
+        > The host ALWAYS goes first and is interviewing the guest. The guest is the one who explains the topic.
         > The host should ask the guest questions.
         > The host should summarize the key insights at the end.
         > Include common verbal fillers like "uhms" and "errs" in the host and guests response. This is so the script is realistic.

utils.py CHANGED Viewed

@@ -23,19 +23,19 @@ client = OpenAI(
 hf_client = Client("mrfakename/MeloTTS")
-def generate_script(system_prompt: str, text: str, dialogue_format):
     """Get the dialogue from the LLM."""
     # Load as python object
     try:
-        response = call_llm(system_prompt, text, dialogue_format)
-        dialogue = dialogue_format.model_validate_json(
             response.choices[0].message.content
         )
     except ValidationError as e:
         error_message = f"Failed to parse dialogue JSON: {e}"
-        system_prompt_with_error = f"{system_prompt}\n\n Please return a VALID JSON object. This was the earlier error: {error_message}"
-        response = call_llm(system_prompt_with_error, text, dialogue_format)
-        dialogue = dialogue_format.model_validate_json(
             response.choices[0].message.content
         )
     return dialogue

 hf_client = Client("mrfakename/MeloTTS")
+def generate_script(system_prompt: str, input_text: str, output_model):
     """Get the dialogue from the LLM."""
     # Load as python object
     try:
+        response = call_llm(system_prompt, input_text, output_model)
+        dialogue = output_model.model_validate_json(
             response.choices[0].message.content
         )
     except ValidationError as e:
         error_message = f"Failed to parse dialogue JSON: {e}"
+        system_prompt_with_error = f"{system_prompt}\n\nPlease return a VALID JSON object. This was the earlier error: {error_message}"
+        response = call_llm(system_prompt_with_error, input_text, output_model)
+        dialogue = output_model.model_validate_json(
             response.choices[0].message.content
         )
     return dialogue