knowsuchagency commited on
Commit
1a8723f
·
1 Parent(s): 7a115e0

chore: Refactor PDF file reading in generate_audio function

Browse files
Files changed (1) hide show
  1. main.py +22 -5
main.py CHANGED
@@ -12,6 +12,10 @@ from promptic import llm
12
  from pydantic import BaseModel, ValidationError
13
  from pypdf import PdfReader
14
  from tenacity import retry, retry_if_exception_type
 
 
 
 
15
 
16
  sentry_sdk.init(os.getenv("SENTRY_DSN"))
17
 
@@ -61,7 +65,7 @@ def generate_dialogue(text: str) -> Dialogue:
61
  Now that you have brainstormed ideas and created a rough outline, it's time to write the actual podcast dialogue. Aim for a natural, conversational flow between the host and any guest speakers. Incorporate the best ideas from your brainstorming session and make sure to explain any complex topics in an easy-to-understand way.
62
 
63
  <podcast_dialogue>
64
- Write your engaging, informative podcast dialogue here, based on the key points and creative ideas you came up with during the brainstorming session. Use a conversational tone and include any necessary context or explanations to make the content accessible to a general audience. Rather than adding variable brackets like `[Host Name]` or `[Guest Name]`, use made-up names for the host and any guest speakers to create a more engaging and immersive experience for listeners as your output will be used to generate audio.
65
  </podcast_dialogue>
66
  """
67
 
@@ -102,7 +106,6 @@ def generate_audio(file: str, openai_api_key: str = None) -> bytes:
102
  futures = []
103
  for line in llm_output.dialogue:
104
  transcript_line = f"{line.speaker}: {line.text}"
105
- logger.info(transcript_line)
106
  future = executor.submit(get_mp3, line.text, line.voice, openai_api_key)
107
  futures.append((future, transcript_line))
108
  characters += len(line.text)
@@ -114,7 +117,23 @@ def generate_audio(file: str, openai_api_key: str = None) -> bytes:
114
 
115
  logger.info(f"Generated {characters} characters of audio")
116
 
117
- return audio, transcript
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
 
120
  description = """
@@ -122,8 +141,6 @@ description = """
122
  <strong>Convert any PDF into a podcast episode! Experience research papers, websites, and more in a whole new way.</strong>
123
  <br>
124
  <a href="https://github.com/knowsuchagency/pdf-to-podcast">knowsuchagency/pdf-to-podcast</a>
125
- <br>
126
- <em>Note: audio doesn't work in Safari</em>
127
  </p>
128
  """
129
 
 
12
  from pydantic import BaseModel, ValidationError
13
  from pypdf import PdfReader
14
  from tenacity import retry, retry_if_exception_type
15
+ from tempfile import NamedTemporaryFile
16
+ import glob
17
+ import os
18
+ import time
19
 
20
  sentry_sdk.init(os.getenv("SENTRY_DSN"))
21
 
 
65
  Now that you have brainstormed ideas and created a rough outline, it's time to write the actual podcast dialogue. Aim for a natural, conversational flow between the host and any guest speakers. Incorporate the best ideas from your brainstorming session and make sure to explain any complex topics in an easy-to-understand way.
66
 
67
  <podcast_dialogue>
68
+ Write your engaging, informative podcast dialogue here, based on the key points and creative ideas you came up with during the brainstorming session. Use a conversational tone and include any necessary context or explanations to make the content accessible to a general audience. Don't include variable brackets like `[Host Name]` or `[Guest Name]`. Use made-up names for the hosts and guests to create a more engaging and immersive experience for listeners. Design your output to be read aloud -- it will be directly converted into audio.
69
  </podcast_dialogue>
70
  """
71
 
 
106
  futures = []
107
  for line in llm_output.dialogue:
108
  transcript_line = f"{line.speaker}: {line.text}"
 
109
  future = executor.submit(get_mp3, line.text, line.voice, openai_api_key)
110
  futures.append((future, transcript_line))
111
  characters += len(line.text)
 
117
 
118
  logger.info(f"Generated {characters} characters of audio")
119
 
120
+ temporary_directory = "./gradio_cached_examples/tmp/"
121
+ os.makedirs(temporary_directory, exist_ok=True)
122
+
123
+ temporary_file = NamedTemporaryFile(
124
+ dir=temporary_directory,
125
+ delete=False,
126
+ suffix=".mp3",
127
+ )
128
+ temporary_file.write(audio)
129
+ temporary_file.close()
130
+
131
+ # Delete any files in the temp directory that end with .mp3 and are over a day old
132
+ for file in glob.glob(f"{temporary_directory}*.mp3"):
133
+ if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
134
+ os.remove(file)
135
+
136
+ return temporary_file.name, transcript
137
 
138
 
139
  description = """
 
141
  <strong>Convert any PDF into a podcast episode! Experience research papers, websites, and more in a whole new way.</strong>
142
  <br>
143
  <a href="https://github.com/knowsuchagency/pdf-to-podcast">knowsuchagency/pdf-to-podcast</a>
 
 
144
  </p>
145
  """
146