Spaces:
Runtime error
Runtime error
File size: 4,936 Bytes
d7529f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import gradio as gr
import PyPDF2
import nltk
from nltk.tokenize import sent_tokenize
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
import os
from pydub import AudioSegment
from concurrent.futures import ThreadPoolExecutor
from TTS.api import TTS
# Download necessary NLTK data
nltk.download('punkt', quiet=True)
# Initialize TTS model using ONNX
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", use_onnx=True)
# Set default speaker and language manually based on valid IDs obtained
default_speaker = "en_speaker_1" # Replace with a valid speaker ID from the printed list
default_language = "en" # Replace with a valid language code from the printed list
def extract_text_from_pdf(pdf_path):
try:
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
text = ''
for page in reader.pages:
text += page.extract_text()
return text
except Exception as e:
print(f"Error extracting text from PDF: {e}")
return None
def summarize_text(text, summary_length):
parser = PlaintextParser.from_string(text, Tokenizer("english"))
summarizer = LsaSummarizer()
summary = summarizer(parser.document, summary_length)
return ' '.join([str(sentence) for sentence in summary])
def split_into_chapters(text, num_chapters):
sentences = sent_tokenize(text)
if len(sentences) <= num_chapters:
return sentences
sentences_per_chapter = max(1, len(sentences) // num_chapters)
chapters = []
for i in range(0, len(sentences), sentences_per_chapter):
chapter = ' '.join(sentences[i:i+sentences_per_chapter])
chapters.append(chapter)
while len(chapters) > num_chapters:
chapters[-2] += ' ' + chapters[-1]
chapters.pop()
return chapters
def text_to_speech(text, output_path, speaker, language):
tts.tts_to_file(text=text, file_path=output_path, speaker=speaker, language=language)
return output_path
def adjust_audio_speed(input_path, output_path, target_duration):
audio = AudioSegment.from_mp3(input_path)
current_duration = len(audio)
if current_duration == 0:
print(f"Warning: Audio file {input_path} has zero duration. Skipping speed adjustment.")
return input_path
speed_factor = current_duration / target_duration
if speed_factor < 0.1:
speed_factor = 0.1
try:
adjusted_audio = audio.speedup(playback_speed=speed_factor)
adjusted_audio.export(output_path, format="mp3")
return output_path
except Exception as e:
print(f"Error adjusting audio speed: {e}")
return input_path
def process_chapter(chapter, i, speaker, language):
try:
if len(chapter.strip()) == 0:
print(f"Warning: Chapter {i+1} is empty. Skipping.")
return None
temp_path = f"temp_chapter_{i+1}.mp3"
output_path = f"chapter_{i+1}.mp3"
text_to_speech(chapter, temp_path, speaker, language)
# Adjust speed to fit into 3 minutes
adjust_audio_speed(temp_path, output_path, 3 * 60 * 1000)
os.remove(temp_path) # Clean up temporary file
return output_path
except Exception as e:
print(f"Error processing chapter {i+1}: {e}")
return None
def process_pdf(pdf_path, num_chapters, speaker, language):
full_text = extract_text_from_pdf(pdf_path)
if full_text is None or len(full_text.strip()) == 0:
print("Error: Extracted text is empty or None")
return []
# Clean text to remove unwanted characters
full_text = full_text.replace('\t', ' ')
summary_length = max(1, 15 * 150 // len(full_text.split()))
summary = summarize_text(full_text, summary_length)
chapters = split_into_chapters(summary, num_chapters)
with ThreadPoolExecutor() as executor:
chapter_audios = list(executor.map(lambda i: process_chapter(chapters[i], i, speaker, language), range(len(chapters))))
return [audio for audio in chapter_audios if audio is not None]
def gradio_interface(pdf_file, num_chapters):
if pdf_file is None:
return [None] * 10
chapter_audios = process_pdf(pdf_file.name, num_chapters, default_speaker, default_language)
return chapter_audios + [None] * (10 - len(chapter_audios))
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.File(label="Upload PDF Book"),
gr.Slider(minimum=1, maximum=10, step=1, label="Number of Chapters", value=5)
],
outputs=[gr.Audio(label=f"Chapter {i+1}") for i in range(10)],
title="PDF Book to Audiobook Summary",
description="Upload a PDF book to get a 15-minute audiobook summary split into chapters."
)
if __name__ == "__main__":
iface.launch(share=True)
|