import gradio as gr from transformers import pipeline from PyPDF2 import PdfReader from ebooklib import epub from bs4 import BeautifulSoup from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor model = AutoModelForSpeechSeq2Seq.from_pretrained( "efficient-speech/lite-whisper-large-v3-turbo", trust_remote_code=True ) # Load the VITS model from Hugging Face text_to_speech = pipeline("text-to-speech", model=model) # Function to extract text from PDF def extract_pdf_text(file): reader = PdfReader(file) text = "" for page in reader.pages: if page and page.extract_text(): text += page.extract_text() return text # Function to extract text from EPUB def extract_epub_text(file): book = epub.read_epub(file) text = "" for item in book.get_items(): if item.get_type() == epub.ITEM_DOCUMENT: soup = BeautifulSoup(item.content, 'html.parser') text += soup.get_text() return text # Unified function to convert text to speech def convert_to_audio(file, file_type): if file_type == 'PDF': text = extract_pdf_text(file) elif file_type == 'EPUB': text = extract_epub_text(file) else: text = file.read().decode('utf-8') if not text.strip(): return "No text found in the file." # Convert text to speech audio = text_to_speech(text[:5000]) # Limiting input to avoid model constraints return (audio["audio"],) # Gradio interface demo = gr.Interface( fn=convert_to_audio, inputs=[ gr.File(label="Upload PDF, EPUB, or Text File"), gr.Radio(["PDF", "EPUB", "TXT"], label="File Type") ], outputs="audio", title="Unlimited Text-to-Speech Converter", description="Upload PDF, EPUB, or text files — convert them into audio with no limits!" ) demo.launch()