File size: 2,561 Bytes
698f92d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import gradio as gr
import pdfminer.high_level
import transformers
from transformers import pipeline
from pydub import AudioSegment
import tempfile

# Error handling function
def handle_error(message):
    print(f"Error: {message}")
    return {"audio": None, "error": message}

# Function to extract text from PDF
def extract_text(pdf_path):
    try:
        with open(pdf_path, "rb") as file:
            text = pdfminer.high_level.extract_text(file)
            return text
    except Exception as e:
        return handle_error(f"Failed to extract text: {e}")

# Function to split text into chunks
def chunk_text(text, chunk_size=250):
    chunks = []
    for i in range(0, len(text), chunk_size):
        chunk = text[i:i + chunk_size]
        chunks.append(chunk)
    return chunks

# Function to perform text-to-speech and stitch audio
def convert_to_speech(text_chunks, language="en", speaker="0"):
    try:
        model_name = "tts-es-es1"  # Replace with your chosen model
        tts_pipeline = pipeline("text-to-speech", model=model_name)
        audio_segments = []
        for chunk in text_chunks:
            audio = tts_pipeline(text=chunk, lang=language, speaker=speaker)
            audio_segments.append(AudioSegment.from_mp3(audio["audio"]))
        return audio_segments
    except Exception as e:
        return handle_error(f"Text-to-speech failed: {e}")

# Function to save and return audio file
def save_audio(audio_segments, filename, format="mp3"):
    try:
        combined_audio = audio_segments[0]
        for segment in audio_segments[1:]:
            combined_audio += segment
        audio_path = tempfile.NamedTemporaryFile(suffix=f".{format}").name
        combined_audio.export(audio_path, format=format)
        return {"audio_path": audio_path}
    except Exception as e:
        return handle_error(f"Failed to save audio: {e}")

# Gradio interface definition
def pdf_to_speech(pdf_file):
    # Extract text from PDF
    text = extract_text(pdf_file)
    if text["error"]:
        return text["error"]

    # Chunk text and convert to speech
    text_chunks = chunk_text(text)
    audio_result = convert_to_speech(text_chunks)
    if audio_result["error"]:
        return audio_result["error"]

    # Save and return audio
    audio_data = save_audio(audio_result)
    return audio_data["audio_path"]

# Create Gradio interface
interface = gr.Interface(
    fn=pdf_to_speech,
    inputs=gr.FileInput(type="pdf"),
    outputs=[gr.Audio(label="Play"), gr.File(label="Download")],
)

# Launch Gradio app
interface.launch()