WhisperSpeech-PDF-text-to-speech

Runtime error

File size: 2,150 Bytes

474f4cc
cc84c76
 
1197cf0
 
9f8e59c
6d916f4
 
9e1c381
 
2522fb3
 
 
 
 
1bfa7fa
4b5b0e4
ac5edc7
9f8e59c
 
 
29a0691
 
 
ad135b5
29a0691
de0a7fd
29a0691
 
 
 
589c7da
6d916f4
 
 
 
 
 
 
 
 
 
 
 
 
0b298e3
 
 
ac5edc7
cc84c76
0b298e3
e2bb816
b56acb6
0b298e3
29382d3
 
 
0acfb8a
29382d3

import streamlit as st
from gtts import gTTS 
from io import BytesIO
import torch
import torchaudio
from PyPDF2 import PdfReader
from whisperspeech.pipeline import Pipeline

st.image('OIG3 (4).jpeg', caption='Your host on this PDF-to-Speech adventure!')

with st.write("⏳ Loading WhisperSpeech Pipeline ⌛")

# TTS pipeline
pipe = Pipeline(torch_compile=True)

x = st.slider('Select the number of pages you wish to transcribe')

uploaded_file = st.file_uploader("Choose a file", "pdf")
if uploaded_file is not None: 
    # creating a pdf reader object
    reader = PdfReader(uploaded_file)
    # printing number of pages in pdf file
    X = len(reader.pages)
    print(X)

    i = 0
    while i <= X and i <= x:
        # getting a specific page from the pdf file
        page = reader.pages[i]
        # extracting text from page
        text = page.extract_text()  
        print("Created text of page", i )

    # Generate audio for the current page using a unique filename
        page_audio_file = f"output_{i}.wav"
        pipe.generate_to_file(page_audio_file, text)

    # Display the generated audio using st.audio
        with open(page_audio_file, "rb") as audio_file:
            st.audio(audio_file)
        
#        sound_file = BytesIO()
#        tts = gTTS(text, lang='en')
#        tts.write_to_fp(sound_file)
#        st.audio(sound_file)
        print("Read aloud", i, "pages of", X, "total pages.")
        i = i + 1
    st.write("🎉 That's the whole PDF! Have an awesome day! 🎉")
    

prompt = st.chat_input("Copy/Paste or type in text to have read aloud")
if prompt:
    st.write(prompt)
    with st.popover("✨ Open your text-to-speech from text input ✨"):
        #sound_file = BytesIO()
        #tts = gTTS(prompt, lang='en')
        #tts.write_to_fp(sound_file)
        
        #st.audio(sound_file)
            # Generate audio for the current page using a unique filename
        page_audio_file = f"output_{i}.wav"
        pipe.generate_to_file(page_audio_file, text)

    # Display the generated audio using st.audio
        with open(page_audio_file, "rb") as audio_file:
            st.audio(audio_file)