File size: 2,150 Bytes
474f4cc
cc84c76
 
1197cf0
 
9f8e59c
6d916f4
 
9e1c381
 
2522fb3
 
 
 
 
1bfa7fa
4b5b0e4
ac5edc7
9f8e59c
 
 
29a0691
 
 
ad135b5
29a0691
de0a7fd
29a0691
 
 
 
589c7da
6d916f4
 
 
 
 
 
 
 
 
 
 
 
 
0b298e3
 
 
ac5edc7
cc84c76
0b298e3
e2bb816
b56acb6
0b298e3
29382d3
 
 
0acfb8a
29382d3
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import streamlit as st
from gtts import gTTS 
from io import BytesIO
import torch
import torchaudio
from PyPDF2 import PdfReader
from whisperspeech.pipeline import Pipeline

st.image('OIG3 (4).jpeg', caption='Your host on this PDF-to-Speech adventure!')

with st.write("⏳ Loading WhisperSpeech Pipeline ⌛")

# TTS pipeline
pipe = Pipeline(torch_compile=True)

x = st.slider('Select the number of pages you wish to transcribe')

uploaded_file = st.file_uploader("Choose a file", "pdf")
if uploaded_file is not None: 
    # creating a pdf reader object
    reader = PdfReader(uploaded_file)
    # printing number of pages in pdf file
    X = len(reader.pages)
    print(X)

    i = 0
    while i <= X and i <= x:
        # getting a specific page from the pdf file
        page = reader.pages[i]
        # extracting text from page
        text = page.extract_text()  
        print("Created text of page", i )

    # Generate audio for the current page using a unique filename
        page_audio_file = f"output_{i}.wav"
        pipe.generate_to_file(page_audio_file, text)

    # Display the generated audio using st.audio
        with open(page_audio_file, "rb") as audio_file:
            st.audio(audio_file)
        
#        sound_file = BytesIO()
#        tts = gTTS(text, lang='en')
#        tts.write_to_fp(sound_file)
#        st.audio(sound_file)
        print("Read aloud", i, "pages of", X, "total pages.")
        i = i + 1
    st.write("🎉 That's the whole PDF! Have an awesome day! 🎉")
    

prompt = st.chat_input("Copy/Paste or type in text to have read aloud")
if prompt:
    st.write(prompt)
    with st.popover("✨ Open your text-to-speech from text input ✨"):
        #sound_file = BytesIO()
        #tts = gTTS(prompt, lang='en')
        #tts.write_to_fp(sound_file)
        
        #st.audio(sound_file)
            # Generate audio for the current page using a unique filename
        page_audio_file = f"output_{i}.wav"
        pipe.generate_to_file(page_audio_file, text)

    # Display the generated audio using st.audio
        with open(page_audio_file, "rb") as audio_file:
            st.audio(audio_file)