pratikshahp commited on
Commit
795d45e
1 Parent(s): 4b57ca9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -19
app.py CHANGED
@@ -1,24 +1,29 @@
1
- import streamlit as st
2
- from espnet2.bin.tts_inference import Text2Speech
 
 
3
 
4
- # Load the Text2Speech model
5
- model = Text2Speech.from_pretrained("kan-bayashi/ljspeech_fastspeech2")
 
6
 
7
- def generate_audio(text):
8
- with st.spinner("Generating Speech..."):
9
- speech, *_ = model(text)
10
- return speech
11
 
12
- def main():
13
- st.title("Text to Speech with ESPnet2")
14
 
15
- text_input = st.text_area("Enter the text to generate speech:", "")
16
- if st.button("Generate Speech"):
17
- if text_input:
18
- audio = generate_audio(text_input)
19
- st.audio(audio, format="audio/wav")
20
- else:
21
- st.warning("Please enter some text.")
22
 
23
- if __name__ == "__main__":
24
- main()
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torchaudio
3
+ from speechbrain.inference.vocoders import HIFIGAN
4
+ from speechbrain.tts import Tacotron2
5
 
6
+ # Initialize Tacotron2 TTS model and HIFIGAN vocoder
7
+ tts_model = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="/tmpdir_tacotron2")
8
+ hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="/tmpdir_hifigan")
9
 
10
+ # Function to generate speech
11
+ def generate_speech(text):
12
+ # Encode text using Tacotron2
13
+ mel_output, mel_length = tts_model.encode_text(text)
14
 
15
+ # Decode mel spectrogram to waveform using HIFIGAN vocoder
16
+ waveform = hifi_gan.decode_batch(mel_output)
17
 
18
+ # Return the generated waveform for Gradio to play
19
+ return waveform.squeeze(1)
 
 
 
 
 
20
 
21
+ # Interface for Gradio
22
+ iface = gr.Interface(
23
+ fn=generate_speech,
24
+ inputs=gr.Textbox(label="Input Text", placeholder="Enter text to convert to speech..."),
25
+ outputs=gr.Audio(label="Output Speech")
26
+ )
27
+
28
+ # Launch the Gradio interface
29
+ iface.launch()