File size: 866 Bytes
e2edc99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import gradio as gr
import torch
from nemo.collections.tts.models import FastPitchModel
from nemo.collections.tts.models import HifiGanModel

# ๐Ÿ”น Load pretrained models from NeMo
fastpitch = FastPitchModel.from_pretrained("nvidia/tts_en_fastpitch")
hifigan = HifiGanModel.from_pretrained("nvidia/tts_hifigan")

# ๐Ÿ”น TTS function
def tts(text):
    # Convert text โ†’ mel spectrogram
    with torch.no_grad():
        spectrogram = fastpitch.parse(text)
        audio = hifigan.convert_spectrogram_to_audio(spectrogram)
    return (22050, audio.cpu().numpy())

# ๐Ÿ”น Gradio UI
iface = gr.Interface(
    fn=tts,
    inputs=gr.Textbox(label="Enter text"),
    outputs=gr.Audio(label="Generated Speech"),
    title="FastPitch + HiFiGAN (NeMo TTS)",
    description="Enter text and get speech synthesized using NVIDIA NeMo FastPitch and HiFiGAN."
)

iface.launch()