tts / app.py
TriNguyenPO's picture
Update app.py
b6e1372 verified
raw
history blame
1.26 kB
import gradio as gr
import tensorflow as tf
import numpy as np
from tensorflow_tts.inference import TFAutoModel, AutoProcessor
# Load pre-trained models
processor = AutoProcessor.from_pretrained("tensorspeech/tts-fastspeech2-ljspeech-en")
fastspeech2 = TFAutoModel.from_pretrained("tensorspeech/tts-fastspeech2-ljspeech-en")
melgan = TFAutoModel.from_pretrained("tensorspeech/tts-mb_melgan-ljspeech-en")
# Define inference function
def tts_inference(text):
# Convert text to sequence
input_ids = processor.text_to_sequence(text)
# Generate mel spectrogram
mel_outputs = fastspeech2.inference(
input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
speaker_ids=tf.convert_to_tensor([0], dtype=tf.int32)
)
# Convert mel spectrogram to waveform
audio = melgan.inference(mel_outputs)[0, :, 0]
audio = audio.numpy()
# Save to a temporary file and return path
return audio, 22050 # Return audio and sample rate for Gradio to play
# Create Gradio interface
iface = gr.Interface(
fn=tts_inference,
inputs="text",
outputs="audio",
title="FastSpeech2_vi TTS",
description="Enter Vietnamese text and generate speech using FastSpeech2"
)
iface.launch()