Spaces:
Build error
Build error
import gradio as gr | |
import tensorflow as tf | |
import numpy as np | |
from tensorflow_tts.inference import TFAutoModel, AutoProcessor | |
# Load pre-trained models | |
processor = AutoProcessor.from_pretrained("tensorspeech/tts-fastspeech2-ljspeech-en") | |
fastspeech2 = TFAutoModel.from_pretrained("tensorspeech/tts-fastspeech2-ljspeech-en") | |
melgan = TFAutoModel.from_pretrained("tensorspeech/tts-mb_melgan-ljspeech-en") | |
# Define inference function | |
def tts_inference(text): | |
# Convert text to sequence | |
input_ids = processor.text_to_sequence(text) | |
# Generate mel spectrogram | |
mel_outputs = fastspeech2.inference( | |
input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0), | |
speaker_ids=tf.convert_to_tensor([0], dtype=tf.int32) | |
) | |
# Convert mel spectrogram to waveform | |
audio = melgan.inference(mel_outputs)[0, :, 0] | |
audio = audio.numpy() | |
# Save to a temporary file and return path | |
return audio, 22050 # Return audio and sample rate for Gradio to play | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=tts_inference, | |
inputs="text", | |
outputs="audio", | |
title="FastSpeech2_vi TTS", | |
description="Enter Vietnamese text and generate speech using FastSpeech2" | |
) | |
iface.launch() | |