Spaces:
Build error
Build error
import gradio as gr | |
import tensorflow as tf | |
from tensorflow_tts.inference import TFAutoModel, AutoProcessor | |
import soundfile as sf | |
# Load the model and processor | |
lightspeech = TFAutoModel.from_pretrained("bookbot/lightspeech-mfa-sw-v4") | |
processor = AutoProcessor.from_pretrained("bookbot/lightspeech-mfa-sw-v4") | |
mb_melgan = TFAutoModel.from_pretrained("bookbot/mb-melgan-hifi-postnets-sw-v4") | |
def tts(text, speaker_name="sw-TZ-Victoria"): | |
# Process input text | |
input_ids = processor.text_to_sequence(text) | |
# Generate mel-spectrogram | |
mel, _, _ = lightspeech.inference( | |
input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0), | |
speaker_ids=tf.convert_to_tensor( | |
[processor.speakers_map[speaker_name]], dtype=tf.int32 | |
), | |
speed_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32), | |
f0_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32), | |
energy_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32), | |
) | |
# Generate audio from mel-spectrogram | |
audio = mb_melgan.inference(mel)[0, :, 0] | |
# Save to file | |
sf.write("output.wav", audio, 44100, "PCM_16") | |
# Return the audio file for Gradio to play | |
return "output.wav" | |
# Create a Gradio interface | |
iface = gr.Interface(fn=tts, inputs="text", outputs="audio") | |
# Launch the interface | |
iface.launch() | |