Spaces:

awkondoro
/

swahili_tts

Build error

swahili_tts / app.py

initial commit

3f312c5 3 months ago

1.36 kB

	import gradio as gr
	import tensorflow as tf
	from tensorflow_tts.inference import TFAutoModel, AutoProcessor
	import soundfile as sf

	# Load the model and processor
	lightspeech = TFAutoModel.from_pretrained("bookbot/lightspeech-mfa-sw-v4")
	processor = AutoProcessor.from_pretrained("bookbot/lightspeech-mfa-sw-v4")
	mb_melgan = TFAutoModel.from_pretrained("bookbot/mb-melgan-hifi-postnets-sw-v4")


	def tts(text, speaker_name="sw-TZ-Victoria"):
	# Process input text
	input_ids = processor.text_to_sequence(text)

	# Generate mel-spectrogram
	mel, _, _ = lightspeech.inference(
	input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
	speaker_ids=tf.convert_to_tensor(
	[processor.speakers_map[speaker_name]], dtype=tf.int32
	),
	speed_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32),
	f0_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32),
	energy_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32),
	)

	# Generate audio from mel-spectrogram
	audio = mb_melgan.inference(mel)[0, :, 0]

	# Save to file
	sf.write("output.wav", audio, 44100, "PCM_16")

	# Return the audio file for Gradio to play
	return "output.wav"


	# Create a Gradio interface
	iface = gr.Interface(fn=tts, inputs="text", outputs="audio")

	# Launch the interface
	iface.launch()