Spaces:

tykiww
/

TTS_Demo

Runtime error

App Files Files Community

TTS_Demo / app.py

tykiww

Update app.py

16b24b7 verified 12 months ago

raw

history blame contribute delete

2.82 kB

	###################################### imports ######################################
	import torch
	from TTS.api import TTS
	import gradio as gr
	import os
	import spaces
	import yaml


	###################################### utilities ######################################
	def get_config():
	# get config path
	config_path = os.environ["CONFIG_PATH"]
	# Parse the YAML file
	with open(config_path, 'r') as file:
	config = yaml.safe_load(file)

	return config


	def init_TTS(config):
	# Get device
	device = "cuda" if torch.cuda.is_available() else "cpu"
	# Initialize the TTS model
	tts = TTS(config['inference']['model']).to(device)
	return tts


	@spaces.GPU
	def generate_speech(voice_choice, markdown, microphone, text):
	# Generate speech using the provided text, speaker voice, and language

	if voice_choice=="Record":
	speaker = microphone

	else:
	speaker = config['inference']['speaker_wav']

	tts.tts_to_file(text=text,
	file_path=config['inference']['file_path'],
	speaker_wav=speaker,
	language=config['inference']['language'])
	return config['inference']['file_path']


	###################################### main ######################################
	def UI(config):

	# gradio elements
	voice_choice = gr.Radio(label="Record or use a predefined voice.",
	choices=["Record", "Predefined (Nancy)"],
	value="Record")
	markdown = gr.Markdown("""If recording, speak loud and clearly. Recommended speaking track
	'*printing, in the only sense with which we are at present concerned,
	differs from, most if not all, the arts and crafts in the exhibition.*'""")
	microphone = gr.Audio(label="Audio", sources="microphone", type="filepath", elem_id='audio')
	enter_text = gr.Textbox(label="Enter your text")

	# Create the Gradio interface
	demo = gr.Interface(
	fn=generate_speech,
	inputs=[
	voice_choice,
	markdown,
	microphone,
	enter_text
	],
	outputs="audio",
	title="Voice cloning and Synthesis with Coqui-XTTS",
	description="Clone your voice and Synthesize speech using predefined target voice and language. It takes a 10-20 seconds to download the model, so wait to record until the app is Running on Zero to begin."
	)

	# Launch the interface
	demo.launch()
	return 0


	###################################### Execute ######################################
	if __name__ == "__main__":
	# Get config
	config = get_config()

	# initialize TTS
	tts = init_TTS(config)

	# run program
	UI(config)