Spaces:
Runtime error
Runtime error
import os | |
import tempfile | |
import gradio as gr | |
from TTS.api import TTS | |
from TTS.utils.synthesizer import Synthesizer | |
from huggingface_hub import hf_hub_download | |
import json | |
import glob | |
# Define constants | |
MODEL_INFO = [ | |
["Persian XTTS", "checkpoint_30000.pth", "config.json", "saillab/xtts_v2_fa_revision1"], | |
] | |
MAX_TXT_LEN = 400 | |
TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN') | |
model_files = {} | |
config_files = {} | |
speaker_files = {} | |
# Create a dictionary to store synthesizer objects for each model | |
synthesizers = {} | |
def update_config_speakers_file_recursive(config_dict, speakers_path): | |
"""Recursively update speakers_file keys in a dictionary.""" | |
if "speakers_file" in config_dict: | |
config_dict["speakers_file"] = speakers_path | |
for key, value in config_dict.items(): | |
if isinstance(value, dict): | |
update_config_speakers_file_recursive(value, speakers_path) | |
def update_config_speakers_file(config_path, speakers_path): | |
"""Update the config.json file to point to the correct speakers.pth file.""" | |
# Load the existing config | |
with open(config_path, 'r') as f: | |
config = json.load(f) | |
# Modify the speakers_file entry | |
update_config_speakers_file_recursive(config, speakers_path) | |
# Save the modified config | |
with open(config_path, 'w') as f: | |
json.dump(config, f, indent=4) | |
# Download models and initialize synthesizers | |
for info in MODEL_INFO: | |
model_name, model_file, config_file, repo_name = info[:4] | |
speaker_file = info[4] if len(info) == 5 else None # Check if speakers.pth is defined for the model | |
print(f"|> Downloading: {model_name}") | |
# Download model and config files | |
model_files[model_name] = hf_hub_download(repo_id=repo_name, filename=model_file, use_auth_token=TOKEN) | |
config_files[model_name] = hf_hub_download(repo_id=repo_name, filename=config_file, use_auth_token=TOKEN) | |
# Download speakers.pth if it exists | |
if speaker_file: | |
speaker_files[model_name] = hf_hub_download(repo_id=repo_name, filename=speaker_file, use_auth_token=TOKEN) | |
update_config_speakers_file(config_files[model_name], speaker_files[model_name]) # Update the config file | |
print(speaker_files[model_name]) | |
# Initialize synthesizer for the model | |
synthesizer = Synthesizer( | |
tts_checkpoint=model_files[model_name], | |
tts_config_path=config_files[model_name], | |
tts_speakers_file=speaker_files[model_name], # Pass the speakers.pth file if it exists | |
use_cuda=False # Assuming you don't want to use GPU, adjust if needed | |
) | |
elif speaker_file is None: | |
# Initialize synthesizer for the model | |
synthesizer = Synthesizer( | |
tts_checkpoint=model_files[model_name], | |
tts_config_path=config_files[model_name], | |
# tts_speakers_file=speaker_files.get(model_name, None), # Pass the speakers.pth file if it exists | |
use_cuda=False # Assuming you don't want to use GPU, adjust if needed | |
) | |
synthesizers[model_name] = synthesizer | |
#def synthesize(text: str, model_name: str, speaker_name="speaker-0") -> str: | |
def synthesize(text: str, model_name: str, speaker_name=None) -> str: | |
"""Synthesize speech using the selected model.""" | |
if len(text) > MAX_TXT_LEN: | |
text = text[:MAX_TXT_LEN] | |
print(f"Input text was cut off as it exceeded the {MAX_TXT_LEN} character limit.") | |
# Use the synthesizer object for the selected model | |
synthesizer = synthesizers[model_name] | |
if synthesizer is None: | |
raise NameError("Model not found") | |
if synthesizer.tts_speakers_file is "": | |
wavs = synthesizer.tts(text) | |
elif synthesizer.tts_speakers_file is not "": | |
if speaker_name == "": | |
#wavs = synthesizer.tts(text, speaker_name="speaker-0") ## should change, better if gradio conditions are figure out. | |
wavs = synthesizer.tts(text, speaker_name=None) | |
else: | |
wavs = synthesizer.tts(text, speaker_name=speaker_name) | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: | |
synthesizer.save_wav(wavs, fp) | |
return fp.name | |
# Callback function to update UI based on the selected model | |
def update_options(model_name): | |
synthesizer = synthesizers[model_name] | |
# if synthesizer.tts.is_multi_speaker: | |
if model_name is MODEL_NAMES[1]: | |
speakers = synthesizer.tts_model.speaker_manager.speaker_names | |
# return options for the dropdown | |
return speakers | |
else: | |
# return empty options if not multi-speaker | |
return [] | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=synthesize, | |
inputs=[ | |
gr.Textbox(label="Enter Text to Synthesize:", value="زین همرهان سست عناصر، دلم گرفت."), | |
gr.Radio(label="Pick a Model", choices=MODEL_NAMES, value=MODEL_NAMES[0], type="value"), | |
#gr.Dropdown(label="Select Speaker", choices=update_options(MODEL_NAMES[1]), type="value", default="speaker-0") | |
gr.Dropdown(label="Select Speaker", choices=update_options(MODEL_NAMES[1]), type="value", default=None) | |
], | |
outputs=gr.Audio(label="Output", type='filepath'), | |
examples=[["زین همرهان سست عناصر، دلم گرفت.", MODEL_NAMES[0], ""]], # Example should include a speaker name for multispeaker models | |
title='Persian TTS Playground', | |
description=""" | |
### Persian text to speech model demo. | |
#### Pick a speaker for MultiSpeaker models. (for single speaker go for speaker-0) | |
""", | |
article="", | |
live=False | |
) | |
iface.launch() |