VCTK_British_English_Males / prepare_model.py
jvision's picture
Fix links
5088543
import json
import os
import subprocess
def generate_html_output(data, repository_path):
with open('speakers.md', 'a') as file:
for speaker_id, speaker_info in data.items():
out_path = f"{repository_path}/samples/{speaker_id}.wav"
age = speaker_info['age']
gender = speaker_info['gender']
if gender == 'F':
gender = 'female'
elif gender == "M":
gender = 'male'
accents = speaker_info['accents']
region = speaker_info['region']
file.write(f"<p>VCTK_{speaker_id}: {age} year old {gender}, {accents} accent ({region})<audio controls><source src=\"{out_path}\" type=\"audio/wav\"></audio> </p>\n")
# Load the data from the provided dictionary
data = {
"p226": {"age": 22, "gender": "M", "accents": "English", "region": "Surrey"},
"p227": {"age": 38, "gender": "M", "accents": "English", "region": "Cumbria"},
"p232": {"age": 23, "gender": "M", "accents": "English", "region": "Southern England"},
"p243": {"age": 22, "gender": "M", "accents": "English", "region": "London"},
"p254": {"age": 21, "gender": "M", "accents": "English", "region": "Surrey"},
"p256": {"age": 24, "gender": "M", "accents": "English", "region": "Birmingham"},
"p258": {"age": 22, "gender": "M", "accents": "English", "region": "Southern England"},
"p259": {"age": 23, "gender": "M", "accents": "English", "region": "Nottingham"},
"p270": {"age": 21, "gender": "M", "accents": "English", "region": "Yorkshire"},
"p273": {"age": 23, "gender": "M", "accents": "English", "region": "Suffolk"},
"p274": {"age": 22, "gender": "M", "accents": "English", "region": "Essex"},
"p278": {"age": 22, "gender": "M", "accents": "English", "region": "Cheshire"},
"p279": {"age": 23, "gender": "M", "accents": "English", "region": "Leicester"},
"p286": {"age": 23, "gender": "M", "accents": "English", "region": "Newcastle"},
"p287": {"age": 23, "gender": "M", "accents": "English", "region": "York"}
}
# Convert the data to JSON format
json_data = json.dumps(data, indent=2)
# Save the JSON data to a file
with open('speakers-log.json', 'w') as file:
file.write(json_data)
# Run the TTS command to get the speaker indices
command = "tts --model_path checkpoint_85000.pth --config_path config.json --list_speaker_idxs | grep -vE '^(\s*\||\s*>|\s*$)'"
output = subprocess.check_output(command, shell=True, text=True)
# Parse the JSON output into a Python dictionary
speaker_indices = eval(output)
# Load the speaker IDs from speakers.json
with open('speakers-log.json', 'r') as file:
speaker_ids = json.load(file)
# Create the speakers.md file
with open('speakers.md', 'w') as file:
for speaker_idx in speaker_indices:
# Remove the 'VCTK_' prefix
speaker_id = speaker_idx.replace('VCTK_', '')
# Lookup the speaker ID in the loaded speaker IDs
if speaker_id in speaker_ids:
speaker_id_json = speaker_ids[speaker_id]
else:
continue
# # Generate the TTS command to create the audio file
text = f"Hello, I am from {speaker_id_json['region']}. I hope that you will select my voice for your project. Thank you."
# # make samples directory if it doesn't exist
if not os.path.exists("samples"):
os.makedirs("samples")
out_path = f"samples/{speaker_id}.wav"
tts_command = f"tts --text \"{text}\" --model_path checkpoint_85000.pth --language_idx en --config_path config.json --speaker_idx \"VCTK_{speaker_id}\" --out_path {out_path}"
# Execute the TTS command
os.system(tts_command)
# Write the speaker information to the speakers.md file
generate_html_output({speaker_id: speaker_id_json}, "https://huggingface.co/voices/VCTK_European_English_Males/resolve/main")