VCTK_European_English_Males / prepare_model.py
jvision's picture
update labels for speakers
9276756
raw
history blame contribute delete
No virus
3.53 kB
import json
import os
import subprocess
# Load the data from the provided dictionary
data = {
"VCTK_p237": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Fife"},
"VCTK_p241": {"age": 21, "gender": "M", "accents": "Scottish", "region": "Perth"},
"VCTK_p245": {"age": 25, "gender": "M", "accents": "Irish", "region": "Dublin"},
"VCTK_p246": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Selkirk"},
"VCTK_p247": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Argyll"},
"VCTK_p252": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Edinburgh"},
"VCTK_p255": {"age": 19, "gender": "M", "accents": "Scottish", "region": "Galloway"},
"VCTK_p260": {"age": 21, "gender": "M", "accents": "Scottish", "region": "Orkney"},
"VCTK_p263": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Aberdeen"},
"VCTK_p271": {"age": 19, "gender": "M", "accents": "Scottish", "region": "Fife"},
"VCTK_p272": {"age": 23, "gender": "M", "accents": "Scottish", "region": "Edinburgh"},
"VCTK_p275": {"age": 23, "gender": "M", "accents": "Scottish", "region": "Midlothian"},
"VCTK_p281": {"age": 29, "gender": "M", "accents": "Scottish", "region": "Edinburgh"},
"VCTK_p284": {"age": 20, "gender": "M", "accents": "Scottish", "region": "Fife"},
"VCTK_p285": {"age": 21, "gender": "M", "accents": "Scottish", "region": "Edinburgh"},
"VCTK_p292": {"age": 23, "gender": "M", "accents": "NorthernIrish", "region": "Belfast"},
"VCTK_p298": {"age": 19, "gender": "M", "accents": "Irish", "region": "Tipperary"},
"VCTK_p304": {"age": 22, "gender": "M", "accents": "NorthernIrish", "region": "Belfast"},
"VCTK_p326": {"age": 26, "gender": "M", "accents": "Australian English", "region": "Sydney"},
"VCTK_p364": {"age": 23, "gender": "M", "accents": "Irish", "region": "Donegal"},
"VCTK_p374": {"age": 28, "gender": "M", "accents": "Australian English", "region": "The Outback"},
}
# Convert the data to JSON format
json_data = json.dumps(data, indent=2)
# Save the JSON data to a file
with open('speakers-log.json', 'w') as file:
file.write(json_data)
# Run the TTS command to get the speaker indices
command = "tts --model_path checkpoint_85000.pth --config_path config.json --list_speaker_idxs | grep -vE '^(\s*\||\s*>|\s*$)'"
output = subprocess.check_output(command, shell=True, text=True)
# Parse the JSON output into a Python dictionary
speaker_indices = eval(output)
# Load the speaker IDs from speakers.json
with open('speakers-log.json', 'r') as file:
speaker_ids = json.load(file)
for speaker_idx in speaker_indices:
# # Remove the 'VCTK_' prefix
speaker_id = speaker_idx
# speaker_id = speaker_idx.replace('VCTK_', '')
# Lookup the speaker ID in the loaded speaker IDs
if speaker_id in speaker_ids:
speaker_id_json = speaker_ids[speaker_id]
else:
continue
# # Generate the TTS command to create the audio file
# text = f"Hello, I am from {speaker_id_json['region']}. I hope that you will select my voice for your project. Thank you."
# # make samples directory if it doesn't exist
# if not os.path.exists("samples"):
# os.makedirs("samples")
# out_path = f"samples/{speaker_id}.wav"
# tts_command = f"tts --text \"{text}\" --model_path checkpoint_85000.pth --language_idx en --config_path config.json --speaker_idx \"VCTK_{speaker_id}\" --out_path {out_path}"
# Execute the TTS command
# os.system(tts_command)