|
import json |
|
import os |
|
import subprocess |
|
|
|
def generate_html_output(data, repository_path): |
|
with open('speakers.md', 'a') as file: |
|
for speaker_id, speaker_info in data.items(): |
|
out_path = f"{repository_path}/samples/{speaker_id}.wav" |
|
age = speaker_info['age'] |
|
gender = speaker_info['gender'] |
|
if gender == 'F': |
|
gender = 'female' |
|
elif gender == "M": |
|
gender = 'male' |
|
accents = speaker_info['accents'] |
|
region = speaker_info['region'] |
|
|
|
file.write(f"<p>VCTK_{speaker_id}: {age} year old {gender}, {accents} accent ({region})<audio controls><source src=\"{out_path}\" type=\"audio/wav\"></audio> </p>\n") |
|
|
|
|
|
|
|
data = { |
|
"VCTK_p225": {"age": 23, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""}, |
|
"VCTK_p228": {"age": 22, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""}, |
|
"VCTK_p229": {"age": 23, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""}, |
|
"VCTK_p230": {"age": 22, "gender": "F", "accents": "English", "region": "Stockton-on-tees", "comments": ""}, |
|
"VCTK_p231": {"age": 23, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""}, |
|
"VCTK_p233": {"age": 23, "gender": "F", "accents": "English", "region": "Staffordshire", "comments": ""}, |
|
"VCTK_p236": {"age": 23, "gender": "F", "accents": "English", "region": "Manchester", "comments": ""}, |
|
"VCTK_p239": {"age": 22, "gender": "F", "accents": "English", "region": "Southwest England", "comments": ""}, |
|
"VCTK_p240": {"age": 21, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""}, |
|
"VCTK_p244": {"age": 22, "gender": "F", "accents": "English", "region": "Manchester", "comments": ""}, |
|
"VCTK_p250": {"age": 22, "gender": "F", "accents": "English", "region": "Southeast England", "comments": ""}, |
|
"VCTK_p257": {"age": 24, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""}, |
|
"VCTK_p267": {"age": 23, "gender": "F", "accents": "English", "region": "Yorkshire", "comments": ""}, |
|
"VCTK_p268": {"age": 23, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""}, |
|
"VCTK_p269": {"age": 20, "gender": "F", "accents": "English", "region": "Newcastle", "comments": ""}, |
|
"VCTK_p276": {"age": 24, "gender": "F", "accents": "English", "region": "Oxford", "comments": ""}, |
|
"VCTK_p277": {"age": 23, "gender": "F", "accents": "English", "region": "Northeast England", "comments": ""}, |
|
"VCTK_p282": {"age": 23, "gender": "F", "accents": "English", "region": "Newcastle", "comments": ""} |
|
} |
|
|
|
|
|
json_data = json.dumps(data, indent=2) |
|
|
|
|
|
with open('speakers-log.json', 'w') as file: |
|
file.write(json_data) |
|
|
|
|
|
command = "tts --model_path checkpoint_85000.pth --config_path config.json --list_speaker_idxs | grep -vE '^(\s*\||\s*>|\s*$)'" |
|
output = subprocess.check_output(command, shell=True, text=True) |
|
|
|
|
|
speaker_indices = eval(output) |
|
|
|
|
|
with open('speakers-log.json', 'r') as file: |
|
speaker_ids = json.load(file) |
|
|
|
|
|
for speaker_idx in speaker_indices: |
|
|
|
speaker_id = speaker_idx |
|
|
|
|
|
|
|
if speaker_id in speaker_ids: |
|
speaker_id_json = speaker_ids[speaker_id] |
|
else: |
|
continue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generate_html_output({speaker_id: speaker_id_json}, "https://huggingface.co/voices/VCTK_European_English_Females/resolve/main") |
|
|