VCTK_European_English_Males / prepare_model.py

first commit

277d850 over 1 year ago

4.41 kB

	import json
	import os
	import subprocess

	def generate_html_output(data, repository_path):
	with open('speakers.md', 'a') as file:
	for speaker_id, speaker_info in data.items():
	out_path = f"{repository_path}/samples/{speaker_id}.wav"
	age = speaker_info['age']
	gender = speaker_info['gender']
	if gender == 'F':
	gender = 'female'
	elif gender == "M":
	gender = 'male'
	accents = speaker_info['accents']
	region = speaker_info['region']

	file.write(f"<p>VCTK_{speaker_id}: {age} year old {gender}, {accents} accent ({region})<audio controls><source src=\"{out_path}\" type=\"audio/wav\"></audio> </p>\n")


	# Load the data from the provided dictionary
	data = {
	"p237": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Fife"},
	"p241": {"age": 21, "gender": "M", "accents": "Scottish", "region": "Perth"},
	"p245": {"age": 25, "gender": "M", "accents": "Irish", "region": "Dublin"},
	"p246": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Selkirk"},
	"p247": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Argyll"},
	"p252": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Edinburgh"},
	"p255": {"age": 19, "gender": "M", "accents": "Scottish", "region": "Galloway"},
	"p260": {"age": 21, "gender": "M", "accents": "Scottish", "region": "Orkney"},
	"p263": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Aberdeen"},
	"p271": {"age": 19, "gender": "M", "accents": "Scottish", "region": "Fife"},
	"p272": {"age": 23, "gender": "M", "accents": "Scottish", "region": "Edinburgh"},
	"p275": {"age": 23, "gender": "M", "accents": "Scottish", "region": "Midlothian"},
	"p281": {"age": 29, "gender": "M", "accents": "Scottish", "region": "Edinburgh"},
	"p284": {"age": 20, "gender": "M", "accents": "Scottish", "region": "Fife"},
	"p285": {"age": 21, "gender": "M", "accents": "Scottish", "region": "Edinburgh"},
	"p292": {"age": 23, "gender": "M", "accents": "NorthernIrish", "region": "Belfast"},
	"p298": {"age": 19, "gender": "M", "accents": "Irish", "region": "Tipperary"},
	"p304": {"age": 22, "gender": "M", "accents": "NorthernIrish", "region": "Belfast"},
	"p326": {"age": 26, "gender": "M", "accents": "Australian English", "region": "Sydney"},
	"p364": {"age": 23, "gender": "M", "accents": "Irish", "region": "Donegal"},
	"p374": {"age": 28, "gender": "M", "accents": "Australian English", "region": "The Outback"},
	}

	# Convert the data to JSON format
	json_data = json.dumps(data, indent=2)

	# Save the JSON data to a file
	with open('speakers-log.json', 'w') as file:
	file.write(json_data)

	# Run the TTS command to get the speaker indices
	command = "tts --model_path checkpoint_85000.pth --config_path config.json --list_speaker_idxs \| grep -vE '^(\s\\|\|\s>\|\s*$)'"
	output = subprocess.check_output(command, shell=True, text=True)

	# Parse the JSON output into a Python dictionary
	speaker_indices = eval(output)

	# Load the speaker IDs from speakers.json
	with open('speakers-log.json', 'r') as file:
	speaker_ids = json.load(file)

	# Create the speakers.md file
	with open('speakers.md', 'w') as file:
	for speaker_idx in speaker_indices:
	# Remove the 'VCTK_' prefix
	speaker_id = speaker_idx.replace('VCTK_', '')

	# Lookup the speaker ID in the loaded speaker IDs
	if speaker_id in speaker_ids:
	speaker_id_json = speaker_ids[speaker_id]
	else:
	continue

	# # Generate the TTS command to create the audio file
	text = f"Hello, I am from {speaker_id_json['region']}. I hope that you will select my voice for your project. Thank you."
	# # make samples directory if it doesn't exist
	if not os.path.exists("samples"):
	os.makedirs("samples")

	out_path = f"samples/{speaker_id}.wav"
	tts_command = f"tts --text \"{text}\" --model_path checkpoint_85000.pth --language_idx en --config_path config.json --speaker_idx \"VCTK_{speaker_id}\" --out_path {out_path}"

	# Execute the TTS command
	os.system(tts_command)

	# Write the speaker information to the speakers.md file
	generate_html_output({speaker_id: speaker_id_json}, "https://huggingface.co/voices/VCTK_European_English_Males/resolve/main")