File size: 4,413 Bytes
277d850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import json
import os
import subprocess

def generate_html_output(data, repository_path):
    with open('speakers.md', 'a') as file:
        for speaker_id, speaker_info in data.items():
            out_path = f"{repository_path}/samples/{speaker_id}.wav"
            age = speaker_info['age']
            gender = speaker_info['gender']
            if gender == 'F':
                gender = 'female'
            elif gender == "M":
                gender = 'male'
            accents = speaker_info['accents']
            region = speaker_info['region']

            file.write(f"<p>VCTK_{speaker_id}: {age} year old {gender}, {accents} accent ({region})<audio controls><source src=\"{out_path}\" type=\"audio/wav\"></audio> </p>\n")


# Load the data from the provided dictionary
data = {
    "p237": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Fife"},
    "p241": {"age": 21, "gender": "M", "accents": "Scottish", "region": "Perth"},
    "p245": {"age": 25, "gender": "M", "accents": "Irish", "region": "Dublin"},
    "p246": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Selkirk"},
    "p247": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Argyll"},
    "p252": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Edinburgh"},
    "p255": {"age": 19, "gender": "M", "accents": "Scottish", "region": "Galloway"},
    "p260": {"age": 21, "gender": "M", "accents": "Scottish", "region": "Orkney"},
    "p263": {"age": 22, "gender": "M", "accents": "Scottish", "region": "Aberdeen"},
    "p271": {"age": 19, "gender": "M", "accents": "Scottish", "region": "Fife"},
    "p272": {"age": 23, "gender": "M", "accents": "Scottish", "region": "Edinburgh"},
    "p275": {"age": 23, "gender": "M", "accents": "Scottish", "region": "Midlothian"},
    "p281": {"age": 29, "gender": "M", "accents": "Scottish", "region": "Edinburgh"},
    "p284": {"age": 20, "gender": "M", "accents": "Scottish", "region": "Fife"},
    "p285": {"age": 21, "gender": "M", "accents": "Scottish", "region": "Edinburgh"},
    "p292": {"age": 23, "gender": "M", "accents": "NorthernIrish", "region": "Belfast"},
    "p298": {"age": 19, "gender": "M", "accents": "Irish", "region": "Tipperary"},
    "p304": {"age": 22, "gender": "M", "accents": "NorthernIrish", "region": "Belfast"},
    "p326": {"age": 26, "gender": "M", "accents": "Australian English", "region": "Sydney"},
    "p364": {"age": 23, "gender": "M", "accents": "Irish", "region": "Donegal"},
    "p374": {"age": 28, "gender": "M", "accents": "Australian English", "region": "The Outback"},
}

# Convert the data to JSON format
json_data = json.dumps(data, indent=2)

# Save the JSON data to a file
with open('speakers-log.json', 'w') as file:
    file.write(json_data)

# Run the TTS command to get the speaker indices
command = "tts --model_path checkpoint_85000.pth --config_path config.json --list_speaker_idxs | grep -vE '^(\s*\||\s*>|\s*$)'"
output = subprocess.check_output(command, shell=True, text=True)

# Parse the JSON output into a Python dictionary
speaker_indices = eval(output)

# Load the speaker IDs from speakers.json
with open('speakers-log.json', 'r') as file:
    speaker_ids = json.load(file)

# Create the speakers.md file
with open('speakers.md', 'w') as file:
    for speaker_idx in speaker_indices:
        # Remove the 'VCTK_' prefix
        speaker_id = speaker_idx.replace('VCTK_', '')

        # Lookup the speaker ID in the loaded speaker IDs
        if speaker_id in speaker_ids:
            speaker_id_json = speaker_ids[speaker_id]
        else:
            continue

        # # Generate the TTS command to create the audio file
        text = f"Hello, I am from {speaker_id_json['region']}. I hope that you will select my voice for your project. Thank you."
        # # make samples directory if it doesn't exist
        if not os.path.exists("samples"):
            os.makedirs("samples")

        out_path = f"samples/{speaker_id}.wav"
        tts_command = f"tts --text \"{text}\" --model_path checkpoint_85000.pth --language_idx en --config_path config.json --speaker_idx \"VCTK_{speaker_id}\" --out_path {out_path}"

        # Execute the TTS command
        os.system(tts_command)

        # Write the speaker information to the speakers.md file
        generate_html_output({speaker_id: speaker_id_json}, "https://huggingface.co/voices/VCTK_European_English_Males/resolve/main")