Updating speaker labels
Browse files- prepare_model.py +33 -52
- speakers-log.json +15 -15
prepare_model.py
CHANGED
@@ -2,39 +2,23 @@ import json
|
|
2 |
import os
|
3 |
import subprocess
|
4 |
|
5 |
-
def generate_html_output(data, repository_path):
|
6 |
-
with open('speakers.md', 'a') as file:
|
7 |
-
for speaker_id, speaker_info in data.items():
|
8 |
-
out_path = f"{repository_path}/samples/{speaker_id}.wav"
|
9 |
-
age = speaker_info['age']
|
10 |
-
gender = speaker_info['gender']
|
11 |
-
if gender == 'F':
|
12 |
-
gender = 'female'
|
13 |
-
elif gender == "M":
|
14 |
-
gender = 'male'
|
15 |
-
accents = speaker_info['accents']
|
16 |
-
region = speaker_info['region']
|
17 |
-
|
18 |
-
file.write(f"<p>VCTK_{speaker_id}: {age} year old {gender}, {accents} accent ({region})<audio controls><source src=\"{out_path}\" type=\"audio/wav\"></audio> </p>\n")
|
19 |
-
|
20 |
-
|
21 |
# Load the data from the provided dictionary
|
22 |
data = {
|
23 |
-
"
|
24 |
-
"
|
25 |
-
"
|
26 |
-
"
|
27 |
-
"
|
28 |
-
"
|
29 |
-
"
|
30 |
-
"
|
31 |
-
"
|
32 |
-
"
|
33 |
-
"
|
34 |
-
"
|
35 |
-
"
|
36 |
-
"
|
37 |
-
"
|
38 |
}
|
39 |
|
40 |
|
@@ -56,29 +40,26 @@ speaker_indices = eval(output)
|
|
56 |
with open('speakers-log.json', 'r') as file:
|
57 |
speaker_ids = json.load(file)
|
58 |
|
59 |
-
# Create the speakers.md file
|
60 |
-
with open('speakers.md', 'w') as file:
|
61 |
-
for speaker_idx in speaker_indices:
|
62 |
-
# Remove the 'VCTK_' prefix
|
63 |
-
speaker_id = speaker_idx.replace('VCTK_', '')
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
continue
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
79 |
|
80 |
-
|
81 |
-
|
82 |
|
83 |
-
|
84 |
-
|
|
|
2 |
import os
|
3 |
import subprocess
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
# Load the data from the provided dictionary
|
6 |
data = {
|
7 |
+
"VCTK_p226": {"age": 22, "gender": "M", "accents": "English", "region": "Surrey"},
|
8 |
+
"VCTK_p227": {"age": 38, "gender": "M", "accents": "English", "region": "Cumbria"},
|
9 |
+
"VCTK_p232": {"age": 23, "gender": "M", "accents": "English", "region": "Southern England"},
|
10 |
+
"VCTK_p243": {"age": 22, "gender": "M", "accents": "English", "region": "London"},
|
11 |
+
"VCTK_p254": {"age": 21, "gender": "M", "accents": "English", "region": "Surrey"},
|
12 |
+
"VCTK_p256": {"age": 24, "gender": "M", "accents": "English", "region": "Birmingham"},
|
13 |
+
"VCTK_p258": {"age": 22, "gender": "M", "accents": "English", "region": "Southern England"},
|
14 |
+
"VCTK_p259": {"age": 23, "gender": "M", "accents": "English", "region": "Nottingham"},
|
15 |
+
"VCTK_p270": {"age": 21, "gender": "M", "accents": "English", "region": "Yorkshire"},
|
16 |
+
"VCTK_p273": {"age": 23, "gender": "M", "accents": "English", "region": "Suffolk"},
|
17 |
+
"VCTK_p274": {"age": 22, "gender": "M", "accents": "English", "region": "Essex"},
|
18 |
+
"VCTK_p278": {"age": 22, "gender": "M", "accents": "English", "region": "Cheshire"},
|
19 |
+
"VCTK_p279": {"age": 23, "gender": "M", "accents": "English", "region": "Leicester"},
|
20 |
+
"VCTK_p286": {"age": 23, "gender": "M", "accents": "English", "region": "Newcastle"},
|
21 |
+
"VCTK_p287": {"age": 23, "gender": "M", "accents": "English", "region": "York"}
|
22 |
}
|
23 |
|
24 |
|
|
|
40 |
with open('speakers-log.json', 'r') as file:
|
41 |
speaker_ids = json.load(file)
|
42 |
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
+
for speaker_idx in speaker_indices:
|
45 |
+
# # Remove the 'VCTK_' prefix
|
46 |
+
speaker_id = speaker_idx
|
47 |
+
# speaker_id = speaker_idx.replace('VCTK_', '')
|
|
|
48 |
|
49 |
+
# Lookup the speaker ID in the loaded speaker IDs
|
50 |
+
if speaker_id in speaker_ids:
|
51 |
+
speaker_id_json = speaker_ids[speaker_id]
|
52 |
+
else:
|
53 |
+
continue
|
54 |
|
55 |
+
# # Generate the TTS command to create the audio file
|
56 |
+
# text = f"Hello, I am from {speaker_id_json['region']}. I hope that you will select my voice for your project. Thank you."
|
57 |
+
# # make samples directory if it doesn't exist
|
58 |
+
# if not os.path.exists("samples"):
|
59 |
+
# os.makedirs("samples")
|
60 |
|
61 |
+
# out_path = f"samples/{speaker_id}.wav"
|
62 |
+
# tts_command = f"tts --text \"{text}\" --model_path checkpoint_85000.pth --language_idx en --config_path config.json --speaker_idx \"VCTK_{speaker_id}\" --out_path {out_path}"
|
63 |
|
64 |
+
# Execute the TTS command
|
65 |
+
# os.system(tts_command)
|
speakers-log.json
CHANGED
@@ -1,89 +1,89 @@
|
|
1 |
{
|
2 |
-
"
|
3 |
"age": 22,
|
4 |
"gender": "M",
|
5 |
"accents": "English",
|
6 |
"region": "Surrey"
|
7 |
},
|
8 |
-
"
|
9 |
"age": 38,
|
10 |
"gender": "M",
|
11 |
"accents": "English",
|
12 |
"region": "Cumbria"
|
13 |
},
|
14 |
-
"
|
15 |
"age": 23,
|
16 |
"gender": "M",
|
17 |
"accents": "English",
|
18 |
"region": "Southern England"
|
19 |
},
|
20 |
-
"
|
21 |
"age": 22,
|
22 |
"gender": "M",
|
23 |
"accents": "English",
|
24 |
"region": "London"
|
25 |
},
|
26 |
-
"
|
27 |
"age": 21,
|
28 |
"gender": "M",
|
29 |
"accents": "English",
|
30 |
"region": "Surrey"
|
31 |
},
|
32 |
-
"
|
33 |
"age": 24,
|
34 |
"gender": "M",
|
35 |
"accents": "English",
|
36 |
"region": "Birmingham"
|
37 |
},
|
38 |
-
"
|
39 |
"age": 22,
|
40 |
"gender": "M",
|
41 |
"accents": "English",
|
42 |
"region": "Southern England"
|
43 |
},
|
44 |
-
"
|
45 |
"age": 23,
|
46 |
"gender": "M",
|
47 |
"accents": "English",
|
48 |
"region": "Nottingham"
|
49 |
},
|
50 |
-
"
|
51 |
"age": 21,
|
52 |
"gender": "M",
|
53 |
"accents": "English",
|
54 |
"region": "Yorkshire"
|
55 |
},
|
56 |
-
"
|
57 |
"age": 23,
|
58 |
"gender": "M",
|
59 |
"accents": "English",
|
60 |
"region": "Suffolk"
|
61 |
},
|
62 |
-
"
|
63 |
"age": 22,
|
64 |
"gender": "M",
|
65 |
"accents": "English",
|
66 |
"region": "Essex"
|
67 |
},
|
68 |
-
"
|
69 |
"age": 22,
|
70 |
"gender": "M",
|
71 |
"accents": "English",
|
72 |
"region": "Cheshire"
|
73 |
},
|
74 |
-
"
|
75 |
"age": 23,
|
76 |
"gender": "M",
|
77 |
"accents": "English",
|
78 |
"region": "Leicester"
|
79 |
},
|
80 |
-
"
|
81 |
"age": 23,
|
82 |
"gender": "M",
|
83 |
"accents": "English",
|
84 |
"region": "Newcastle"
|
85 |
},
|
86 |
-
"
|
87 |
"age": 23,
|
88 |
"gender": "M",
|
89 |
"accents": "English",
|
|
|
1 |
{
|
2 |
+
"VCTK_p226": {
|
3 |
"age": 22,
|
4 |
"gender": "M",
|
5 |
"accents": "English",
|
6 |
"region": "Surrey"
|
7 |
},
|
8 |
+
"VCTK_p227": {
|
9 |
"age": 38,
|
10 |
"gender": "M",
|
11 |
"accents": "English",
|
12 |
"region": "Cumbria"
|
13 |
},
|
14 |
+
"VCTK_p232": {
|
15 |
"age": 23,
|
16 |
"gender": "M",
|
17 |
"accents": "English",
|
18 |
"region": "Southern England"
|
19 |
},
|
20 |
+
"VCTK_p243": {
|
21 |
"age": 22,
|
22 |
"gender": "M",
|
23 |
"accents": "English",
|
24 |
"region": "London"
|
25 |
},
|
26 |
+
"VCTK_p254": {
|
27 |
"age": 21,
|
28 |
"gender": "M",
|
29 |
"accents": "English",
|
30 |
"region": "Surrey"
|
31 |
},
|
32 |
+
"VCTK_p256": {
|
33 |
"age": 24,
|
34 |
"gender": "M",
|
35 |
"accents": "English",
|
36 |
"region": "Birmingham"
|
37 |
},
|
38 |
+
"VCTK_p258": {
|
39 |
"age": 22,
|
40 |
"gender": "M",
|
41 |
"accents": "English",
|
42 |
"region": "Southern England"
|
43 |
},
|
44 |
+
"VCTK_p259": {
|
45 |
"age": 23,
|
46 |
"gender": "M",
|
47 |
"accents": "English",
|
48 |
"region": "Nottingham"
|
49 |
},
|
50 |
+
"VCTK_p270": {
|
51 |
"age": 21,
|
52 |
"gender": "M",
|
53 |
"accents": "English",
|
54 |
"region": "Yorkshire"
|
55 |
},
|
56 |
+
"VCTK_p273": {
|
57 |
"age": 23,
|
58 |
"gender": "M",
|
59 |
"accents": "English",
|
60 |
"region": "Suffolk"
|
61 |
},
|
62 |
+
"VCTK_p274": {
|
63 |
"age": 22,
|
64 |
"gender": "M",
|
65 |
"accents": "English",
|
66 |
"region": "Essex"
|
67 |
},
|
68 |
+
"VCTK_p278": {
|
69 |
"age": 22,
|
70 |
"gender": "M",
|
71 |
"accents": "English",
|
72 |
"region": "Cheshire"
|
73 |
},
|
74 |
+
"VCTK_p279": {
|
75 |
"age": 23,
|
76 |
"gender": "M",
|
77 |
"accents": "English",
|
78 |
"region": "Leicester"
|
79 |
},
|
80 |
+
"VCTK_p286": {
|
81 |
"age": 23,
|
82 |
"gender": "M",
|
83 |
"accents": "English",
|
84 |
"region": "Newcastle"
|
85 |
},
|
86 |
+
"VCTK_p287": {
|
87 |
"age": 23,
|
88 |
"gender": "M",
|
89 |
"accents": "English",
|