Hecheng0625 commited on
Commit
7ee3434
β€’
1 Parent(s): c968fc3

Upload 61 files

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. app.py +355 -0
  3. text/__init__.py +79 -0
  4. text/cleaners.py +98 -0
  5. text/cmudict.py +145 -0
  6. text/g2p.py +38 -0
  7. text/g2p_module.py +230 -0
  8. text/lexicon/librispeech-lexicon.txt +0 -0
  9. text/lexicon/pinyin-lexicon-r.txt +4120 -0
  10. text/numbers.py +77 -0
  11. text/pinyin.py +218 -0
  12. text/symbol_table.py +292 -0
  13. text/symbols.py +34 -0
  14. text/text_token_collation.py +123 -0
  15. utils/HyperParams/__init__.py +6 -0
  16. utils/HyperParams/hps.py +43 -0
  17. utils/__init__.py +0 -0
  18. utils/audio.py +74 -0
  19. utils/audio_slicer.py +476 -0
  20. utils/cut_by_vad.py +105 -0
  21. utils/data_utils.py +588 -0
  22. utils/distribution.py +270 -0
  23. utils/dsp.py +97 -0
  24. utils/duration.py +86 -0
  25. utils/f0.py +275 -0
  26. utils/hparam.py +659 -0
  27. utils/hubert.py +155 -0
  28. utils/io.py +182 -0
  29. utils/io_optim.py +123 -0
  30. utils/mel.py +280 -0
  31. utils/mert.py +139 -0
  32. utils/mfa_prepare.py +116 -0
  33. utils/model_summary.py +74 -0
  34. utils/prompt_preparer.py +68 -0
  35. utils/ssim.py +80 -0
  36. utils/stft.py +278 -0
  37. utils/symbol_table.py +317 -0
  38. utils/tokenizer.py +150 -0
  39. utils/topk_sampling.py +89 -0
  40. utils/trainer_utils.py +16 -0
  41. utils/util.py +687 -0
  42. utils/whisper_transcription.py +122 -0
  43. utils/world.py +92 -0
  44. visualization/SingVisio/System_Introduction_of_SingVisio_V2.pdf +3 -0
  45. visualization/SingVisio/webpage/Dockerfile +23 -0
  46. visualization/SingVisio/webpage/README.md +126 -0
  47. visualization/SingVisio/webpage/config/default.json +407 -0
  48. visualization/SingVisio/webpage/img/difference_bar.jpg +0 -0
  49. visualization/SingVisio/webpage/img/syllable.png +0 -0
  50. visualization/SingVisio/webpage/index.html +390 -0
.gitattributes CHANGED
@@ -37,3 +37,4 @@ imgs/vocoder/gan/MSSBCQTD.png filter=lfs diff=lfs merge=lfs -text
37
  models/codec/facodec/modules/JDC/bst.t7 filter=lfs diff=lfs merge=lfs -text
38
  models/tts/maskgct/g2p/sources/chinese_lexicon.txt filter=lfs diff=lfs merge=lfs -text
39
  models/tts/maskgct/wav/prompt.wav filter=lfs diff=lfs merge=lfs -text
 
 
37
  models/codec/facodec/modules/JDC/bst.t7 filter=lfs diff=lfs merge=lfs -text
38
  models/tts/maskgct/g2p/sources/chinese_lexicon.txt filter=lfs diff=lfs merge=lfs -text
39
  models/tts/maskgct/wav/prompt.wav filter=lfs diff=lfs merge=lfs -text
40
+ visualization/SingVisio/System_Introduction_of_SingVisio_V2.pdf filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import safetensors
4
+ from huggingface_hub import hf_hub_download
5
+ import soundfile as sf
6
+
7
+ import numpy as np
8
+ import librosa
9
+ from models.codec.kmeans.repcodec_model import RepCodec
10
+ from models.tts.maskgct.maskgct_s2a import MaskGCT_S2A
11
+ from models.tts.maskgct.maskgct_t2s import MaskGCT_T2S
12
+ from models.codec.amphion_codec.codec import CodecEncoder, CodecDecoder
13
+ from transformers import Wav2Vec2BertModel
14
+ from utils.util import load_config
15
+ from models.tts.maskgct.g2p.g2p_generation import g2p, chn_eng_g2p
16
+
17
+ from transformers import SeamlessM4TFeatureExtractor
18
+
19
+ processor = SeamlessM4TFeatureExtractor.from_pretrained("facebook/w2v-bert-2.0")
20
+
21
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
+
23
+
24
+ def g2p_(text, language):
25
+ if language in ["zh", "en"]:
26
+ return chn_eng_g2p(text)
27
+ else:
28
+ return g2p(text, sentence=None, language=language)
29
+
30
+
31
+ def build_t2s_model(cfg, device):
32
+ t2s_model = MaskGCT_T2S(cfg=cfg)
33
+ t2s_model.eval()
34
+ t2s_model.to(device)
35
+ return t2s_model
36
+
37
+
38
+ def build_s2a_model(cfg, device):
39
+ soundstorm_model = MaskGCT_S2A(cfg=cfg)
40
+ soundstorm_model.eval()
41
+ soundstorm_model.to(device)
42
+ return soundstorm_model
43
+
44
+
45
+ def build_semantic_model(device):
46
+ semantic_model = Wav2Vec2BertModel.from_pretrained("facebook/w2v-bert-2.0")
47
+ semantic_model.eval()
48
+ semantic_model.to(device)
49
+ stat_mean_var = torch.load("./models/tts/maskgct/ckpt/wav2vec2bert_stats.pt")
50
+ semantic_mean = stat_mean_var["mean"]
51
+ semantic_std = torch.sqrt(stat_mean_var["var"])
52
+ semantic_mean = semantic_mean.to(device)
53
+ semantic_std = semantic_std.to(device)
54
+ return semantic_model, semantic_mean, semantic_std
55
+
56
+
57
+ def build_semantic_codec(cfg, device):
58
+ semantic_codec = RepCodec(cfg=cfg)
59
+ semantic_codec.eval()
60
+ semantic_codec.to(device)
61
+ return semantic_codec
62
+
63
+
64
+ def build_acoustic_codec(cfg, device):
65
+ codec_encoder = CodecEncoder(cfg=cfg.encoder)
66
+ codec_decoder = CodecDecoder(cfg=cfg.decoder)
67
+ codec_encoder.eval()
68
+ codec_decoder.eval()
69
+ codec_encoder.to(device)
70
+ codec_decoder.to(device)
71
+ return codec_encoder, codec_decoder
72
+
73
+
74
+ @torch.no_grad()
75
+ def extract_features(speech, processor):
76
+ inputs = processor(speech, sampling_rate=16000, return_tensors="pt")
77
+ input_features = inputs["input_features"][0]
78
+ attention_mask = inputs["attention_mask"][0]
79
+ return input_features, attention_mask
80
+
81
+
82
+ @torch.no_grad()
83
+ def extract_semantic_code(semantic_mean, semantic_std, input_features, attention_mask):
84
+ vq_emb = semantic_model(
85
+ input_features=input_features,
86
+ attention_mask=attention_mask,
87
+ output_hidden_states=True,
88
+ )
89
+ feat = vq_emb.hidden_states[17] # (B, T, C)
90
+ feat = (feat - semantic_mean.to(feat)) / semantic_std.to(feat)
91
+
92
+ semantic_code, rec_feat = semantic_codec.quantize(feat) # (B, T)
93
+ return semantic_code, rec_feat
94
+
95
+
96
+ @torch.no_grad()
97
+ def extract_acoustic_code(speech):
98
+ vq_emb = codec_encoder(speech.unsqueeze(1))
99
+ _, vq, _, _, _ = codec_decoder.quantizer(vq_emb)
100
+ acoustic_code = vq.permute(1, 2, 0)
101
+ return acoustic_code
102
+
103
+
104
+ @torch.no_grad()
105
+ def text2semantic(
106
+ device,
107
+ prompt_speech,
108
+ prompt_text,
109
+ prompt_language,
110
+ target_text,
111
+ target_language,
112
+ target_len=None,
113
+ n_timesteps=50,
114
+ cfg=2.5,
115
+ rescale_cfg=0.75,
116
+ ):
117
+
118
+ prompt_phone_id = g2p_(prompt_text, prompt_language)[1]
119
+
120
+ target_phone_id = g2p_(target_text, target_language)[1]
121
+
122
+ if target_len is None:
123
+ target_len = int(
124
+ (len(prompt_speech) * len(target_phone_id) / len(prompt_phone_id))
125
+ / 16000
126
+ * 50
127
+ )
128
+ else:
129
+ target_len = int(target_len * 50)
130
+
131
+ prompt_phone_id = torch.tensor(prompt_phone_id, dtype=torch.long).to(device)
132
+ target_phone_id = torch.tensor(target_phone_id, dtype=torch.long).to(device)
133
+
134
+ phone_id = torch.cat([prompt_phone_id, target_phone_id])
135
+
136
+ input_fetures, attention_mask = extract_features(prompt_speech, processor)
137
+ input_fetures = input_fetures.unsqueeze(0).to(device)
138
+ attention_mask = attention_mask.unsqueeze(0).to(device)
139
+ semantic_code, _ = extract_semantic_code(
140
+ semantic_mean, semantic_std, input_fetures, attention_mask
141
+ )
142
+
143
+ predict_semantic = t2s_model.reverse_diffusion(
144
+ semantic_code[:, :],
145
+ target_len,
146
+ phone_id.unsqueeze(0),
147
+ n_timesteps=n_timesteps,
148
+ cfg=cfg,
149
+ rescale_cfg=rescale_cfg,
150
+ )
151
+
152
+ combine_semantic_code = torch.cat([semantic_code[:, :], predict_semantic], dim=-1)
153
+ prompt_semantic_code = semantic_code
154
+
155
+ return combine_semantic_code, prompt_semantic_code
156
+
157
+
158
+ @torch.no_grad()
159
+ def semantic2acoustic(
160
+ device,
161
+ combine_semantic_code,
162
+ acoustic_code,
163
+ n_timesteps=[25, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
164
+ cfg=2.5,
165
+ rescale_cfg=0.75,
166
+ ):
167
+
168
+ semantic_code = combine_semantic_code
169
+
170
+ cond = s2a_model_1layer.cond_emb(semantic_code)
171
+ prompt = acoustic_code[:, :, :]
172
+ predict_1layer = s2a_model_1layer.reverse_diffusion(
173
+ cond=cond,
174
+ prompt=prompt,
175
+ temp=1.5,
176
+ filter_thres=0.98,
177
+ n_timesteps=n_timesteps[:1],
178
+ cfg=cfg,
179
+ rescale_cfg=rescale_cfg,
180
+ )
181
+
182
+ cond = s2a_model_full.cond_emb(semantic_code)
183
+ prompt = acoustic_code[:, :, :]
184
+ predict_full = s2a_model_full.reverse_diffusion(
185
+ cond=cond,
186
+ prompt=prompt,
187
+ temp=1.5,
188
+ filter_thres=0.98,
189
+ n_timesteps=n_timesteps,
190
+ cfg=cfg,
191
+ rescale_cfg=rescale_cfg,
192
+ gt_code=predict_1layer,
193
+ )
194
+
195
+ vq_emb = codec_decoder.vq2emb(predict_full.permute(2, 0, 1), n_quantizers=12)
196
+ recovered_audio = codec_decoder(vq_emb)
197
+ prompt_vq_emb = codec_decoder.vq2emb(prompt.permute(2, 0, 1), n_quantizers=12)
198
+ recovered_prompt_audio = codec_decoder(prompt_vq_emb)
199
+ recovered_prompt_audio = recovered_prompt_audio[0][0].cpu().numpy()
200
+ recovered_audio = recovered_audio[0][0].cpu().numpy()
201
+ combine_audio = np.concatenate([recovered_prompt_audio, recovered_audio])
202
+
203
+ return combine_audio, recovered_audio
204
+
205
+
206
+ # Load the model and checkpoints
207
+ def load_models():
208
+ cfg_path = "./models/tts/maskgct/config/maskgct.json"
209
+
210
+ cfg = load_config(cfg_path)
211
+ semantic_model, semantic_mean, semantic_std = build_semantic_model(device)
212
+ semantic_codec = build_semantic_codec(cfg.model.semantic_codec, device)
213
+ codec_encoder, codec_decoder = build_acoustic_codec(
214
+ cfg.model.acoustic_codec, device
215
+ )
216
+ t2s_model = build_t2s_model(cfg.model.t2s_model, device)
217
+ s2a_model_1layer = build_s2a_model(cfg.model.s2a_model.s2a_1layer, device)
218
+ s2a_model_full = build_s2a_model(cfg.model.s2a_model.s2a_full, device)
219
+
220
+ # Download checkpoints
221
+ semantic_code_ckpt = hf_hub_download(
222
+ "amphion/MaskGCT", filename="semantic_codec/model.safetensors"
223
+ )
224
+ codec_encoder_ckpt = hf_hub_download(
225
+ "amphion/MaskGCT", filename="acoustic_codec/model.safetensors"
226
+ )
227
+ codec_decoder_ckpt = hf_hub_download(
228
+ "amphion/MaskGCT", filename="acoustic_codec/model_1.safetensors"
229
+ )
230
+ t2s_model_ckpt = hf_hub_download(
231
+ "amphion/MaskGCT", filename="t2s_model/model.safetensors"
232
+ )
233
+ s2a_1layer_ckpt = hf_hub_download(
234
+ "amphion/MaskGCT", filename="s2a_model/s2a_model_1layer/model.safetensors"
235
+ )
236
+ s2a_full_ckpt = hf_hub_download(
237
+ "amphion/MaskGCT", filename="s2a_model/s2a_model_full/model.safetensors"
238
+ )
239
+
240
+ safetensors.torch.load_model(semantic_codec, semantic_code_ckpt)
241
+ safetensors.torch.load_model(codec_encoder, codec_encoder_ckpt)
242
+ safetensors.torch.load_model(codec_decoder, codec_decoder_ckpt)
243
+ safetensors.torch.load_model(t2s_model, t2s_model_ckpt)
244
+ safetensors.torch.load_model(s2a_model_1layer, s2a_1layer_ckpt)
245
+ safetensors.torch.load_model(s2a_model_full, s2a_full_ckpt)
246
+
247
+ return (
248
+ semantic_model,
249
+ semantic_mean,
250
+ semantic_std,
251
+ semantic_codec,
252
+ codec_encoder,
253
+ codec_decoder,
254
+ t2s_model,
255
+ s2a_model_1layer,
256
+ s2a_model_full,
257
+ )
258
+
259
+
260
+ @torch.no_grad()
261
+ def maskgct_inference(
262
+ prompt_speech_path,
263
+ prompt_text,
264
+ target_text,
265
+ language="en",
266
+ target_language="en",
267
+ target_len=None,
268
+ n_timesteps=25,
269
+ cfg=2.5,
270
+ rescale_cfg=0.75,
271
+ n_timesteps_s2a=[25, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
272
+ cfg_s2a=2.5,
273
+ rescale_cfg_s2a=0.75,
274
+ device=torch.device("cuda:5"),
275
+ ):
276
+ speech_16k = librosa.load(prompt_speech_path, sr=16000)[0]
277
+ speech = librosa.load(prompt_speech_path, sr=24000)[0]
278
+
279
+ combine_semantic_code, _ = text2semantic(
280
+ device,
281
+ speech_16k,
282
+ prompt_text,
283
+ language,
284
+ target_text,
285
+ target_language,
286
+ target_len,
287
+ n_timesteps,
288
+ cfg,
289
+ rescale_cfg,
290
+ )
291
+ acoustic_code = extract_acoustic_code(torch.tensor(speech).unsqueeze(0).to(device))
292
+ _, recovered_audio = semantic2acoustic(
293
+ device,
294
+ combine_semantic_code,
295
+ acoustic_code,
296
+ n_timesteps=n_timesteps_s2a,
297
+ cfg=cfg_s2a,
298
+ rescale_cfg=rescale_cfg_s2a,
299
+ )
300
+
301
+ return recovered_audio
302
+
303
+
304
+ @torch.no_grad()
305
+ def inference(
306
+ prompt_wav,
307
+ prompt_text,
308
+ target_text,
309
+ target_len,
310
+ n_timesteps,
311
+ language,
312
+ target_language,
313
+ ):
314
+ save_path = "./output/output.wav"
315
+ os.makedirs("./output", exist_ok=True)
316
+ recovered_audio = maskgct_inference(
317
+ prompt_wav,
318
+ prompt_text,
319
+ target_text,
320
+ language,
321
+ target_language,
322
+ target_len=target_len,
323
+ n_timesteps=int(n_timesteps),
324
+ device=device,
325
+ )
326
+ sf.write(save_path, recovered_audio, 24000)
327
+ return save_path
328
+
329
+
330
+ # Language list
331
+ language_list = ["en", "zh", "ja", "ko", "fr", "de"]
332
+
333
+ # Gradio interface
334
+ iface = gr.Interface(
335
+ fn=inference,
336
+ inputs=[
337
+ gr.Audio(label="Upload Prompt Wav", type="filepath"),
338
+ gr.Textbox(label="Prompt Text"),
339
+ gr.Textbox(label="Target Text"),
340
+ gr.Number(
341
+ label="Target Duration (in seconds)", value=None
342
+ ), # Removed 'optional=True'
343
+ gr.Slider(
344
+ label="Number of Timesteps", minimum=15, maximum=100, value=25, step=1
345
+ ),
346
+ gr.Dropdown(label="Language", choices=language_list, value="en"),
347
+ gr.Dropdown(label="Target Language", choices=language_list, value="en"),
348
+ ],
349
+ outputs=gr.Audio(label="Generated Audio"),
350
+ title="MaskGCT TTS Demo",
351
+ description="Generate speech from text using the MaskGCT model.",
352
+ )
353
+
354
+ # Launch the interface
355
+ iface.launch(allowed_paths=["./output"])
text/__init__.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ """ This code is modified from https://github.com/keithito/tacotron """
7
+ import re
8
+ from text import cleaners
9
+ from text.symbols import symbols
10
+
11
+
12
+ # Mappings from symbol to numeric ID and vice versa:
13
+ _symbol_to_id = {s: i for i, s in enumerate(symbols)}
14
+ _id_to_symbol = {i: s for i, s in enumerate(symbols)}
15
+
16
+ # Regular expression matching text enclosed in curly braces:
17
+ _curly_re = re.compile(r"(.*?)\{(.+?)\}(.*)")
18
+
19
+
20
+ def text_to_sequence(text, cleaner_names):
21
+ """Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
22
+
23
+ The text can optionally have ARPAbet sequences enclosed in curly braces embedded
24
+ in it. For example, "Turn left on {HH AW1 S S T AH0 N} Street."
25
+
26
+ Args:
27
+ text: string to convert to a sequence
28
+ cleaner_names: names of the cleaner functions to run the text through
29
+
30
+ Returns:
31
+ List of integers corresponding to the symbols in the text
32
+ """
33
+ sequence = []
34
+
35
+ # Check for curly braces and treat their contents as ARPAbet:
36
+ while len(text):
37
+ m = _curly_re.match(text)
38
+
39
+ if not m:
40
+ sequence += _symbols_to_sequence(_clean_text(text, cleaner_names))
41
+ break
42
+ sequence += _symbols_to_sequence(_clean_text(m.group(1), cleaner_names))
43
+ sequence += _arpabet_to_sequence(m.group(2))
44
+ text = m.group(3)
45
+ return sequence
46
+
47
+
48
+ def sequence_to_text(sequence):
49
+ """Converts a sequence of IDs back to a string"""
50
+ result = ""
51
+ for symbol_id in sequence:
52
+ if symbol_id in _id_to_symbol:
53
+ s = _id_to_symbol[symbol_id]
54
+ # Enclose ARPAbet back in curly braces:
55
+ if len(s) > 1 and s[0] == "@":
56
+ s = "{%s}" % s[1:]
57
+ result += s
58
+ return result.replace("}{", " ")
59
+
60
+
61
+ def _clean_text(text, cleaner_names):
62
+ for name in cleaner_names:
63
+ cleaner = getattr(cleaners, name)
64
+ if not cleaner:
65
+ raise Exception("Unknown cleaner: %s" % name)
66
+ text = cleaner(text)
67
+ return text
68
+
69
+
70
+ def _symbols_to_sequence(symbols):
71
+ return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)]
72
+
73
+
74
+ def _arpabet_to_sequence(text):
75
+ return _symbols_to_sequence(["@" + s for s in text.split()])
76
+
77
+
78
+ def _should_keep_symbol(s):
79
+ return s in _symbol_to_id and s != "_" and s != "~"
text/cleaners.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ """ This code is modified from https://github.com/keithito/tacotron """
7
+
8
+ """
9
+ Cleaners are transformations that run over the input text at both training and eval time.
10
+
11
+ Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
12
+ hyperparameter. Some cleaners are English-specific. You'll typically want to use:
13
+ 1. "english_cleaners" for English text
14
+ 2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
15
+ the Unidecode library (https://pypi.python.org/pypi/Unidecode)
16
+ 3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
17
+ the symbols in symbols.py to match your data).
18
+ """
19
+
20
+
21
+ # Regular expression matching whitespace:
22
+ import re
23
+ from unidecode import unidecode
24
+ from .numbers import normalize_numbers
25
+
26
+ _whitespace_re = re.compile(r"\s+")
27
+
28
+ # List of (regular expression, replacement) pairs for abbreviations:
29
+ _abbreviations = [
30
+ (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
31
+ for x in [
32
+ ("mrs", "misess"),
33
+ ("mr", "mister"),
34
+ ("dr", "doctor"),
35
+ ("st", "saint"),
36
+ ("co", "company"),
37
+ ("jr", "junior"),
38
+ ("maj", "major"),
39
+ ("gen", "general"),
40
+ ("drs", "doctors"),
41
+ ("rev", "reverend"),
42
+ ("lt", "lieutenant"),
43
+ ("hon", "honorable"),
44
+ ("sgt", "sergeant"),
45
+ ("capt", "captain"),
46
+ ("esq", "esquire"),
47
+ ("ltd", "limited"),
48
+ ("col", "colonel"),
49
+ ("ft", "fort"),
50
+ ]
51
+ ]
52
+
53
+
54
+ def expand_abbreviations(text):
55
+ for regex, replacement in _abbreviations:
56
+ text = re.sub(regex, replacement, text)
57
+ return text
58
+
59
+
60
+ def expand_numbers(text):
61
+ return normalize_numbers(text)
62
+
63
+
64
+ def lowercase(text):
65
+ return text.lower()
66
+
67
+
68
+ def collapse_whitespace(text):
69
+ return re.sub(_whitespace_re, " ", text)
70
+
71
+
72
+ def convert_to_ascii(text):
73
+ return unidecode(text)
74
+
75
+
76
+ def basic_cleaners(text):
77
+ """Basic pipeline that lowercases and collapses whitespace without transliteration."""
78
+ text = lowercase(text)
79
+ text = collapse_whitespace(text)
80
+ return text
81
+
82
+
83
+ def transliteration_cleaners(text):
84
+ """Pipeline for non-English text that transliterates to ASCII."""
85
+ text = convert_to_ascii(text)
86
+ text = lowercase(text)
87
+ text = collapse_whitespace(text)
88
+ return text
89
+
90
+
91
+ def english_cleaners(text):
92
+ """Pipeline for English text, including number and abbreviation expansion."""
93
+ text = convert_to_ascii(text)
94
+ text = lowercase(text)
95
+ text = expand_numbers(text)
96
+ text = expand_abbreviations(text)
97
+ text = collapse_whitespace(text)
98
+ return text
text/cmudict.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ """ This code is modified from https://github.com/keithito/tacotron """
7
+
8
+ import re
9
+
10
+
11
+ valid_symbols = [
12
+ "AA",
13
+ "AA0",
14
+ "AA1",
15
+ "AA2",
16
+ "AE",
17
+ "AE0",
18
+ "AE1",
19
+ "AE2",
20
+ "AH",
21
+ "AH0",
22
+ "AH1",
23
+ "AH2",
24
+ "AO",
25
+ "AO0",
26
+ "AO1",
27
+ "AO2",
28
+ "AW",
29
+ "AW0",
30
+ "AW1",
31
+ "AW2",
32
+ "AY",
33
+ "AY0",
34
+ "AY1",
35
+ "AY2",
36
+ "B",
37
+ "CH",
38
+ "D",
39
+ "DH",
40
+ "EH",
41
+ "EH0",
42
+ "EH1",
43
+ "EH2",
44
+ "ER",
45
+ "ER0",
46
+ "ER1",
47
+ "ER2",
48
+ "EY",
49
+ "EY0",
50
+ "EY1",
51
+ "EY2",
52
+ "F",
53
+ "G",
54
+ "HH",
55
+ "IH",
56
+ "IH0",
57
+ "IH1",
58
+ "IH2",
59
+ "IY",
60
+ "IY0",
61
+ "IY1",
62
+ "IY2",
63
+ "JH",
64
+ "K",
65
+ "L",
66
+ "M",
67
+ "N",
68
+ "NG",
69
+ "OW",
70
+ "OW0",
71
+ "OW1",
72
+ "OW2",
73
+ "OY",
74
+ "OY0",
75
+ "OY1",
76
+ "OY2",
77
+ "P",
78
+ "R",
79
+ "S",
80
+ "SH",
81
+ "T",
82
+ "TH",
83
+ "UH",
84
+ "UH0",
85
+ "UH1",
86
+ "UH2",
87
+ "UW",
88
+ "UW0",
89
+ "UW1",
90
+ "UW2",
91
+ "V",
92
+ "W",
93
+ "Y",
94
+ "Z",
95
+ "ZH",
96
+ ]
97
+
98
+ _valid_symbol_set = set(valid_symbols)
99
+
100
+
101
+ class CMUDict:
102
+ """Thin wrapper around CMUDict data. http://www.speech.cs.cmu.edu/cgi-bin/cmudict"""
103
+
104
+ def __init__(self, file_or_path, keep_ambiguous=True):
105
+ if isinstance(file_or_path, str):
106
+ with open(file_or_path, encoding="latin-1") as f:
107
+ entries = _parse_cmudict(f)
108
+ else:
109
+ entries = _parse_cmudict(file_or_path)
110
+ if not keep_ambiguous:
111
+ entries = {word: pron for word, pron in entries.items() if len(pron) == 1}
112
+ self._entries = entries
113
+
114
+ def __len__(self):
115
+ return len(self._entries)
116
+
117
+ def lookup(self, word):
118
+ """Returns list of ARPAbet pronunciations of the given word."""
119
+ return self._entries.get(word.upper())
120
+
121
+
122
+ _alt_re = re.compile(r"\([0-9]+\)")
123
+
124
+
125
+ def _parse_cmudict(file):
126
+ cmudict = {}
127
+ for line in file:
128
+ if len(line) and (line[0] >= "A" and line[0] <= "Z" or line[0] == "'"):
129
+ parts = line.split(" ")
130
+ word = re.sub(_alt_re, "", parts[0])
131
+ pronunciation = _get_pronunciation(parts[1])
132
+ if pronunciation:
133
+ if word in cmudict:
134
+ cmudict[word].append(pronunciation)
135
+ else:
136
+ cmudict[word] = [pronunciation]
137
+ return cmudict
138
+
139
+
140
+ def _get_pronunciation(s):
141
+ parts = s.strip().split(" ")
142
+ for part in parts:
143
+ if part not in _valid_symbol_set:
144
+ return None
145
+ return " ".join(parts)
text/g2p.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import re
7
+ from g2p_en import G2p
8
+ from string import punctuation
9
+
10
+
11
+ def read_lexicon(lex_path):
12
+ lexicon = {}
13
+ with open(lex_path) as f:
14
+ for line in f:
15
+ temp = re.split(r"\s+", line.strip("\n"))
16
+ word = temp[0]
17
+ phones = temp[1:]
18
+ if word.lower() not in lexicon:
19
+ lexicon[word.lower()] = phones
20
+ return lexicon
21
+
22
+
23
+ def preprocess_english(text, lexicon):
24
+ text = text.rstrip(punctuation)
25
+
26
+ g2p = G2p()
27
+ phones = []
28
+ words = re.split(r"([,;.\-\?\!\s+])", text)
29
+ for w in words:
30
+ if w.lower() in lexicon:
31
+ phones += lexicon[w.lower()]
32
+ else:
33
+ phones += list(filter(lambda p: p != " ", g2p(w)))
34
+ phones = "}{".join(phones)
35
+ phones = re.sub(r"\{[^\w\s]?\}", "{sp}", phones)
36
+ phones = phones.replace("}{", " ")
37
+
38
+ return phones
text/g2p_module.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+
7
+ import re
8
+ from g2p_en import G2p
9
+ from string import punctuation
10
+ from typing import Any, Dict, List, Optional, Pattern, Union
11
+
12
+ from phonemizer.backend import EspeakBackend
13
+ from phonemizer.backend.espeak.language_switch import LanguageSwitch
14
+ from phonemizer.backend.espeak.words_mismatch import WordMismatch
15
+ from phonemizer.punctuation import Punctuation
16
+ from phonemizer.separator import Separator
17
+
18
+ try:
19
+ from pypinyin import Style, pinyin
20
+ from pypinyin.style._utils import get_finals, get_initials
21
+ except Exception:
22
+ pass
23
+
24
+
25
+ # This code is modified from
26
+ # https://github.com/lifeiteng/vall-e/blob/9c69096d603ce13174fb5cb025f185e2e9b36ac7/valle/data/tokenizer.py
27
+
28
+
29
+ class PypinyinBackend:
30
+ """PypinyinBackend for Chinese. Most codes is referenced from espnet.
31
+ There are two types pinyin or initials_finals, one is
32
+ just like "ni1 hao3", the other is like "n i1 h ao3".
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ backend="initials_finals",
38
+ punctuation_marks: Union[str, Pattern] = Punctuation.default_marks(),
39
+ ) -> None:
40
+ self.backend = backend
41
+ self.punctuation_marks = punctuation_marks
42
+
43
+ def phonemize(
44
+ self, text: List[str], separator: Separator, strip=True, njobs=1
45
+ ) -> List[str]:
46
+ assert isinstance(text, List)
47
+ phonemized = []
48
+ for _text in text:
49
+ _text = re.sub(" +", " ", _text.strip())
50
+ _text = _text.replace(" ", separator.word)
51
+ phones = []
52
+ if self.backend == "pypinyin":
53
+ for n, py in enumerate(
54
+ pinyin(_text, style=Style.TONE3, neutral_tone_with_five=True)
55
+ ):
56
+ if all([c in self.punctuation_marks for c in py[0]]):
57
+ if len(phones):
58
+ assert phones[-1] == separator.syllable
59
+ phones.pop(-1)
60
+
61
+ phones.extend(list(py[0]))
62
+ else:
63
+ phones.extend([py[0], separator.syllable])
64
+ elif self.backend == "pypinyin_initials_finals":
65
+ for n, py in enumerate(
66
+ pinyin(_text, style=Style.TONE3, neutral_tone_with_five=True)
67
+ ):
68
+ if all([c in self.punctuation_marks for c in py[0]]):
69
+ if len(phones):
70
+ assert phones[-1] == separator.syllable
71
+ phones.pop(-1)
72
+ phones.extend(list(py[0]))
73
+ else:
74
+ if py[0][-1].isalnum():
75
+ initial = get_initials(py[0], strict=False)
76
+ if py[0][-1].isdigit():
77
+ final = get_finals(py[0][:-1], strict=False) + py[0][-1]
78
+ else:
79
+ final = get_finals(py[0], strict=False)
80
+ phones.extend(
81
+ [
82
+ initial,
83
+ separator.phone,
84
+ final,
85
+ separator.syllable,
86
+ ]
87
+ )
88
+ else:
89
+ assert ValueError
90
+ else:
91
+ raise NotImplementedError
92
+ phonemized.append(
93
+ "".join(phones).rstrip(f"{separator.word}{separator.syllable}")
94
+ )
95
+ return phonemized
96
+
97
+
98
+ class G2PModule:
99
+ """Phonemize Text."""
100
+
101
+ # We support espeak to extract IPA (International Phonetic Alphabet), which supports 100 languages,
102
+ # https://github.com/espeak-ng/espeak-ng/blob/master/docs/languages.md
103
+
104
+ def __init__(
105
+ self,
106
+ language="en-us",
107
+ backend="espeak",
108
+ separator=Separator(word="_", syllable="-", phone="|"),
109
+ preserve_punctuation=True,
110
+ punctuation_marks: Union[str, Pattern] = Punctuation.default_marks(),
111
+ with_stress: bool = False,
112
+ tie: Union[bool, str] = False,
113
+ language_switch: LanguageSwitch = "keep-flags",
114
+ words_mismatch: WordMismatch = "ignore",
115
+ ) -> None:
116
+ self.separator = separator
117
+ self.backend = self._initialize_backend(
118
+ backend,
119
+ language,
120
+ punctuation_marks,
121
+ preserve_punctuation,
122
+ with_stress,
123
+ tie,
124
+ language_switch,
125
+ words_mismatch,
126
+ )
127
+
128
+ def _initialize_backend(
129
+ self,
130
+ backend,
131
+ language,
132
+ punctuation_marks,
133
+ preserve_punctuation,
134
+ with_stress,
135
+ tie,
136
+ language_switch,
137
+ words_mismatch,
138
+ ):
139
+ if backend == "espeak":
140
+ return EspeakBackend(
141
+ language,
142
+ punctuation_marks=punctuation_marks,
143
+ preserve_punctuation=preserve_punctuation,
144
+ with_stress=with_stress,
145
+ tie=tie,
146
+ language_switch=language_switch,
147
+ words_mismatch=words_mismatch,
148
+ )
149
+ elif backend in ["pypinyin", "pypinyin_initials_finals"]:
150
+ if language != "cmn":
151
+ raise ValueError(
152
+ f"{language} is not supported for pypinyin and pypinyin_initials_finals."
153
+ )
154
+ return PypinyinBackend(
155
+ backend=backend,
156
+ punctuation_marks=punctuation_marks + self.separator.word,
157
+ )
158
+ else:
159
+ raise NotImplementedError(f"{backend}")
160
+
161
+ def to_list(self, phonemized: str) -> List[str]:
162
+ fields = []
163
+ for word in phonemized.split(self.separator.word):
164
+ pp = re.findall(r"\w+|[^\w\s]", word, re.UNICODE)
165
+ fields.extend(
166
+ [p for p in pp if p != self.separator.phone] + [self.separator.word]
167
+ )
168
+ assert len("".join(fields[:-1])) == len(phonemized) - phonemized.count(
169
+ self.separator.phone
170
+ )
171
+ return fields[:-1]
172
+
173
+ def phonemization(self, text, strip=True) -> List[List[str]]:
174
+ if isinstance(text, str):
175
+ text = [text]
176
+
177
+ phonemized = self.backend.phonemize(
178
+ text, separator=self.separator, strip=strip, njobs=1
179
+ )
180
+ phonemes = [self.to_list(p) for p in phonemized]
181
+ return phonemes
182
+
183
+ def g2p_conversion(self, text: str) -> List[str]:
184
+ phonemes = self.phonemization([text.strip()])
185
+ return phonemes[0]
186
+
187
+
188
+ class LexiconModule:
189
+ def __init__(self, lex_path, language="en-us") -> None:
190
+ # todo: check lexicon derivation, merge with G2PModule?
191
+ lexicon = {}
192
+ with open(lex_path) as f:
193
+ for line in f:
194
+ temp = re.split(r"\s+", line.strip("\n"))
195
+ word = temp[0]
196
+ phones = temp[1:]
197
+ if word.lower() not in lexicon:
198
+ lexicon[word.lower()] = phones
199
+ self.lexicon = lexicon
200
+ self.language = language
201
+ self.lang2g2p = {"en-us": G2p()}
202
+
203
+ def g2p_conversion(self, text):
204
+ phone = None
205
+
206
+ # todo: preprocess with other languages
207
+ if self.language == "en-us":
208
+ phone = self.preprocess_english(text)
209
+ else:
210
+ print("No support to", self.language)
211
+ raise
212
+
213
+ return phone
214
+
215
+ def preprocess_english(self, text):
216
+ text = text.rstrip(punctuation)
217
+
218
+ g2p = self.lang2g2p["en-us"]
219
+ phones = []
220
+ words = re.split(r"([,;.\-\?\!\s+])", text)
221
+ for w in words:
222
+ if w.lower() in self.lexicon:
223
+ phones += self.lexicon[w.lower()]
224
+ else:
225
+ phones += list(filter(lambda p: p != " ", g2p(w)))
226
+ phones = "}{".join(phones)
227
+ phones = re.sub(r"\{[^\w\s]?\}", "{sp}", phones)
228
+ phones = phones.replace("}{", " ")
229
+
230
+ return phones
text/lexicon/librispeech-lexicon.txt ADDED
The diff for this file is too large to render. See raw diff
 
text/lexicon/pinyin-lexicon-r.txt ADDED
@@ -0,0 +1,4120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a1 a1
2
+ a2 a2
3
+ a3 a3
4
+ a4 a4
5
+ a5 a5
6
+ ai1 ai1
7
+ ai2 ai2
8
+ ai3 ai3
9
+ ai4 ai4
10
+ ai5 ai5
11
+ an1 an1
12
+ an2 an2
13
+ an3 an3
14
+ an4 an4
15
+ an5 an5
16
+ ang1 ang1
17
+ ang2 ang2
18
+ ang3 ang3
19
+ ang4 ang4
20
+ ang5 ang5
21
+ ao1 ao1
22
+ ao2 ao2
23
+ ao3 ao3
24
+ ao4 ao4
25
+ ao5 ao5
26
+ ba1 b a1
27
+ ba2 b a2
28
+ ba3 b a3
29
+ ba4 b a4
30
+ ba5 b a5
31
+ bai1 b ai1
32
+ bai2 b ai2
33
+ bai3 b ai3
34
+ bai4 b ai4
35
+ bai5 b ai5
36
+ ban1 b an1
37
+ ban2 b an2
38
+ ban3 b an3
39
+ ban4 b an4
40
+ ban5 b an5
41
+ bang1 b ang1
42
+ bang2 b ang2
43
+ bang3 b ang3
44
+ bang4 b ang4
45
+ bang5 b ang5
46
+ bao1 b ao1
47
+ bao2 b ao2
48
+ bao3 b ao3
49
+ bao4 b ao4
50
+ bao5 b ao5
51
+ bei1 b ei1
52
+ bei2 b ei2
53
+ bei3 b ei3
54
+ bei4 b ei4
55
+ bei5 b ei5
56
+ ben1 b en1
57
+ ben2 b en2
58
+ ben3 b en3
59
+ ben4 b en4
60
+ ben5 b en5
61
+ beng1 b eng1
62
+ beng2 b eng2
63
+ beng3 b eng3
64
+ beng4 b eng4
65
+ beng5 b eng5
66
+ bi1 b i1
67
+ bi2 b i2
68
+ bi3 b i3
69
+ bi4 b i4
70
+ bi5 b i5
71
+ bian1 b ian1
72
+ bian2 b ian2
73
+ bian3 b ian3
74
+ bian4 b ian4
75
+ bian5 b ian5
76
+ biao1 b iao1
77
+ biao2 b iao2
78
+ biao3 b iao3
79
+ biao4 b iao4
80
+ biao5 b iao5
81
+ bie1 b ie1
82
+ bie2 b ie2
83
+ bie3 b ie3
84
+ bie4 b ie4
85
+ bie5 b ie5
86
+ bin1 b in1
87
+ bin2 b in2
88
+ bin3 b in3
89
+ bin4 b in4
90
+ bin5 b in5
91
+ bing1 b ing1
92
+ bing2 b ing2
93
+ bing3 b ing3
94
+ bing4 b ing4
95
+ bing5 b ing5
96
+ bo1 b o1
97
+ bo2 b o2
98
+ bo3 b o3
99
+ bo4 b o4
100
+ bo5 b o5
101
+ bu1 b u1
102
+ bu2 b u2
103
+ bu3 b u3
104
+ bu4 b u4
105
+ bu5 b u5
106
+ ca1 c a1
107
+ ca2 c a2
108
+ ca3 c a3
109
+ ca4 c a4
110
+ ca5 c a5
111
+ cai1 c ai1
112
+ cai2 c ai2
113
+ cai3 c ai3
114
+ cai4 c ai4
115
+ cai5 c ai5
116
+ can1 c an1
117
+ can2 c an2
118
+ can3 c an3
119
+ can4 c an4
120
+ can5 c an5
121
+ cang1 c ang1
122
+ cang2 c ang2
123
+ cang3 c ang3
124
+ cang4 c ang4
125
+ cang5 c ang5
126
+ cao1 c ao1
127
+ cao2 c ao2
128
+ cao3 c ao3
129
+ cao4 c ao4
130
+ cao5 c ao5
131
+ ce1 c e1
132
+ ce2 c e2
133
+ ce3 c e3
134
+ ce4 c e4
135
+ ce5 c e5
136
+ cen1 c en1
137
+ cen2 c en2
138
+ cen3 c en3
139
+ cen4 c en4
140
+ cen5 c en5
141
+ ceng1 c eng1
142
+ ceng2 c eng2
143
+ ceng3 c eng3
144
+ ceng4 c eng4
145
+ ceng5 c eng5
146
+ cha1 ch a1
147
+ cha2 ch a2
148
+ cha3 ch a3
149
+ cha4 ch a4
150
+ cha5 ch a5
151
+ chai1 ch ai1
152
+ chai2 ch ai2
153
+ chai3 ch ai3
154
+ chai4 ch ai4
155
+ chai5 ch ai5
156
+ chan1 ch an1
157
+ chan2 ch an2
158
+ chan3 ch an3
159
+ chan4 ch an4
160
+ chan5 ch an5
161
+ chang1 ch ang1
162
+ chang2 ch ang2
163
+ chang3 ch ang3
164
+ chang4 ch ang4
165
+ chang5 ch ang5
166
+ chao1 ch ao1
167
+ chao2 ch ao2
168
+ chao3 ch ao3
169
+ chao4 ch ao4
170
+ chao5 ch ao5
171
+ che1 ch e1
172
+ che2 ch e2
173
+ che3 ch e3
174
+ che4 ch e4
175
+ che5 ch e5
176
+ chen1 ch en1
177
+ chen2 ch en2
178
+ chen3 ch en3
179
+ chen4 ch en4
180
+ chen5 ch en5
181
+ cheng1 ch eng1
182
+ cheng2 ch eng2
183
+ cheng3 ch eng3
184
+ cheng4 ch eng4
185
+ cheng5 ch eng5
186
+ chi1 ch iii1
187
+ chi2 ch iii2
188
+ chi3 ch iii3
189
+ chi4 ch iii4
190
+ chi5 ch iii5
191
+ chong1 ch ong1
192
+ chong2 ch ong2
193
+ chong3 ch ong3
194
+ chong4 ch ong4
195
+ chong5 ch ong5
196
+ chou1 ch ou1
197
+ chou2 ch ou2
198
+ chou3 ch ou3
199
+ chou4 ch ou4
200
+ chou5 ch ou5
201
+ chu1 ch u1
202
+ chu2 ch u2
203
+ chu3 ch u3
204
+ chu4 ch u4
205
+ chu5 ch u5
206
+ chuai1 ch uai1
207
+ chuai2 ch uai2
208
+ chuai3 ch uai3
209
+ chuai4 ch uai4
210
+ chuai5 ch uai5
211
+ chuan1 ch uan1
212
+ chuan2 ch uan2
213
+ chuan3 ch uan3
214
+ chuan4 ch uan4
215
+ chuan5 ch uan5
216
+ chuang1 ch uang1
217
+ chuang2 ch uang2
218
+ chuang3 ch uang3
219
+ chuang4 ch uang4
220
+ chuang5 ch uang5
221
+ chui1 ch uei1
222
+ chui2 ch uei2
223
+ chui3 ch uei3
224
+ chui4 ch uei4
225
+ chui5 ch uei5
226
+ chun1 ch uen1
227
+ chun2 ch uen2
228
+ chun3 ch uen3
229
+ chun4 ch uen4
230
+ chun5 ch uen5
231
+ chuo1 ch uo1
232
+ chuo2 ch uo2
233
+ chuo3 ch uo3
234
+ chuo4 ch uo4
235
+ chuo5 ch uo5
236
+ ci1 c ii1
237
+ ci2 c ii2
238
+ ci3 c ii3
239
+ ci4 c ii4
240
+ ci5 c ii5
241
+ cong1 c ong1
242
+ cong2 c ong2
243
+ cong3 c ong3
244
+ cong4 c ong4
245
+ cong5 c ong5
246
+ cou1 c ou1
247
+ cou2 c ou2
248
+ cou3 c ou3
249
+ cou4 c ou4
250
+ cou5 c ou5
251
+ cu1 c u1
252
+ cu2 c u2
253
+ cu3 c u3
254
+ cu4 c u4
255
+ cu5 c u5
256
+ cuan1 c uan1
257
+ cuan2 c uan2
258
+ cuan3 c uan3
259
+ cuan4 c uan4
260
+ cuan5 c uan5
261
+ cui1 c uei1
262
+ cui2 c uei2
263
+ cui3 c uei3
264
+ cui4 c uei4
265
+ cui5 c uei5
266
+ cun1 c uen1
267
+ cun2 c uen2
268
+ cun3 c uen3
269
+ cun4 c uen4
270
+ cun5 c uen5
271
+ cuo1 c uo1
272
+ cuo2 c uo2
273
+ cuo3 c uo3
274
+ cuo4 c uo4
275
+ cuo5 c uo5
276
+ da1 d a1
277
+ da2 d a2
278
+ da3 d a3
279
+ da4 d a4
280
+ da5 d a5
281
+ dai1 d ai1
282
+ dai2 d ai2
283
+ dai3 d ai3
284
+ dai4 d ai4
285
+ dai5 d ai5
286
+ dan1 d an1
287
+ dan2 d an2
288
+ dan3 d an3
289
+ dan4 d an4
290
+ dan5 d an5
291
+ dang1 d ang1
292
+ dang2 d ang2
293
+ dang3 d ang3
294
+ dang4 d ang4
295
+ dang5 d ang5
296
+ dao1 d ao1
297
+ dao2 d ao2
298
+ dao3 d ao3
299
+ dao4 d ao4
300
+ dao5 d ao5
301
+ de1 d e1
302
+ de2 d e2
303
+ de3 d e3
304
+ de4 d e4
305
+ de5 d e5
306
+ dei1 d ei1
307
+ dei2 d ei2
308
+ dei3 d ei3
309
+ dei4 d ei4
310
+ dei5 d ei5
311
+ den1 d en1
312
+ den2 d en2
313
+ den3 d en3
314
+ den4 d en4
315
+ den5 d en5
316
+ deng1 d eng1
317
+ deng2 d eng2
318
+ deng3 d eng3
319
+ deng4 d eng4
320
+ deng5 d eng5
321
+ di1 d i1
322
+ di2 d i2
323
+ di3 d i3
324
+ di4 d i4
325
+ di5 d i5
326
+ dia1 d ia1
327
+ dia2 d ia2
328
+ dia3 d ia3
329
+ dia4 d ia4
330
+ dia5 d ia5
331
+ dian1 d ian1
332
+ dian2 d ian2
333
+ dian3 d ian3
334
+ dian4 d ian4
335
+ dian5 d ian5
336
+ diao1 d iao1
337
+ diao2 d iao2
338
+ diao3 d iao3
339
+ diao4 d iao4
340
+ diao5 d iao5
341
+ die1 d ie1
342
+ die2 d ie2
343
+ die3 d ie3
344
+ die4 d ie4
345
+ die5 d ie5
346
+ ding1 d ing1
347
+ ding2 d ing2
348
+ ding3 d ing3
349
+ ding4 d ing4
350
+ ding5 d ing5
351
+ diu1 d iou1
352
+ diu2 d iou2
353
+ diu3 d iou3
354
+ diu4 d iou4
355
+ diu5 d iou5
356
+ dong1 d ong1
357
+ dong2 d ong2
358
+ dong3 d ong3
359
+ dong4 d ong4
360
+ dong5 d ong5
361
+ dou1 d ou1
362
+ dou2 d ou2
363
+ dou3 d ou3
364
+ dou4 d ou4
365
+ dou5 d ou5
366
+ du1 d u1
367
+ du2 d u2
368
+ du3 d u3
369
+ du4 d u4
370
+ du5 d u5
371
+ duan1 d uan1
372
+ duan2 d uan2
373
+ duan3 d uan3
374
+ duan4 d uan4
375
+ duan5 d uan5
376
+ dui1 d uei1
377
+ dui2 d uei2
378
+ dui3 d uei3
379
+ dui4 d uei4
380
+ dui5 d uei5
381
+ dun1 d uen1
382
+ dun2 d uen2
383
+ dun3 d uen3
384
+ dun4 d uen4
385
+ dun5 d uen5
386
+ duo1 d uo1
387
+ duo2 d uo2
388
+ duo3 d uo3
389
+ duo4 d uo4
390
+ duo5 d uo5
391
+ e1 e1
392
+ e2 e2
393
+ e3 e3
394
+ e4 e4
395
+ e5 e5
396
+ ei1 ei1
397
+ ei2 ei2
398
+ ei3 ei3
399
+ ei4 ei4
400
+ ei5 ei5
401
+ en1 en1
402
+ en2 en2
403
+ en3 en3
404
+ en4 en4
405
+ en5 en5
406
+ eng1 eng1
407
+ eng2 eng2
408
+ eng3 eng3
409
+ eng4 eng4
410
+ eng5 eng5
411
+ r1 er1
412
+ r2 er2
413
+ r3 er3
414
+ r4 er4
415
+ r5 er5
416
+ er1 er1
417
+ er2 er2
418
+ er3 er3
419
+ er4 er4
420
+ er5 er5
421
+ fa1 f a1
422
+ fa2 f a2
423
+ fa3 f a3
424
+ fa4 f a4
425
+ fa5 f a5
426
+ fan1 f an1
427
+ fan2 f an2
428
+ fan3 f an3
429
+ fan4 f an4
430
+ fan5 f an5
431
+ fang1 f ang1
432
+ fang2 f ang2
433
+ fang3 f ang3
434
+ fang4 f ang4
435
+ fang5 f ang5
436
+ fei1 f ei1
437
+ fei2 f ei2
438
+ fei3 f ei3
439
+ fei4 f ei4
440
+ fei5 f ei5
441
+ fen1 f en1
442
+ fen2 f en2
443
+ fen3 f en3
444
+ fen4 f en4
445
+ fen5 f en5
446
+ feng1 f eng1
447
+ feng2 f eng2
448
+ feng3 f eng3
449
+ feng4 f eng4
450
+ feng5 f eng5
451
+ fo1 f o1
452
+ fo2 f o2
453
+ fo3 f o3
454
+ fo4 f o4
455
+ fo5 f o5
456
+ fou1 f ou1
457
+ fou2 f ou2
458
+ fou3 f ou3
459
+ fou4 f ou4
460
+ fou5 f ou5
461
+ fu1 f u1
462
+ fu2 f u2
463
+ fu3 f u3
464
+ fu4 f u4
465
+ fu5 f u5
466
+ ga1 g a1
467
+ ga2 g a2
468
+ ga3 g a3
469
+ ga4 g a4
470
+ ga5 g a5
471
+ gai1 g ai1
472
+ gai2 g ai2
473
+ gai3 g ai3
474
+ gai4 g ai4
475
+ gai5 g ai5
476
+ gan1 g an1
477
+ gan2 g an2
478
+ gan3 g an3
479
+ gan4 g an4
480
+ gan5 g an5
481
+ gang1 g ang1
482
+ gang2 g ang2
483
+ gang3 g ang3
484
+ gang4 g ang4
485
+ gang5 g ang5
486
+ gao1 g ao1
487
+ gao2 g ao2
488
+ gao3 g ao3
489
+ gao4 g ao4
490
+ gao5 g ao5
491
+ ge1 g e1
492
+ ge2 g e2
493
+ ge3 g e3
494
+ ge4 g e4
495
+ ge5 g e5
496
+ gei1 g ei1
497
+ gei2 g ei2
498
+ gei3 g ei3
499
+ gei4 g ei4
500
+ gei5 g ei5
501
+ gen1 g en1
502
+ gen2 g en2
503
+ gen3 g en3
504
+ gen4 g en4
505
+ gen5 g en5
506
+ geng1 g eng1
507
+ geng2 g eng2
508
+ geng3 g eng3
509
+ geng4 g eng4
510
+ geng5 g eng5
511
+ gong1 g ong1
512
+ gong2 g ong2
513
+ gong3 g ong3
514
+ gong4 g ong4
515
+ gong5 g ong5
516
+ gou1 g ou1
517
+ gou2 g ou2
518
+ gou3 g ou3
519
+ gou4 g ou4
520
+ gou5 g ou5
521
+ gu1 g u1
522
+ gu2 g u2
523
+ gu3 g u3
524
+ gu4 g u4
525
+ gu5 g u5
526
+ gua1 g ua1
527
+ gua2 g ua2
528
+ gua3 g ua3
529
+ gua4 g ua4
530
+ gua5 g ua5
531
+ guai1 g uai1
532
+ guai2 g uai2
533
+ guai3 g uai3
534
+ guai4 g uai4
535
+ guai5 g uai5
536
+ guan1 g uan1
537
+ guan2 g uan2
538
+ guan3 g uan3
539
+ guan4 g uan4
540
+ guan5 g uan5
541
+ guang1 g uang1
542
+ guang2 g uang2
543
+ guang3 g uang3
544
+ guang4 g uang4
545
+ guang5 g uang5
546
+ gui1 g uei1
547
+ gui2 g uei2
548
+ gui3 g uei3
549
+ gui4 g uei4
550
+ gui5 g uei5
551
+ gun1 g uen1
552
+ gun2 g uen2
553
+ gun3 g uen3
554
+ gun4 g uen4
555
+ gun5 g uen5
556
+ guo1 g uo1
557
+ guo2 g uo2
558
+ guo3 g uo3
559
+ guo4 g uo4
560
+ guo5 g uo5
561
+ ha1 h a1
562
+ ha2 h a2
563
+ ha3 h a3
564
+ ha4 h a4
565
+ ha5 h a5
566
+ hai1 h ai1
567
+ hai2 h ai2
568
+ hai3 h ai3
569
+ hai4 h ai4
570
+ hai5 h ai5
571
+ han1 h an1
572
+ han2 h an2
573
+ han3 h an3
574
+ han4 h an4
575
+ han5 h an5
576
+ hang1 h ang1
577
+ hang2 h ang2
578
+ hang3 h ang3
579
+ hang4 h ang4
580
+ hang5 h ang5
581
+ hao1 h ao1
582
+ hao2 h ao2
583
+ hao3 h ao3
584
+ hao4 h ao4
585
+ hao5 h ao5
586
+ he1 h e1
587
+ he2 h e2
588
+ he3 h e3
589
+ he4 h e4
590
+ he5 h e5
591
+ hei1 h ei1
592
+ hei2 h ei2
593
+ hei3 h ei3
594
+ hei4 h ei4
595
+ hei5 h ei5
596
+ hen1 h en1
597
+ hen2 h en2
598
+ hen3 h en3
599
+ hen4 h en4
600
+ hen5 h en5
601
+ heng1 h eng1
602
+ heng2 h eng2
603
+ heng3 h eng3
604
+ heng4 h eng4
605
+ heng5 h eng5
606
+ hong1 h ong1
607
+ hong2 h ong2
608
+ hong3 h ong3
609
+ hong4 h ong4
610
+ hong5 h ong5
611
+ hou1 h ou1
612
+ hou2 h ou2
613
+ hou3 h ou3
614
+ hou4 h ou4
615
+ hou5 h ou5
616
+ hu1 h u1
617
+ hu2 h u2
618
+ hu3 h u3
619
+ hu4 h u4
620
+ hu5 h u5
621
+ hua1 h ua1
622
+ hua2 h ua2
623
+ hua3 h ua3
624
+ hua4 h ua4
625
+ hua5 h ua5
626
+ huai1 h uai1
627
+ huai2 h uai2
628
+ huai3 h uai3
629
+ huai4 h uai4
630
+ huai5 h uai5
631
+ huan1 h uan1
632
+ huan2 h uan2
633
+ huan3 h uan3
634
+ huan4 h uan4
635
+ huan5 h uan5
636
+ huang1 h uang1
637
+ huang2 h uang2
638
+ huang3 h uang3
639
+ huang4 h uang4
640
+ huang5 h uang5
641
+ hui1 h uei1
642
+ hui2 h uei2
643
+ hui3 h uei3
644
+ hui4 h uei4
645
+ hui5 h uei5
646
+ hun1 h uen1
647
+ hun2 h uen2
648
+ hun3 h uen3
649
+ hun4 h uen4
650
+ hun5 h uen5
651
+ huo1 h uo1
652
+ huo2 h uo2
653
+ huo3 h uo3
654
+ huo4 h uo4
655
+ huo5 h uo5
656
+ ji1 j i1
657
+ ji2 j i2
658
+ ji3 j i3
659
+ ji4 j i4
660
+ ji5 j i5
661
+ jia1 j ia1
662
+ jia2 j ia2
663
+ jia3 j ia3
664
+ jia4 j ia4
665
+ jia5 j ia5
666
+ jian1 j ian1
667
+ jian2 j ian2
668
+ jian3 j ian3
669
+ jian4 j ian4
670
+ jian5 j ian5
671
+ jiang1 j iang1
672
+ jiang2 j iang2
673
+ jiang3 j iang3
674
+ jiang4 j iang4
675
+ jiang5 j iang5
676
+ jiao1 j iao1
677
+ jiao2 j iao2
678
+ jiao3 j iao3
679
+ jiao4 j iao4
680
+ jiao5 j iao5
681
+ jie1 j ie1
682
+ jie2 j ie2
683
+ jie3 j ie3
684
+ jie4 j ie4
685
+ jie5 j ie5
686
+ jin1 j in1
687
+ jin2 j in2
688
+ jin3 j in3
689
+ jin4 j in4
690
+ jin5 j in5
691
+ jing1 j ing1
692
+ jing2 j ing2
693
+ jing3 j ing3
694
+ jing4 j ing4
695
+ jing5 j ing5
696
+ jiong1 j iong1
697
+ jiong2 j iong2
698
+ jiong3 j iong3
699
+ jiong4 j iong4
700
+ jiong5 j iong5
701
+ jiu1 j iou1
702
+ jiu2 j iou2
703
+ jiu3 j iou3
704
+ jiu4 j iou4
705
+ jiu5 j iou5
706
+ ju1 j v1
707
+ ju2 j v2
708
+ ju3 j v3
709
+ ju4 j v4
710
+ ju5 j v5
711
+ juan1 j van1
712
+ juan2 j van2
713
+ juan3 j van3
714
+ juan4 j van4
715
+ juan5 j van5
716
+ jue1 j ve1
717
+ jue2 j ve2
718
+ jue3 j ve3
719
+ jue4 j ve4
720
+ jue5 j ve5
721
+ jun1 j vn1
722
+ jun2 j vn2
723
+ jun3 j vn3
724
+ jun4 j vn4
725
+ jun5 j vn5
726
+ ka1 k a1
727
+ ka2 k a2
728
+ ka3 k a3
729
+ ka4 k a4
730
+ ka5 k a5
731
+ kai1 k ai1
732
+ kai2 k ai2
733
+ kai3 k ai3
734
+ kai4 k ai4
735
+ kai5 k ai5
736
+ kan1 k an1
737
+ kan2 k an2
738
+ kan3 k an3
739
+ kan4 k an4
740
+ kan5 k an5
741
+ kang1 k ang1
742
+ kang2 k ang2
743
+ kang3 k ang3
744
+ kang4 k ang4
745
+ kang5 k ang5
746
+ kao1 k ao1
747
+ kao2 k ao2
748
+ kao3 k ao3
749
+ kao4 k ao4
750
+ kao5 k ao5
751
+ ke1 k e1
752
+ ke2 k e2
753
+ ke3 k e3
754
+ ke4 k e4
755
+ ke5 k e5
756
+ kei1 k ei1
757
+ kei2 k ei2
758
+ kei3 k ei3
759
+ kei4 k ei4
760
+ kei5 k ei5
761
+ ken1 k en1
762
+ ken2 k en2
763
+ ken3 k en3
764
+ ken4 k en4
765
+ ken5 k en5
766
+ keng1 k eng1
767
+ keng2 k eng2
768
+ keng3 k eng3
769
+ keng4 k eng4
770
+ keng5 k eng5
771
+ kong1 k ong1
772
+ kong2 k ong2
773
+ kong3 k ong3
774
+ kong4 k ong4
775
+ kong5 k ong5
776
+ kou1 k ou1
777
+ kou2 k ou2
778
+ kou3 k ou3
779
+ kou4 k ou4
780
+ kou5 k ou5
781
+ ku1 k u1
782
+ ku2 k u2
783
+ ku3 k u3
784
+ ku4 k u4
785
+ ku5 k u5
786
+ kua1 k ua1
787
+ kua2 k ua2
788
+ kua3 k ua3
789
+ kua4 k ua4
790
+ kua5 k ua5
791
+ kuai1 k uai1
792
+ kuai2 k uai2
793
+ kuai3 k uai3
794
+ kuai4 k uai4
795
+ kuai5 k uai5
796
+ kuan1 k uan1
797
+ kuan2 k uan2
798
+ kuan3 k uan3
799
+ kuan4 k uan4
800
+ kuan5 k uan5
801
+ kuang1 k uang1
802
+ kuang2 k uang2
803
+ kuang3 k uang3
804
+ kuang4 k uang4
805
+ kuang5 k uang5
806
+ kui1 k uei1
807
+ kui2 k uei2
808
+ kui3 k uei3
809
+ kui4 k uei4
810
+ kui5 k uei5
811
+ kun1 k uen1
812
+ kun2 k uen2
813
+ kun3 k uen3
814
+ kun4 k uen4
815
+ kun5 k uen5
816
+ kuo1 k uo1
817
+ kuo2 k uo2
818
+ kuo3 k uo3
819
+ kuo4 k uo4
820
+ kuo5 k uo5
821
+ la1 l a1
822
+ la2 l a2
823
+ la3 l a3
824
+ la4 l a4
825
+ la5 l a5
826
+ lai1 l ai1
827
+ lai2 l ai2
828
+ lai3 l ai3
829
+ lai4 l ai4
830
+ lai5 l ai5
831
+ lan1 l an1
832
+ lan2 l an2
833
+ lan3 l an3
834
+ lan4 l an4
835
+ lan5 l an5
836
+ lang1 l ang1
837
+ lang2 l ang2
838
+ lang3 l ang3
839
+ lang4 l ang4
840
+ lang5 l ang5
841
+ lao1 l ao1
842
+ lao2 l ao2
843
+ lao3 l ao3
844
+ lao4 l ao4
845
+ lao5 l ao5
846
+ le1 l e1
847
+ le2 l e2
848
+ le3 l e3
849
+ le4 l e4
850
+ le5 l e5
851
+ lei1 l ei1
852
+ lei2 l ei2
853
+ lei3 l ei3
854
+ lei4 l ei4
855
+ lei5 l ei5
856
+ leng1 l eng1
857
+ leng2 l eng2
858
+ leng3 l eng3
859
+ leng4 l eng4
860
+ leng5 l eng5
861
+ li1 l i1
862
+ li2 l i2
863
+ li3 l i3
864
+ li4 l i4
865
+ li5 l i5
866
+ lia1 l ia1
867
+ lia2 l ia2
868
+ lia3 l ia3
869
+ lia4 l ia4
870
+ lia5 l ia5
871
+ lian1 l ian1
872
+ lian2 l ian2
873
+ lian3 l ian3
874
+ lian4 l ian4
875
+ lian5 l ian5
876
+ liang1 l iang1
877
+ liang2 l iang2
878
+ liang3 l iang3
879
+ liang4 l iang4
880
+ liang5 l iang5
881
+ liao1 l iao1
882
+ liao2 l iao2
883
+ liao3 l iao3
884
+ liao4 l iao4
885
+ liao5 l iao5
886
+ lie1 l ie1
887
+ lie2 l ie2
888
+ lie3 l ie3
889
+ lie4 l ie4
890
+ lie5 l ie5
891
+ lin1 l in1
892
+ lin2 l in2
893
+ lin3 l in3
894
+ lin4 l in4
895
+ lin5 l in5
896
+ ling1 l ing1
897
+ ling2 l ing2
898
+ ling3 l ing3
899
+ ling4 l ing4
900
+ ling5 l ing5
901
+ liu1 l iou1
902
+ liu2 l iou2
903
+ liu3 l iou3
904
+ liu4 l iou4
905
+ liu5 l iou5
906
+ lo1 l o1
907
+ lo2 l o2
908
+ lo3 l o3
909
+ lo4 l o4
910
+ lo5 l o5
911
+ long1 l ong1
912
+ long2 l ong2
913
+ long3 l ong3
914
+ long4 l ong4
915
+ long5 l ong5
916
+ lou1 l ou1
917
+ lou2 l ou2
918
+ lou3 l ou3
919
+ lou4 l ou4
920
+ lou5 l ou5
921
+ lu1 l u1
922
+ lu2 l u2
923
+ lu3 l u3
924
+ lu4 l u4
925
+ lu5 l u5
926
+ luan1 l uan1
927
+ luan2 l uan2
928
+ luan3 l uan3
929
+ luan4 l uan4
930
+ luan5 l uan5
931
+ lue1 l ve1
932
+ lue2 l ve2
933
+ lue3 l ve3
934
+ lue4 l ve4
935
+ lue5 l ve5
936
+ lve1 l ve1
937
+ lve2 l ve2
938
+ lve3 l ve3
939
+ lve4 l ve4
940
+ lve5 l ve5
941
+ lun1 l uen1
942
+ lun2 l uen2
943
+ lun3 l uen3
944
+ lun4 l uen4
945
+ lun5 l uen5
946
+ luo1 l uo1
947
+ luo2 l uo2
948
+ luo3 l uo3
949
+ luo4 l uo4
950
+ luo5 l uo5
951
+ lv1 l v1
952
+ lv2 l v2
953
+ lv3 l v3
954
+ lv4 l v4
955
+ lv5 l v5
956
+ ma1 m a1
957
+ ma2 m a2
958
+ ma3 m a3
959
+ ma4 m a4
960
+ ma5 m a5
961
+ mai1 m ai1
962
+ mai2 m ai2
963
+ mai3 m ai3
964
+ mai4 m ai4
965
+ mai5 m ai5
966
+ man1 m an1
967
+ man2 m an2
968
+ man3 m an3
969
+ man4 m an4
970
+ man5 m an5
971
+ mang1 m ang1
972
+ mang2 m ang2
973
+ mang3 m ang3
974
+ mang4 m ang4
975
+ mang5 m ang5
976
+ mao1 m ao1
977
+ mao2 m ao2
978
+ mao3 m ao3
979
+ mao4 m ao4
980
+ mao5 m ao5
981
+ me1 m e1
982
+ me2 m e2
983
+ me3 m e3
984
+ me4 m e4
985
+ me5 m e5
986
+ mei1 m ei1
987
+ mei2 m ei2
988
+ mei3 m ei3
989
+ mei4 m ei4
990
+ mei5 m ei5
991
+ men1 m en1
992
+ men2 m en2
993
+ men3 m en3
994
+ men4 m en4
995
+ men5 m en5
996
+ meng1 m eng1
997
+ meng2 m eng2
998
+ meng3 m eng3
999
+ meng4 m eng4
1000
+ meng5 m eng5
1001
+ mi1 m i1
1002
+ mi2 m i2
1003
+ mi3 m i3
1004
+ mi4 m i4
1005
+ mi5 m i5
1006
+ mian1 m ian1
1007
+ mian2 m ian2
1008
+ mian3 m ian3
1009
+ mian4 m ian4
1010
+ mian5 m ian5
1011
+ miao1 m iao1
1012
+ miao2 m iao2
1013
+ miao3 m iao3
1014
+ miao4 m iao4
1015
+ miao5 m iao5
1016
+ mie1 m ie1
1017
+ mie2 m ie2
1018
+ mie3 m ie3
1019
+ mie4 m ie4
1020
+ mie5 m ie5
1021
+ min1 m in1
1022
+ min2 m in2
1023
+ min3 m in3
1024
+ min4 m in4
1025
+ min5 m in5
1026
+ ming1 m ing1
1027
+ ming2 m ing2
1028
+ ming3 m ing3
1029
+ ming4 m ing4
1030
+ ming5 m ing5
1031
+ miu1 m iou1
1032
+ miu2 m iou2
1033
+ miu3 m iou3
1034
+ miu4 m iou4
1035
+ miu5 m iou5
1036
+ mo1 m o1
1037
+ mo2 m o2
1038
+ mo3 m o3
1039
+ mo4 m o4
1040
+ mo5 m o5
1041
+ mou1 m ou1
1042
+ mou2 m ou2
1043
+ mou3 m ou3
1044
+ mou4 m ou4
1045
+ mou5 m ou5
1046
+ mu1 m u1
1047
+ mu2 m u2
1048
+ mu3 m u3
1049
+ mu4 m u4
1050
+ mu5 m u5
1051
+ na1 n a1
1052
+ na2 n a2
1053
+ na3 n a3
1054
+ na4 n a4
1055
+ na5 n a5
1056
+ nai1 n ai1
1057
+ nai2 n ai2
1058
+ nai3 n ai3
1059
+ nai4 n ai4
1060
+ nai5 n ai5
1061
+ nan1 n an1
1062
+ nan2 n an2
1063
+ nan3 n an3
1064
+ nan4 n an4
1065
+ nan5 n an5
1066
+ nang1 n ang1
1067
+ nang2 n ang2
1068
+ nang3 n ang3
1069
+ nang4 n ang4
1070
+ nang5 n ang5
1071
+ nao1 n ao1
1072
+ nao2 n ao2
1073
+ nao3 n ao3
1074
+ nao4 n ao4
1075
+ nao5 n ao5
1076
+ ne1 n e1
1077
+ ne2 n e2
1078
+ ne3 n e3
1079
+ ne4 n e4
1080
+ ne5 n e5
1081
+ nei1 n ei1
1082
+ nei2 n ei2
1083
+ nei3 n ei3
1084
+ nei4 n ei4
1085
+ nei5 n ei5
1086
+ nen1 n en1
1087
+ nen2 n en2
1088
+ nen3 n en3
1089
+ nen4 n en4
1090
+ nen5 n en5
1091
+ neng1 n eng1
1092
+ neng2 n eng2
1093
+ neng3 n eng3
1094
+ neng4 n eng4
1095
+ neng5 n eng5
1096
+ ni1 n i1
1097
+ ni2 n i2
1098
+ ni3 n i3
1099
+ ni4 n i4
1100
+ ni5 n i5
1101
+ nian1 n ian1
1102
+ nian2 n ian2
1103
+ nian3 n ian3
1104
+ nian4 n ian4
1105
+ nian5 n ian5
1106
+ niang1 n iang1
1107
+ niang2 n iang2
1108
+ niang3 n iang3
1109
+ niang4 n iang4
1110
+ niang5 n iang5
1111
+ niao1 n iao1
1112
+ niao2 n iao2
1113
+ niao3 n iao3
1114
+ niao4 n iao4
1115
+ niao5 n iao5
1116
+ nie1 n ie1
1117
+ nie2 n ie2
1118
+ nie3 n ie3
1119
+ nie4 n ie4
1120
+ nie5 n ie5
1121
+ nin1 n in1
1122
+ nin2 n in2
1123
+ nin3 n in3
1124
+ nin4 n in4
1125
+ nin5 n in5
1126
+ ning1 n ing1
1127
+ ning2 n ing2
1128
+ ning3 n ing3
1129
+ ning4 n ing4
1130
+ ning5 n ing5
1131
+ niu1 n iou1
1132
+ niu2 n iou2
1133
+ niu3 n iou3
1134
+ niu4 n iou4
1135
+ niu5 n iou5
1136
+ nong1 n ong1
1137
+ nong2 n ong2
1138
+ nong3 n ong3
1139
+ nong4 n ong4
1140
+ nong5 n ong5
1141
+ nou1 n ou1
1142
+ nou2 n ou2
1143
+ nou3 n ou3
1144
+ nou4 n ou4
1145
+ nou5 n ou5
1146
+ nu1 n u1
1147
+ nu2 n u2
1148
+ nu3 n u3
1149
+ nu4 n u4
1150
+ nu5 n u5
1151
+ nuan1 n uan1
1152
+ nuan2 n uan2
1153
+ nuan3 n uan3
1154
+ nuan4 n uan4
1155
+ nuan5 n uan5
1156
+ nue1 n ve1
1157
+ nue2 n ve2
1158
+ nue3 n ve3
1159
+ nue4 n ve4
1160
+ nue5 n ve5
1161
+ nve1 n ve1
1162
+ nve2 n ve2
1163
+ nve3 n ve3
1164
+ nve4 n ve4
1165
+ nve5 n ve5
1166
+ nuo1 n uo1
1167
+ nuo2 n uo2
1168
+ nuo3 n uo3
1169
+ nuo4 n uo4
1170
+ nuo5 n uo5
1171
+ nv1 n v1
1172
+ nv2 n v2
1173
+ nv3 n v3
1174
+ nv4 n v4
1175
+ nv5 n v5
1176
+ o1 o1
1177
+ o2 o2
1178
+ o3 o3
1179
+ o4 o4
1180
+ o5 o5
1181
+ ou1 ou1
1182
+ ou2 ou2
1183
+ ou3 ou3
1184
+ ou4 ou4
1185
+ ou5 ou5
1186
+ pa1 p a1
1187
+ pa2 p a2
1188
+ pa3 p a3
1189
+ pa4 p a4
1190
+ pa5 p a5
1191
+ pai1 p ai1
1192
+ pai2 p ai2
1193
+ pai3 p ai3
1194
+ pai4 p ai4
1195
+ pai5 p ai5
1196
+ pan1 p an1
1197
+ pan2 p an2
1198
+ pan3 p an3
1199
+ pan4 p an4
1200
+ pan5 p an5
1201
+ pang1 p ang1
1202
+ pang2 p ang2
1203
+ pang3 p ang3
1204
+ pang4 p ang4
1205
+ pang5 p ang5
1206
+ pao1 p ao1
1207
+ pao2 p ao2
1208
+ pao3 p ao3
1209
+ pao4 p ao4
1210
+ pao5 p ao5
1211
+ pei1 p ei1
1212
+ pei2 p ei2
1213
+ pei3 p ei3
1214
+ pei4 p ei4
1215
+ pei5 p ei5
1216
+ pen1 p en1
1217
+ pen2 p en2
1218
+ pen3 p en3
1219
+ pen4 p en4
1220
+ pen5 p en5
1221
+ peng1 p eng1
1222
+ peng2 p eng2
1223
+ peng3 p eng3
1224
+ peng4 p eng4
1225
+ peng5 p eng5
1226
+ pi1 p i1
1227
+ pi2 p i2
1228
+ pi3 p i3
1229
+ pi4 p i4
1230
+ pi5 p i5
1231
+ pian1 p ian1
1232
+ pian2 p ian2
1233
+ pian3 p ian3
1234
+ pian4 p ian4
1235
+ pian5 p ian5
1236
+ piao1 p iao1
1237
+ piao2 p iao2
1238
+ piao3 p iao3
1239
+ piao4 p iao4
1240
+ piao5 p iao5
1241
+ pie1 p ie1
1242
+ pie2 p ie2
1243
+ pie3 p ie3
1244
+ pie4 p ie4
1245
+ pie5 p ie5
1246
+ pin1 p in1
1247
+ pin2 p in2
1248
+ pin3 p in3
1249
+ pin4 p in4
1250
+ pin5 p in5
1251
+ ping1 p ing1
1252
+ ping2 p ing2
1253
+ ping3 p ing3
1254
+ ping4 p ing4
1255
+ ping5 p ing5
1256
+ po1 p o1
1257
+ po2 p o2
1258
+ po3 p o3
1259
+ po4 p o4
1260
+ po5 p o5
1261
+ pou1 p ou1
1262
+ pou2 p ou2
1263
+ pou3 p ou3
1264
+ pou4 p ou4
1265
+ pou5 p ou5
1266
+ pu1 p u1
1267
+ pu2 p u2
1268
+ pu3 p u3
1269
+ pu4 p u4
1270
+ pu5 p u5
1271
+ qi1 q i1
1272
+ qi2 q i2
1273
+ qi3 q i3
1274
+ qi4 q i4
1275
+ qi5 q i5
1276
+ qia1 q ia1
1277
+ qia2 q ia2
1278
+ qia3 q ia3
1279
+ qia4 q ia4
1280
+ qia5 q ia5
1281
+ qian1 q ian1
1282
+ qian2 q ian2
1283
+ qian3 q ian3
1284
+ qian4 q ian4
1285
+ qian5 q ian5
1286
+ qiang1 q iang1
1287
+ qiang2 q iang2
1288
+ qiang3 q iang3
1289
+ qiang4 q iang4
1290
+ qiang5 q iang5
1291
+ qiao1 q iao1
1292
+ qiao2 q iao2
1293
+ qiao3 q iao3
1294
+ qiao4 q iao4
1295
+ qiao5 q iao5
1296
+ qie1 q ie1
1297
+ qie2 q ie2
1298
+ qie3 q ie3
1299
+ qie4 q ie4
1300
+ qie5 q ie5
1301
+ qin1 q in1
1302
+ qin2 q in2
1303
+ qin3 q in3
1304
+ qin4 q in4
1305
+ qin5 q in5
1306
+ qing1 q ing1
1307
+ qing2 q ing2
1308
+ qing3 q ing3
1309
+ qing4 q ing4
1310
+ qing5 q ing5
1311
+ qiong1 q iong1
1312
+ qiong2 q iong2
1313
+ qiong3 q iong3
1314
+ qiong4 q iong4
1315
+ qiong5 q iong5
1316
+ qiu1 q iou1
1317
+ qiu2 q iou2
1318
+ qiu3 q iou3
1319
+ qiu4 q iou4
1320
+ qiu5 q iou5
1321
+ qu1 q v1
1322
+ qu2 q v2
1323
+ qu3 q v3
1324
+ qu4 q v4
1325
+ qu5 q v5
1326
+ quan1 q van1
1327
+ quan2 q van2
1328
+ quan3 q van3
1329
+ quan4 q van4
1330
+ quan5 q van5
1331
+ que1 q ve1
1332
+ que2 q ve2
1333
+ que3 q ve3
1334
+ que4 q ve4
1335
+ que5 q ve5
1336
+ qun1 q vn1
1337
+ qun2 q vn2
1338
+ qun3 q vn3
1339
+ qun4 q vn4
1340
+ qun5 q vn5
1341
+ ran1 r an1
1342
+ ran2 r an2
1343
+ ran3 r an3
1344
+ ran4 r an4
1345
+ ran5 r an5
1346
+ rang1 r ang1
1347
+ rang2 r ang2
1348
+ rang3 r ang3
1349
+ rang4 r ang4
1350
+ rang5 r ang5
1351
+ rao1 r ao1
1352
+ rao2 r ao2
1353
+ rao3 r ao3
1354
+ rao4 r ao4
1355
+ rao5 r ao5
1356
+ re1 r e1
1357
+ re2 r e2
1358
+ re3 r e3
1359
+ re4 r e4
1360
+ re5 r e5
1361
+ ren1 r en1
1362
+ ren2 r en2
1363
+ ren3 r en3
1364
+ ren4 r en4
1365
+ ren5 r en5
1366
+ reng1 r eng1
1367
+ reng2 r eng2
1368
+ reng3 r eng3
1369
+ reng4 r eng4
1370
+ reng5 r eng5
1371
+ ri1 r iii1
1372
+ ri2 r iii2
1373
+ ri3 r iii3
1374
+ ri4 r iii4
1375
+ ri5 r iii5
1376
+ rong1 r ong1
1377
+ rong2 r ong2
1378
+ rong3 r ong3
1379
+ rong4 r ong4
1380
+ rong5 r ong5
1381
+ rou1 r ou1
1382
+ rou2 r ou2
1383
+ rou3 r ou3
1384
+ rou4 r ou4
1385
+ rou5 r ou5
1386
+ ru1 r u1
1387
+ ru2 r u2
1388
+ ru3 r u3
1389
+ ru4 r u4
1390
+ ru5 r u5
1391
+ rua1 r ua1
1392
+ rua2 r ua2
1393
+ rua3 r ua3
1394
+ rua4 r ua4
1395
+ rua5 r ua5
1396
+ ruan1 r uan1
1397
+ ruan2 r uan2
1398
+ ruan3 r uan3
1399
+ ruan4 r uan4
1400
+ ruan5 r uan5
1401
+ rui1 r uei1
1402
+ rui2 r uei2
1403
+ rui3 r uei3
1404
+ rui4 r uei4
1405
+ rui5 r uei5
1406
+ run1 r uen1
1407
+ run2 r uen2
1408
+ run3 r uen3
1409
+ run4 r uen4
1410
+ run5 r uen5
1411
+ ruo1 r uo1
1412
+ ruo2 r uo2
1413
+ ruo3 r uo3
1414
+ ruo4 r uo4
1415
+ ruo5 r uo5
1416
+ sa1 s a1
1417
+ sa2 s a2
1418
+ sa3 s a3
1419
+ sa4 s a4
1420
+ sa5 s a5
1421
+ sai1 s ai1
1422
+ sai2 s ai2
1423
+ sai3 s ai3
1424
+ sai4 s ai4
1425
+ sai5 s ai5
1426
+ san1 s an1
1427
+ san2 s an2
1428
+ san3 s an3
1429
+ san4 s an4
1430
+ san5 s an5
1431
+ sang1 s ang1
1432
+ sang2 s ang2
1433
+ sang3 s ang3
1434
+ sang4 s ang4
1435
+ sang5 s ang5
1436
+ sao1 s ao1
1437
+ sao2 s ao2
1438
+ sao3 s ao3
1439
+ sao4 s ao4
1440
+ sao5 s ao5
1441
+ se1 s e1
1442
+ se2 s e2
1443
+ se3 s e3
1444
+ se4 s e4
1445
+ se5 s e5
1446
+ sen1 s en1
1447
+ sen2 s en2
1448
+ sen3 s en3
1449
+ sen4 s en4
1450
+ sen5 s en5
1451
+ seng1 s eng1
1452
+ seng2 s eng2
1453
+ seng3 s eng3
1454
+ seng4 s eng4
1455
+ seng5 s eng5
1456
+ sha1 sh a1
1457
+ sha2 sh a2
1458
+ sha3 sh a3
1459
+ sha4 sh a4
1460
+ sha5 sh a5
1461
+ shai1 sh ai1
1462
+ shai2 sh ai2
1463
+ shai3 sh ai3
1464
+ shai4 sh ai4
1465
+ shai5 sh ai5
1466
+ shan1 sh an1
1467
+ shan2 sh an2
1468
+ shan3 sh an3
1469
+ shan4 sh an4
1470
+ shan5 sh an5
1471
+ shang1 sh ang1
1472
+ shang2 sh ang2
1473
+ shang3 sh ang3
1474
+ shang4 sh ang4
1475
+ shang5 sh ang5
1476
+ shao1 sh ao1
1477
+ shao2 sh ao2
1478
+ shao3 sh ao3
1479
+ shao4 sh ao4
1480
+ shao5 sh ao5
1481
+ she1 sh e1
1482
+ she2 sh e2
1483
+ she3 sh e3
1484
+ she4 sh e4
1485
+ she5 sh e5
1486
+ shei1 sh ei1
1487
+ shei2 sh ei2
1488
+ shei3 sh ei3
1489
+ shei4 sh ei4
1490
+ shei5 sh ei5
1491
+ shen1 sh en1
1492
+ shen2 sh en2
1493
+ shen3 sh en3
1494
+ shen4 sh en4
1495
+ shen5 sh en5
1496
+ sheng1 sh eng1
1497
+ sheng2 sh eng2
1498
+ sheng3 sh eng3
1499
+ sheng4 sh eng4
1500
+ sheng5 sh eng5
1501
+ shi1 sh iii1
1502
+ shi2 sh iii2
1503
+ shi3 sh iii3
1504
+ shi4 sh iii4
1505
+ shi5 sh iii5
1506
+ shou1 sh ou1
1507
+ shou2 sh ou2
1508
+ shou3 sh ou3
1509
+ shou4 sh ou4
1510
+ shou5 sh ou5
1511
+ shu1 sh u1
1512
+ shu2 sh u2
1513
+ shu3 sh u3
1514
+ shu4 sh u4
1515
+ shu5 sh u5
1516
+ shua1 sh ua1
1517
+ shua2 sh ua2
1518
+ shua3 sh ua3
1519
+ shua4 sh ua4
1520
+ shua5 sh ua5
1521
+ shuai1 sh uai1
1522
+ shuai2 sh uai2
1523
+ shuai3 sh uai3
1524
+ shuai4 sh uai4
1525
+ shuai5 sh uai5
1526
+ shuan1 sh uan1
1527
+ shuan2 sh uan2
1528
+ shuan3 sh uan3
1529
+ shuan4 sh uan4
1530
+ shuan5 sh uan5
1531
+ shuang1 sh uang1
1532
+ shuang2 sh uang2
1533
+ shuang3 sh uang3
1534
+ shuang4 sh uang4
1535
+ shuang5 sh uang5
1536
+ shui1 sh uei1
1537
+ shui2 sh uei2
1538
+ shui3 sh uei3
1539
+ shui4 sh uei4
1540
+ shui5 sh uei5
1541
+ shun1 sh uen1
1542
+ shun2 sh uen2
1543
+ shun3 sh uen3
1544
+ shun4 sh uen4
1545
+ shun5 sh uen5
1546
+ shuo1 sh uo1
1547
+ shuo2 sh uo2
1548
+ shuo3 sh uo3
1549
+ shuo4 sh uo4
1550
+ shuo5 sh uo5
1551
+ si1 s ii1
1552
+ si2 s ii2
1553
+ si3 s ii3
1554
+ si4 s ii4
1555
+ si5 s ii5
1556
+ song1 s ong1
1557
+ song2 s ong2
1558
+ song3 s ong3
1559
+ song4 s ong4
1560
+ song5 s ong5
1561
+ sou1 s ou1
1562
+ sou2 s ou2
1563
+ sou3 s ou3
1564
+ sou4 s ou4
1565
+ sou5 s ou5
1566
+ su1 s u1
1567
+ su2 s u2
1568
+ su3 s u3
1569
+ su4 s u4
1570
+ su5 s u5
1571
+ suan1 s uan1
1572
+ suan2 s uan2
1573
+ suan3 s uan3
1574
+ suan4 s uan4
1575
+ suan5 s uan5
1576
+ sui1 s uei1
1577
+ sui2 s uei2
1578
+ sui3 s uei3
1579
+ sui4 s uei4
1580
+ sui5 s uei5
1581
+ sun1 s uen1
1582
+ sun2 s uen2
1583
+ sun3 s uen3
1584
+ sun4 s uen4
1585
+ sun5 s uen5
1586
+ suo1 s uo1
1587
+ suo2 s uo2
1588
+ suo3 s uo3
1589
+ suo4 s uo4
1590
+ suo5 s uo5
1591
+ ta1 t a1
1592
+ ta2 t a2
1593
+ ta3 t a3
1594
+ ta4 t a4
1595
+ ta5 t a5
1596
+ tai1 t ai1
1597
+ tai2 t ai2
1598
+ tai3 t ai3
1599
+ tai4 t ai4
1600
+ tai5 t ai5
1601
+ tan1 t an1
1602
+ tan2 t an2
1603
+ tan3 t an3
1604
+ tan4 t an4
1605
+ tan5 t an5
1606
+ tang1 t ang1
1607
+ tang2 t ang2
1608
+ tang3 t ang3
1609
+ tang4 t ang4
1610
+ tang5 t ang5
1611
+ tao1 t ao1
1612
+ tao2 t ao2
1613
+ tao3 t ao3
1614
+ tao4 t ao4
1615
+ tao5 t ao5
1616
+ te1 t e1
1617
+ te2 t e2
1618
+ te3 t e3
1619
+ te4 t e4
1620
+ te5 t e5
1621
+ tei1 t ei1
1622
+ tei2 t ei2
1623
+ tei3 t ei3
1624
+ tei4 t ei4
1625
+ tei5 t ei5
1626
+ teng1 t eng1
1627
+ teng2 t eng2
1628
+ teng3 t eng3
1629
+ teng4 t eng4
1630
+ teng5 t eng5
1631
+ ti1 t i1
1632
+ ti2 t i2
1633
+ ti3 t i3
1634
+ ti4 t i4
1635
+ ti5 t i5
1636
+ tian1 t ian1
1637
+ tian2 t ian2
1638
+ tian3 t ian3
1639
+ tian4 t ian4
1640
+ tian5 t ian5
1641
+ tiao1 t iao1
1642
+ tiao2 t iao2
1643
+ tiao3 t iao3
1644
+ tiao4 t iao4
1645
+ tiao5 t iao5
1646
+ tie1 t ie1
1647
+ tie2 t ie2
1648
+ tie3 t ie3
1649
+ tie4 t ie4
1650
+ tie5 t ie5
1651
+ ting1 t ing1
1652
+ ting2 t ing2
1653
+ ting3 t ing3
1654
+ ting4 t ing4
1655
+ ting5 t ing5
1656
+ tong1 t ong1
1657
+ tong2 t ong2
1658
+ tong3 t ong3
1659
+ tong4 t ong4
1660
+ tong5 t ong5
1661
+ tou1 t ou1
1662
+ tou2 t ou2
1663
+ tou3 t ou3
1664
+ tou4 t ou4
1665
+ tou5 t ou5
1666
+ tu1 t u1
1667
+ tu2 t u2
1668
+ tu3 t u3
1669
+ tu4 t u4
1670
+ tu5 t u5
1671
+ tuan1 t uan1
1672
+ tuan2 t uan2
1673
+ tuan3 t uan3
1674
+ tuan4 t uan4
1675
+ tuan5 t uan5
1676
+ tui1 t uei1
1677
+ tui2 t uei2
1678
+ tui3 t uei3
1679
+ tui4 t uei4
1680
+ tui5 t uei5
1681
+ tun1 t uen1
1682
+ tun2 t uen2
1683
+ tun3 t uen3
1684
+ tun4 t uen4
1685
+ tun5 t uen5
1686
+ tuo1 t uo1
1687
+ tuo2 t uo2
1688
+ tuo3 t uo3
1689
+ tuo4 t uo4
1690
+ tuo5 t uo5
1691
+ wa1 w ua1
1692
+ wa2 w ua2
1693
+ wa3 w ua3
1694
+ wa4 w ua4
1695
+ wa5 w ua5
1696
+ wai1 w uai1
1697
+ wai2 w uai2
1698
+ wai3 w uai3
1699
+ wai4 w uai4
1700
+ wai5 w uai5
1701
+ wan1 w uan1
1702
+ wan2 w uan2
1703
+ wan3 w uan3
1704
+ wan4 w uan4
1705
+ wan5 w uan5
1706
+ wang1 w uang1
1707
+ wang2 w uang2
1708
+ wang3 w uang3
1709
+ wang4 w uang4
1710
+ wang5 w uang5
1711
+ wei1 w uei1
1712
+ wei2 w uei2
1713
+ wei3 w uei3
1714
+ wei4 w uei4
1715
+ wei5 w uei5
1716
+ wen1 w uen1
1717
+ wen2 w uen2
1718
+ wen3 w uen3
1719
+ wen4 w uen4
1720
+ wen5 w uen5
1721
+ weng1 w uen1
1722
+ weng2 w uen2
1723
+ weng3 w uen3
1724
+ weng4 w uen4
1725
+ weng5 w uen5
1726
+ wo1 w uo1
1727
+ wo2 w uo2
1728
+ wo3 w uo3
1729
+ wo4 w uo4
1730
+ wo5 w uo5
1731
+ wu1 w u1
1732
+ wu2 w u2
1733
+ wu3 w u3
1734
+ wu4 w u4
1735
+ wu5 w u5
1736
+ xi1 x i1
1737
+ xi2 x i2
1738
+ xi3 x i3
1739
+ xi4 x i4
1740
+ xi5 x i5
1741
+ xia1 x ia1
1742
+ xia2 x ia2
1743
+ xia3 x ia3
1744
+ xia4 x ia4
1745
+ xia5 x ia5
1746
+ xian1 x ian1
1747
+ xian2 x ian2
1748
+ xian3 x ian3
1749
+ xian4 x ian4
1750
+ xian5 x ian5
1751
+ xiang1 x iang1
1752
+ xiang2 x iang2
1753
+ xiang3 x iang3
1754
+ xiang4 x iang4
1755
+ xiang5 x iang5
1756
+ xiao1 x iao1
1757
+ xiao2 x iao2
1758
+ xiao3 x iao3
1759
+ xiao4 x iao4
1760
+ xiao5 x iao5
1761
+ xie1 x ie1
1762
+ xie2 x ie2
1763
+ xie3 x ie3
1764
+ xie4 x ie4
1765
+ xie5 x ie5
1766
+ xin1 x in1
1767
+ xin2 x in2
1768
+ xin3 x in3
1769
+ xin4 x in4
1770
+ xin5 x in5
1771
+ xing1 x ing1
1772
+ xing2 x ing2
1773
+ xing3 x ing3
1774
+ xing4 x ing4
1775
+ xing5 x ing5
1776
+ xiong1 x iong1
1777
+ xiong2 x iong2
1778
+ xiong3 x iong3
1779
+ xiong4 x iong4
1780
+ xiong5 x iong5
1781
+ xiu1 x iou1
1782
+ xiu2 x iou2
1783
+ xiu3 x iou3
1784
+ xiu4 x iou4
1785
+ xiu5 x iou5
1786
+ xu1 x v1
1787
+ xu2 x v2
1788
+ xu3 x v3
1789
+ xu4 x v4
1790
+ xu5 x v5
1791
+ xuan1 x van1
1792
+ xuan2 x van2
1793
+ xuan3 x van3
1794
+ xuan4 x van4
1795
+ xuan5 x van5
1796
+ xue1 x ve1
1797
+ xue2 x ve2
1798
+ xue3 x ve3
1799
+ xue4 x ve4
1800
+ xue5 x ve5
1801
+ xun1 x vn1
1802
+ xun2 x vn2
1803
+ xun3 x vn3
1804
+ xun4 x vn4
1805
+ xun5 x vn5
1806
+ ya1 y ia1
1807
+ ya2 y ia2
1808
+ ya3 y ia3
1809
+ ya4 y ia4
1810
+ ya5 y ia5
1811
+ yan1 y ian1
1812
+ yan2 y ian2
1813
+ yan3 y ian3
1814
+ yan4 y ian4
1815
+ yan5 y ian5
1816
+ yang1 y iang1
1817
+ yang2 y iang2
1818
+ yang3 y iang3
1819
+ yang4 y iang4
1820
+ yang5 y iang5
1821
+ yao1 y iao1
1822
+ yao2 y iao2
1823
+ yao3 y iao3
1824
+ yao4 y iao4
1825
+ yao5 y iao5
1826
+ ye1 y ie1
1827
+ ye2 y ie2
1828
+ ye3 y ie3
1829
+ ye4 y ie4
1830
+ ye5 y ie5
1831
+ yi1 y i1
1832
+ yi2 y i2
1833
+ yi3 y i3
1834
+ yi4 y i4
1835
+ yi5 y i5
1836
+ yin1 y in1
1837
+ yin2 y in2
1838
+ yin3 y in3
1839
+ yin4 y in4
1840
+ yin5 y in5
1841
+ ying1 y ing1
1842
+ ying2 y ing2
1843
+ ying3 y ing3
1844
+ ying4 y ing4
1845
+ ying5 y ing5
1846
+ yo1 y iou1
1847
+ yo2 y iou2
1848
+ yo3 y iou3
1849
+ yo4 y iou4
1850
+ yo5 y iou5
1851
+ yong1 y iong1
1852
+ yong2 y iong2
1853
+ yong3 y iong3
1854
+ yong4 y iong4
1855
+ yong5 y iong5
1856
+ you1 y iou1
1857
+ you2 y iou2
1858
+ you3 y iou3
1859
+ you4 y iou4
1860
+ you5 y iou5
1861
+ yu1 y v1
1862
+ yu2 y v2
1863
+ yu3 y v3
1864
+ yu4 y v4
1865
+ yu5 y v5
1866
+ yuan1 y van1
1867
+ yuan2 y van2
1868
+ yuan3 y van3
1869
+ yuan4 y van4
1870
+ yuan5 y van5
1871
+ yue1 y ve1
1872
+ yue2 y ve2
1873
+ yue3 y ve3
1874
+ yue4 y ve4
1875
+ yue5 y ve5
1876
+ yun1 y vn1
1877
+ yun2 y vn2
1878
+ yun3 y vn3
1879
+ yun4 y vn4
1880
+ yun5 y vn5
1881
+ za1 z a1
1882
+ za2 z a2
1883
+ za3 z a3
1884
+ za4 z a4
1885
+ za5 z a5
1886
+ zai1 z ai1
1887
+ zai2 z ai2
1888
+ zai3 z ai3
1889
+ zai4 z ai4
1890
+ zai5 z ai5
1891
+ zan1 z an1
1892
+ zan2 z an2
1893
+ zan3 z an3
1894
+ zan4 z an4
1895
+ zan5 z an5
1896
+ zang1 z ang1
1897
+ zang2 z ang2
1898
+ zang3 z ang3
1899
+ zang4 z ang4
1900
+ zang5 z ang5
1901
+ zao1 z ao1
1902
+ zao2 z ao2
1903
+ zao3 z ao3
1904
+ zao4 z ao4
1905
+ zao5 z ao5
1906
+ ze1 z e1
1907
+ ze2 z e2
1908
+ ze3 z e3
1909
+ ze4 z e4
1910
+ ze5 z e5
1911
+ zei1 z ei1
1912
+ zei2 z ei2
1913
+ zei3 z ei3
1914
+ zei4 z ei4
1915
+ zei5 z ei5
1916
+ zen1 z en1
1917
+ zen2 z en2
1918
+ zen3 z en3
1919
+ zen4 z en4
1920
+ zen5 z en5
1921
+ zeng1 z eng1
1922
+ zeng2 z eng2
1923
+ zeng3 z eng3
1924
+ zeng4 z eng4
1925
+ zeng5 z eng5
1926
+ zha1 zh a1
1927
+ zha2 zh a2
1928
+ zha3 zh a3
1929
+ zha4 zh a4
1930
+ zha5 zh a5
1931
+ zhai1 zh ai1
1932
+ zhai2 zh ai2
1933
+ zhai3 zh ai3
1934
+ zhai4 zh ai4
1935
+ zhai5 zh ai5
1936
+ zhan1 zh an1
1937
+ zhan2 zh an2
1938
+ zhan3 zh an3
1939
+ zhan4 zh an4
1940
+ zhan5 zh an5
1941
+ zhang1 zh ang1
1942
+ zhang2 zh ang2
1943
+ zhang3 zh ang3
1944
+ zhang4 zh ang4
1945
+ zhang5 zh ang5
1946
+ zhao1 zh ao1
1947
+ zhao2 zh ao2
1948
+ zhao3 zh ao3
1949
+ zhao4 zh ao4
1950
+ zhao5 zh ao5
1951
+ zhe1 zh e1
1952
+ zhe2 zh e2
1953
+ zhe3 zh e3
1954
+ zhe4 zh e4
1955
+ zhe5 zh e5
1956
+ zhei1 zh ei1
1957
+ zhei2 zh ei2
1958
+ zhei3 zh ei3
1959
+ zhei4 zh ei4
1960
+ zhei5 zh ei5
1961
+ zhen1 zh en1
1962
+ zhen2 zh en2
1963
+ zhen3 zh en3
1964
+ zhen4 zh en4
1965
+ zhen5 zh en5
1966
+ zheng1 zh eng1
1967
+ zheng2 zh eng2
1968
+ zheng3 zh eng3
1969
+ zheng4 zh eng4
1970
+ zheng5 zh eng5
1971
+ zhi1 zh iii1
1972
+ zhi2 zh iii2
1973
+ zhi3 zh iii3
1974
+ zhi4 zh iii4
1975
+ zhi5 zh iii5
1976
+ zhong1 zh ong1
1977
+ zhong2 zh ong2
1978
+ zhong3 zh ong3
1979
+ zhong4 zh ong4
1980
+ zhong5 zh ong5
1981
+ zhou1 zh ou1
1982
+ zhou2 zh ou2
1983
+ zhou3 zh ou3
1984
+ zhou4 zh ou4
1985
+ zhou5 zh ou5
1986
+ zhu1 zh u1
1987
+ zhu2 zh u2
1988
+ zhu3 zh u3
1989
+ zhu4 zh u4
1990
+ zhu5 zh u5
1991
+ zhua1 zh ua1
1992
+ zhua2 zh ua2
1993
+ zhua3 zh ua3
1994
+ zhua4 zh ua4
1995
+ zhua5 zh ua5
1996
+ zhuai1 zh uai1
1997
+ zhuai2 zh uai2
1998
+ zhuai3 zh uai3
1999
+ zhuai4 zh uai4
2000
+ zhuai5 zh uai5
2001
+ zhuan1 zh uan1
2002
+ zhuan2 zh uan2
2003
+ zhuan3 zh uan3
2004
+ zhuan4 zh uan4
2005
+ zhuan5 zh uan5
2006
+ zhuang1 zh uang1
2007
+ zhuang2 zh uang2
2008
+ zhuang3 zh uang3
2009
+ zhuang4 zh uang4
2010
+ zhuang5 zh uang5
2011
+ zhui1 zh uei1
2012
+ zhui2 zh uei2
2013
+ zhui3 zh uei3
2014
+ zhui4 zh uei4
2015
+ zhui5 zh uei5
2016
+ zhun1 zh uen1
2017
+ zhun2 zh uen2
2018
+ zhun3 zh uen3
2019
+ zhun4 zh uen4
2020
+ zhun5 zh uen5
2021
+ zhuo1 zh uo1
2022
+ zhuo2 zh uo2
2023
+ zhuo3 zh uo3
2024
+ zhuo4 zh uo4
2025
+ zhuo5 zh uo5
2026
+ zi1 z ii1
2027
+ zi2 z ii2
2028
+ zi3 z ii3
2029
+ zi4 z ii4
2030
+ zi5 z ii5
2031
+ zong1 z ong1
2032
+ zong2 z ong2
2033
+ zong3 z ong3
2034
+ zong4 z ong4
2035
+ zong5 z ong5
2036
+ zou1 z ou1
2037
+ zou2 z ou2
2038
+ zou3 z ou3
2039
+ zou4 z ou4
2040
+ zou5 z ou5
2041
+ zu1 z u1
2042
+ zu2 z u2
2043
+ zu3 z u3
2044
+ zu4 z u4
2045
+ zu5 z u5
2046
+ zuan1 z uan1
2047
+ zuan2 z uan2
2048
+ zuan3 z uan3
2049
+ zuan4 z uan4
2050
+ zuan5 z uan5
2051
+ zui1 z uei1
2052
+ zui2 z uei2
2053
+ zui3 z uei3
2054
+ zui4 z uei4
2055
+ zui5 z uei5
2056
+ zun1 z uen1
2057
+ zun2 z uen2
2058
+ zun3 z uen3
2059
+ zun4 z uen4
2060
+ zun5 z uen5
2061
+ zuo1 z uo1
2062
+ zuo2 z uo2
2063
+ zuo3 z uo3
2064
+ zuo4 z uo4
2065
+ zuo5 z uo5
2066
+ ar1 a1 rr
2067
+ ar2 a2 rr
2068
+ ar3 a3 rr
2069
+ ar4 a4 rr
2070
+ ar5 a5 rr
2071
+ air1 ai1 rr
2072
+ air2 ai2 rr
2073
+ air3 ai3 rr
2074
+ air4 ai4 rr
2075
+ air5 ai5 rr
2076
+ anr1 an1 rr
2077
+ anr2 an2 rr
2078
+ anr3 an3 rr
2079
+ anr4 an4 rr
2080
+ anr5 an5 rr
2081
+ angr1 ang1 rr
2082
+ angr2 ang2 rr
2083
+ angr3 ang3 rr
2084
+ angr4 ang4 rr
2085
+ angr5 ang5 rr
2086
+ aor1 ao1 rr
2087
+ aor2 ao2 rr
2088
+ aor3 ao3 rr
2089
+ aor4 ao4 rr
2090
+ aor5 ao5 rr
2091
+ bar1 b a1 rr
2092
+ bar2 b a2 rr
2093
+ bar3 b a3 rr
2094
+ bar4 b a4 rr
2095
+ bar5 b a5 rr
2096
+ bair1 b ai1 rr
2097
+ bair2 b ai2 rr
2098
+ bair3 b ai3 rr
2099
+ bair4 b ai4 rr
2100
+ bair5 b ai5 rr
2101
+ banr1 b an1 rr
2102
+ banr2 b an2 rr
2103
+ banr3 b an3 rr
2104
+ banr4 b an4 rr
2105
+ banr5 b an5 rr
2106
+ bangr1 b ang1 rr
2107
+ bangr2 b ang2 rr
2108
+ bangr3 b ang3 rr
2109
+ bangr4 b ang4 rr
2110
+ bangr5 b ang5 rr
2111
+ baor1 b ao1 rr
2112
+ baor2 b ao2 rr
2113
+ baor3 b ao3 rr
2114
+ baor4 b ao4 rr
2115
+ baor5 b ao5 rr
2116
+ beir1 b ei1 rr
2117
+ beir2 b ei2 rr
2118
+ beir3 b ei3 rr
2119
+ beir4 b ei4 rr
2120
+ beir5 b ei5 rr
2121
+ benr1 b en1 rr
2122
+ benr2 b en2 rr
2123
+ benr3 b en3 rr
2124
+ benr4 b en4 rr
2125
+ benr5 b en5 rr
2126
+ bengr1 b eng1 rr
2127
+ bengr2 b eng2 rr
2128
+ bengr3 b eng3 rr
2129
+ bengr4 b eng4 rr
2130
+ bengr5 b eng5 rr
2131
+ bir1 b i1 rr
2132
+ bir2 b i2 rr
2133
+ bir3 b i3 rr
2134
+ bir4 b i4 rr
2135
+ bir5 b i5 rr
2136
+ bianr1 b ian1 rr
2137
+ bianr2 b ian2 rr
2138
+ bianr3 b ian3 rr
2139
+ bianr4 b ian4 rr
2140
+ bianr5 b ian5 rr
2141
+ biaor1 b iao1 rr
2142
+ biaor2 b iao2 rr
2143
+ biaor3 b iao3 rr
2144
+ biaor4 b iao4 rr
2145
+ biaor5 b iao5 rr
2146
+ bier1 b ie1 rr
2147
+ bier2 b ie2 rr
2148
+ bier3 b ie3 rr
2149
+ bier4 b ie4 rr
2150
+ bier5 b ie5 rr
2151
+ binr1 b in1 rr
2152
+ binr2 b in2 rr
2153
+ binr3 b in3 rr
2154
+ binr4 b in4 rr
2155
+ binr5 b in5 rr
2156
+ bingr1 b ing1 rr
2157
+ bingr2 b ing2 rr
2158
+ bingr3 b ing3 rr
2159
+ bingr4 b ing4 rr
2160
+ bingr5 b ing5 rr
2161
+ bor1 b o1 rr
2162
+ bor2 b o2 rr
2163
+ bor3 b o3 rr
2164
+ bor4 b o4 rr
2165
+ bor5 b o5 rr
2166
+ bur1 b u1 rr
2167
+ bur2 b u2 rr
2168
+ bur3 b u3 rr
2169
+ bur4 b u4 rr
2170
+ bur5 b u5 rr
2171
+ car1 c a1 rr
2172
+ car2 c a2 rr
2173
+ car3 c a3 rr
2174
+ car4 c a4 rr
2175
+ car5 c a5 rr
2176
+ cair1 c ai1 rr
2177
+ cair2 c ai2 rr
2178
+ cair3 c ai3 rr
2179
+ cair4 c ai4 rr
2180
+ cair5 c ai5 rr
2181
+ canr1 c an1 rr
2182
+ canr2 c an2 rr
2183
+ canr3 c an3 rr
2184
+ canr4 c an4 rr
2185
+ canr5 c an5 rr
2186
+ cangr1 c ang1 rr
2187
+ cangr2 c ang2 rr
2188
+ cangr3 c ang3 rr
2189
+ cangr4 c ang4 rr
2190
+ cangr5 c ang5 rr
2191
+ caor1 c ao1 rr
2192
+ caor2 c ao2 rr
2193
+ caor3 c ao3 rr
2194
+ caor4 c ao4 rr
2195
+ caor5 c ao5 rr
2196
+ cer1 c e1 rr
2197
+ cer2 c e2 rr
2198
+ cer3 c e3 rr
2199
+ cer4 c e4 rr
2200
+ cer5 c e5 rr
2201
+ cenr1 c en1 rr
2202
+ cenr2 c en2 rr
2203
+ cenr3 c en3 rr
2204
+ cenr4 c en4 rr
2205
+ cenr5 c en5 rr
2206
+ cengr1 c eng1 rr
2207
+ cengr2 c eng2 rr
2208
+ cengr3 c eng3 rr
2209
+ cengr4 c eng4 rr
2210
+ cengr5 c eng5 rr
2211
+ char1 ch a1 rr
2212
+ char2 ch a2 rr
2213
+ char3 ch a3 rr
2214
+ char4 ch a4 rr
2215
+ char5 ch a5 rr
2216
+ chair1 ch ai1 rr
2217
+ chair2 ch ai2 rr
2218
+ chair3 ch ai3 rr
2219
+ chair4 ch ai4 rr
2220
+ chair5 ch ai5 rr
2221
+ chanr1 ch an1 rr
2222
+ chanr2 ch an2 rr
2223
+ chanr3 ch an3 rr
2224
+ chanr4 ch an4 rr
2225
+ chanr5 ch an5 rr
2226
+ changr1 ch ang1 rr
2227
+ changr2 ch ang2 rr
2228
+ changr3 ch ang3 rr
2229
+ changr4 ch ang4 rr
2230
+ changr5 ch ang5 rr
2231
+ chaor1 ch ao1 rr
2232
+ chaor2 ch ao2 rr
2233
+ chaor3 ch ao3 rr
2234
+ chaor4 ch ao4 rr
2235
+ chaor5 ch ao5 rr
2236
+ cher1 ch e1 rr
2237
+ cher2 ch e2 rr
2238
+ cher3 ch e3 rr
2239
+ cher4 ch e4 rr
2240
+ cher5 ch e5 rr
2241
+ chenr1 ch en1 rr
2242
+ chenr2 ch en2 rr
2243
+ chenr3 ch en3 rr
2244
+ chenr4 ch en4 rr
2245
+ chenr5 ch en5 rr
2246
+ chengr1 ch eng1 rr
2247
+ chengr2 ch eng2 rr
2248
+ chengr3 ch eng3 rr
2249
+ chengr4 ch eng4 rr
2250
+ chengr5 ch eng5 rr
2251
+ chir1 ch iii1 rr
2252
+ chir2 ch iii2 rr
2253
+ chir3 ch iii3 rr
2254
+ chir4 ch iii4 rr
2255
+ chir5 ch iii5 rr
2256
+ chongr1 ch ong1 rr
2257
+ chongr2 ch ong2 rr
2258
+ chongr3 ch ong3 rr
2259
+ chongr4 ch ong4 rr
2260
+ chongr5 ch ong5 rr
2261
+ chour1 ch ou1 rr
2262
+ chour2 ch ou2 rr
2263
+ chour3 ch ou3 rr
2264
+ chour4 ch ou4 rr
2265
+ chour5 ch ou5 rr
2266
+ chur1 ch u1 rr
2267
+ chur2 ch u2 rr
2268
+ chur3 ch u3 rr
2269
+ chur4 ch u4 rr
2270
+ chur5 ch u5 rr
2271
+ chuair1 ch uai1 rr
2272
+ chuair2 ch uai2 rr
2273
+ chuair3 ch uai3 rr
2274
+ chuair4 ch uai4 rr
2275
+ chuair5 ch uai5 rr
2276
+ chuanr1 ch uan1 rr
2277
+ chuanr2 ch uan2 rr
2278
+ chuanr3 ch uan3 rr
2279
+ chuanr4 ch uan4 rr
2280
+ chuanr5 ch uan5 rr
2281
+ chuangr1 ch uang1 rr
2282
+ chuangr2 ch uang2 rr
2283
+ chuangr3 ch uang3 rr
2284
+ chuangr4 ch uang4 rr
2285
+ chuangr5 ch uang5 rr
2286
+ chuir1 ch uei1 rr
2287
+ chuir2 ch uei2 rr
2288
+ chuir3 ch uei3 rr
2289
+ chuir4 ch uei4 rr
2290
+ chuir5 ch uei5 rr
2291
+ chunr1 ch uen1 rr
2292
+ chunr2 ch uen2 rr
2293
+ chunr3 ch uen3 rr
2294
+ chunr4 ch uen4 rr
2295
+ chunr5 ch uen5 rr
2296
+ chuor1 ch uo1 rr
2297
+ chuor2 ch uo2 rr
2298
+ chuor3 ch uo3 rr
2299
+ chuor4 ch uo4 rr
2300
+ chuor5 ch uo5 rr
2301
+ cir1 c ii1 rr
2302
+ cir2 c ii2 rr
2303
+ cir3 c ii3 rr
2304
+ cir4 c ii4 rr
2305
+ cir5 c ii5 rr
2306
+ congr1 c ong1 rr
2307
+ congr2 c ong2 rr
2308
+ congr3 c ong3 rr
2309
+ congr4 c ong4 rr
2310
+ congr5 c ong5 rr
2311
+ cour1 c ou1 rr
2312
+ cour2 c ou2 rr
2313
+ cour3 c ou3 rr
2314
+ cour4 c ou4 rr
2315
+ cour5 c ou5 rr
2316
+ cur1 c u1 rr
2317
+ cur2 c u2 rr
2318
+ cur3 c u3 rr
2319
+ cur4 c u4 rr
2320
+ cur5 c u5 rr
2321
+ cuanr1 c uan1 rr
2322
+ cuanr2 c uan2 rr
2323
+ cuanr3 c uan3 rr
2324
+ cuanr4 c uan4 rr
2325
+ cuanr5 c uan5 rr
2326
+ cuir1 c uei1 rr
2327
+ cuir2 c uei2 rr
2328
+ cuir3 c uei3 rr
2329
+ cuir4 c uei4 rr
2330
+ cuir5 c uei5 rr
2331
+ cunr1 c uen1 rr
2332
+ cunr2 c uen2 rr
2333
+ cunr3 c uen3 rr
2334
+ cunr4 c uen4 rr
2335
+ cunr5 c uen5 rr
2336
+ cuor1 c uo1 rr
2337
+ cuor2 c uo2 rr
2338
+ cuor3 c uo3 rr
2339
+ cuor4 c uo4 rr
2340
+ cuor5 c uo5 rr
2341
+ dar1 d a1 rr
2342
+ dar2 d a2 rr
2343
+ dar3 d a3 rr
2344
+ dar4 d a4 rr
2345
+ dar5 d a5 rr
2346
+ dair1 d ai1 rr
2347
+ dair2 d ai2 rr
2348
+ dair3 d ai3 rr
2349
+ dair4 d ai4 rr
2350
+ dair5 d ai5 rr
2351
+ danr1 d an1 rr
2352
+ danr2 d an2 rr
2353
+ danr3 d an3 rr
2354
+ danr4 d an4 rr
2355
+ danr5 d an5 rr
2356
+ dangr1 d ang1 rr
2357
+ dangr2 d ang2 rr
2358
+ dangr3 d ang3 rr
2359
+ dangr4 d ang4 rr
2360
+ dangr5 d ang5 rr
2361
+ daor1 d ao1 rr
2362
+ daor2 d ao2 rr
2363
+ daor3 d ao3 rr
2364
+ daor4 d ao4 rr
2365
+ daor5 d ao5 rr
2366
+ der1 d e1 rr
2367
+ der2 d e2 rr
2368
+ der3 d e3 rr
2369
+ der4 d e4 rr
2370
+ der5 d e5 rr
2371
+ deir1 d ei1 rr
2372
+ deir2 d ei2 rr
2373
+ deir3 d ei3 rr
2374
+ deir4 d ei4 rr
2375
+ deir5 d ei5 rr
2376
+ denr1 d en1 rr
2377
+ denr2 d en2 rr
2378
+ denr3 d en3 rr
2379
+ denr4 d en4 rr
2380
+ denr5 d en5 rr
2381
+ dengr1 d eng1 rr
2382
+ dengr2 d eng2 rr
2383
+ dengr3 d eng3 rr
2384
+ dengr4 d eng4 rr
2385
+ dengr5 d eng5 rr
2386
+ dir1 d i1 rr
2387
+ dir2 d i2 rr
2388
+ dir3 d i3 rr
2389
+ dir4 d i4 rr
2390
+ dir5 d i5 rr
2391
+ diar1 d ia1 rr
2392
+ diar2 d ia2 rr
2393
+ diar3 d ia3 rr
2394
+ diar4 d ia4 rr
2395
+ diar5 d ia5 rr
2396
+ dianr1 d ian1 rr
2397
+ dianr2 d ian2 rr
2398
+ dianr3 d ian3 rr
2399
+ dianr4 d ian4 rr
2400
+ dianr5 d ian5 rr
2401
+ diaor1 d iao1 rr
2402
+ diaor2 d iao2 rr
2403
+ diaor3 d iao3 rr
2404
+ diaor4 d iao4 rr
2405
+ diaor5 d iao5 rr
2406
+ dier1 d ie1 rr
2407
+ dier2 d ie2 rr
2408
+ dier3 d ie3 rr
2409
+ dier4 d ie4 rr
2410
+ dier5 d ie5 rr
2411
+ dingr1 d ing1 rr
2412
+ dingr2 d ing2 rr
2413
+ dingr3 d ing3 rr
2414
+ dingr4 d ing4 rr
2415
+ dingr5 d ing5 rr
2416
+ diur1 d iou1 rr
2417
+ diur2 d iou2 rr
2418
+ diur3 d iou3 rr
2419
+ diur4 d iou4 rr
2420
+ diur5 d iou5 rr
2421
+ dongr1 d ong1 rr
2422
+ dongr2 d ong2 rr
2423
+ dongr3 d ong3 rr
2424
+ dongr4 d ong4 rr
2425
+ dongr5 d ong5 rr
2426
+ dour1 d ou1 rr
2427
+ dour2 d ou2 rr
2428
+ dour3 d ou3 rr
2429
+ dour4 d ou4 rr
2430
+ dour5 d ou5 rr
2431
+ dur1 d u1 rr
2432
+ dur2 d u2 rr
2433
+ dur3 d u3 rr
2434
+ dur4 d u4 rr
2435
+ dur5 d u5 rr
2436
+ duanr1 d uan1 rr
2437
+ duanr2 d uan2 rr
2438
+ duanr3 d uan3 rr
2439
+ duanr4 d uan4 rr
2440
+ duanr5 d uan5 rr
2441
+ duir1 d uei1 rr
2442
+ duir2 d uei2 rr
2443
+ duir3 d uei3 rr
2444
+ duir4 d uei4 rr
2445
+ duir5 d uei5 rr
2446
+ dunr1 d uen1 rr
2447
+ dunr2 d uen2 rr
2448
+ dunr3 d uen3 rr
2449
+ dunr4 d uen4 rr
2450
+ dunr5 d uen5 rr
2451
+ duor1 d uo1 rr
2452
+ duor2 d uo2 rr
2453
+ duor3 d uo3 rr
2454
+ duor4 d uo4 rr
2455
+ duor5 d uo5 rr
2456
+ er1 e1 rr
2457
+ er2 e2 rr
2458
+ er3 e3 rr
2459
+ er4 e4 rr
2460
+ er5 e5 rr
2461
+ eir1 ei1 rr
2462
+ eir2 ei2 rr
2463
+ eir3 ei3 rr
2464
+ eir4 ei4 rr
2465
+ eir5 ei5 rr
2466
+ enr1 en1 rr
2467
+ enr2 en2 rr
2468
+ enr3 en3 rr
2469
+ enr4 en4 rr
2470
+ enr5 en5 rr
2471
+ engr1 eng1 rr
2472
+ engr2 eng2 rr
2473
+ engr3 eng3 rr
2474
+ engr4 eng4 rr
2475
+ engr5 eng5 rr
2476
+ far1 f a1 rr
2477
+ far2 f a2 rr
2478
+ far3 f a3 rr
2479
+ far4 f a4 rr
2480
+ far5 f a5 rr
2481
+ fanr1 f an1 rr
2482
+ fanr2 f an2 rr
2483
+ fanr3 f an3 rr
2484
+ fanr4 f an4 rr
2485
+ fanr5 f an5 rr
2486
+ fangr1 f ang1 rr
2487
+ fangr2 f ang2 rr
2488
+ fangr3 f ang3 rr
2489
+ fangr4 f ang4 rr
2490
+ fangr5 f ang5 rr
2491
+ feir1 f ei1 rr
2492
+ feir2 f ei2 rr
2493
+ feir3 f ei3 rr
2494
+ feir4 f ei4 rr
2495
+ feir5 f ei5 rr
2496
+ fenr1 f en1 rr
2497
+ fenr2 f en2 rr
2498
+ fenr3 f en3 rr
2499
+ fenr4 f en4 rr
2500
+ fenr5 f en5 rr
2501
+ fengr1 f eng1 rr
2502
+ fengr2 f eng2 rr
2503
+ fengr3 f eng3 rr
2504
+ fengr4 f eng4 rr
2505
+ fengr5 f eng5 rr
2506
+ for1 f o1 rr
2507
+ for2 f o2 rr
2508
+ for3 f o3 rr
2509
+ for4 f o4 rr
2510
+ for5 f o5 rr
2511
+ four1 f ou1 rr
2512
+ four2 f ou2 rr
2513
+ four3 f ou3 rr
2514
+ four4 f ou4 rr
2515
+ four5 f ou5 rr
2516
+ fur1 f u1 rr
2517
+ fur2 f u2 rr
2518
+ fur3 f u3 rr
2519
+ fur4 f u4 rr
2520
+ fur5 f u5 rr
2521
+ gar1 g a1 rr
2522
+ gar2 g a2 rr
2523
+ gar3 g a3 rr
2524
+ gar4 g a4 rr
2525
+ gar5 g a5 rr
2526
+ gair1 g ai1 rr
2527
+ gair2 g ai2 rr
2528
+ gair3 g ai3 rr
2529
+ gair4 g ai4 rr
2530
+ gair5 g ai5 rr
2531
+ ganr1 g an1 rr
2532
+ ganr2 g an2 rr
2533
+ ganr3 g an3 rr
2534
+ ganr4 g an4 rr
2535
+ ganr5 g an5 rr
2536
+ gangr1 g ang1 rr
2537
+ gangr2 g ang2 rr
2538
+ gangr3 g ang3 rr
2539
+ gangr4 g ang4 rr
2540
+ gangr5 g ang5 rr
2541
+ gaor1 g ao1 rr
2542
+ gaor2 g ao2 rr
2543
+ gaor3 g ao3 rr
2544
+ gaor4 g ao4 rr
2545
+ gaor5 g ao5 rr
2546
+ ger1 g e1 rr
2547
+ ger2 g e2 rr
2548
+ ger3 g e3 rr
2549
+ ger4 g e4 rr
2550
+ ger5 g e5 rr
2551
+ geir1 g ei1 rr
2552
+ geir2 g ei2 rr
2553
+ geir3 g ei3 rr
2554
+ geir4 g ei4 rr
2555
+ geir5 g ei5 rr
2556
+ genr1 g en1 rr
2557
+ genr2 g en2 rr
2558
+ genr3 g en3 rr
2559
+ genr4 g en4 rr
2560
+ genr5 g en5 rr
2561
+ gengr1 g eng1 rr
2562
+ gengr2 g eng2 rr
2563
+ gengr3 g eng3 rr
2564
+ gengr4 g eng4 rr
2565
+ gengr5 g eng5 rr
2566
+ gongr1 g ong1 rr
2567
+ gongr2 g ong2 rr
2568
+ gongr3 g ong3 rr
2569
+ gongr4 g ong4 rr
2570
+ gongr5 g ong5 rr
2571
+ gour1 g ou1 rr
2572
+ gour2 g ou2 rr
2573
+ gour3 g ou3 rr
2574
+ gour4 g ou4 rr
2575
+ gour5 g ou5 rr
2576
+ gur1 g u1 rr
2577
+ gur2 g u2 rr
2578
+ gur3 g u3 rr
2579
+ gur4 g u4 rr
2580
+ gur5 g u5 rr
2581
+ guar1 g ua1 rr
2582
+ guar2 g ua2 rr
2583
+ guar3 g ua3 rr
2584
+ guar4 g ua4 rr
2585
+ guar5 g ua5 rr
2586
+ guair1 g uai1 rr
2587
+ guair2 g uai2 rr
2588
+ guair3 g uai3 rr
2589
+ guair4 g uai4 rr
2590
+ guair5 g uai5 rr
2591
+ guanr1 g uan1 rr
2592
+ guanr2 g uan2 rr
2593
+ guanr3 g uan3 rr
2594
+ guanr4 g uan4 rr
2595
+ guanr5 g uan5 rr
2596
+ guangr1 g uang1 rr
2597
+ guangr2 g uang2 rr
2598
+ guangr3 g uang3 rr
2599
+ guangr4 g uang4 rr
2600
+ guangr5 g uang5 rr
2601
+ guir1 g uei1 rr
2602
+ guir2 g uei2 rr
2603
+ guir3 g uei3 rr
2604
+ guir4 g uei4 rr
2605
+ guir5 g uei5 rr
2606
+ gunr1 g uen1 rr
2607
+ gunr2 g uen2 rr
2608
+ gunr3 g uen3 rr
2609
+ gunr4 g uen4 rr
2610
+ gunr5 g uen5 rr
2611
+ guor1 g uo1 rr
2612
+ guor2 g uo2 rr
2613
+ guor3 g uo3 rr
2614
+ guor4 g uo4 rr
2615
+ guor5 g uo5 rr
2616
+ har1 h a1 rr
2617
+ har2 h a2 rr
2618
+ har3 h a3 rr
2619
+ har4 h a4 rr
2620
+ har5 h a5 rr
2621
+ hair1 h ai1 rr
2622
+ hair2 h ai2 rr
2623
+ hair3 h ai3 rr
2624
+ hair4 h ai4 rr
2625
+ hair5 h ai5 rr
2626
+ hanr1 h an1 rr
2627
+ hanr2 h an2 rr
2628
+ hanr3 h an3 rr
2629
+ hanr4 h an4 rr
2630
+ hanr5 h an5 rr
2631
+ hangr1 h ang1 rr
2632
+ hangr2 h ang2 rr
2633
+ hangr3 h ang3 rr
2634
+ hangr4 h ang4 rr
2635
+ hangr5 h ang5 rr
2636
+ haor1 h ao1 rr
2637
+ haor2 h ao2 rr
2638
+ haor3 h ao3 rr
2639
+ haor4 h ao4 rr
2640
+ haor5 h ao5 rr
2641
+ her1 h e1 rr
2642
+ her2 h e2 rr
2643
+ her3 h e3 rr
2644
+ her4 h e4 rr
2645
+ her5 h e5 rr
2646
+ heir1 h ei1 rr
2647
+ heir2 h ei2 rr
2648
+ heir3 h ei3 rr
2649
+ heir4 h ei4 rr
2650
+ heir5 h ei5 rr
2651
+ henr1 h en1 rr
2652
+ henr2 h en2 rr
2653
+ henr3 h en3 rr
2654
+ henr4 h en4 rr
2655
+ henr5 h en5 rr
2656
+ hengr1 h eng1 rr
2657
+ hengr2 h eng2 rr
2658
+ hengr3 h eng3 rr
2659
+ hengr4 h eng4 rr
2660
+ hengr5 h eng5 rr
2661
+ hongr1 h ong1 rr
2662
+ hongr2 h ong2 rr
2663
+ hongr3 h ong3 rr
2664
+ hongr4 h ong4 rr
2665
+ hongr5 h ong5 rr
2666
+ hour1 h ou1 rr
2667
+ hour2 h ou2 rr
2668
+ hour3 h ou3 rr
2669
+ hour4 h ou4 rr
2670
+ hour5 h ou5 rr
2671
+ hur1 h u1 rr
2672
+ hur2 h u2 rr
2673
+ hur3 h u3 rr
2674
+ hur4 h u4 rr
2675
+ hur5 h u5 rr
2676
+ huar1 h ua1 rr
2677
+ huar2 h ua2 rr
2678
+ huar3 h ua3 rr
2679
+ huar4 h ua4 rr
2680
+ huar5 h ua5 rr
2681
+ huair1 h uai1 rr
2682
+ huair2 h uai2 rr
2683
+ huair3 h uai3 rr
2684
+ huair4 h uai4 rr
2685
+ huair5 h uai5 rr
2686
+ huanr1 h uan1 rr
2687
+ huanr2 h uan2 rr
2688
+ huanr3 h uan3 rr
2689
+ huanr4 h uan4 rr
2690
+ huanr5 h uan5 rr
2691
+ huangr1 h uang1 rr
2692
+ huangr2 h uang2 rr
2693
+ huangr3 h uang3 rr
2694
+ huangr4 h uang4 rr
2695
+ huangr5 h uang5 rr
2696
+ huir1 h uei1 rr
2697
+ huir2 h uei2 rr
2698
+ huir3 h uei3 rr
2699
+ huir4 h uei4 rr
2700
+ huir5 h uei5 rr
2701
+ hunr1 h uen1 rr
2702
+ hunr2 h uen2 rr
2703
+ hunr3 h uen3 rr
2704
+ hunr4 h uen4 rr
2705
+ hunr5 h uen5 rr
2706
+ huor1 h uo1 rr
2707
+ huor2 h uo2 rr
2708
+ huor3 h uo3 rr
2709
+ huor4 h uo4 rr
2710
+ huor5 h uo5 rr
2711
+ jir1 j i1 rr
2712
+ jir2 j i2 rr
2713
+ jir3 j i3 rr
2714
+ jir4 j i4 rr
2715
+ jir5 j i5 rr
2716
+ jiar1 j ia1 rr
2717
+ jiar2 j ia2 rr
2718
+ jiar3 j ia3 rr
2719
+ jiar4 j ia4 rr
2720
+ jiar5 j ia5 rr
2721
+ jianr1 j ian1 rr
2722
+ jianr2 j ian2 rr
2723
+ jianr3 j ian3 rr
2724
+ jianr4 j ian4 rr
2725
+ jianr5 j ian5 rr
2726
+ jiangr1 j iang1 rr
2727
+ jiangr2 j iang2 rr
2728
+ jiangr3 j iang3 rr
2729
+ jiangr4 j iang4 rr
2730
+ jiangr5 j iang5 rr
2731
+ jiaor1 j iao1 rr
2732
+ jiaor2 j iao2 rr
2733
+ jiaor3 j iao3 rr
2734
+ jiaor4 j iao4 rr
2735
+ jiaor5 j iao5 rr
2736
+ jier1 j ie1 rr
2737
+ jier2 j ie2 rr
2738
+ jier3 j ie3 rr
2739
+ jier4 j ie4 rr
2740
+ jier5 j ie5 rr
2741
+ jinr1 j in1 rr
2742
+ jinr2 j in2 rr
2743
+ jinr3 j in3 rr
2744
+ jinr4 j in4 rr
2745
+ jinr5 j in5 rr
2746
+ jingr1 j ing1 rr
2747
+ jingr2 j ing2 rr
2748
+ jingr3 j ing3 rr
2749
+ jingr4 j ing4 rr
2750
+ jingr5 j ing5 rr
2751
+ jiongr1 j iong1 rr
2752
+ jiongr2 j iong2 rr
2753
+ jiongr3 j iong3 rr
2754
+ jiongr4 j iong4 rr
2755
+ jiongr5 j iong5 rr
2756
+ jiur1 j iou1 rr
2757
+ jiur2 j iou2 rr
2758
+ jiur3 j iou3 rr
2759
+ jiur4 j iou4 rr
2760
+ jiur5 j iou5 rr
2761
+ jur1 j v1 rr
2762
+ jur2 j v2 rr
2763
+ jur3 j v3 rr
2764
+ jur4 j v4 rr
2765
+ jur5 j v5 rr
2766
+ juanr1 j van1 rr
2767
+ juanr2 j van2 rr
2768
+ juanr3 j van3 rr
2769
+ juanr4 j van4 rr
2770
+ juanr5 j van5 rr
2771
+ juer1 j ve1 rr
2772
+ juer2 j ve2 rr
2773
+ juer3 j ve3 rr
2774
+ juer4 j ve4 rr
2775
+ juer5 j ve5 rr
2776
+ junr1 j vn1 rr
2777
+ junr2 j vn2 rr
2778
+ junr3 j vn3 rr
2779
+ junr4 j vn4 rr
2780
+ junr5 j vn5 rr
2781
+ kar1 k a1 rr
2782
+ kar2 k a2 rr
2783
+ kar3 k a3 rr
2784
+ kar4 k a4 rr
2785
+ kar5 k a5 rr
2786
+ kair1 k ai1 rr
2787
+ kair2 k ai2 rr
2788
+ kair3 k ai3 rr
2789
+ kair4 k ai4 rr
2790
+ kair5 k ai5 rr
2791
+ kanr1 k an1 rr
2792
+ kanr2 k an2 rr
2793
+ kanr3 k an3 rr
2794
+ kanr4 k an4 rr
2795
+ kanr5 k an5 rr
2796
+ kangr1 k ang1 rr
2797
+ kangr2 k ang2 rr
2798
+ kangr3 k ang3 rr
2799
+ kangr4 k ang4 rr
2800
+ kangr5 k ang5 rr
2801
+ kaor1 k ao1 rr
2802
+ kaor2 k ao2 rr
2803
+ kaor3 k ao3 rr
2804
+ kaor4 k ao4 rr
2805
+ kaor5 k ao5 rr
2806
+ ker1 k e1 rr
2807
+ ker2 k e2 rr
2808
+ ker3 k e3 rr
2809
+ ker4 k e4 rr
2810
+ ker5 k e5 rr
2811
+ keir1 k ei1 rr
2812
+ keir2 k ei2 rr
2813
+ keir3 k ei3 rr
2814
+ keir4 k ei4 rr
2815
+ keir5 k ei5 rr
2816
+ kenr1 k en1 rr
2817
+ kenr2 k en2 rr
2818
+ kenr3 k en3 rr
2819
+ kenr4 k en4 rr
2820
+ kenr5 k en5 rr
2821
+ kengr1 k eng1 rr
2822
+ kengr2 k eng2 rr
2823
+ kengr3 k eng3 rr
2824
+ kengr4 k eng4 rr
2825
+ kengr5 k eng5 rr
2826
+ kongr1 k ong1 rr
2827
+ kongr2 k ong2 rr
2828
+ kongr3 k ong3 rr
2829
+ kongr4 k ong4 rr
2830
+ kongr5 k ong5 rr
2831
+ kour1 k ou1 rr
2832
+ kour2 k ou2 rr
2833
+ kour3 k ou3 rr
2834
+ kour4 k ou4 rr
2835
+ kour5 k ou5 rr
2836
+ kur1 k u1 rr
2837
+ kur2 k u2 rr
2838
+ kur3 k u3 rr
2839
+ kur4 k u4 rr
2840
+ kur5 k u5 rr
2841
+ kuar1 k ua1 rr
2842
+ kuar2 k ua2 rr
2843
+ kuar3 k ua3 rr
2844
+ kuar4 k ua4 rr
2845
+ kuar5 k ua5 rr
2846
+ kuair1 k uai1 rr
2847
+ kuair2 k uai2 rr
2848
+ kuair3 k uai3 rr
2849
+ kuair4 k uai4 rr
2850
+ kuair5 k uai5 rr
2851
+ kuanr1 k uan1 rr
2852
+ kuanr2 k uan2 rr
2853
+ kuanr3 k uan3 rr
2854
+ kuanr4 k uan4 rr
2855
+ kuanr5 k uan5 rr
2856
+ kuangr1 k uang1 rr
2857
+ kuangr2 k uang2 rr
2858
+ kuangr3 k uang3 rr
2859
+ kuangr4 k uang4 rr
2860
+ kuangr5 k uang5 rr
2861
+ kuir1 k uei1 rr
2862
+ kuir2 k uei2 rr
2863
+ kuir3 k uei3 rr
2864
+ kuir4 k uei4 rr
2865
+ kuir5 k uei5 rr
2866
+ kunr1 k uen1 rr
2867
+ kunr2 k uen2 rr
2868
+ kunr3 k uen3 rr
2869
+ kunr4 k uen4 rr
2870
+ kunr5 k uen5 rr
2871
+ kuor1 k uo1 rr
2872
+ kuor2 k uo2 rr
2873
+ kuor3 k uo3 rr
2874
+ kuor4 k uo4 rr
2875
+ kuor5 k uo5 rr
2876
+ lar1 l a1 rr
2877
+ lar2 l a2 rr
2878
+ lar3 l a3 rr
2879
+ lar4 l a4 rr
2880
+ lar5 l a5 rr
2881
+ lair1 l ai1 rr
2882
+ lair2 l ai2 rr
2883
+ lair3 l ai3 rr
2884
+ lair4 l ai4 rr
2885
+ lair5 l ai5 rr
2886
+ lanr1 l an1 rr
2887
+ lanr2 l an2 rr
2888
+ lanr3 l an3 rr
2889
+ lanr4 l an4 rr
2890
+ lanr5 l an5 rr
2891
+ langr1 l ang1 rr
2892
+ langr2 l ang2 rr
2893
+ langr3 l ang3 rr
2894
+ langr4 l ang4 rr
2895
+ langr5 l ang5 rr
2896
+ laor1 l ao1 rr
2897
+ laor2 l ao2 rr
2898
+ laor3 l ao3 rr
2899
+ laor4 l ao4 rr
2900
+ laor5 l ao5 rr
2901
+ ler1 l e1 rr
2902
+ ler2 l e2 rr
2903
+ ler3 l e3 rr
2904
+ ler4 l e4 rr
2905
+ ler5 l e5 rr
2906
+ leir1 l ei1 rr
2907
+ leir2 l ei2 rr
2908
+ leir3 l ei3 rr
2909
+ leir4 l ei4 rr
2910
+ leir5 l ei5 rr
2911
+ lengr1 l eng1 rr
2912
+ lengr2 l eng2 rr
2913
+ lengr3 l eng3 rr
2914
+ lengr4 l eng4 rr
2915
+ lengr5 l eng5 rr
2916
+ lir1 l i1 rr
2917
+ lir2 l i2 rr
2918
+ lir3 l i3 rr
2919
+ lir4 l i4 rr
2920
+ lir5 l i5 rr
2921
+ liar1 l ia1 rr
2922
+ liar2 l ia2 rr
2923
+ liar3 l ia3 rr
2924
+ liar4 l ia4 rr
2925
+ liar5 l ia5 rr
2926
+ lianr1 l ian1 rr
2927
+ lianr2 l ian2 rr
2928
+ lianr3 l ian3 rr
2929
+ lianr4 l ian4 rr
2930
+ lianr5 l ian5 rr
2931
+ liangr1 l iang1 rr
2932
+ liangr2 l iang2 rr
2933
+ liangr3 l iang3 rr
2934
+ liangr4 l iang4 rr
2935
+ liangr5 l iang5 rr
2936
+ liaor1 l iao1 rr
2937
+ liaor2 l iao2 rr
2938
+ liaor3 l iao3 rr
2939
+ liaor4 l iao4 rr
2940
+ liaor5 l iao5 rr
2941
+ lier1 l ie1 rr
2942
+ lier2 l ie2 rr
2943
+ lier3 l ie3 rr
2944
+ lier4 l ie4 rr
2945
+ lier5 l ie5 rr
2946
+ linr1 l in1 rr
2947
+ linr2 l in2 rr
2948
+ linr3 l in3 rr
2949
+ linr4 l in4 rr
2950
+ linr5 l in5 rr
2951
+ lingr1 l ing1 rr
2952
+ lingr2 l ing2 rr
2953
+ lingr3 l ing3 rr
2954
+ lingr4 l ing4 rr
2955
+ lingr5 l ing5 rr
2956
+ liur1 l iou1 rr
2957
+ liur2 l iou2 rr
2958
+ liur3 l iou3 rr
2959
+ liur4 l iou4 rr
2960
+ liur5 l iou5 rr
2961
+ lor1 l o1 rr
2962
+ lor2 l o2 rr
2963
+ lor3 l o3 rr
2964
+ lor4 l o4 rr
2965
+ lor5 l o5 rr
2966
+ longr1 l ong1 rr
2967
+ longr2 l ong2 rr
2968
+ longr3 l ong3 rr
2969
+ longr4 l ong4 rr
2970
+ longr5 l ong5 rr
2971
+ lour1 l ou1 rr
2972
+ lour2 l ou2 rr
2973
+ lour3 l ou3 rr
2974
+ lour4 l ou4 rr
2975
+ lour5 l ou5 rr
2976
+ lur1 l u1 rr
2977
+ lur2 l u2 rr
2978
+ lur3 l u3 rr
2979
+ lur4 l u4 rr
2980
+ lur5 l u5 rr
2981
+ luanr1 l uan1 rr
2982
+ luanr2 l uan2 rr
2983
+ luanr3 l uan3 rr
2984
+ luanr4 l uan4 rr
2985
+ luanr5 l uan5 rr
2986
+ luer1 l ve1 rr
2987
+ luer2 l ve2 rr
2988
+ luer3 l ve3 rr
2989
+ luer4 l ve4 rr
2990
+ luer5 l ve5 rr
2991
+ lver1 l ve1 rr
2992
+ lver2 l ve2 rr
2993
+ lver3 l ve3 rr
2994
+ lver4 l ve4 rr
2995
+ lver5 l ve5 rr
2996
+ lunr1 l uen1 rr
2997
+ lunr2 l uen2 rr
2998
+ lunr3 l uen3 rr
2999
+ lunr4 l uen4 rr
3000
+ lunr5 l uen5 rr
3001
+ luor1 l uo1 rr
3002
+ luor2 l uo2 rr
3003
+ luor3 l uo3 rr
3004
+ luor4 l uo4 rr
3005
+ luor5 l uo5 rr
3006
+ lvr1 l v1 rr
3007
+ lvr2 l v2 rr
3008
+ lvr3 l v3 rr
3009
+ lvr4 l v4 rr
3010
+ lvr5 l v5 rr
3011
+ mar1 m a1 rr
3012
+ mar2 m a2 rr
3013
+ mar3 m a3 rr
3014
+ mar4 m a4 rr
3015
+ mar5 m a5 rr
3016
+ mair1 m ai1 rr
3017
+ mair2 m ai2 rr
3018
+ mair3 m ai3 rr
3019
+ mair4 m ai4 rr
3020
+ mair5 m ai5 rr
3021
+ manr1 m an1 rr
3022
+ manr2 m an2 rr
3023
+ manr3 m an3 rr
3024
+ manr4 m an4 rr
3025
+ manr5 m an5 rr
3026
+ mangr1 m ang1 rr
3027
+ mangr2 m ang2 rr
3028
+ mangr3 m ang3 rr
3029
+ mangr4 m ang4 rr
3030
+ mangr5 m ang5 rr
3031
+ maor1 m ao1 rr
3032
+ maor2 m ao2 rr
3033
+ maor3 m ao3 rr
3034
+ maor4 m ao4 rr
3035
+ maor5 m ao5 rr
3036
+ mer1 m e1 rr
3037
+ mer2 m e2 rr
3038
+ mer3 m e3 rr
3039
+ mer4 m e4 rr
3040
+ mer5 m e5 rr
3041
+ meir1 m ei1 rr
3042
+ meir2 m ei2 rr
3043
+ meir3 m ei3 rr
3044
+ meir4 m ei4 rr
3045
+ meir5 m ei5 rr
3046
+ menr1 m en1 rr
3047
+ menr2 m en2 rr
3048
+ menr3 m en3 rr
3049
+ menr4 m en4 rr
3050
+ menr5 m en5 rr
3051
+ mengr1 m eng1 rr
3052
+ mengr2 m eng2 rr
3053
+ mengr3 m eng3 rr
3054
+ mengr4 m eng4 rr
3055
+ mengr5 m eng5 rr
3056
+ mir1 m i1 rr
3057
+ mir2 m i2 rr
3058
+ mir3 m i3 rr
3059
+ mir4 m i4 rr
3060
+ mir5 m i5 rr
3061
+ mianr1 m ian1 rr
3062
+ mianr2 m ian2 rr
3063
+ mianr3 m ian3 rr
3064
+ mianr4 m ian4 rr
3065
+ mianr5 m ian5 rr
3066
+ miaor1 m iao1 rr
3067
+ miaor2 m iao2 rr
3068
+ miaor3 m iao3 rr
3069
+ miaor4 m iao4 rr
3070
+ miaor5 m iao5 rr
3071
+ mier1 m ie1 rr
3072
+ mier2 m ie2 rr
3073
+ mier3 m ie3 rr
3074
+ mier4 m ie4 rr
3075
+ mier5 m ie5 rr
3076
+ minr1 m in1 rr
3077
+ minr2 m in2 rr
3078
+ minr3 m in3 rr
3079
+ minr4 m in4 rr
3080
+ minr5 m in5 rr
3081
+ mingr1 m ing1 rr
3082
+ mingr2 m ing2 rr
3083
+ mingr3 m ing3 rr
3084
+ mingr4 m ing4 rr
3085
+ mingr5 m ing5 rr
3086
+ miur1 m iou1 rr
3087
+ miur2 m iou2 rr
3088
+ miur3 m iou3 rr
3089
+ miur4 m iou4 rr
3090
+ miur5 m iou5 rr
3091
+ mor1 m o1 rr
3092
+ mor2 m o2 rr
3093
+ mor3 m o3 rr
3094
+ mor4 m o4 rr
3095
+ mor5 m o5 rr
3096
+ mour1 m ou1 rr
3097
+ mour2 m ou2 rr
3098
+ mour3 m ou3 rr
3099
+ mour4 m ou4 rr
3100
+ mour5 m ou5 rr
3101
+ mur1 m u1 rr
3102
+ mur2 m u2 rr
3103
+ mur3 m u3 rr
3104
+ mur4 m u4 rr
3105
+ mur5 m u5 rr
3106
+ nar1 n a1 rr
3107
+ nar2 n a2 rr
3108
+ nar3 n a3 rr
3109
+ nar4 n a4 rr
3110
+ nar5 n a5 rr
3111
+ nair1 n ai1 rr
3112
+ nair2 n ai2 rr
3113
+ nair3 n ai3 rr
3114
+ nair4 n ai4 rr
3115
+ nair5 n ai5 rr
3116
+ nanr1 n an1 rr
3117
+ nanr2 n an2 rr
3118
+ nanr3 n an3 rr
3119
+ nanr4 n an4 rr
3120
+ nanr5 n an5 rr
3121
+ nangr1 n ang1 rr
3122
+ nangr2 n ang2 rr
3123
+ nangr3 n ang3 rr
3124
+ nangr4 n ang4 rr
3125
+ nangr5 n ang5 rr
3126
+ naor1 n ao1 rr
3127
+ naor2 n ao2 rr
3128
+ naor3 n ao3 rr
3129
+ naor4 n ao4 rr
3130
+ naor5 n ao5 rr
3131
+ ner1 n e1 rr
3132
+ ner2 n e2 rr
3133
+ ner3 n e3 rr
3134
+ ner4 n e4 rr
3135
+ ner5 n e5 rr
3136
+ neir1 n ei1 rr
3137
+ neir2 n ei2 rr
3138
+ neir3 n ei3 rr
3139
+ neir4 n ei4 rr
3140
+ neir5 n ei5 rr
3141
+ nenr1 n en1 rr
3142
+ nenr2 n en2 rr
3143
+ nenr3 n en3 rr
3144
+ nenr4 n en4 rr
3145
+ nenr5 n en5 rr
3146
+ nengr1 n eng1 rr
3147
+ nengr2 n eng2 rr
3148
+ nengr3 n eng3 rr
3149
+ nengr4 n eng4 rr
3150
+ nengr5 n eng5 rr
3151
+ nir1 n i1 rr
3152
+ nir2 n i2 rr
3153
+ nir3 n i3 rr
3154
+ nir4 n i4 rr
3155
+ nir5 n i5 rr
3156
+ nianr1 n ian1 rr
3157
+ nianr2 n ian2 rr
3158
+ nianr3 n ian3 rr
3159
+ nianr4 n ian4 rr
3160
+ nianr5 n ian5 rr
3161
+ niangr1 n iang1 rr
3162
+ niangr2 n iang2 rr
3163
+ niangr3 n iang3 rr
3164
+ niangr4 n iang4 rr
3165
+ niangr5 n iang5 rr
3166
+ niaor1 n iao1 rr
3167
+ niaor2 n iao2 rr
3168
+ niaor3 n iao3 rr
3169
+ niaor4 n iao4 rr
3170
+ niaor5 n iao5 rr
3171
+ nier1 n ie1 rr
3172
+ nier2 n ie2 rr
3173
+ nier3 n ie3 rr
3174
+ nier4 n ie4 rr
3175
+ nier5 n ie5 rr
3176
+ ninr1 n in1 rr
3177
+ ninr2 n in2 rr
3178
+ ninr3 n in3 rr
3179
+ ninr4 n in4 rr
3180
+ ninr5 n in5 rr
3181
+ ningr1 n ing1 rr
3182
+ ningr2 n ing2 rr
3183
+ ningr3 n ing3 rr
3184
+ ningr4 n ing4 rr
3185
+ ningr5 n ing5 rr
3186
+ niur1 n iou1 rr
3187
+ niur2 n iou2 rr
3188
+ niur3 n iou3 rr
3189
+ niur4 n iou4 rr
3190
+ niur5 n iou5 rr
3191
+ nongr1 n ong1 rr
3192
+ nongr2 n ong2 rr
3193
+ nongr3 n ong3 rr
3194
+ nongr4 n ong4 rr
3195
+ nongr5 n ong5 rr
3196
+ nour1 n ou1 rr
3197
+ nour2 n ou2 rr
3198
+ nour3 n ou3 rr
3199
+ nour4 n ou4 rr
3200
+ nour5 n ou5 rr
3201
+ nur1 n u1 rr
3202
+ nur2 n u2 rr
3203
+ nur3 n u3 rr
3204
+ nur4 n u4 rr
3205
+ nur5 n u5 rr
3206
+ nuanr1 n uan1 rr
3207
+ nuanr2 n uan2 rr
3208
+ nuanr3 n uan3 rr
3209
+ nuanr4 n uan4 rr
3210
+ nuanr5 n uan5 rr
3211
+ nuer1 n ve1 rr
3212
+ nuer2 n ve2 rr
3213
+ nuer3 n ve3 rr
3214
+ nuer4 n ve4 rr
3215
+ nuer5 n ve5 rr
3216
+ nver1 n ve1 rr
3217
+ nver2 n ve2 rr
3218
+ nver3 n ve3 rr
3219
+ nver4 n ve4 rr
3220
+ nver5 n ve5 rr
3221
+ nuor1 n uo1 rr
3222
+ nuor2 n uo2 rr
3223
+ nuor3 n uo3 rr
3224
+ nuor4 n uo4 rr
3225
+ nuor5 n uo5 rr
3226
+ nvr1 n v1 rr
3227
+ nvr2 n v2 rr
3228
+ nvr3 n v3 rr
3229
+ nvr4 n v4 rr
3230
+ nvr5 n v5 rr
3231
+ or1 o1 rr
3232
+ or2 o2 rr
3233
+ or3 o3 rr
3234
+ or4 o4 rr
3235
+ or5 o5 rr
3236
+ our1 ou1 rr
3237
+ our2 ou2 rr
3238
+ our3 ou3 rr
3239
+ our4 ou4 rr
3240
+ our5 ou5 rr
3241
+ par1 p a1 rr
3242
+ par2 p a2 rr
3243
+ par3 p a3 rr
3244
+ par4 p a4 rr
3245
+ par5 p a5 rr
3246
+ pair1 p ai1 rr
3247
+ pair2 p ai2 rr
3248
+ pair3 p ai3 rr
3249
+ pair4 p ai4 rr
3250
+ pair5 p ai5 rr
3251
+ panr1 p an1 rr
3252
+ panr2 p an2 rr
3253
+ panr3 p an3 rr
3254
+ panr4 p an4 rr
3255
+ panr5 p an5 rr
3256
+ pangr1 p ang1 rr
3257
+ pangr2 p ang2 rr
3258
+ pangr3 p ang3 rr
3259
+ pangr4 p ang4 rr
3260
+ pangr5 p ang5 rr
3261
+ paor1 p ao1 rr
3262
+ paor2 p ao2 rr
3263
+ paor3 p ao3 rr
3264
+ paor4 p ao4 rr
3265
+ paor5 p ao5 rr
3266
+ peir1 p ei1 rr
3267
+ peir2 p ei2 rr
3268
+ peir3 p ei3 rr
3269
+ peir4 p ei4 rr
3270
+ peir5 p ei5 rr
3271
+ penr1 p en1 rr
3272
+ penr2 p en2 rr
3273
+ penr3 p en3 rr
3274
+ penr4 p en4 rr
3275
+ penr5 p en5 rr
3276
+ pengr1 p eng1 rr
3277
+ pengr2 p eng2 rr
3278
+ pengr3 p eng3 rr
3279
+ pengr4 p eng4 rr
3280
+ pengr5 p eng5 rr
3281
+ pir1 p i1 rr
3282
+ pir2 p i2 rr
3283
+ pir3 p i3 rr
3284
+ pir4 p i4 rr
3285
+ pir5 p i5 rr
3286
+ pianr1 p ian1 rr
3287
+ pianr2 p ian2 rr
3288
+ pianr3 p ian3 rr
3289
+ pianr4 p ian4 rr
3290
+ pianr5 p ian5 rr
3291
+ piaor1 p iao1 rr
3292
+ piaor2 p iao2 rr
3293
+ piaor3 p iao3 rr
3294
+ piaor4 p iao4 rr
3295
+ piaor5 p iao5 rr
3296
+ pier1 p ie1 rr
3297
+ pier2 p ie2 rr
3298
+ pier3 p ie3 rr
3299
+ pier4 p ie4 rr
3300
+ pier5 p ie5 rr
3301
+ pinr1 p in1 rr
3302
+ pinr2 p in2 rr
3303
+ pinr3 p in3 rr
3304
+ pinr4 p in4 rr
3305
+ pinr5 p in5 rr
3306
+ pingr1 p ing1 rr
3307
+ pingr2 p ing2 rr
3308
+ pingr3 p ing3 rr
3309
+ pingr4 p ing4 rr
3310
+ pingr5 p ing5 rr
3311
+ por1 p o1 rr
3312
+ por2 p o2 rr
3313
+ por3 p o3 rr
3314
+ por4 p o4 rr
3315
+ por5 p o5 rr
3316
+ pour1 p ou1 rr
3317
+ pour2 p ou2 rr
3318
+ pour3 p ou3 rr
3319
+ pour4 p ou4 rr
3320
+ pour5 p ou5 rr
3321
+ pur1 p u1 rr
3322
+ pur2 p u2 rr
3323
+ pur3 p u3 rr
3324
+ pur4 p u4 rr
3325
+ pur5 p u5 rr
3326
+ qir1 q i1 rr
3327
+ qir2 q i2 rr
3328
+ qir3 q i3 rr
3329
+ qir4 q i4 rr
3330
+ qir5 q i5 rr
3331
+ qiar1 q ia1 rr
3332
+ qiar2 q ia2 rr
3333
+ qiar3 q ia3 rr
3334
+ qiar4 q ia4 rr
3335
+ qiar5 q ia5 rr
3336
+ qianr1 q ian1 rr
3337
+ qianr2 q ian2 rr
3338
+ qianr3 q ian3 rr
3339
+ qianr4 q ian4 rr
3340
+ qianr5 q ian5 rr
3341
+ qiangr1 q iang1 rr
3342
+ qiangr2 q iang2 rr
3343
+ qiangr3 q iang3 rr
3344
+ qiangr4 q iang4 rr
3345
+ qiangr5 q iang5 rr
3346
+ qiaor1 q iao1 rr
3347
+ qiaor2 q iao2 rr
3348
+ qiaor3 q iao3 rr
3349
+ qiaor4 q iao4 rr
3350
+ qiaor5 q iao5 rr
3351
+ qier1 q ie1 rr
3352
+ qier2 q ie2 rr
3353
+ qier3 q ie3 rr
3354
+ qier4 q ie4 rr
3355
+ qier5 q ie5 rr
3356
+ qinr1 q in1 rr
3357
+ qinr2 q in2 rr
3358
+ qinr3 q in3 rr
3359
+ qinr4 q in4 rr
3360
+ qinr5 q in5 rr
3361
+ qingr1 q ing1 rr
3362
+ qingr2 q ing2 rr
3363
+ qingr3 q ing3 rr
3364
+ qingr4 q ing4 rr
3365
+ qingr5 q ing5 rr
3366
+ qiongr1 q iong1 rr
3367
+ qiongr2 q iong2 rr
3368
+ qiongr3 q iong3 rr
3369
+ qiongr4 q iong4 rr
3370
+ qiongr5 q iong5 rr
3371
+ qiur1 q iou1 rr
3372
+ qiur2 q iou2 rr
3373
+ qiur3 q iou3 rr
3374
+ qiur4 q iou4 rr
3375
+ qiur5 q iou5 rr
3376
+ qur1 q v1 rr
3377
+ qur2 q v2 rr
3378
+ qur3 q v3 rr
3379
+ qur4 q v4 rr
3380
+ qur5 q v5 rr
3381
+ quanr1 q van1 rr
3382
+ quanr2 q van2 rr
3383
+ quanr3 q van3 rr
3384
+ quanr4 q van4 rr
3385
+ quanr5 q van5 rr
3386
+ quer1 q ve1 rr
3387
+ quer2 q ve2 rr
3388
+ quer3 q ve3 rr
3389
+ quer4 q ve4 rr
3390
+ quer5 q ve5 rr
3391
+ qunr1 q vn1 rr
3392
+ qunr2 q vn2 rr
3393
+ qunr3 q vn3 rr
3394
+ qunr4 q vn4 rr
3395
+ qunr5 q vn5 rr
3396
+ ranr1 r an1 rr
3397
+ ranr2 r an2 rr
3398
+ ranr3 r an3 rr
3399
+ ranr4 r an4 rr
3400
+ ranr5 r an5 rr
3401
+ rangr1 r ang1 rr
3402
+ rangr2 r ang2 rr
3403
+ rangr3 r ang3 rr
3404
+ rangr4 r ang4 rr
3405
+ rangr5 r ang5 rr
3406
+ raor1 r ao1 rr
3407
+ raor2 r ao2 rr
3408
+ raor3 r ao3 rr
3409
+ raor4 r ao4 rr
3410
+ raor5 r ao5 rr
3411
+ rer1 r e1 rr
3412
+ rer2 r e2 rr
3413
+ rer3 r e3 rr
3414
+ rer4 r e4 rr
3415
+ rer5 r e5 rr
3416
+ renr1 r en1 rr
3417
+ renr2 r en2 rr
3418
+ renr3 r en3 rr
3419
+ renr4 r en4 rr
3420
+ renr5 r en5 rr
3421
+ rengr1 r eng1 rr
3422
+ rengr2 r eng2 rr
3423
+ rengr3 r eng3 rr
3424
+ rengr4 r eng4 rr
3425
+ rengr5 r eng5 rr
3426
+ rir1 r iii1 rr
3427
+ rir2 r iii2 rr
3428
+ rir3 r iii3 rr
3429
+ rir4 r iii4 rr
3430
+ rir5 r iii5 rr
3431
+ rongr1 r ong1 rr
3432
+ rongr2 r ong2 rr
3433
+ rongr3 r ong3 rr
3434
+ rongr4 r ong4 rr
3435
+ rongr5 r ong5 rr
3436
+ rour1 r ou1 rr
3437
+ rour2 r ou2 rr
3438
+ rour3 r ou3 rr
3439
+ rour4 r ou4 rr
3440
+ rour5 r ou5 rr
3441
+ rur1 r u1 rr
3442
+ rur2 r u2 rr
3443
+ rur3 r u3 rr
3444
+ rur4 r u4 rr
3445
+ rur5 r u5 rr
3446
+ ruar1 r ua1 rr
3447
+ ruar2 r ua2 rr
3448
+ ruar3 r ua3 rr
3449
+ ruar4 r ua4 rr
3450
+ ruar5 r ua5 rr
3451
+ ruanr1 r uan1 rr
3452
+ ruanr2 r uan2 rr
3453
+ ruanr3 r uan3 rr
3454
+ ruanr4 r uan4 rr
3455
+ ruanr5 r uan5 rr
3456
+ ruir1 r uei1 rr
3457
+ ruir2 r uei2 rr
3458
+ ruir3 r uei3 rr
3459
+ ruir4 r uei4 rr
3460
+ ruir5 r uei5 rr
3461
+ runr1 r uen1 rr
3462
+ runr2 r uen2 rr
3463
+ runr3 r uen3 rr
3464
+ runr4 r uen4 rr
3465
+ runr5 r uen5 rr
3466
+ ruor1 r uo1 rr
3467
+ ruor2 r uo2 rr
3468
+ ruor3 r uo3 rr
3469
+ ruor4 r uo4 rr
3470
+ ruor5 r uo5 rr
3471
+ sar1 s a1 rr
3472
+ sar2 s a2 rr
3473
+ sar3 s a3 rr
3474
+ sar4 s a4 rr
3475
+ sar5 s a5 rr
3476
+ sair1 s ai1 rr
3477
+ sair2 s ai2 rr
3478
+ sair3 s ai3 rr
3479
+ sair4 s ai4 rr
3480
+ sair5 s ai5 rr
3481
+ sanr1 s an1 rr
3482
+ sanr2 s an2 rr
3483
+ sanr3 s an3 rr
3484
+ sanr4 s an4 rr
3485
+ sanr5 s an5 rr
3486
+ sangr1 s ang1 rr
3487
+ sangr2 s ang2 rr
3488
+ sangr3 s ang3 rr
3489
+ sangr4 s ang4 rr
3490
+ sangr5 s ang5 rr
3491
+ saor1 s ao1 rr
3492
+ saor2 s ao2 rr
3493
+ saor3 s ao3 rr
3494
+ saor4 s ao4 rr
3495
+ saor5 s ao5 rr
3496
+ ser1 s e1 rr
3497
+ ser2 s e2 rr
3498
+ ser3 s e3 rr
3499
+ ser4 s e4 rr
3500
+ ser5 s e5 rr
3501
+ senr1 s en1 rr
3502
+ senr2 s en2 rr
3503
+ senr3 s en3 rr
3504
+ senr4 s en4 rr
3505
+ senr5 s en5 rr
3506
+ sengr1 s eng1 rr
3507
+ sengr2 s eng2 rr
3508
+ sengr3 s eng3 rr
3509
+ sengr4 s eng4 rr
3510
+ sengr5 s eng5 rr
3511
+ shar1 sh a1 rr
3512
+ shar2 sh a2 rr
3513
+ shar3 sh a3 rr
3514
+ shar4 sh a4 rr
3515
+ shar5 sh a5 rr
3516
+ shair1 sh ai1 rr
3517
+ shair2 sh ai2 rr
3518
+ shair3 sh ai3 rr
3519
+ shair4 sh ai4 rr
3520
+ shair5 sh ai5 rr
3521
+ shanr1 sh an1 rr
3522
+ shanr2 sh an2 rr
3523
+ shanr3 sh an3 rr
3524
+ shanr4 sh an4 rr
3525
+ shanr5 sh an5 rr
3526
+ shangr1 sh ang1 rr
3527
+ shangr2 sh ang2 rr
3528
+ shangr3 sh ang3 rr
3529
+ shangr4 sh ang4 rr
3530
+ shangr5 sh ang5 rr
3531
+ shaor1 sh ao1 rr
3532
+ shaor2 sh ao2 rr
3533
+ shaor3 sh ao3 rr
3534
+ shaor4 sh ao4 rr
3535
+ shaor5 sh ao5 rr
3536
+ sher1 sh e1 rr
3537
+ sher2 sh e2 rr
3538
+ sher3 sh e3 rr
3539
+ sher4 sh e4 rr
3540
+ sher5 sh e5 rr
3541
+ sheir1 sh ei1 rr
3542
+ sheir2 sh ei2 rr
3543
+ sheir3 sh ei3 rr
3544
+ sheir4 sh ei4 rr
3545
+ sheir5 sh ei5 rr
3546
+ shenr1 sh en1 rr
3547
+ shenr2 sh en2 rr
3548
+ shenr3 sh en3 rr
3549
+ shenr4 sh en4 rr
3550
+ shenr5 sh en5 rr
3551
+ shengr1 sh eng1 rr
3552
+ shengr2 sh eng2 rr
3553
+ shengr3 sh eng3 rr
3554
+ shengr4 sh eng4 rr
3555
+ shengr5 sh eng5 rr
3556
+ shir1 sh iii1 rr
3557
+ shir2 sh iii2 rr
3558
+ shir3 sh iii3 rr
3559
+ shir4 sh iii4 rr
3560
+ shir5 sh iii5 rr
3561
+ shour1 sh ou1 rr
3562
+ shour2 sh ou2 rr
3563
+ shour3 sh ou3 rr
3564
+ shour4 sh ou4 rr
3565
+ shour5 sh ou5 rr
3566
+ shur1 sh u1 rr
3567
+ shur2 sh u2 rr
3568
+ shur3 sh u3 rr
3569
+ shur4 sh u4 rr
3570
+ shur5 sh u5 rr
3571
+ shuar1 sh ua1 rr
3572
+ shuar2 sh ua2 rr
3573
+ shuar3 sh ua3 rr
3574
+ shuar4 sh ua4 rr
3575
+ shuar5 sh ua5 rr
3576
+ shuair1 sh uai1 rr
3577
+ shuair2 sh uai2 rr
3578
+ shuair3 sh uai3 rr
3579
+ shuair4 sh uai4 rr
3580
+ shuair5 sh uai5 rr
3581
+ shuanr1 sh uan1 rr
3582
+ shuanr2 sh uan2 rr
3583
+ shuanr3 sh uan3 rr
3584
+ shuanr4 sh uan4 rr
3585
+ shuanr5 sh uan5 rr
3586
+ shuangr1 sh uang1 rr
3587
+ shuangr2 sh uang2 rr
3588
+ shuangr3 sh uang3 rr
3589
+ shuangr4 sh uang4 rr
3590
+ shuangr5 sh uang5 rr
3591
+ shuir1 sh uei1 rr
3592
+ shuir2 sh uei2 rr
3593
+ shuir3 sh uei3 rr
3594
+ shuir4 sh uei4 rr
3595
+ shuir5 sh uei5 rr
3596
+ shunr1 sh uen1 rr
3597
+ shunr2 sh uen2 rr
3598
+ shunr3 sh uen3 rr
3599
+ shunr4 sh uen4 rr
3600
+ shunr5 sh uen5 rr
3601
+ shuor1 sh uo1 rr
3602
+ shuor2 sh uo2 rr
3603
+ shuor3 sh uo3 rr
3604
+ shuor4 sh uo4 rr
3605
+ shuor5 sh uo5 rr
3606
+ sir1 s ii1 rr
3607
+ sir2 s ii2 rr
3608
+ sir3 s ii3 rr
3609
+ sir4 s ii4 rr
3610
+ sir5 s ii5 rr
3611
+ songr1 s ong1 rr
3612
+ songr2 s ong2 rr
3613
+ songr3 s ong3 rr
3614
+ songr4 s ong4 rr
3615
+ songr5 s ong5 rr
3616
+ sour1 s ou1 rr
3617
+ sour2 s ou2 rr
3618
+ sour3 s ou3 rr
3619
+ sour4 s ou4 rr
3620
+ sour5 s ou5 rr
3621
+ sur1 s u1 rr
3622
+ sur2 s u2 rr
3623
+ sur3 s u3 rr
3624
+ sur4 s u4 rr
3625
+ sur5 s u5 rr
3626
+ suanr1 s uan1 rr
3627
+ suanr2 s uan2 rr
3628
+ suanr3 s uan3 rr
3629
+ suanr4 s uan4 rr
3630
+ suanr5 s uan5 rr
3631
+ suir1 s uei1 rr
3632
+ suir2 s uei2 rr
3633
+ suir3 s uei3 rr
3634
+ suir4 s uei4 rr
3635
+ suir5 s uei5 rr
3636
+ sunr1 s uen1 rr
3637
+ sunr2 s uen2 rr
3638
+ sunr3 s uen3 rr
3639
+ sunr4 s uen4 rr
3640
+ sunr5 s uen5 rr
3641
+ suor1 s uo1 rr
3642
+ suor2 s uo2 rr
3643
+ suor3 s uo3 rr
3644
+ suor4 s uo4 rr
3645
+ suor5 s uo5 rr
3646
+ tar1 t a1 rr
3647
+ tar2 t a2 rr
3648
+ tar3 t a3 rr
3649
+ tar4 t a4 rr
3650
+ tar5 t a5 rr
3651
+ tair1 t ai1 rr
3652
+ tair2 t ai2 rr
3653
+ tair3 t ai3 rr
3654
+ tair4 t ai4 rr
3655
+ tair5 t ai5 rr
3656
+ tanr1 t an1 rr
3657
+ tanr2 t an2 rr
3658
+ tanr3 t an3 rr
3659
+ tanr4 t an4 rr
3660
+ tanr5 t an5 rr
3661
+ tangr1 t ang1 rr
3662
+ tangr2 t ang2 rr
3663
+ tangr3 t ang3 rr
3664
+ tangr4 t ang4 rr
3665
+ tangr5 t ang5 rr
3666
+ taor1 t ao1 rr
3667
+ taor2 t ao2 rr
3668
+ taor3 t ao3 rr
3669
+ taor4 t ao4 rr
3670
+ taor5 t ao5 rr
3671
+ ter1 t e1 rr
3672
+ ter2 t e2 rr
3673
+ ter3 t e3 rr
3674
+ ter4 t e4 rr
3675
+ ter5 t e5 rr
3676
+ teir1 t ei1 rr
3677
+ teir2 t ei2 rr
3678
+ teir3 t ei3 rr
3679
+ teir4 t ei4 rr
3680
+ teir5 t ei5 rr
3681
+ tengr1 t eng1 rr
3682
+ tengr2 t eng2 rr
3683
+ tengr3 t eng3 rr
3684
+ tengr4 t eng4 rr
3685
+ tengr5 t eng5 rr
3686
+ tir1 t i1 rr
3687
+ tir2 t i2 rr
3688
+ tir3 t i3 rr
3689
+ tir4 t i4 rr
3690
+ tir5 t i5 rr
3691
+ tianr1 t ian1 rr
3692
+ tianr2 t ian2 rr
3693
+ tianr3 t ian3 rr
3694
+ tianr4 t ian4 rr
3695
+ tianr5 t ian5 rr
3696
+ tiaor1 t iao1 rr
3697
+ tiaor2 t iao2 rr
3698
+ tiaor3 t iao3 rr
3699
+ tiaor4 t iao4 rr
3700
+ tiaor5 t iao5 rr
3701
+ tier1 t ie1 rr
3702
+ tier2 t ie2 rr
3703
+ tier3 t ie3 rr
3704
+ tier4 t ie4 rr
3705
+ tier5 t ie5 rr
3706
+ tingr1 t ing1 rr
3707
+ tingr2 t ing2 rr
3708
+ tingr3 t ing3 rr
3709
+ tingr4 t ing4 rr
3710
+ tingr5 t ing5 rr
3711
+ tongr1 t ong1 rr
3712
+ tongr2 t ong2 rr
3713
+ tongr3 t ong3 rr
3714
+ tongr4 t ong4 rr
3715
+ tongr5 t ong5 rr
3716
+ tour1 t ou1 rr
3717
+ tour2 t ou2 rr
3718
+ tour3 t ou3 rr
3719
+ tour4 t ou4 rr
3720
+ tour5 t ou5 rr
3721
+ tur1 t u1 rr
3722
+ tur2 t u2 rr
3723
+ tur3 t u3 rr
3724
+ tur4 t u4 rr
3725
+ tur5 t u5 rr
3726
+ tuanr1 t uan1 rr
3727
+ tuanr2 t uan2 rr
3728
+ tuanr3 t uan3 rr
3729
+ tuanr4 t uan4 rr
3730
+ tuanr5 t uan5 rr
3731
+ tuir1 t uei1 rr
3732
+ tuir2 t uei2 rr
3733
+ tuir3 t uei3 rr
3734
+ tuir4 t uei4 rr
3735
+ tuir5 t uei5 rr
3736
+ tunr1 t uen1 rr
3737
+ tunr2 t uen2 rr
3738
+ tunr3 t uen3 rr
3739
+ tunr4 t uen4 rr
3740
+ tunr5 t uen5 rr
3741
+ tuor1 t uo1 rr
3742
+ tuor2 t uo2 rr
3743
+ tuor3 t uo3 rr
3744
+ tuor4 t uo4 rr
3745
+ tuor5 t uo5 rr
3746
+ war1 w ua1 rr
3747
+ war2 w ua2 rr
3748
+ war3 w ua3 rr
3749
+ war4 w ua4 rr
3750
+ war5 w ua5 rr
3751
+ wair1 w uai1 rr
3752
+ wair2 w uai2 rr
3753
+ wair3 w uai3 rr
3754
+ wair4 w uai4 rr
3755
+ wair5 w uai5 rr
3756
+ wanr1 w uan1 rr
3757
+ wanr2 w uan2 rr
3758
+ wanr3 w uan3 rr
3759
+ wanr4 w uan4 rr
3760
+ wanr5 w uan5 rr
3761
+ wangr1 w uang1 rr
3762
+ wangr2 w uang2 rr
3763
+ wangr3 w uang3 rr
3764
+ wangr4 w uang4 rr
3765
+ wangr5 w uang5 rr
3766
+ weir1 w uei1 rr
3767
+ weir2 w uei2 rr
3768
+ weir3 w uei3 rr
3769
+ weir4 w uei4 rr
3770
+ weir5 w uei5 rr
3771
+ wenr1 w uen1 rr
3772
+ wenr2 w uen2 rr
3773
+ wenr3 w uen3 rr
3774
+ wenr4 w uen4 rr
3775
+ wenr5 w uen5 rr
3776
+ wengr1 w uen1 rr
3777
+ wengr2 w uen2 rr
3778
+ wengr3 w uen3 rr
3779
+ wengr4 w uen4 rr
3780
+ wengr5 w uen5 rr
3781
+ wor1 w uo1 rr
3782
+ wor2 w uo2 rr
3783
+ wor3 w uo3 rr
3784
+ wor4 w uo4 rr
3785
+ wor5 w uo5 rr
3786
+ wur1 w u1 rr
3787
+ wur2 w u2 rr
3788
+ wur3 w u3 rr
3789
+ wur4 w u4 rr
3790
+ wur5 w u5 rr
3791
+ xir1 x i1 rr
3792
+ xir2 x i2 rr
3793
+ xir3 x i3 rr
3794
+ xir4 x i4 rr
3795
+ xir5 x i5 rr
3796
+ xiar1 x ia1 rr
3797
+ xiar2 x ia2 rr
3798
+ xiar3 x ia3 rr
3799
+ xiar4 x ia4 rr
3800
+ xiar5 x ia5 rr
3801
+ xianr1 x ian1 rr
3802
+ xianr2 x ian2 rr
3803
+ xianr3 x ian3 rr
3804
+ xianr4 x ian4 rr
3805
+ xianr5 x ian5 rr
3806
+ xiangr1 x iang1 rr
3807
+ xiangr2 x iang2 rr
3808
+ xiangr3 x iang3 rr
3809
+ xiangr4 x iang4 rr
3810
+ xiangr5 x iang5 rr
3811
+ xiaor1 x iao1 rr
3812
+ xiaor2 x iao2 rr
3813
+ xiaor3 x iao3 rr
3814
+ xiaor4 x iao4 rr
3815
+ xiaor5 x iao5 rr
3816
+ xier1 x ie1 rr
3817
+ xier2 x ie2 rr
3818
+ xier3 x ie3 rr
3819
+ xier4 x ie4 rr
3820
+ xier5 x ie5 rr
3821
+ xinr1 x in1 rr
3822
+ xinr2 x in2 rr
3823
+ xinr3 x in3 rr
3824
+ xinr4 x in4 rr
3825
+ xinr5 x in5 rr
3826
+ xingr1 x ing1 rr
3827
+ xingr2 x ing2 rr
3828
+ xingr3 x ing3 rr
3829
+ xingr4 x ing4 rr
3830
+ xingr5 x ing5 rr
3831
+ xiongr1 x iong1 rr
3832
+ xiongr2 x iong2 rr
3833
+ xiongr3 x iong3 rr
3834
+ xiongr4 x iong4 rr
3835
+ xiongr5 x iong5 rr
3836
+ xiur1 x iou1 rr
3837
+ xiur2 x iou2 rr
3838
+ xiur3 x iou3 rr
3839
+ xiur4 x iou4 rr
3840
+ xiur5 x iou5 rr
3841
+ xur1 x v1 rr
3842
+ xur2 x v2 rr
3843
+ xur3 x v3 rr
3844
+ xur4 x v4 rr
3845
+ xur5 x v5 rr
3846
+ xuanr1 x van1 rr
3847
+ xuanr2 x van2 rr
3848
+ xuanr3 x van3 rr
3849
+ xuanr4 x van4 rr
3850
+ xuanr5 x van5 rr
3851
+ xuer1 x ve1 rr
3852
+ xuer2 x ve2 rr
3853
+ xuer3 x ve3 rr
3854
+ xuer4 x ve4 rr
3855
+ xuer5 x ve5 rr
3856
+ xunr1 x vn1 rr
3857
+ xunr2 x vn2 rr
3858
+ xunr3 x vn3 rr
3859
+ xunr4 x vn4 rr
3860
+ xunr5 x vn5 rr
3861
+ yar1 y ia1 rr
3862
+ yar2 y ia2 rr
3863
+ yar3 y ia3 rr
3864
+ yar4 y ia4 rr
3865
+ yar5 y ia5 rr
3866
+ yanr1 y ian1 rr
3867
+ yanr2 y ian2 rr
3868
+ yanr3 y ian3 rr
3869
+ yanr4 y ian4 rr
3870
+ yanr5 y ian5 rr
3871
+ yangr1 y iang1 rr
3872
+ yangr2 y iang2 rr
3873
+ yangr3 y iang3 rr
3874
+ yangr4 y iang4 rr
3875
+ yangr5 y iang5 rr
3876
+ yaor1 y iao1 rr
3877
+ yaor2 y iao2 rr
3878
+ yaor3 y iao3 rr
3879
+ yaor4 y iao4 rr
3880
+ yaor5 y iao5 rr
3881
+ yer1 y ie1 rr
3882
+ yer2 y ie2 rr
3883
+ yer3 y ie3 rr
3884
+ yer4 y ie4 rr
3885
+ yer5 y ie5 rr
3886
+ yir1 y i1 rr
3887
+ yir2 y i2 rr
3888
+ yir3 y i3 rr
3889
+ yir4 y i4 rr
3890
+ yir5 y i5 rr
3891
+ yinr1 y in1 rr
3892
+ yinr2 y in2 rr
3893
+ yinr3 y in3 rr
3894
+ yinr4 y in4 rr
3895
+ yinr5 y in5 rr
3896
+ yingr1 y ing1 rr
3897
+ yingr2 y ing2 rr
3898
+ yingr3 y ing3 rr
3899
+ yingr4 y ing4 rr
3900
+ yingr5 y ing5 rr
3901
+ yor1 y iou1 rr
3902
+ yor2 y iou2 rr
3903
+ yor3 y iou3 rr
3904
+ yor4 y iou4 rr
3905
+ yor5 y iou5 rr
3906
+ yongr1 y iong1 rr
3907
+ yongr2 y iong2 rr
3908
+ yongr3 y iong3 rr
3909
+ yongr4 y iong4 rr
3910
+ yongr5 y iong5 rr
3911
+ your1 y iou1 rr
3912
+ your2 y iou2 rr
3913
+ your3 y iou3 rr
3914
+ your4 y iou4 rr
3915
+ your5 y iou5 rr
3916
+ yur1 y v1 rr
3917
+ yur2 y v2 rr
3918
+ yur3 y v3 rr
3919
+ yur4 y v4 rr
3920
+ yur5 y v5 rr
3921
+ yuanr1 y van1 rr
3922
+ yuanr2 y van2 rr
3923
+ yuanr3 y van3 rr
3924
+ yuanr4 y van4 rr
3925
+ yuanr5 y van5 rr
3926
+ yuer1 y ve1 rr
3927
+ yuer2 y ve2 rr
3928
+ yuer3 y ve3 rr
3929
+ yuer4 y ve4 rr
3930
+ yuer5 y ve5 rr
3931
+ yunr1 y vn1 rr
3932
+ yunr2 y vn2 rr
3933
+ yunr3 y vn3 rr
3934
+ yunr4 y vn4 rr
3935
+ yunr5 y vn5 rr
3936
+ zar1 z a1 rr
3937
+ zar2 z a2 rr
3938
+ zar3 z a3 rr
3939
+ zar4 z a4 rr
3940
+ zar5 z a5 rr
3941
+ zair1 z ai1 rr
3942
+ zair2 z ai2 rr
3943
+ zair3 z ai3 rr
3944
+ zair4 z ai4 rr
3945
+ zair5 z ai5 rr
3946
+ zanr1 z an1 rr
3947
+ zanr2 z an2 rr
3948
+ zanr3 z an3 rr
3949
+ zanr4 z an4 rr
3950
+ zanr5 z an5 rr
3951
+ zangr1 z ang1 rr
3952
+ zangr2 z ang2 rr
3953
+ zangr3 z ang3 rr
3954
+ zangr4 z ang4 rr
3955
+ zangr5 z ang5 rr
3956
+ zaor1 z ao1 rr
3957
+ zaor2 z ao2 rr
3958
+ zaor3 z ao3 rr
3959
+ zaor4 z ao4 rr
3960
+ zaor5 z ao5 rr
3961
+ zer1 z e1 rr
3962
+ zer2 z e2 rr
3963
+ zer3 z e3 rr
3964
+ zer4 z e4 rr
3965
+ zer5 z e5 rr
3966
+ zeir1 z ei1 rr
3967
+ zeir2 z ei2 rr
3968
+ zeir3 z ei3 rr
3969
+ zeir4 z ei4 rr
3970
+ zeir5 z ei5 rr
3971
+ zenr1 z en1 rr
3972
+ zenr2 z en2 rr
3973
+ zenr3 z en3 rr
3974
+ zenr4 z en4 rr
3975
+ zenr5 z en5 rr
3976
+ zengr1 z eng1 rr
3977
+ zengr2 z eng2 rr
3978
+ zengr3 z eng3 rr
3979
+ zengr4 z eng4 rr
3980
+ zengr5 z eng5 rr
3981
+ zhar1 zh a1 rr
3982
+ zhar2 zh a2 rr
3983
+ zhar3 zh a3 rr
3984
+ zhar4 zh a4 rr
3985
+ zhar5 zh a5 rr
3986
+ zhair1 zh ai1 rr
3987
+ zhair2 zh ai2 rr
3988
+ zhair3 zh ai3 rr
3989
+ zhair4 zh ai4 rr
3990
+ zhair5 zh ai5 rr
3991
+ zhanr1 zh an1 rr
3992
+ zhanr2 zh an2 rr
3993
+ zhanr3 zh an3 rr
3994
+ zhanr4 zh an4 rr
3995
+ zhanr5 zh an5 rr
3996
+ zhangr1 zh ang1 rr
3997
+ zhangr2 zh ang2 rr
3998
+ zhangr3 zh ang3 rr
3999
+ zhangr4 zh ang4 rr
4000
+ zhangr5 zh ang5 rr
4001
+ zhaor1 zh ao1 rr
4002
+ zhaor2 zh ao2 rr
4003
+ zhaor3 zh ao3 rr
4004
+ zhaor4 zh ao4 rr
4005
+ zhaor5 zh ao5 rr
4006
+ zher1 zh e1 rr
4007
+ zher2 zh e2 rr
4008
+ zher3 zh e3 rr
4009
+ zher4 zh e4 rr
4010
+ zher5 zh e5 rr
4011
+ zheir1 zh ei1 rr
4012
+ zheir2 zh ei2 rr
4013
+ zheir3 zh ei3 rr
4014
+ zheir4 zh ei4 rr
4015
+ zheir5 zh ei5 rr
4016
+ zhenr1 zh en1 rr
4017
+ zhenr2 zh en2 rr
4018
+ zhenr3 zh en3 rr
4019
+ zhenr4 zh en4 rr
4020
+ zhenr5 zh en5 rr
4021
+ zhengr1 zh eng1 rr
4022
+ zhengr2 zh eng2 rr
4023
+ zhengr3 zh eng3 rr
4024
+ zhengr4 zh eng4 rr
4025
+ zhengr5 zh eng5 rr
4026
+ zhir1 zh iii1 rr
4027
+ zhir2 zh iii2 rr
4028
+ zhir3 zh iii3 rr
4029
+ zhir4 zh iii4 rr
4030
+ zhir5 zh iii5 rr
4031
+ zhongr1 zh ong1 rr
4032
+ zhongr2 zh ong2 rr
4033
+ zhongr3 zh ong3 rr
4034
+ zhongr4 zh ong4 rr
4035
+ zhongr5 zh ong5 rr
4036
+ zhour1 zh ou1 rr
4037
+ zhour2 zh ou2 rr
4038
+ zhour3 zh ou3 rr
4039
+ zhour4 zh ou4 rr
4040
+ zhour5 zh ou5 rr
4041
+ zhur1 zh u1 rr
4042
+ zhur2 zh u2 rr
4043
+ zhur3 zh u3 rr
4044
+ zhur4 zh u4 rr
4045
+ zhur5 zh u5 rr
4046
+ zhuar1 zh ua1 rr
4047
+ zhuar2 zh ua2 rr
4048
+ zhuar3 zh ua3 rr
4049
+ zhuar4 zh ua4 rr
4050
+ zhuar5 zh ua5 rr
4051
+ zhuair1 zh uai1 rr
4052
+ zhuair2 zh uai2 rr
4053
+ zhuair3 zh uai3 rr
4054
+ zhuair4 zh uai4 rr
4055
+ zhuair5 zh uai5 rr
4056
+ zhuanr1 zh uan1 rr
4057
+ zhuanr2 zh uan2 rr
4058
+ zhuanr3 zh uan3 rr
4059
+ zhuanr4 zh uan4 rr
4060
+ zhuanr5 zh uan5 rr
4061
+ zhuangr1 zh uang1 rr
4062
+ zhuangr2 zh uang2 rr
4063
+ zhuangr3 zh uang3 rr
4064
+ zhuangr4 zh uang4 rr
4065
+ zhuangr5 zh uang5 rr
4066
+ zhuir1 zh uei1 rr
4067
+ zhuir2 zh uei2 rr
4068
+ zhuir3 zh uei3 rr
4069
+ zhuir4 zh uei4 rr
4070
+ zhuir5 zh uei5 rr
4071
+ zhunr1 zh uen1 rr
4072
+ zhunr2 zh uen2 rr
4073
+ zhunr3 zh uen3 rr
4074
+ zhunr4 zh uen4 rr
4075
+ zhunr5 zh uen5 rr
4076
+ zhuor1 zh uo1 rr
4077
+ zhuor2 zh uo2 rr
4078
+ zhuor3 zh uo3 rr
4079
+ zhuor4 zh uo4 rr
4080
+ zhuor5 zh uo5 rr
4081
+ zir1 z ii1 rr
4082
+ zir2 z ii2 rr
4083
+ zir3 z ii3 rr
4084
+ zir4 z ii4 rr
4085
+ zir5 z ii5 rr
4086
+ zongr1 z ong1 rr
4087
+ zongr2 z ong2 rr
4088
+ zongr3 z ong3 rr
4089
+ zongr4 z ong4 rr
4090
+ zongr5 z ong5 rr
4091
+ zour1 z ou1 rr
4092
+ zour2 z ou2 rr
4093
+ zour3 z ou3 rr
4094
+ zour4 z ou4 rr
4095
+ zour5 z ou5 rr
4096
+ zur1 z u1 rr
4097
+ zur2 z u2 rr
4098
+ zur3 z u3 rr
4099
+ zur4 z u4 rr
4100
+ zur5 z u5 rr
4101
+ zuanr1 z uan1 rr
4102
+ zuanr2 z uan2 rr
4103
+ zuanr3 z uan3 rr
4104
+ zuanr4 z uan4 rr
4105
+ zuanr5 z uan5 rr
4106
+ zuir1 z uei1 rr
4107
+ zuir2 z uei2 rr
4108
+ zuir3 z uei3 rr
4109
+ zuir4 z uei4 rr
4110
+ zuir5 z uei5 rr
4111
+ zunr1 z uen1 rr
4112
+ zunr2 z uen2 rr
4113
+ zunr3 z uen3 rr
4114
+ zunr4 z uen4 rr
4115
+ zunr5 z uen5 rr
4116
+ zuor1 z uo1 rr
4117
+ zuor2 z uo2 rr
4118
+ zuor3 z uo3 rr
4119
+ zuor4 z uo4 rr
4120
+ zuor5 z uo5 rr
text/numbers.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ """ This code is modified from https://github.com/keithito/tacotron """
7
+
8
+ import inflect
9
+ import re
10
+
11
+ _inflect = inflect.engine()
12
+ _comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
13
+ _decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
14
+ _pounds_re = re.compile(r"Β£([0-9\,]*[0-9]+)")
15
+ _dollars_re = re.compile(r"\$([0-9\.\,]*[0-9]+)")
16
+ _ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)")
17
+ _number_re = re.compile(r"[0-9]+")
18
+
19
+
20
+ def _remove_commas(m):
21
+ return m.group(1).replace(",", "")
22
+
23
+
24
+ def _expand_decimal_point(m):
25
+ return m.group(1).replace(".", " point ")
26
+
27
+
28
+ def _expand_dollars(m):
29
+ match = m.group(1)
30
+ parts = match.split(".")
31
+ if len(parts) > 2:
32
+ return match + " dollars" # Unexpected format
33
+ dollars = int(parts[0]) if parts[0] else 0
34
+ cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
35
+ if dollars and cents:
36
+ dollar_unit = "dollar" if dollars == 1 else "dollars"
37
+ cent_unit = "cent" if cents == 1 else "cents"
38
+ return "%s %s, %s %s" % (dollars, dollar_unit, cents, cent_unit)
39
+ elif dollars:
40
+ dollar_unit = "dollar" if dollars == 1 else "dollars"
41
+ return "%s %s" % (dollars, dollar_unit)
42
+ elif cents:
43
+ cent_unit = "cent" if cents == 1 else "cents"
44
+ return "%s %s" % (cents, cent_unit)
45
+ else:
46
+ return "zero dollars"
47
+
48
+
49
+ def _expand_ordinal(m):
50
+ return _inflect.number_to_words(m.group(0))
51
+
52
+
53
+ def _expand_number(m):
54
+ num = int(m.group(0))
55
+ if num > 1000 and num < 3000:
56
+ if num == 2000:
57
+ return "two thousand"
58
+ elif num > 2000 and num < 2010:
59
+ return "two thousand " + _inflect.number_to_words(num % 100)
60
+ elif num % 100 == 0:
61
+ return _inflect.number_to_words(num // 100) + " hundred"
62
+ else:
63
+ return _inflect.number_to_words(
64
+ num, andword="", zero="oh", group=2
65
+ ).replace(", ", " ")
66
+ else:
67
+ return _inflect.number_to_words(num, andword="")
68
+
69
+
70
+ def normalize_numbers(text):
71
+ text = re.sub(_comma_number_re, _remove_commas, text)
72
+ text = re.sub(_pounds_re, r"\1 pounds", text)
73
+ text = re.sub(_dollars_re, _expand_dollars, text)
74
+ text = re.sub(_decimal_number_re, _expand_decimal_point, text)
75
+ text = re.sub(_ordinal_re, _expand_ordinal, text)
76
+ text = re.sub(_number_re, _expand_number, text)
77
+ return text
text/pinyin.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ initials = [
7
+ "b",
8
+ "c",
9
+ "ch",
10
+ "d",
11
+ "f",
12
+ "g",
13
+ "h",
14
+ "j",
15
+ "k",
16
+ "l",
17
+ "m",
18
+ "n",
19
+ "p",
20
+ "q",
21
+ "r",
22
+ "s",
23
+ "sh",
24
+ "t",
25
+ "w",
26
+ "x",
27
+ "y",
28
+ "z",
29
+ "zh",
30
+ ]
31
+ finals = [
32
+ "a1",
33
+ "a2",
34
+ "a3",
35
+ "a4",
36
+ "a5",
37
+ "ai1",
38
+ "ai2",
39
+ "ai3",
40
+ "ai4",
41
+ "ai5",
42
+ "an1",
43
+ "an2",
44
+ "an3",
45
+ "an4",
46
+ "an5",
47
+ "ang1",
48
+ "ang2",
49
+ "ang3",
50
+ "ang4",
51
+ "ang5",
52
+ "ao1",
53
+ "ao2",
54
+ "ao3",
55
+ "ao4",
56
+ "ao5",
57
+ "e1",
58
+ "e2",
59
+ "e3",
60
+ "e4",
61
+ "e5",
62
+ "ei1",
63
+ "ei2",
64
+ "ei3",
65
+ "ei4",
66
+ "ei5",
67
+ "en1",
68
+ "en2",
69
+ "en3",
70
+ "en4",
71
+ "en5",
72
+ "eng1",
73
+ "eng2",
74
+ "eng3",
75
+ "eng4",
76
+ "eng5",
77
+ "er1",
78
+ "er2",
79
+ "er3",
80
+ "er4",
81
+ "er5",
82
+ "i1",
83
+ "i2",
84
+ "i3",
85
+ "i4",
86
+ "i5",
87
+ "ia1",
88
+ "ia2",
89
+ "ia3",
90
+ "ia4",
91
+ "ia5",
92
+ "ian1",
93
+ "ian2",
94
+ "ian3",
95
+ "ian4",
96
+ "ian5",
97
+ "iang1",
98
+ "iang2",
99
+ "iang3",
100
+ "iang4",
101
+ "iang5",
102
+ "iao1",
103
+ "iao2",
104
+ "iao3",
105
+ "iao4",
106
+ "iao5",
107
+ "ie1",
108
+ "ie2",
109
+ "ie3",
110
+ "ie4",
111
+ "ie5",
112
+ "ii1",
113
+ "ii2",
114
+ "ii3",
115
+ "ii4",
116
+ "ii5",
117
+ "iii1",
118
+ "iii2",
119
+ "iii3",
120
+ "iii4",
121
+ "iii5",
122
+ "in1",
123
+ "in2",
124
+ "in3",
125
+ "in4",
126
+ "in5",
127
+ "ing1",
128
+ "ing2",
129
+ "ing3",
130
+ "ing4",
131
+ "ing5",
132
+ "iong1",
133
+ "iong2",
134
+ "iong3",
135
+ "iong4",
136
+ "iong5",
137
+ "iou1",
138
+ "iou2",
139
+ "iou3",
140
+ "iou4",
141
+ "iou5",
142
+ "o1",
143
+ "o2",
144
+ "o3",
145
+ "o4",
146
+ "o5",
147
+ "ong1",
148
+ "ong2",
149
+ "ong3",
150
+ "ong4",
151
+ "ong5",
152
+ "ou1",
153
+ "ou2",
154
+ "ou3",
155
+ "ou4",
156
+ "ou5",
157
+ "u1",
158
+ "u2",
159
+ "u3",
160
+ "u4",
161
+ "u5",
162
+ "ua1",
163
+ "ua2",
164
+ "ua3",
165
+ "ua4",
166
+ "ua5",
167
+ "uai1",
168
+ "uai2",
169
+ "uai3",
170
+ "uai4",
171
+ "uai5",
172
+ "uan1",
173
+ "uan2",
174
+ "uan3",
175
+ "uan4",
176
+ "uan5",
177
+ "uang1",
178
+ "uang2",
179
+ "uang3",
180
+ "uang4",
181
+ "uang5",
182
+ "uei1",
183
+ "uei2",
184
+ "uei3",
185
+ "uei4",
186
+ "uei5",
187
+ "uen1",
188
+ "uen2",
189
+ "uen3",
190
+ "uen4",
191
+ "uen5",
192
+ "uo1",
193
+ "uo2",
194
+ "uo3",
195
+ "uo4",
196
+ "uo5",
197
+ "v1",
198
+ "v2",
199
+ "v3",
200
+ "v4",
201
+ "v5",
202
+ "van1",
203
+ "van2",
204
+ "van3",
205
+ "van4",
206
+ "van5",
207
+ "ve1",
208
+ "ve2",
209
+ "ve3",
210
+ "ve4",
211
+ "ve5",
212
+ "vn1",
213
+ "vn2",
214
+ "vn3",
215
+ "vn4",
216
+ "vn5",
217
+ ]
218
+ valid_symbols = initials + finals + ["rr"]
text/symbol_table.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Mobvoi Inc. (authors: Fangjun Kuang)
2
+ #
3
+ # See ../../../LICENSE for clarification regarding multiple authors
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from dataclasses import dataclass
18
+ from dataclasses import field
19
+ from typing import Dict
20
+ from typing import Generic
21
+ from typing import List
22
+ from typing import Optional
23
+ from typing import TypeVar
24
+ from typing import Union
25
+
26
+ Symbol = TypeVar("Symbol")
27
+
28
+ # SymbolTable is copied from
29
+ # https://github.com/k2-fsa/k2/blob/master/k2/python/k2/symbol_table.py
30
+
31
+ """
32
+ SymbolTable: map symbol to id
33
+ """
34
+
35
+
36
+ @dataclass(repr=False)
37
+ class SymbolTable(Generic[Symbol]):
38
+ """SymbolTable that maps symbol IDs, found on the FSA arcs to
39
+ actual objects. These objects can be arbitrary Python objects
40
+ that can serve as keys in a dictionary (i.e. they need to be
41
+ hashable and immutable).
42
+
43
+ The SymbolTable can only be read to/written from disk if the
44
+ symbols are strings.
45
+ """
46
+
47
+ _id2sym: Dict[int, Symbol] = field(default_factory=dict)
48
+ """Map an integer to a symbol.
49
+ """
50
+
51
+ _sym2id: Dict[Symbol, int] = field(default_factory=dict)
52
+ """Map a symbol to an integer.
53
+ """
54
+
55
+ _next_available_id: int = 1
56
+ """A helper internal field that helps adding new symbols
57
+ to the table efficiently.
58
+ """
59
+
60
+ eps: Symbol = "<eps>"
61
+ """Null symbol, always mapped to index 0.
62
+ """
63
+
64
+ def __post_init__(self):
65
+ assert all(self._sym2id[sym] == idx for idx, sym in self._id2sym.items())
66
+ assert all(self._id2sym[idx] == sym for sym, idx in self._sym2id.items())
67
+ assert 0 not in self._id2sym or self._id2sym[0] == self.eps
68
+
69
+ self._next_available_id = max(self._id2sym, default=0) + 1
70
+ self._id2sym.setdefault(0, self.eps)
71
+ self._sym2id.setdefault(self.eps, 0)
72
+
73
+ @staticmethod
74
+ def from_str(s: str) -> "SymbolTable":
75
+ """Build a symbol table from a string.
76
+
77
+ The string consists of lines. Every line has two fields separated
78
+ by space(s), tab(s) or both. The first field is the symbol and the
79
+ second the integer id of the symbol.
80
+
81
+ Args:
82
+ s:
83
+ The input string with the format described above.
84
+ Returns:
85
+ An instance of :class:`SymbolTable`.
86
+ """
87
+ id2sym: Dict[int, str] = dict()
88
+ sym2id: Dict[str, int] = dict()
89
+
90
+ for line in s.split("\n"):
91
+ fields = line.split()
92
+ if len(fields) == 0:
93
+ continue # skip empty lines
94
+ assert (
95
+ len(fields) == 2
96
+ ), f"Expect a line with 2 fields. Given: {len(fields)}"
97
+ sym, idx = fields[0], int(fields[1])
98
+ assert sym not in sym2id, f"Duplicated symbol {sym}"
99
+ assert idx not in id2sym, f"Duplicated id {idx}"
100
+ id2sym[idx] = sym
101
+ sym2id[sym] = idx
102
+
103
+ eps = id2sym.get(0, "<eps>")
104
+
105
+ return SymbolTable(_id2sym=id2sym, _sym2id=sym2id, eps=eps)
106
+
107
+ @staticmethod
108
+ def from_file(filename: str) -> "SymbolTable":
109
+ """Build a symbol table from file.
110
+
111
+ Every line in the symbol table file has two fields separated by
112
+ space(s), tab(s) or both. The following is an example file:
113
+
114
+ .. code-block::
115
+
116
+ <eps> 0
117
+ a 1
118
+ b 2
119
+ c 3
120
+
121
+ Args:
122
+ filename:
123
+ Name of the symbol table file. Its format is documented above.
124
+
125
+ Returns:
126
+ An instance of :class:`SymbolTable`.
127
+
128
+ """
129
+ with open(filename, "r", encoding="utf-8") as f:
130
+ return SymbolTable.from_str(f.read().strip())
131
+
132
+ def to_str(self) -> str:
133
+ """
134
+ Returns:
135
+ Return a string representation of this object. You can pass
136
+ it to the method ``from_str`` to recreate an identical object.
137
+ """
138
+ s = ""
139
+ for idx, symbol in sorted(self._id2sym.items()):
140
+ s += f"{symbol} {idx}\n"
141
+ return s
142
+
143
+ def to_file(self, filename: str):
144
+ """Serialize the SymbolTable to a file.
145
+
146
+ Every line in the symbol table file has two fields separated by
147
+ space(s), tab(s) or both. The following is an example file:
148
+
149
+ .. code-block::
150
+
151
+ <eps> 0
152
+ a 1
153
+ b 2
154
+ c 3
155
+
156
+ Args:
157
+ filename:
158
+ Name of the symbol table file. Its format is documented above.
159
+ """
160
+ with open(filename, "w") as f:
161
+ for idx, symbol in sorted(self._id2sym.items()):
162
+ print(symbol, idx, file=f)
163
+
164
+ def add(self, symbol: Symbol, index: Optional[int] = None) -> int:
165
+ """Add a new symbol to the SymbolTable.
166
+
167
+ Args:
168
+ symbol:
169
+ The symbol to be added.
170
+ index:
171
+ Optional int id to which the symbol should be assigned.
172
+ If it is not available, a ValueError will be raised.
173
+
174
+ Returns:
175
+ The int id to which the symbol has been assigned.
176
+ """
177
+ # Already in the table? Return its ID.
178
+ if symbol in self._sym2id:
179
+ return self._sym2id[symbol]
180
+ # Specific ID not provided - use next available.
181
+ if index is None:
182
+ index = self._next_available_id
183
+ # Specific ID provided but not available.
184
+ if index in self._id2sym:
185
+ raise ValueError(
186
+ f"Cannot assign id '{index}' to '{symbol}' - "
187
+ f"already occupied by {self._id2sym[index]}"
188
+ )
189
+ self._sym2id[symbol] = index
190
+ self._id2sym[index] = symbol
191
+
192
+ # Update next available ID if needed
193
+ if self._next_available_id <= index:
194
+ self._next_available_id = index + 1
195
+
196
+ return index
197
+
198
+ def get(self, k: Union[int, Symbol]) -> Union[Symbol, int]:
199
+ """Get a symbol for an id or get an id for a symbol
200
+
201
+ Args:
202
+ k:
203
+ If it is an id, it tries to find the symbol corresponding
204
+ to the id; if it is a symbol, it tries to find the id
205
+ corresponding to the symbol.
206
+
207
+ Returns:
208
+ An id or a symbol depending on the given `k`.
209
+ """
210
+ if isinstance(k, int):
211
+ return self._id2sym[k]
212
+ else:
213
+ return self._sym2id[k]
214
+
215
+ def merge(self, other: "SymbolTable") -> "SymbolTable":
216
+ """Create a union of two SymbolTables.
217
+ Raises an AssertionError if the same IDs are occupied by
218
+ different symbols.
219
+
220
+ Args:
221
+ other:
222
+ A symbol table to merge with ``self``.
223
+
224
+ Returns:
225
+ A new symbol table.
226
+ """
227
+ self._check_compatible(other)
228
+ return SymbolTable(
229
+ _id2sym={**self._id2sym, **other._id2sym},
230
+ _sym2id={**self._sym2id, **other._sym2id},
231
+ eps=self.eps,
232
+ )
233
+
234
+ def _check_compatible(self, other: "SymbolTable") -> None:
235
+ # Epsilon compatibility
236
+ assert self.eps == other.eps, (
237
+ f"Mismatched epsilon symbol: " f"{self.eps} != {other.eps}"
238
+ )
239
+ # IDs compatibility
240
+ common_ids = set(self._id2sym).intersection(other._id2sym)
241
+ for idx in common_ids:
242
+ assert self[idx] == other[idx], (
243
+ f"ID conflict for id: {idx}, "
244
+ f'self[idx] = "{self[idx]}", '
245
+ f'other[idx] = "{other[idx]}"'
246
+ )
247
+ # Symbols compatibility
248
+ common_symbols = set(self._sym2id).intersection(other._sym2id)
249
+ for sym in common_symbols:
250
+ assert self[sym] == other[sym], (
251
+ f"ID conflict for id: {sym}, "
252
+ f'self[sym] = "{self[sym]}", '
253
+ f'other[sym] = "{other[sym]}"'
254
+ )
255
+
256
+ def __getitem__(self, item: Union[int, Symbol]) -> Union[Symbol, int]:
257
+ return self.get(item)
258
+
259
+ def __contains__(self, item: Union[int, Symbol]) -> bool:
260
+ if isinstance(item, int):
261
+ return item in self._id2sym
262
+ else:
263
+ return item in self._sym2id
264
+
265
+ def __len__(self) -> int:
266
+ return len(self._id2sym)
267
+
268
+ def __eq__(self, other: "SymbolTable") -> bool:
269
+ if len(self) != len(other):
270
+ return False
271
+
272
+ for s in self.symbols:
273
+ if self[s] != other[s]:
274
+ return False
275
+
276
+ return True
277
+
278
+ @property
279
+ def ids(self) -> List[int]:
280
+ """Returns a list of integer IDs corresponding to the symbols."""
281
+ ans = list(self._id2sym.keys())
282
+ ans.sort()
283
+ return ans
284
+
285
+ @property
286
+ def symbols(self) -> List[Symbol]:
287
+ """Returns a list of symbols (e.g., strings) corresponding to
288
+ the integer IDs.
289
+ """
290
+ ans = list(self._sym2id.keys())
291
+ ans.sort()
292
+ return ans
text/symbols.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ """ This code is modified from https://github.com/keithito/tacotron """
7
+
8
+ """
9
+ Defines the set of symbols used in text input to the model.
10
+
11
+ The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. """
12
+
13
+ from text import cmudict, pinyin
14
+
15
+ _pad = "_"
16
+ _punctuation = "!'(),.:;? "
17
+ _special = "-"
18
+ _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
19
+ _silences = ["@sp", "@spn", "@sil"]
20
+
21
+ # Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
22
+ _arpabet = ["@" + s for s in cmudict.valid_symbols]
23
+ _pinyin = ["@" + s for s in pinyin.valid_symbols]
24
+
25
+ # Export all symbols:
26
+ symbols = (
27
+ [_pad]
28
+ + list(_special)
29
+ + list(_punctuation)
30
+ + list(_letters)
31
+ + _arpabet
32
+ + _silences
33
+ # + _pinyin # for chinese
34
+ )
text/text_token_collation.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ from pathlib import Path
7
+ from typing import List, Tuple
8
+ import os
9
+ import numpy as np
10
+ import torch
11
+ from text.symbol_table import SymbolTable
12
+ from text import text_to_sequence
13
+
14
+
15
+ """
16
+ TextToken: map text to id
17
+ """
18
+
19
+
20
+ # TextTokenCollator is modified from
21
+ # https://github.com/lifeiteng/vall-e/blob/9c69096d603ce13174fb5cb025f185e2e9b36ac7/valle/data/collation.py
22
+ class TextTokenCollator:
23
+ def __init__(
24
+ self,
25
+ text_tokens: List[str],
26
+ add_eos: bool = True,
27
+ add_bos: bool = True,
28
+ pad_symbol: str = "<pad>",
29
+ bos_symbol: str = "<bos>",
30
+ eos_symbol: str = "<eos>",
31
+ ):
32
+ self.pad_symbol = pad_symbol
33
+ self.add_eos = add_eos
34
+ self.add_bos = add_bos
35
+ self.bos_symbol = bos_symbol
36
+ self.eos_symbol = eos_symbol
37
+
38
+ unique_tokens = [pad_symbol]
39
+ if add_bos:
40
+ unique_tokens.append(bos_symbol)
41
+ if add_eos:
42
+ unique_tokens.append(eos_symbol)
43
+ unique_tokens.extend(sorted(text_tokens))
44
+
45
+ self.token2idx = {token: idx for idx, token in enumerate(unique_tokens)}
46
+ self.idx2token = unique_tokens
47
+
48
+ def index(self, tokens_list: List[str]) -> Tuple[torch.Tensor, torch.Tensor]:
49
+ seqs, seq_lens = [], []
50
+ for tokens in tokens_list:
51
+ assert all([True if s in self.token2idx else False for s in tokens]) is True
52
+ seq = (
53
+ ([self.bos_symbol] if self.add_bos else [])
54
+ + list(tokens)
55
+ + ([self.eos_symbol] if self.add_eos else [])
56
+ )
57
+ seqs.append(seq)
58
+ seq_lens.append(len(seq))
59
+
60
+ max_len = max(seq_lens)
61
+ for k, (seq, seq_len) in enumerate(zip(seqs, seq_lens)):
62
+ seq.extend([self.pad_symbol] * (max_len - seq_len))
63
+
64
+ tokens = torch.from_numpy(
65
+ np.array(
66
+ [[self.token2idx[token] for token in seq] for seq in seqs],
67
+ dtype=np.int64,
68
+ )
69
+ )
70
+ tokens_lens = torch.IntTensor(seq_lens)
71
+
72
+ return tokens, tokens_lens
73
+
74
+ def __call__(self, text):
75
+ tokens_seq = [p for p in text]
76
+ seq = (
77
+ ([self.bos_symbol] if self.add_bos else [])
78
+ + tokens_seq
79
+ + ([self.eos_symbol] if self.add_eos else [])
80
+ )
81
+
82
+ token_ids = [self.token2idx[token] for token in seq]
83
+ token_lens = len(tokens_seq) + self.add_eos + self.add_bos
84
+
85
+ return token_ids, token_lens
86
+
87
+
88
+ def get_text_token_collater(text_tokens_file: str) -> TextTokenCollator:
89
+ text_tokens_path = Path(text_tokens_file)
90
+ unique_tokens = SymbolTable.from_file(text_tokens_path)
91
+ collater = TextTokenCollator(unique_tokens.symbols, add_bos=True, add_eos=True)
92
+ token2idx = collater.token2idx
93
+ return collater, token2idx
94
+
95
+
96
+ class phoneIDCollation:
97
+ def __init__(self, cfg, dataset=None, symbols_dict_file=None) -> None:
98
+ if cfg.preprocess.phone_extractor != "lexicon":
99
+ ### get text token collator
100
+ if symbols_dict_file is None:
101
+ assert dataset is not None
102
+ symbols_dict_file = os.path.join(
103
+ cfg.preprocess.processed_dir, dataset, cfg.preprocess.symbols_dict
104
+ )
105
+ self.text_token_colloator, token2idx = get_text_token_collater(
106
+ symbols_dict_file
107
+ )
108
+ # # unique_tokens = SymbolTable.from_file(symbols_dict_path)
109
+ # # text_tokenizer = TextToken(unique_tokens.symbols, add_bos=True, add_eos=True)
110
+
111
+ # # update phone symbols dict file with pad_symbol or optional tokens (add_bos and add_eos) in TextTokenCollator
112
+ # phone_symbol_dict = SymbolTable()
113
+ # for s in sorted(list(set(token2idx.keys()))):
114
+ # phone_symbol_dict.add(s)
115
+ # phone_symbol_dict.to_file(symbols_dict_file)
116
+
117
+ def get_phone_id_sequence(self, cfg, phones_seq):
118
+ if cfg.preprocess.phone_extractor == "lexicon":
119
+ phones_seq = " ".join(phones_seq)
120
+ sequence = text_to_sequence(phones_seq, cfg.preprocess.text_cleaners)
121
+ else:
122
+ sequence, seq_len = self.text_token_colloator(phones_seq)
123
+ return sequence
utils/HyperParams/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ from .hps import HyperParams
utils/HyperParams/hps.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+
7
+ class HyperParams:
8
+ """The class to store hyperparameters. The key is case-insensitive.
9
+
10
+ Args:
11
+ *args: a list of dict or HyperParams.
12
+ **kwargs: a list of key-value pairs.
13
+ """
14
+
15
+ def __init__(self, **kwargs):
16
+ for k, v in kwargs.items():
17
+ if type(v) == dict:
18
+ v = HyperParams(**v)
19
+ self[k] = v
20
+
21
+ def keys(self):
22
+ return self.__dict__.keys()
23
+
24
+ def items(self):
25
+ return self.__dict__.items()
26
+
27
+ def values(self):
28
+ return self.__dict__.values()
29
+
30
+ def __len__(self):
31
+ return len(self.__dict__)
32
+
33
+ def __getitem__(self, key):
34
+ return getattr(self, key)
35
+
36
+ def __setitem__(self, key, value):
37
+ return setattr(self, key, value)
38
+
39
+ def __contains__(self, key):
40
+ return key in self.__dict__
41
+
42
+ def __repr__(self):
43
+ return self.__dict__.__repr__()
utils/__init__.py ADDED
File without changes
utils/audio.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import torch
7
+ import numpy as np
8
+ from numpy import linalg as LA
9
+ import librosa
10
+ import soundfile as sf
11
+ import librosa.filters
12
+
13
+
14
+ def load_audio_torch(wave_file, fs):
15
+ """Load audio data into torch tensor
16
+
17
+ Args:
18
+ wave_file (str): path to wave file
19
+ fs (int): sample rate
20
+
21
+ Returns:
22
+ audio (tensor): audio data in tensor
23
+ fs (int): sample rate
24
+ """
25
+
26
+ audio, sample_rate = librosa.load(wave_file, sr=fs, mono=True)
27
+ # audio: (T,)
28
+ assert len(audio) > 2
29
+
30
+ # Check the audio type (for soundfile loading backbone) - float, 8bit or 16bit
31
+ if np.issubdtype(audio.dtype, np.integer):
32
+ max_mag = -np.iinfo(audio.dtype).min
33
+ else:
34
+ max_mag = max(np.amax(audio), -np.amin(audio))
35
+ max_mag = (
36
+ (2**31) + 1
37
+ if max_mag > (2**15)
38
+ else ((2**15) + 1 if max_mag > 1.01 else 1.0)
39
+ )
40
+
41
+ # Normalize the audio
42
+ audio = torch.FloatTensor(audio.astype(np.float32)) / max_mag
43
+
44
+ if (torch.isnan(audio) | torch.isinf(audio)).any():
45
+ return [], sample_rate or fs or 48000
46
+
47
+ # Resample the audio to our target samplerate
48
+ if fs is not None and fs != sample_rate:
49
+ audio = torch.from_numpy(
50
+ librosa.core.resample(audio.numpy(), orig_sr=sample_rate, target_sr=fs)
51
+ )
52
+ sample_rate = fs
53
+
54
+ return audio, fs
55
+
56
+
57
+ def _stft(y, cfg):
58
+ return librosa.stft(
59
+ y=y, n_fft=cfg.n_fft, hop_length=cfg.hop_size, win_length=cfg.win_size
60
+ )
61
+
62
+
63
+ def energy(wav, cfg):
64
+ D = _stft(wav, cfg)
65
+ magnitudes = np.abs(D).T # [F, T]
66
+ return LA.norm(magnitudes, axis=1)
67
+
68
+
69
+ def get_energy_from_tacotron(audio, _stft):
70
+ audio = torch.clip(torch.FloatTensor(audio).unsqueeze(0), -1, 1)
71
+ audio = torch.autograd.Variable(audio, requires_grad=False)
72
+ mel, energy = _stft.mel_spectrogram(audio)
73
+ energy = torch.squeeze(energy, 0).numpy().astype(np.float32)
74
+ return mel, energy
utils/audio_slicer.py ADDED
@@ -0,0 +1,476 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import os
7
+ import json
8
+ import numpy as np
9
+ from tqdm import tqdm
10
+ import torch
11
+ import torchaudio
12
+
13
+ from utils.io import save_audio
14
+ from utils.audio import load_audio_torch
15
+
16
+
17
+ # This function is obtained from librosa.
18
+ def get_rms(
19
+ y,
20
+ *,
21
+ frame_length=2048,
22
+ hop_length=512,
23
+ pad_mode="constant",
24
+ ):
25
+ padding = (int(frame_length // 2), int(frame_length // 2))
26
+ y = np.pad(y, padding, mode=pad_mode)
27
+
28
+ axis = -1
29
+ # put our new within-frame axis at the end for now
30
+ out_strides = y.strides + tuple([y.strides[axis]])
31
+ # Reduce the shape on the framing axis
32
+ x_shape_trimmed = list(y.shape)
33
+ x_shape_trimmed[axis] -= frame_length - 1
34
+ out_shape = tuple(x_shape_trimmed) + tuple([frame_length])
35
+ xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides)
36
+ if axis < 0:
37
+ target_axis = axis - 1
38
+ else:
39
+ target_axis = axis + 1
40
+ xw = np.moveaxis(xw, -1, target_axis)
41
+ # Downsample along the target axis
42
+ slices = [slice(None)] * xw.ndim
43
+ slices[axis] = slice(0, None, hop_length)
44
+ x = xw[tuple(slices)]
45
+
46
+ # Calculate power
47
+ power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True)
48
+
49
+ return np.sqrt(power)
50
+
51
+
52
+ class Slicer:
53
+ """
54
+ Copy from: https://github.com/openvpi/audio-slicer/blob/main/slicer2.py
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ sr: int,
60
+ threshold: float = -40.0,
61
+ min_length: int = 5000,
62
+ min_interval: int = 300,
63
+ hop_size: int = 10,
64
+ max_sil_kept: int = 5000,
65
+ ):
66
+ if not min_length >= min_interval >= hop_size:
67
+ raise ValueError(
68
+ "The following condition must be satisfied: min_length >= min_interval >= hop_size"
69
+ )
70
+ if not max_sil_kept >= hop_size:
71
+ raise ValueError(
72
+ "The following condition must be satisfied: max_sil_kept >= hop_size"
73
+ )
74
+ min_interval = sr * min_interval / 1000
75
+ self.threshold = 10 ** (threshold / 20.0)
76
+ self.hop_size = round(sr * hop_size / 1000)
77
+ self.win_size = min(round(min_interval), 4 * self.hop_size)
78
+ self.min_length = round(sr * min_length / 1000 / self.hop_size)
79
+ self.min_interval = round(min_interval / self.hop_size)
80
+ self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size)
81
+
82
+ def _apply_slice(self, waveform, begin, end):
83
+ begin = begin * self.hop_size
84
+ if len(waveform.shape) > 1:
85
+ end = min(waveform.shape[1], end * self.hop_size)
86
+ return waveform[:, begin:end], begin, end
87
+ else:
88
+ end = min(waveform.shape[0], end * self.hop_size)
89
+ return waveform[begin:end], begin, end
90
+
91
+ # @timeit
92
+ def slice(self, waveform, return_chunks_positions=False):
93
+ if len(waveform.shape) > 1:
94
+ # (#channle, wave_len) -> (wave_len)
95
+ samples = waveform.mean(axis=0)
96
+ else:
97
+ samples = waveform
98
+ if samples.shape[0] <= self.min_length:
99
+ return [waveform]
100
+ rms_list = get_rms(
101
+ y=samples, frame_length=self.win_size, hop_length=self.hop_size
102
+ ).squeeze(0)
103
+ sil_tags = []
104
+ silence_start = None
105
+ clip_start = 0
106
+ for i, rms in enumerate(rms_list):
107
+ # Keep looping while frame is silent.
108
+ if rms < self.threshold:
109
+ # Record start of silent frames.
110
+ if silence_start is None:
111
+ silence_start = i
112
+ continue
113
+ # Keep looping while frame is not silent and silence start has not been recorded.
114
+ if silence_start is None:
115
+ continue
116
+ # Clear recorded silence start if interval is not enough or clip is too short
117
+ is_leading_silence = silence_start == 0 and i > self.max_sil_kept
118
+ need_slice_middle = (
119
+ i - silence_start >= self.min_interval
120
+ and i - clip_start >= self.min_length
121
+ )
122
+ if not is_leading_silence and not need_slice_middle:
123
+ silence_start = None
124
+ continue
125
+ # Need slicing. Record the range of silent frames to be removed.
126
+ if i - silence_start <= self.max_sil_kept:
127
+ pos = rms_list[silence_start : i + 1].argmin() + silence_start
128
+ if silence_start == 0:
129
+ sil_tags.append((0, pos))
130
+ else:
131
+ sil_tags.append((pos, pos))
132
+ clip_start = pos
133
+ elif i - silence_start <= self.max_sil_kept * 2:
134
+ pos = rms_list[
135
+ i - self.max_sil_kept : silence_start + self.max_sil_kept + 1
136
+ ].argmin()
137
+ pos += i - self.max_sil_kept
138
+ pos_l = (
139
+ rms_list[
140
+ silence_start : silence_start + self.max_sil_kept + 1
141
+ ].argmin()
142
+ + silence_start
143
+ )
144
+ pos_r = (
145
+ rms_list[i - self.max_sil_kept : i + 1].argmin()
146
+ + i
147
+ - self.max_sil_kept
148
+ )
149
+ if silence_start == 0:
150
+ sil_tags.append((0, pos_r))
151
+ clip_start = pos_r
152
+ else:
153
+ sil_tags.append((min(pos_l, pos), max(pos_r, pos)))
154
+ clip_start = max(pos_r, pos)
155
+ else:
156
+ pos_l = (
157
+ rms_list[
158
+ silence_start : silence_start + self.max_sil_kept + 1
159
+ ].argmin()
160
+ + silence_start
161
+ )
162
+ pos_r = (
163
+ rms_list[i - self.max_sil_kept : i + 1].argmin()
164
+ + i
165
+ - self.max_sil_kept
166
+ )
167
+ if silence_start == 0:
168
+ sil_tags.append((0, pos_r))
169
+ else:
170
+ sil_tags.append((pos_l, pos_r))
171
+ clip_start = pos_r
172
+ silence_start = None
173
+ # Deal with trailing silence.
174
+ total_frames = rms_list.shape[0]
175
+ if (
176
+ silence_start is not None
177
+ and total_frames - silence_start >= self.min_interval
178
+ ):
179
+ silence_end = min(total_frames, silence_start + self.max_sil_kept)
180
+ pos = rms_list[silence_start : silence_end + 1].argmin() + silence_start
181
+ sil_tags.append((pos, total_frames + 1))
182
+ # Apply and return slices.
183
+ if len(sil_tags) == 0:
184
+ return [waveform]
185
+ else:
186
+ chunks = []
187
+ chunks_pos_of_waveform = []
188
+
189
+ if sil_tags[0][0] > 0:
190
+ chunk, begin, end = self._apply_slice(waveform, 0, sil_tags[0][0])
191
+ chunks.append(chunk)
192
+ chunks_pos_of_waveform.append((begin, end))
193
+
194
+ for i in range(len(sil_tags) - 1):
195
+ chunk, begin, end = self._apply_slice(
196
+ waveform, sil_tags[i][1], sil_tags[i + 1][0]
197
+ )
198
+ chunks.append(chunk)
199
+ chunks_pos_of_waveform.append((begin, end))
200
+
201
+ if sil_tags[-1][1] < total_frames:
202
+ chunk, begin, end = self._apply_slice(
203
+ waveform, sil_tags[-1][1], total_frames
204
+ )
205
+ chunks.append(chunk)
206
+ chunks_pos_of_waveform.append((begin, end))
207
+
208
+ return (
209
+ chunks
210
+ if not return_chunks_positions
211
+ else (
212
+ chunks,
213
+ chunks_pos_of_waveform,
214
+ )
215
+ )
216
+
217
+
218
+ def split_utterances_from_audio(
219
+ wav_file,
220
+ output_dir,
221
+ max_duration_of_utterance=10.0,
222
+ min_interval=300,
223
+ db_threshold=-40,
224
+ ):
225
+ """
226
+ Split a long audio into utterances accoring to the silence (VAD).
227
+
228
+ max_duration_of_utterance (second):
229
+ The maximum duration of every utterance (seconds)
230
+ min_interval (millisecond):
231
+ The smaller min_interval is, the more sliced audio clips this script is likely to generate.
232
+ """
233
+ print("File:", wav_file.split("/")[-1])
234
+ waveform, fs = torchaudio.load(wav_file)
235
+
236
+ slicer = Slicer(sr=fs, min_interval=min_interval, threshold=db_threshold)
237
+ chunks, positions = slicer.slice(waveform, return_chunks_positions=True)
238
+
239
+ durations = [(end - begin) / fs for begin, end in positions]
240
+ print(
241
+ "Slicer's min silence part is {}ms, min and max duration of sliced utterances is {}s and {}s".format(
242
+ min_interval, min(durations), max(durations)
243
+ )
244
+ )
245
+
246
+ res_chunks, res_positions = [], []
247
+ for i, chunk in enumerate(chunks):
248
+ if len(chunk.shape) == 1:
249
+ chunk = chunk[None, :]
250
+
251
+ begin, end = positions[i]
252
+ assert end - begin == chunk.shape[-1]
253
+
254
+ max_wav_len = max_duration_of_utterance * fs
255
+ if chunk.shape[-1] <= max_wav_len:
256
+ res_chunks.append(chunk)
257
+ res_positions.append(positions[i])
258
+ else:
259
+ # TODO: to reserve overlapping and conduct fade-in, fade-out
260
+
261
+ # Get segments number
262
+ number = 2
263
+ while chunk.shape[-1] // number >= max_wav_len:
264
+ number += 1
265
+ seg_len = chunk.shape[-1] // number
266
+
267
+ # Split
268
+ for num in range(number):
269
+ s = seg_len * num
270
+ t = min(s + seg_len, chunk.shape[-1])
271
+
272
+ seg_begin = begin + s
273
+ seg_end = begin + t
274
+
275
+ res_chunks.append(chunk[:, s:t])
276
+ res_positions.append((seg_begin, seg_end))
277
+
278
+ # Save utterances
279
+ os.makedirs(output_dir, exist_ok=True)
280
+ res = {"fs": int(fs)}
281
+ for i, chunk in enumerate(res_chunks):
282
+ filename = "{:04d}.wav".format(i)
283
+ res[filename] = [int(p) for p in res_positions[i]]
284
+ save_audio(os.path.join(output_dir, filename), chunk, fs)
285
+
286
+ # Save positions
287
+ with open(os.path.join(output_dir, "positions.json"), "w") as f:
288
+ json.dump(res, f, indent=4, ensure_ascii=False)
289
+ return res
290
+
291
+
292
+ def is_silence(
293
+ wavform,
294
+ fs,
295
+ threshold=-40.0,
296
+ min_interval=300,
297
+ hop_size=10,
298
+ min_length=5000,
299
+ ):
300
+ """
301
+ Detect whether the given wavform is a silence
302
+
303
+ wavform: (T, )
304
+ """
305
+ threshold = 10 ** (threshold / 20.0)
306
+
307
+ hop_size = round(fs * hop_size / 1000)
308
+ win_size = min(round(min_interval), 4 * hop_size)
309
+ min_length = round(fs * min_length / 1000 / hop_size)
310
+
311
+ if wavform.shape[0] <= min_length:
312
+ return True
313
+
314
+ # (#Frame,)
315
+ rms_array = get_rms(y=wavform, frame_length=win_size, hop_length=hop_size).squeeze(
316
+ 0
317
+ )
318
+ return (rms_array < threshold).all()
319
+
320
+
321
+ def split_audio(
322
+ wav_file, target_sr, output_dir, max_duration_of_segment=10.0, overlap_duration=1.0
323
+ ):
324
+ """
325
+ Split a long audio into segments.
326
+
327
+ target_sr:
328
+ The target sampling rate to save the segments.
329
+ max_duration_of_utterance (second):
330
+ The maximum duration of every utterance (second)
331
+ overlap_duraion:
332
+ Each segment has "overlap duration" (second) overlap with its previous and next segment
333
+ """
334
+ # (#channel, T) -> (T,)
335
+ waveform, fs = torchaudio.load(wav_file)
336
+ waveform = torchaudio.functional.resample(
337
+ waveform, orig_freq=fs, new_freq=target_sr
338
+ )
339
+ waveform = torch.mean(waveform, dim=0)
340
+
341
+ # waveform, _ = load_audio_torch(wav_file, target_sr)
342
+ assert len(waveform.shape) == 1
343
+
344
+ assert overlap_duration < max_duration_of_segment
345
+ length = int(max_duration_of_segment * target_sr)
346
+ stride = int((max_duration_of_segment - overlap_duration) * target_sr)
347
+ chunks = []
348
+ for i in range(0, len(waveform), stride):
349
+ # (length,)
350
+ chunks.append(waveform[i : i + length])
351
+ if i + length >= len(waveform):
352
+ break
353
+
354
+ # Save segments
355
+ os.makedirs(output_dir, exist_ok=True)
356
+ results = []
357
+ for i, chunk in enumerate(chunks):
358
+ uid = "{:04d}".format(i)
359
+ filename = os.path.join(output_dir, "{}.wav".format(uid))
360
+ results.append(
361
+ {"Uid": uid, "Path": filename, "Duration": len(chunk) / target_sr}
362
+ )
363
+ save_audio(
364
+ filename,
365
+ chunk,
366
+ target_sr,
367
+ turn_up=not is_silence(chunk, target_sr),
368
+ add_silence=False,
369
+ )
370
+
371
+ return results
372
+
373
+
374
+ def merge_segments_torchaudio(wav_files, fs, output_path, overlap_duration=1.0):
375
+ """Merge the given wav_files (may have overlaps) into a long audio
376
+
377
+ fs:
378
+ The sampling rate of the wav files.
379
+ output_path:
380
+ The output path to save the merged audio.
381
+ overlap_duration (float, optional):
382
+ Each segment has "overlap duration" (second) overlap with its previous and next segment. Defaults to 1.0.
383
+ """
384
+
385
+ waveforms = []
386
+ for file in wav_files:
387
+ # (T,)
388
+ waveform, _ = load_audio_torch(file, fs)
389
+ waveforms.append(waveform)
390
+
391
+ if len(waveforms) == 1:
392
+ save_audio(output_path, waveforms[0], fs, add_silence=False, turn_up=False)
393
+ return
394
+
395
+ overlap_len = int(overlap_duration * fs)
396
+ fade_out = torchaudio.transforms.Fade(fade_out_len=overlap_len)
397
+ fade_in = torchaudio.transforms.Fade(fade_in_len=overlap_len)
398
+ fade_in_and_out = torchaudio.transforms.Fade(fade_out_len=overlap_len)
399
+
400
+ segments_lens = [len(wav) for wav in waveforms]
401
+ merged_waveform_len = sum(segments_lens) - overlap_len * (len(waveforms) - 1)
402
+ merged_waveform = torch.zeros(merged_waveform_len)
403
+
404
+ start = 0
405
+ for index, wav in enumerate(
406
+ tqdm(waveforms, desc="Merge for {}".format(output_path))
407
+ ):
408
+ wav_len = len(wav)
409
+
410
+ if index == 0:
411
+ wav = fade_out(wav)
412
+ elif index == len(waveforms) - 1:
413
+ wav = fade_in(wav)
414
+ else:
415
+ wav = fade_in_and_out(wav)
416
+
417
+ merged_waveform[start : start + wav_len] = wav
418
+ start += wav_len - overlap_len
419
+
420
+ save_audio(output_path, merged_waveform, fs, add_silence=False, turn_up=True)
421
+
422
+
423
+ def merge_segments_encodec(wav_files, fs, output_path, overlap_duration=1.0):
424
+ """Merge the given wav_files (may have overlaps) into a long audio
425
+
426
+ fs:
427
+ The sampling rate of the wav files.
428
+ output_path:
429
+ The output path to save the merged audio.
430
+ overlap_duration (float, optional):
431
+ Each segment has "overlap duration" (second) overlap with its previous and next segment. Defaults to 1.0.
432
+ """
433
+
434
+ waveforms = []
435
+ for file in wav_files:
436
+ # (T,)
437
+ waveform, _ = load_audio_torch(file, fs)
438
+ waveforms.append(waveform)
439
+
440
+ if len(waveforms) == 1:
441
+ save_audio(output_path, waveforms[0], fs, add_silence=False, turn_up=False)
442
+ return
443
+
444
+ device = waveforms[0].device
445
+ dtype = waveforms[0].dtype
446
+ shape = waveforms[0].shape[:-1]
447
+
448
+ overlap_len = int(overlap_duration * fs)
449
+ segments_lens = [len(wav) for wav in waveforms]
450
+ merged_waveform_len = sum(segments_lens) - overlap_len * (len(waveforms) - 1)
451
+
452
+ sum_weight = torch.zeros(merged_waveform_len, device=device, dtype=dtype)
453
+ out = torch.zeros(*shape, merged_waveform_len, device=device, dtype=dtype)
454
+ offset = 0
455
+
456
+ for frame in waveforms:
457
+ frame_length = frame.size(-1)
458
+ t = torch.linspace(0, 1, frame_length + 2, device=device, dtype=torch.float32)[
459
+ 1:-1
460
+ ]
461
+ weight = 0.5 - (t - 0.5).abs()
462
+ weighted_frame = frame * weight
463
+
464
+ cur = out[..., offset : offset + frame_length]
465
+ cur += weighted_frame[..., : cur.size(-1)]
466
+ out[..., offset : offset + frame_length] = cur
467
+
468
+ cur = sum_weight[offset : offset + frame_length]
469
+ cur += weight[..., : cur.size(-1)]
470
+ sum_weight[offset : offset + frame_length] = cur
471
+
472
+ offset += frame_length - overlap_len
473
+
474
+ assert sum_weight.min() > 0
475
+ merged_waveform = out / sum_weight
476
+ save_audio(output_path, merged_waveform, fs, add_silence=False, turn_up=True)
utils/cut_by_vad.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ """ This code is modified from https://github.com/facebookresearch/libri-light/blob/main/data_preparation/cut_by_vad.py"""
7
+ import pathlib
8
+ import soundfile as sf
9
+ import numpy as np
10
+ import json
11
+ import multiprocessing
12
+ import tqdm
13
+
14
+
15
+ def save(seq, fname, index, extension):
16
+ """save audio sequences to file"""
17
+ output = np.hstack(seq)
18
+ file_name = fname.parent / (fname.stem + f"_{index:04}{extension}")
19
+ fname.parent.mkdir(exist_ok=True, parents=True)
20
+ sf.write(file_name, output, samplerate=16000)
21
+
22
+
23
+ def cut_sequence(path, vad, path_out, target_len_sec, out_extension):
24
+ """cut audio sequences based on VAD"""
25
+ data, samplerate = sf.read(path)
26
+
27
+ assert len(data.shape) == 1
28
+ assert samplerate == 16000
29
+
30
+ to_stitch = []
31
+ length_accumulated = 0.0
32
+
33
+ i = 0
34
+ # Iterate over VAD segments
35
+ for start, end in vad:
36
+ start_index = int(start * samplerate)
37
+ end_index = int(end * samplerate)
38
+ slice = data[start_index:end_index]
39
+
40
+ # Save slices that exceed the target length or if there's already accumulated audio
41
+ if (
42
+ length_accumulated + (end - start) > target_len_sec
43
+ and length_accumulated > 0
44
+ ):
45
+ save(to_stitch, path_out, i, out_extension)
46
+ to_stitch = []
47
+ i += 1
48
+ length_accumulated = 0
49
+
50
+ # Add the current slice to the list to be stitched
51
+ to_stitch.append(slice)
52
+ length_accumulated += end - start
53
+
54
+ # Save any remaining slices
55
+ if to_stitch:
56
+ save(to_stitch, path_out, i, out_extension)
57
+
58
+
59
+ def cut_book(task):
60
+ """process each book in the dataset"""
61
+ path_book, root_out, target_len_sec, extension = task
62
+
63
+ speaker = pathlib.Path(path_book.parent.name)
64
+
65
+ for i, meta_file_path in enumerate(path_book.glob("*.json")):
66
+ with open(meta_file_path, "r") as f:
67
+ meta = json.loads(f.read())
68
+ book_id = meta["book_meta"]["id"]
69
+ vad = meta["voice_activity"]
70
+
71
+ sound_file = meta_file_path.parent / (meta_file_path.stem + ".flac")
72
+
73
+ path_out = root_out / speaker / book_id / (meta_file_path.stem)
74
+ cut_sequence(sound_file, vad, path_out, target_len_sec, extension)
75
+
76
+
77
+ def cut_segments(
78
+ input_dir, output_dir, target_len_sec=30, n_process=32, out_extension=".wav"
79
+ ):
80
+ """Main function to cut segments from audio files"""
81
+
82
+ pathlib.Path(output_dir).mkdir(exist_ok=True, parents=True)
83
+ list_dir = pathlib.Path(input_dir).glob("*/*")
84
+ list_dir = [x for x in list_dir if x.is_dir()]
85
+
86
+ print(f"{len(list_dir)} directories detected")
87
+ print(f"Launching {n_process} processes")
88
+
89
+ # Create tasks for multiprocessing
90
+ tasks = [
91
+ (path_book, output_dir, target_len_sec, out_extension) for path_book in list_dir
92
+ ]
93
+
94
+ # Process tasks in parallel using multiprocessing
95
+ with multiprocessing.Pool(processes=n_process) as pool:
96
+ for _ in tqdm.tqdm(pool.imap_unordered(cut_book, tasks), total=len(tasks)):
97
+ pass
98
+
99
+
100
+ if __name__ == "__main__":
101
+ input_dir = "/path/to/input_dir"
102
+ output_dir = "/path/to/output_dir"
103
+ target_len_sec = 10
104
+ n_process = 16
105
+ cut_segments(input_dir, output_dir, target_len_sec, n_process)
utils/data_utils.py ADDED
@@ -0,0 +1,588 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import json
7
+ import os
8
+
9
+ import numpy as np
10
+ from scipy.interpolate import interp1d
11
+ from tqdm import tqdm
12
+ from sklearn.preprocessing import StandardScaler
13
+
14
+
15
+ def intersperse(lst, item):
16
+ """
17
+ Insert an item in between any two consecutive elements of the given list, including beginning and end of list
18
+
19
+ Example:
20
+ >>> intersperse(0, [1, 74, 5, 31])
21
+ [0, 1, 0, 74, 0, 5, 0, 31, 0]
22
+ """
23
+ result = [item] * (len(lst) * 2 + 1)
24
+ result[1::2] = lst
25
+ return result
26
+
27
+
28
+ def load_content_feature_path(meta_data, processed_dir, feat_dir):
29
+ utt2feat_path = {}
30
+ for utt_info in meta_data:
31
+ utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
32
+ feat_path = os.path.join(
33
+ processed_dir, utt_info["Dataset"], feat_dir, f'{utt_info["Uid"]}.npy'
34
+ )
35
+ utt2feat_path[utt] = feat_path
36
+
37
+ return utt2feat_path
38
+
39
+
40
+ def load_source_content_feature_path(meta_data, feat_dir):
41
+ utt2feat_path = {}
42
+ for utt in meta_data:
43
+ feat_path = os.path.join(feat_dir, f"{utt}.npy")
44
+ utt2feat_path[utt] = feat_path
45
+
46
+ return utt2feat_path
47
+
48
+
49
+ def get_spk_map(spk2id_path, utt2spk_path):
50
+ utt2spk = {}
51
+ with open(spk2id_path, "r") as spk2id_file:
52
+ spk2id = json.load(spk2id_file)
53
+ with open(utt2spk_path, encoding="utf-8") as f:
54
+ for line in f.readlines():
55
+ utt, spk = line.strip().split("\t")
56
+ utt2spk[utt] = spk
57
+ return spk2id, utt2spk
58
+
59
+
60
+ def get_target_f0_median(f0_dir):
61
+ total_f0 = []
62
+ for utt in os.listdir(f0_dir):
63
+ if not utt.endswith(".npy"):
64
+ continue
65
+ f0_feat_path = os.path.join(f0_dir, utt)
66
+ f0 = np.load(f0_feat_path)
67
+ total_f0 += f0.tolist()
68
+
69
+ total_f0 = np.array(total_f0)
70
+ voiced_position = np.where(total_f0 != 0)
71
+ return np.median(total_f0[voiced_position])
72
+
73
+
74
+ def get_conversion_f0_factor(source_f0, target_median, source_median=None):
75
+ """Align the median between source f0 and target f0
76
+
77
+ Note: Here we use multiplication, whose factor is target_median/source_median
78
+
79
+ Reference: Frequency and pitch interval
80
+ http://blog.ccyg.studio/article/be12c2ee-d47c-4098-9782-ca76da3035e4/
81
+ """
82
+ if source_median is None:
83
+ voiced_position = np.where(source_f0 != 0)
84
+ source_median = np.median(source_f0[voiced_position])
85
+ factor = target_median / source_median
86
+ return source_median, factor
87
+
88
+
89
+ def transpose_key(frame_pitch, trans_key):
90
+ # Transpose by user's argument
91
+ print("Transpose key = {} ...\n".format(trans_key))
92
+
93
+ transed_pitch = frame_pitch * 2 ** (trans_key / 12)
94
+ return transed_pitch
95
+
96
+
97
+ def pitch_shift_to_target(frame_pitch, target_pitch_median, source_pitch_median=None):
98
+ # Loading F0 Base (median) and shift
99
+ source_pitch_median, factor = get_conversion_f0_factor(
100
+ frame_pitch, target_pitch_median, source_pitch_median
101
+ )
102
+ print(
103
+ "Auto transposing: source f0 median = {:.1f}, target f0 median = {:.1f}, factor = {:.2f}".format(
104
+ source_pitch_median, target_pitch_median, factor
105
+ )
106
+ )
107
+ transed_pitch = frame_pitch * factor
108
+ return transed_pitch
109
+
110
+
111
+ def load_frame_pitch(
112
+ meta_data,
113
+ processed_dir,
114
+ pitch_dir,
115
+ use_log_scale=False,
116
+ return_norm=False,
117
+ interoperate=False,
118
+ utt2spk=None,
119
+ ):
120
+ utt2pitch = {}
121
+ utt2uv = {}
122
+ if utt2spk is None:
123
+ pitch_scaler = StandardScaler()
124
+ for utt_info in meta_data:
125
+ utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
126
+ pitch_path = os.path.join(
127
+ processed_dir, utt_info["Dataset"], pitch_dir, f'{utt_info["Uid"]}.npy'
128
+ )
129
+ pitch = np.load(pitch_path)
130
+ assert len(pitch) > 0
131
+ uv = pitch != 0
132
+ utt2uv[utt] = uv
133
+ if use_log_scale:
134
+ nonzero_idxes = np.where(pitch != 0)[0]
135
+ pitch[nonzero_idxes] = np.log(pitch[nonzero_idxes])
136
+ utt2pitch[utt] = pitch
137
+ pitch_scaler.partial_fit(pitch.reshape(-1, 1))
138
+
139
+ mean, std = pitch_scaler.mean_[0], pitch_scaler.scale_[0]
140
+ if return_norm:
141
+ for utt_info in meta_data:
142
+ utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
143
+ pitch = utt2pitch[utt]
144
+ normalized_pitch = (pitch - mean) / std
145
+ utt2pitch[utt] = normalized_pitch
146
+ pitch_statistic = {"mean": mean, "std": std}
147
+ else:
148
+ spk2utt = {}
149
+ pitch_statistic = []
150
+ for utt_info in meta_data:
151
+ utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
152
+ if not utt2spk[utt] in spk2utt:
153
+ spk2utt[utt2spk[utt]] = []
154
+ spk2utt[utt2spk[utt]].append(utt)
155
+
156
+ for spk in spk2utt:
157
+ pitch_scaler = StandardScaler()
158
+ for utt in spk2utt[spk]:
159
+ dataset = utt.split("_")[0]
160
+ uid = "_".join(utt.split("_")[1:])
161
+ pitch_path = os.path.join(
162
+ processed_dir, dataset, pitch_dir, f"{uid}.npy"
163
+ )
164
+ pitch = np.load(pitch_path)
165
+ assert len(pitch) > 0
166
+ uv = pitch != 0
167
+ utt2uv[utt] = uv
168
+ if use_log_scale:
169
+ nonzero_idxes = np.where(pitch != 0)[0]
170
+ pitch[nonzero_idxes] = np.log(pitch[nonzero_idxes])
171
+ utt2pitch[utt] = pitch
172
+ pitch_scaler.partial_fit(pitch.reshape(-1, 1))
173
+
174
+ mean, std = pitch_scaler.mean_[0], pitch_scaler.scale_[0]
175
+ if return_norm:
176
+ for utt in spk2utt[spk]:
177
+ pitch = utt2pitch[utt]
178
+ normalized_pitch = (pitch - mean) / std
179
+ utt2pitch[utt] = normalized_pitch
180
+ pitch_statistic.append({"spk": spk, "mean": mean, "std": std})
181
+
182
+ return utt2pitch, utt2uv, pitch_statistic
183
+
184
+
185
+ # discard
186
+ def load_phone_pitch(
187
+ meta_data,
188
+ processed_dir,
189
+ pitch_dir,
190
+ utt2dur,
191
+ use_log_scale=False,
192
+ return_norm=False,
193
+ interoperate=True,
194
+ utt2spk=None,
195
+ ):
196
+ print("Load Phone Pitch")
197
+ utt2pitch = {}
198
+ utt2uv = {}
199
+ if utt2spk is None:
200
+ pitch_scaler = StandardScaler()
201
+ for utt_info in tqdm(meta_data):
202
+ utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
203
+ pitch_path = os.path.join(
204
+ processed_dir, utt_info["Dataset"], pitch_dir, f'{utt_info["Uid"]}.npy'
205
+ )
206
+ frame_pitch = np.load(pitch_path)
207
+ assert len(frame_pitch) > 0
208
+ uv = frame_pitch != 0
209
+ utt2uv[utt] = uv
210
+ phone_pitch = phone_average_pitch(frame_pitch, utt2dur[utt], interoperate)
211
+ if use_log_scale:
212
+ nonzero_idxes = np.where(phone_pitch != 0)[0]
213
+ phone_pitch[nonzero_idxes] = np.log(phone_pitch[nonzero_idxes])
214
+ utt2pitch[utt] = phone_pitch
215
+ pitch_scaler.partial_fit(remove_outlier(phone_pitch).reshape(-1, 1))
216
+
217
+ mean, std = pitch_scaler.mean_[0], pitch_scaler.scale_[0]
218
+ max_value = np.finfo(np.float64).min
219
+ min_value = np.finfo(np.float64).max
220
+ if return_norm:
221
+ for utt_info in meta_data:
222
+ utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
223
+ pitch = utt2pitch[utt]
224
+ normalized_pitch = (pitch - mean) / std
225
+ max_value = max(max_value, max(normalized_pitch))
226
+ min_value = min(min_value, min(normalized_pitch))
227
+ utt2pitch[utt] = normalized_pitch
228
+ phone_normalized_pitch_path = os.path.join(
229
+ processed_dir,
230
+ utt_info["Dataset"],
231
+ "phone_level_" + pitch_dir,
232
+ f'{utt_info["Uid"]}.npy',
233
+ )
234
+ pitch_statistic = {
235
+ "mean": mean,
236
+ "std": std,
237
+ "min_value": min_value,
238
+ "max_value": max_value,
239
+ }
240
+ else:
241
+ spk2utt = {}
242
+ pitch_statistic = []
243
+ for utt_info in tqdm(meta_data):
244
+ utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
245
+ if not utt2spk[utt] in spk2utt:
246
+ spk2utt[utt2spk[utt]] = []
247
+ spk2utt[utt2spk[utt]].append(utt)
248
+
249
+ for spk in spk2utt:
250
+ pitch_scaler = StandardScaler()
251
+ for utt in spk2utt[spk]:
252
+ dataset = utt.split("_")[0]
253
+ uid = "_".join(utt.split("_")[1:])
254
+ pitch_path = os.path.join(
255
+ processed_dir, dataset, pitch_dir, f"{uid}.npy"
256
+ )
257
+ frame_pitch = np.load(pitch_path)
258
+ assert len(frame_pitch) > 0
259
+ uv = frame_pitch != 0
260
+ utt2uv[utt] = uv
261
+ phone_pitch = phone_average_pitch(
262
+ frame_pitch, utt2dur[utt], interoperate
263
+ )
264
+ if use_log_scale:
265
+ nonzero_idxes = np.where(phone_pitch != 0)[0]
266
+ phone_pitch[nonzero_idxes] = np.log(phone_pitch[nonzero_idxes])
267
+ utt2pitch[utt] = phone_pitch
268
+ pitch_scaler.partial_fit(remove_outlier(phone_pitch).reshape(-1, 1))
269
+
270
+ mean, std = pitch_scaler.mean_[0], pitch_scaler.scale_[0]
271
+ max_value = np.finfo(np.float64).min
272
+ min_value = np.finfo(np.float64).max
273
+
274
+ if return_norm:
275
+ for utt in spk2utt[spk]:
276
+ pitch = utt2pitch[utt]
277
+ normalized_pitch = (pitch - mean) / std
278
+ max_value = max(max_value, max(normalized_pitch))
279
+ min_value = min(min_value, min(normalized_pitch))
280
+ utt2pitch[utt] = normalized_pitch
281
+ pitch_statistic.append(
282
+ {
283
+ "spk": spk,
284
+ "mean": mean,
285
+ "std": std,
286
+ "min_value": min_value,
287
+ "max_value": max_value,
288
+ }
289
+ )
290
+
291
+ return utt2pitch, utt2uv, pitch_statistic
292
+
293
+
294
+ def phone_average_pitch(pitch, dur, interoperate=False):
295
+ pos = 0
296
+
297
+ if interoperate:
298
+ nonzero_ids = np.where(pitch != 0)[0]
299
+ interp_fn = interp1d(
300
+ nonzero_ids,
301
+ pitch[nonzero_ids],
302
+ fill_value=(pitch[nonzero_ids[0]], pitch[nonzero_ids[-1]]),
303
+ bounds_error=False,
304
+ )
305
+ pitch = interp_fn(np.arange(0, len(pitch)))
306
+ phone_pitch = np.zeros(len(dur))
307
+
308
+ for i, d in enumerate(dur):
309
+ d = int(d)
310
+ if d > 0 and pos < len(pitch):
311
+ phone_pitch[i] = np.mean(pitch[pos : pos + d])
312
+ else:
313
+ phone_pitch[i] = 0
314
+ pos += d
315
+ return phone_pitch
316
+
317
+
318
+ def load_energy(
319
+ meta_data,
320
+ processed_dir,
321
+ energy_dir,
322
+ use_log_scale=False,
323
+ return_norm=False,
324
+ utt2spk=None,
325
+ ):
326
+ utt2energy = {}
327
+ if utt2spk is None:
328
+ for utt_info in meta_data:
329
+ utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
330
+ energy_path = os.path.join(
331
+ processed_dir, utt_info["Dataset"], energy_dir, f'{utt_info["Uid"]}.npy'
332
+ )
333
+ if not os.path.exists(energy_path):
334
+ continue
335
+ energy = np.load(energy_path)
336
+ assert len(energy) > 0
337
+
338
+ if use_log_scale:
339
+ nonzero_idxes = np.where(energy != 0)[0]
340
+ energy[nonzero_idxes] = np.log(energy[nonzero_idxes])
341
+ utt2energy[utt] = energy
342
+
343
+ if return_norm:
344
+ with open(
345
+ os.path.join(
346
+ processed_dir, utt_info["Dataset"], energy_dir, "statistics.json"
347
+ )
348
+ ) as f:
349
+ stats = json.load(f)
350
+ mean, std = (
351
+ stats[utt_info["Dataset"] + "_" + utt_info["Singer"]][
352
+ "voiced_positions"
353
+ ]["mean"],
354
+ stats["LJSpeech_LJSpeech"]["voiced_positions"]["std"],
355
+ )
356
+ for utt in utt2energy.keys():
357
+ energy = utt2energy[utt]
358
+ normalized_energy = (energy - mean) / std
359
+ utt2energy[utt] = normalized_energy
360
+
361
+ energy_statistic = {"mean": mean, "std": std}
362
+ else:
363
+ spk2utt = {}
364
+ energy_statistic = []
365
+ for utt_info in meta_data:
366
+ utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
367
+ if not utt2spk[utt] in spk2utt:
368
+ spk2utt[utt2spk[utt]] = []
369
+ spk2utt[utt2spk[utt]].append(utt)
370
+
371
+ for spk in spk2utt:
372
+ energy_scaler = StandardScaler()
373
+ for utt in spk2utt[spk]:
374
+ dataset = utt.split("_")[0]
375
+ uid = "_".join(utt.split("_")[1:])
376
+ energy_path = os.path.join(
377
+ processed_dir, dataset, energy_dir, f"{uid}.npy"
378
+ )
379
+ if not os.path.exists(energy_path):
380
+ continue
381
+ frame_energy = np.load(energy_path)
382
+ assert len(frame_energy) > 0
383
+
384
+ if use_log_scale:
385
+ nonzero_idxes = np.where(frame_energy != 0)[0]
386
+ frame_energy[nonzero_idxes] = np.log(frame_energy[nonzero_idxes])
387
+ utt2energy[utt] = frame_energy
388
+ energy_scaler.partial_fit(frame_energy.reshape(-1, 1))
389
+
390
+ mean, std = energy_scaler.mean_[0], energy_scaler.scale_[0]
391
+ if return_norm:
392
+ for utt in spk2utt[spk]:
393
+ energy = utt2energy[utt]
394
+ normalized_energy = (energy - mean) / std
395
+ utt2energy[utt] = normalized_energy
396
+ energy_statistic.append({"spk": spk, "mean": mean, "std": std})
397
+
398
+ return utt2energy, energy_statistic
399
+
400
+
401
+ def load_frame_energy(
402
+ meta_data,
403
+ processed_dir,
404
+ energy_dir,
405
+ use_log_scale=False,
406
+ return_norm=False,
407
+ interoperate=False,
408
+ utt2spk=None,
409
+ ):
410
+ utt2energy = {}
411
+ if utt2spk is None:
412
+ energy_scaler = StandardScaler()
413
+ for utt_info in meta_data:
414
+ utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
415
+ energy_path = os.path.join(
416
+ processed_dir, utt_info["Dataset"], energy_dir, f'{utt_info["Uid"]}.npy'
417
+ )
418
+ frame_energy = np.load(energy_path)
419
+ assert len(frame_energy) > 0
420
+
421
+ if use_log_scale:
422
+ nonzero_idxes = np.where(frame_energy != 0)[0]
423
+ frame_energy[nonzero_idxes] = np.log(frame_energy[nonzero_idxes])
424
+ utt2energy[utt] = frame_energy
425
+ energy_scaler.partial_fit(frame_energy.reshape(-1, 1))
426
+
427
+ mean, std = energy_scaler.mean_[0], energy_scaler.scale_[0]
428
+ if return_norm:
429
+ for utt_info in meta_data:
430
+ utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
431
+ energy = utt2energy[utt]
432
+ normalized_energy = (energy - mean) / std
433
+ utt2energy[utt] = normalized_energy
434
+ energy_statistic = {"mean": mean, "std": std}
435
+
436
+ else:
437
+ spk2utt = {}
438
+ energy_statistic = []
439
+ for utt_info in meta_data:
440
+ utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
441
+ if not utt2spk[utt] in spk2utt:
442
+ spk2utt[utt2spk[utt]] = []
443
+ spk2utt[utt2spk[utt]].append(utt)
444
+
445
+ for spk in spk2utt:
446
+ energy_scaler = StandardScaler()
447
+ for utt in spk2utt[spk]:
448
+ dataset = utt.split("_")[0]
449
+ uid = "_".join(utt.split("_")[1:])
450
+ energy_path = os.path.join(
451
+ processed_dir, dataset, energy_dir, f"{uid}.npy"
452
+ )
453
+ frame_energy = np.load(energy_path)
454
+ assert len(frame_energy) > 0
455
+
456
+ if use_log_scale:
457
+ nonzero_idxes = np.where(frame_energy != 0)[0]
458
+ frame_energy[nonzero_idxes] = np.log(frame_energy[nonzero_idxes])
459
+ utt2energy[utt] = frame_energy
460
+ energy_scaler.partial_fit(frame_energy.reshape(-1, 1))
461
+
462
+ mean, std = energy_scaler.mean_[0], energy_scaler.scale_[0]
463
+ if return_norm:
464
+ for utt in spk2utt[spk]:
465
+ energy = utt2energy[utt]
466
+ normalized_energy = (energy - mean) / std
467
+ utt2energy[utt] = normalized_energy
468
+ energy_statistic.append({"spk": spk, "mean": mean, "std": std})
469
+
470
+ return utt2energy, energy_statistic
471
+
472
+
473
+ def align_length(feature, target_len, pad_value=0.0):
474
+ feature_len = feature.shape[-1]
475
+ dim = len(feature.shape)
476
+ # align 1-D data
477
+ if dim == 2:
478
+ if target_len > feature_len:
479
+ feature = np.pad(
480
+ feature,
481
+ ((0, 0), (0, target_len - feature_len)),
482
+ constant_values=pad_value,
483
+ )
484
+ else:
485
+ feature = feature[:, :target_len]
486
+ # align 2-D data
487
+ elif dim == 1:
488
+ if target_len > feature_len:
489
+ feature = np.pad(
490
+ feature, (0, target_len - feature_len), constant_values=pad_value
491
+ )
492
+ else:
493
+ feature = feature[:target_len]
494
+ else:
495
+ raise NotImplementedError
496
+ return feature
497
+
498
+
499
+ def align_whisper_feauture_length(
500
+ feature, target_len, fast_mapping=True, source_hop=320, target_hop=256
501
+ ):
502
+ factor = np.gcd(source_hop, target_hop)
503
+ source_hop //= factor
504
+ target_hop //= factor
505
+ # print(
506
+ # "Mapping source's {} frames => target's {} frames".format(
507
+ # target_hop, source_hop
508
+ # )
509
+ # )
510
+
511
+ max_source_len = 1500
512
+ target_len = min(target_len, max_source_len * source_hop // target_hop)
513
+
514
+ width = feature.shape[-1]
515
+
516
+ if fast_mapping:
517
+ source_len = target_len * target_hop // source_hop + 1
518
+ feature = feature[:source_len]
519
+
520
+ else:
521
+ source_len = max_source_len
522
+
523
+ # const ~= target_len * target_hop
524
+ const = source_len * source_hop // target_hop * target_hop
525
+
526
+ # (source_len * source_hop, dim)
527
+ up_sampling_feats = np.repeat(feature, source_hop, axis=0)
528
+ # (const, dim) -> (const/target_hop, target_hop, dim) -> (const/target_hop, dim)
529
+ down_sampling_feats = np.average(
530
+ up_sampling_feats[:const].reshape(-1, target_hop, width), axis=1
531
+ )
532
+ assert len(down_sampling_feats) >= target_len
533
+
534
+ # (target_len, dim)
535
+ feat = down_sampling_feats[:target_len]
536
+
537
+ return feat
538
+
539
+
540
+ def align_content_feature_length(feature, target_len, source_hop=320, target_hop=256):
541
+ factor = np.gcd(source_hop, target_hop)
542
+ source_hop //= factor
543
+ target_hop //= factor
544
+ # print(
545
+ # "Mapping source's {} frames => target's {} frames".format(
546
+ # target_hop, source_hop
547
+ # )
548
+ # )
549
+
550
+ # (source_len, 256)
551
+ source_len, width = feature.shape
552
+
553
+ # const ~= target_len * target_hop
554
+ const = source_len * source_hop // target_hop * target_hop
555
+
556
+ # (source_len * source_hop, dim)
557
+ up_sampling_feats = np.repeat(feature, source_hop, axis=0)
558
+ # (const, dim) -> (const/target_hop, target_hop, dim) -> (const/target_hop, dim)
559
+ down_sampling_feats = np.average(
560
+ up_sampling_feats[:const].reshape(-1, target_hop, width), axis=1
561
+ )
562
+
563
+ err = abs(target_len - len(down_sampling_feats))
564
+ if err > 4: ## why 4 not 3?
565
+ print("target_len:", target_len)
566
+ print("raw feature:", feature.shape)
567
+ print("up_sampling:", up_sampling_feats.shape)
568
+ print("down_sampling_feats:", down_sampling_feats.shape)
569
+ exit()
570
+ if len(down_sampling_feats) < target_len:
571
+ # (1, dim) -> (err, dim)
572
+ end = down_sampling_feats[-1][None, :].repeat(err, axis=0)
573
+ down_sampling_feats = np.concatenate([down_sampling_feats, end], axis=0)
574
+
575
+ # (target_len, dim)
576
+ feat = down_sampling_feats[:target_len]
577
+
578
+ return feat
579
+
580
+
581
+ def remove_outlier(values):
582
+ values = np.array(values)
583
+ p25 = np.percentile(values, 25)
584
+ p75 = np.percentile(values, 75)
585
+ lower = p25 - 1.5 * (p75 - p25)
586
+ upper = p75 + 1.5 * (p75 - p25)
587
+ normal_indices = np.logical_and(values > lower, values < upper)
588
+ return values[normal_indices]
utils/distribution.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import numpy as np
7
+ import torch
8
+ import torch.nn.functional as F
9
+
10
+ from torch.distributions import Normal
11
+
12
+
13
+ def log_sum_exp(x):
14
+ """numerically stable log_sum_exp implementation that prevents overflow"""
15
+ # TF ordering
16
+ axis = len(x.size()) - 1
17
+ m, _ = torch.max(x, dim=axis)
18
+ m2, _ = torch.max(x, dim=axis, keepdim=True)
19
+ return m + torch.log(torch.sum(torch.exp(x - m2), dim=axis))
20
+
21
+
22
+ def discretized_mix_logistic_loss(
23
+ y_hat, y, num_classes=256, log_scale_min=-7.0, reduce=True
24
+ ):
25
+ """Discretized mixture of logistic distributions loss
26
+
27
+ Note that it is assumed that input is scaled to [-1, 1].
28
+
29
+ Args:
30
+ y_hat (Tensor): Predicted output (B x C x T)
31
+ y (Tensor): Target (B x T x 1).
32
+ num_classes (int): Number of classes
33
+ log_scale_min (float): Log scale minimum value
34
+ reduce (bool): If True, the losses are averaged or summed for each
35
+ minibatch.
36
+
37
+ Returns
38
+ Tensor: loss
39
+ """
40
+ assert y_hat.dim() == 3
41
+ assert y_hat.size(1) % 3 == 0
42
+ nr_mix = y_hat.size(1) // 3
43
+
44
+ # (B x T x C)
45
+ y_hat = y_hat.transpose(1, 2)
46
+
47
+ # unpack parameters. (B, T, num_mixtures) x 3
48
+ logit_probs = y_hat[:, :, :nr_mix]
49
+ means = y_hat[:, :, nr_mix : 2 * nr_mix]
50
+ log_scales = torch.clamp(y_hat[:, :, 2 * nr_mix : 3 * nr_mix], min=log_scale_min)
51
+
52
+ # B x T x 1 -> B x T x num_mixtures
53
+ y = y.expand_as(means)
54
+
55
+ centered_y = y - means
56
+ inv_stdv = torch.exp(-log_scales)
57
+ plus_in = inv_stdv * (centered_y + 1.0 / (num_classes - 1))
58
+ cdf_plus = torch.sigmoid(plus_in)
59
+ min_in = inv_stdv * (centered_y - 1.0 / (num_classes - 1))
60
+ cdf_min = torch.sigmoid(min_in)
61
+
62
+ # log probability for edge case of 0 (before scaling)
63
+ # equivalent: torch.log(torch.sigmoid(plus_in))
64
+ log_cdf_plus = plus_in - F.softplus(plus_in)
65
+
66
+ # log probability for edge case of 255 (before scaling)
67
+ # equivalent: (1 - torch.sigmoid(min_in)).log()
68
+ log_one_minus_cdf_min = -F.softplus(min_in)
69
+
70
+ # probability for all other cases
71
+ cdf_delta = cdf_plus - cdf_min
72
+
73
+ mid_in = inv_stdv * centered_y
74
+ # log probability in the center of the bin, to be used in extreme cases
75
+ # (not actually used in our code)
76
+ log_pdf_mid = mid_in - log_scales - 2.0 * F.softplus(mid_in)
77
+
78
+ # tf equivalent
79
+ """
80
+ log_probs = tf.where(x < -0.999, log_cdf_plus,
81
+ tf.where(x > 0.999, log_one_minus_cdf_min,
82
+ tf.where(cdf_delta > 1e-5,
83
+ tf.log(tf.maximum(cdf_delta, 1e-12)),
84
+ log_pdf_mid - np.log(127.5))))
85
+ """
86
+ # TODO: cdf_delta <= 1e-5 actually can happen. How can we choose the value
87
+ # for num_classes=65536 case? 1e-7? not sure..
88
+ inner_inner_cond = (cdf_delta > 1e-5).float()
89
+
90
+ inner_inner_out = inner_inner_cond * torch.log(
91
+ torch.clamp(cdf_delta, min=1e-12)
92
+ ) + (1.0 - inner_inner_cond) * (log_pdf_mid - np.log((num_classes - 1) / 2))
93
+ inner_cond = (y > 0.999).float()
94
+ inner_out = (
95
+ inner_cond * log_one_minus_cdf_min + (1.0 - inner_cond) * inner_inner_out
96
+ )
97
+ cond = (y < -0.999).float()
98
+ log_probs = cond * log_cdf_plus + (1.0 - cond) * inner_out
99
+
100
+ log_probs = log_probs + F.log_softmax(logit_probs, -1)
101
+
102
+ if reduce:
103
+ return -torch.sum(log_sum_exp(log_probs))
104
+ else:
105
+ return -log_sum_exp(log_probs).unsqueeze(-1)
106
+
107
+
108
+ def to_one_hot(tensor, n, fill_with=1.0):
109
+ # we perform one hot encore with respect to the last axis
110
+ one_hot = torch.FloatTensor(tensor.size() + (n,)).zero_()
111
+ if tensor.is_cuda:
112
+ one_hot = one_hot.cuda()
113
+ one_hot.scatter_(len(tensor.size()), tensor.unsqueeze(-1), fill_with)
114
+ return one_hot
115
+
116
+
117
+ def sample_from_discretized_mix_logistic(y, log_scale_min=-7.0, clamp_log_scale=False):
118
+ """
119
+ Sample from discretized mixture of logistic distributions
120
+
121
+ Args:
122
+ y (Tensor): B x C x T
123
+ log_scale_min (float): Log scale minimum value
124
+
125
+ Returns:
126
+ Tensor: sample in range of [-1, 1].
127
+ """
128
+ assert y.size(1) % 3 == 0
129
+ nr_mix = y.size(1) // 3
130
+
131
+ # B x T x C
132
+ y = y.transpose(1, 2)
133
+ logit_probs = y[:, :, :nr_mix]
134
+
135
+ # sample mixture indicator from softmax
136
+ temp = logit_probs.data.new(logit_probs.size()).uniform_(1e-5, 1.0 - 1e-5)
137
+ temp = logit_probs.data - torch.log(-torch.log(temp))
138
+ _, argmax = temp.max(dim=-1)
139
+
140
+ # (B, T) -> (B, T, nr_mix)
141
+ one_hot = to_one_hot(argmax, nr_mix)
142
+ # select logistic parameters
143
+ means = torch.sum(y[:, :, nr_mix : 2 * nr_mix] * one_hot, dim=-1)
144
+ log_scales = torch.sum(y[:, :, 2 * nr_mix : 3 * nr_mix] * one_hot, dim=-1)
145
+ if clamp_log_scale:
146
+ log_scales = torch.clamp(log_scales, min=log_scale_min)
147
+ # sample from logistic & clip to interval
148
+ # we don't actually round to the nearest 8bit value when sampling
149
+ u = means.data.new(means.size()).uniform_(1e-5, 1.0 - 1e-5)
150
+ x = means + torch.exp(log_scales) * (torch.log(u) - torch.log(1.0 - u))
151
+
152
+ x = torch.clamp(torch.clamp(x, min=-1.0), max=1.0)
153
+
154
+ return x
155
+
156
+
157
+ # we can easily define discretized version of the gaussian loss, however,
158
+ # use continuous version as same as the https://clarinet-demo.github.io/
159
+ def mix_gaussian_loss(y_hat, y, log_scale_min=-7.0, reduce=True):
160
+ """Mixture of continuous gaussian distributions loss
161
+
162
+ Note that it is assumed that input is scaled to [-1, 1].
163
+
164
+ Args:
165
+ y_hat (Tensor): Predicted output (B x C x T)
166
+ y (Tensor): Target (B x T x 1).
167
+ log_scale_min (float): Log scale minimum value
168
+ reduce (bool): If True, the losses are averaged or summed for each
169
+ minibatch.
170
+ Returns
171
+ Tensor: loss
172
+ """
173
+ assert y_hat.dim() == 3
174
+ C = y_hat.size(1)
175
+ if C == 2:
176
+ nr_mix = 1
177
+ else:
178
+ assert y_hat.size(1) % 3 == 0
179
+ nr_mix = y_hat.size(1) // 3
180
+
181
+ # (B x T x C)
182
+ y_hat = y_hat.transpose(1, 2)
183
+
184
+ # unpack parameters.
185
+ if C == 2:
186
+ # special case for C == 2, just for compatibility
187
+ logit_probs = None
188
+ means = y_hat[:, :, 0:1]
189
+ log_scales = torch.clamp(y_hat[:, :, 1:2], min=log_scale_min)
190
+ else:
191
+ # (B, T, num_mixtures) x 3
192
+ logit_probs = y_hat[:, :, :nr_mix]
193
+ means = y_hat[:, :, nr_mix : 2 * nr_mix]
194
+ log_scales = torch.clamp(
195
+ y_hat[:, :, 2 * nr_mix : 3 * nr_mix], min=log_scale_min
196
+ )
197
+
198
+ # B x T x 1 -> B x T x num_mixtures
199
+ y = y.expand_as(means)
200
+
201
+ centered_y = y - means
202
+ dist = Normal(loc=0.0, scale=torch.exp(log_scales))
203
+ # do we need to add a trick to avoid log(0)?
204
+ log_probs = dist.log_prob(centered_y)
205
+
206
+ if nr_mix > 1:
207
+ log_probs = log_probs + F.log_softmax(logit_probs, -1)
208
+
209
+ if reduce:
210
+ if nr_mix == 1:
211
+ return -torch.sum(log_probs)
212
+ else:
213
+ return -torch.sum(log_sum_exp(log_probs))
214
+ else:
215
+ if nr_mix == 1:
216
+ return -log_probs
217
+ else:
218
+ return -log_sum_exp(log_probs).unsqueeze(-1)
219
+
220
+
221
+ def sample_from_mix_gaussian(y, log_scale_min=-7.0):
222
+ """
223
+ Sample from (discretized) mixture of gaussian distributions
224
+ Args:
225
+ y (Tensor): B x C x T
226
+ log_scale_min (float): Log scale minimum value
227
+ Returns:
228
+ Tensor: sample in range of [-1, 1].
229
+ """
230
+ C = y.size(1)
231
+ if C == 2:
232
+ nr_mix = 1
233
+ else:
234
+ assert y.size(1) % 3 == 0
235
+ nr_mix = y.size(1) // 3
236
+
237
+ # B x T x C
238
+ y = y.transpose(1, 2)
239
+
240
+ if C == 2:
241
+ logit_probs = None
242
+ else:
243
+ logit_probs = y[:, :, :nr_mix]
244
+
245
+ if nr_mix > 1:
246
+ # sample mixture indicator from softmax
247
+ temp = logit_probs.data.new(logit_probs.size()).uniform_(1e-5, 1.0 - 1e-5)
248
+ temp = logit_probs.data - torch.log(-torch.log(temp))
249
+ _, argmax = temp.max(dim=-1)
250
+
251
+ # (B, T) -> (B, T, nr_mix)
252
+ one_hot = to_one_hot(argmax, nr_mix)
253
+
254
+ # Select means and log scales
255
+ means = torch.sum(y[:, :, nr_mix : 2 * nr_mix] * one_hot, dim=-1)
256
+ log_scales = torch.sum(y[:, :, 2 * nr_mix : 3 * nr_mix] * one_hot, dim=-1)
257
+ else:
258
+ if C == 2:
259
+ means, log_scales = y[:, :, 0], y[:, :, 1]
260
+ elif C == 3:
261
+ means, log_scales = y[:, :, 1], y[:, :, 2]
262
+ else:
263
+ assert False, "shouldn't happen"
264
+
265
+ scales = torch.exp(log_scales)
266
+ dist = Normal(loc=means, scale=scales)
267
+ x = dist.sample()
268
+
269
+ x = torch.clamp(x, min=-1.0, max=1.0)
270
+ return x
utils/dsp.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import numpy as np
7
+ import torch
8
+
9
+ # ZERO = 1e-12
10
+
11
+
12
+ def gaussian_normalize_mel_channel(mel, mu, sigma):
13
+ """
14
+ Shift to Standorm Normal Distribution
15
+
16
+ Args:
17
+ mel: (n_mels, frame_len)
18
+ mu: (n_mels,), mean value
19
+ sigma: (n_mels,), sd value
20
+ Return:
21
+ Tensor like mel
22
+ """
23
+ mu = np.expand_dims(mu, -1)
24
+ sigma = np.expand_dims(sigma, -1)
25
+ return (mel - mu) / sigma
26
+
27
+
28
+ def de_gaussian_normalize_mel_channel(mel, mu, sigma):
29
+ """
30
+
31
+ Args:
32
+ mel: (n_mels, frame_len)
33
+ mu: (n_mels,), mean value
34
+ sigma: (n_mels,), sd value
35
+ Return:
36
+ Tensor like mel
37
+ """
38
+ mu = np.expand_dims(mu, -1)
39
+ sigma = np.expand_dims(sigma, -1)
40
+ return sigma * mel + mu
41
+
42
+
43
+ def decompress(audio_compressed, bits):
44
+ mu = 2**bits - 1
45
+ audio = np.sign(audio_compressed) / mu * ((1 + mu) ** np.abs(audio_compressed) - 1)
46
+ return audio
47
+
48
+
49
+ def compress(audio, bits):
50
+ mu = 2**bits - 1
51
+ audio_compressed = np.sign(audio) * np.log(1 + mu * np.abs(audio)) / np.log(mu + 1)
52
+ return audio_compressed
53
+
54
+
55
+ def label_to_audio(quant, bits):
56
+ classes = 2**bits
57
+ audio = 2 * quant / (classes - 1.0) - 1.0
58
+ return audio
59
+
60
+
61
+ def audio_to_label(audio, bits):
62
+ """Normalized audio data tensor to digit array
63
+
64
+ Args:
65
+ audio (tensor): audio data
66
+ bits (int): data bits
67
+
68
+ Returns:
69
+ array<int>: digit array of audio data
70
+ """
71
+ classes = 2**bits
72
+ # initialize an increasing array with values from -1 to 1
73
+ bins = np.linspace(-1, 1, classes)
74
+ # change value in audio tensor to digits
75
+ quant = np.digitize(audio, bins) - 1
76
+ return quant
77
+
78
+
79
+ def label_to_onehot(x, bits):
80
+ """Converts a class vector (integers) to binary class matrix.
81
+ Args:
82
+ x: class vector to be converted into a matrix
83
+ (integers from 0 to num_classes).
84
+ num_classes: total number of classes.
85
+ Returns:
86
+ A binary matrix representation of the input. The classes axis
87
+ is placed last.
88
+ """
89
+ classes = 2**bits
90
+
91
+ result = torch.zeros((x.shape[0], classes), dtype=torch.float32)
92
+ for i in range(x.shape[0]):
93
+ result[i, x[i]] = 1
94
+
95
+ output_shape = x.shape + (classes,)
96
+ output = torch.reshape(result, output_shape)
97
+ return output
utils/duration.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import numpy as np
7
+ import os
8
+ import tgt
9
+
10
+
11
+ def get_alignment(tier, cfg):
12
+ sample_rate = cfg["sample_rate"]
13
+ hop_size = cfg["hop_size"]
14
+
15
+ sil_phones = ["sil", "sp", "spn"]
16
+
17
+ phones = []
18
+ durations = []
19
+ start_time = 0
20
+ end_time = 0
21
+ end_idx = 0
22
+
23
+ for t in tier._objects:
24
+ s, e, p = t.start_time, t.end_time, t.text
25
+
26
+ # Trim leading silences
27
+ if phones == []:
28
+ if p in sil_phones:
29
+ continue
30
+ else:
31
+ start_time = s
32
+
33
+ if p not in sil_phones:
34
+ # For ordinary phones
35
+ phones.append(p)
36
+ end_time = e
37
+ end_idx = len(phones)
38
+ else:
39
+ # For silent phones
40
+ phones.append(p)
41
+
42
+ durations.append(
43
+ int(
44
+ np.round(e * sample_rate / hop_size)
45
+ - np.round(s * sample_rate / hop_size)
46
+ )
47
+ )
48
+
49
+ # Trim tailing silences
50
+ phones = phones[:end_idx]
51
+ durations = durations[:end_idx]
52
+
53
+ return phones, durations, start_time, end_time
54
+
55
+
56
+ def get_duration(utt, wav, cfg):
57
+ speaker = utt["Singer"]
58
+ basename = utt["Uid"]
59
+ dataset = utt["Dataset"]
60
+ sample_rate = cfg["sample_rate"]
61
+
62
+ # print(cfg.processed_dir, dataset, speaker, basename)
63
+ wav_path = os.path.join(
64
+ cfg.processed_dir, dataset, "raw_data", speaker, "{}.wav".format(basename)
65
+ )
66
+ text_path = os.path.join(
67
+ cfg.processed_dir, dataset, "raw_data", speaker, "{}.lab".format(basename)
68
+ )
69
+ tg_path = os.path.join(
70
+ cfg.processed_dir, dataset, "TextGrid", speaker, "{}.TextGrid".format(basename)
71
+ )
72
+
73
+ # Read raw text
74
+ with open(text_path, "r") as f:
75
+ raw_text = f.readline().strip("\n")
76
+
77
+ # Get alignments
78
+ textgrid = tgt.io.read_textgrid(tg_path)
79
+ phone, duration, start, end = get_alignment(
80
+ textgrid.get_tier_by_name("phones"), cfg
81
+ )
82
+ text = "{" + " ".join(phone) + "}"
83
+ if start >= end:
84
+ return None
85
+
86
+ return duration, text, int(sample_rate * start), int(sample_rate * end)
utils/f0.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import librosa
7
+ import numpy as np
8
+ import torch
9
+ import parselmouth
10
+ import torchcrepe
11
+ import pyworld as pw
12
+
13
+
14
+ def f0_to_coarse(f0, pitch_bin, f0_min, f0_max):
15
+ """
16
+ Convert f0 (Hz) to pitch (mel scale), and then quantize the mel-scale pitch to the
17
+ range from [1, 2, 3, ..., pitch_bin-1]
18
+
19
+ Reference: https://en.wikipedia.org/wiki/Mel_scale
20
+
21
+ Args:
22
+ f0 (array or Tensor): Hz
23
+ pitch_bin (int): the vocabulary size
24
+ f0_min (int): the minimum f0 (Hz)
25
+ f0_max (int): the maximum f0 (Hz)
26
+
27
+ Returns:
28
+ quantized f0 (array or Tensor)
29
+ """
30
+ f0_mel_min = 1127 * np.log(1 + f0_min / 700)
31
+ f0_mel_max = 1127 * np.log(1 + f0_max / 700)
32
+
33
+ is_torch = isinstance(f0, torch.Tensor)
34
+ f0_mel = 1127 * (1 + f0 / 700).log() if is_torch else 1127 * np.log(1 + f0 / 700)
35
+ f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * (pitch_bin - 2) / (
36
+ f0_mel_max - f0_mel_min
37
+ ) + 1
38
+
39
+ f0_mel[f0_mel <= 1] = 1
40
+ f0_mel[f0_mel > pitch_bin - 1] = pitch_bin - 1
41
+ f0_coarse = (f0_mel + 0.5).long() if is_torch else np.rint(f0_mel).astype(np.int32)
42
+ assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, (
43
+ f0_coarse.max(),
44
+ f0_coarse.min(),
45
+ )
46
+ return f0_coarse
47
+
48
+
49
+ def interpolate(f0):
50
+ """Interpolate the unvoiced part. Thus the f0 can be passed to a subtractive synthesizer.
51
+ Args:
52
+ f0: A numpy array of shape (seq_len,)
53
+ Returns:
54
+ f0: Interpolated f0 of shape (seq_len,)
55
+ uv: Unvoiced part of shape (seq_len,)
56
+ """
57
+ uv = f0 == 0
58
+ if len(f0[~uv]) > 0:
59
+ # interpolate the unvoiced f0
60
+ f0[uv] = np.interp(np.where(uv)[0], np.where(~uv)[0], f0[~uv])
61
+ uv = uv.astype("float")
62
+ uv = np.min(np.array([uv[:-2], uv[1:-1], uv[2:]]), axis=0)
63
+ uv = np.pad(uv, (1, 1))
64
+ return f0, uv
65
+
66
+
67
+ def get_log_f0(f0):
68
+ f0[np.where(f0 == 0)] = 1
69
+ log_f0 = np.log(f0)
70
+ return log_f0
71
+
72
+
73
+ def get_f0_features_using_pyin(audio, cfg):
74
+ """Using pyin to extract the f0 feature.
75
+ Args:
76
+ audio
77
+ fs
78
+ win_length
79
+ hop_length
80
+ f0_min
81
+ f0_max
82
+ Returns:
83
+ f0: numpy array of shape (frame_len,)
84
+ """
85
+ f0, voiced_flag, voiced_probs = librosa.pyin(
86
+ y=audio,
87
+ fmin=cfg.f0_min,
88
+ fmax=cfg.f0_max,
89
+ sr=cfg.sample_rate,
90
+ win_length=cfg.win_size,
91
+ hop_length=cfg.hop_size,
92
+ )
93
+ # Set nan to 0
94
+ f0[voiced_flag == False] = 0
95
+ return f0
96
+
97
+
98
+ def get_f0_features_using_parselmouth(audio, cfg, speed=1):
99
+ """Using parselmouth to extract the f0 feature.
100
+ Args:
101
+ audio
102
+ mel_len
103
+ hop_length
104
+ fs
105
+ f0_min
106
+ f0_max
107
+ speed(default=1)
108
+ Returns:
109
+ f0: numpy array of shape (frame_len,)
110
+ pitch_coarse: numpy array of shape (frame_len,)
111
+ """
112
+ hop_size = int(np.round(cfg.hop_size * speed))
113
+
114
+ # Calculate the time step for pitch extraction
115
+ time_step = hop_size / cfg.sample_rate * 1000
116
+
117
+ f0 = (
118
+ parselmouth.Sound(audio, cfg.sample_rate)
119
+ .to_pitch_ac(
120
+ time_step=time_step / 1000,
121
+ voicing_threshold=0.6,
122
+ pitch_floor=cfg.f0_min,
123
+ pitch_ceiling=cfg.f0_max,
124
+ )
125
+ .selected_array["frequency"]
126
+ )
127
+ return f0
128
+
129
+
130
+ def get_f0_features_using_dio(audio, cfg):
131
+ """Using dio to extract the f0 feature.
132
+ Args:
133
+ audio
134
+ mel_len
135
+ fs
136
+ hop_length
137
+ f0_min
138
+ f0_max
139
+ Returns:
140
+ f0: numpy array of shape (frame_len,)
141
+ """
142
+ # Get the raw f0
143
+ _f0, t = pw.dio(
144
+ audio.astype("double"),
145
+ cfg.sample_rate,
146
+ f0_floor=cfg.f0_min,
147
+ f0_ceil=cfg.f0_max,
148
+ channels_in_octave=2,
149
+ frame_period=(1000 * cfg.hop_size / cfg.sample_rate),
150
+ )
151
+ # Get the f0
152
+ f0 = pw.stonemask(audio.astype("double"), _f0, t, cfg.sample_rate)
153
+ return f0
154
+
155
+
156
+ def get_f0_features_using_harvest(audio, mel_len, fs, hop_length, f0_min, f0_max):
157
+ """Using harvest to extract the f0 feature.
158
+ Args:
159
+ audio
160
+ mel_len
161
+ fs
162
+ hop_length
163
+ f0_min
164
+ f0_max
165
+ Returns:
166
+ f0: numpy array of shape (frame_len,)
167
+ """
168
+ f0, _ = pw.harvest(
169
+ audio.astype("double"),
170
+ fs,
171
+ f0_floor=f0_min,
172
+ f0_ceil=f0_max,
173
+ frame_period=(1000 * hop_length / fs),
174
+ )
175
+ f0 = f0.astype("float")[:mel_len]
176
+ return f0
177
+
178
+
179
+ def get_f0_features_using_crepe(
180
+ audio, mel_len, fs, hop_length, hop_length_new, f0_min, f0_max, threshold=0.3
181
+ ):
182
+ """Using torchcrepe to extract the f0 feature.
183
+ Args:
184
+ audio
185
+ mel_len
186
+ fs
187
+ hop_length
188
+ hop_length_new
189
+ f0_min
190
+ f0_max
191
+ threshold(default=0.3)
192
+ Returns:
193
+ f0: numpy array of shape (frame_len,)
194
+ """
195
+ # Currently, crepe only supports 16khz audio
196
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
197
+ audio_16k = librosa.resample(audio, orig_sr=fs, target_sr=16000)
198
+ audio_16k_torch = torch.FloatTensor(audio_16k).unsqueeze(0).to(device)
199
+
200
+ # Get the raw pitch
201
+ f0, pd = torchcrepe.predict(
202
+ audio_16k_torch,
203
+ 16000,
204
+ hop_length_new,
205
+ f0_min,
206
+ f0_max,
207
+ pad=True,
208
+ model="full",
209
+ batch_size=1024,
210
+ device=device,
211
+ return_periodicity=True,
212
+ )
213
+
214
+ # Filter, de-silence, set up threshold for unvoiced part
215
+ pd = torchcrepe.filter.median(pd, 3)
216
+ pd = torchcrepe.threshold.Silence(-60.0)(pd, audio_16k_torch, 16000, hop_length_new)
217
+ f0 = torchcrepe.threshold.At(threshold)(f0, pd)
218
+ f0 = torchcrepe.filter.mean(f0, 3)
219
+
220
+ # Convert unvoiced part to 0hz
221
+ f0 = torch.where(torch.isnan(f0), torch.full_like(f0, 0), f0)
222
+
223
+ # Interpolate f0
224
+ nzindex = torch.nonzero(f0[0]).squeeze()
225
+ f0 = torch.index_select(f0[0], dim=0, index=nzindex).cpu().numpy()
226
+ time_org = 0.005 * nzindex.cpu().numpy()
227
+ time_frame = np.arange(mel_len) * hop_length / fs
228
+ f0 = np.interp(time_frame, time_org, f0, left=f0[0], right=f0[-1])
229
+ return f0
230
+
231
+
232
+ def get_f0(audio, cfg, use_interpolate=False, return_uv=False):
233
+ if cfg.pitch_extractor == "dio":
234
+ f0 = get_f0_features_using_dio(audio, cfg)
235
+ elif cfg.pitch_extractor == "pyin":
236
+ f0 = get_f0_features_using_pyin(audio, cfg)
237
+ elif cfg.pitch_extractor == "parselmouth":
238
+ f0 = get_f0_features_using_parselmouth(audio, cfg)
239
+
240
+ if use_interpolate:
241
+ f0, uv = interpolate(f0)
242
+ else:
243
+ uv = f0 == 0
244
+
245
+ if return_uv:
246
+ return f0, uv
247
+
248
+ return f0
249
+
250
+
251
+ def get_cents(f0_hz):
252
+ """
253
+ F_{cent} = 1200 * log2 (F/440)
254
+
255
+ Reference:
256
+ APSIPA'17, Perceptual Evaluation of Singing Quality
257
+ """
258
+ voiced_f0 = f0_hz[f0_hz != 0]
259
+ return 1200 * np.log2(voiced_f0 / 440)
260
+
261
+
262
+ def get_pitch_derivatives(f0_hz):
263
+ """
264
+ f0_hz: (,T)
265
+ """
266
+ f0_cent = get_cents(f0_hz)
267
+ return f0_cent[1:] - f0_cent[:-1]
268
+
269
+
270
+ def get_pitch_sub_median(f0_hz):
271
+ """
272
+ f0_hz: (,T)
273
+ """
274
+ f0_cent = get_cents(f0_hz)
275
+ return f0_cent - np.median(f0_cent)
utils/hparam.py ADDED
@@ -0,0 +1,659 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ # This code is modified from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/training/python/training/hparam.py pylint: disable=line-too-long
7
+ """Hyperparameter values."""
8
+ from __future__ import absolute_import
9
+ from __future__ import division
10
+ from __future__ import print_function
11
+
12
+ import json
13
+ import numbers
14
+ import re
15
+ import six
16
+
17
+ # Define the regular expression for parsing a single clause of the input
18
+ # (delimited by commas). A legal clause looks like:
19
+ # <variable name>[<index>]? = <rhs>
20
+ # where <rhs> is either a single token or [] enclosed list of tokens.
21
+ # For example: "var[1] = a" or "x = [1,2,3]"
22
+ PARAM_RE = re.compile(
23
+ r"""
24
+ (?P<name>[a-zA-Z][\w\.]*) # variable name: "var" or "x"
25
+ (\[\s*(?P<index>\d+)\s*\])? # (optional) index: "1" or None
26
+ \s*=\s*
27
+ ((?P<val>[^,\[]*) # single value: "a" or None
28
+ |
29
+ \[(?P<vals>[^\]]*)\]) # list of values: None or "1,2,3"
30
+ ($|,\s*)""",
31
+ re.VERBOSE,
32
+ )
33
+
34
+
35
+ def _parse_fail(name, var_type, value, values):
36
+ """Helper function for raising a value error for bad assignment."""
37
+ raise ValueError(
38
+ "Could not parse hparam '%s' of type '%s' with value '%s' in %s"
39
+ % (name, var_type.__name__, value, values)
40
+ )
41
+
42
+
43
+ def _reuse_fail(name, values):
44
+ """Helper function for raising a value error for reuse of name."""
45
+ raise ValueError("Multiple assignments to variable '%s' in %s" % (name, values))
46
+
47
+
48
+ def _process_scalar_value(name, parse_fn, var_type, m_dict, values, results_dictionary):
49
+ """Update results_dictionary with a scalar value.
50
+
51
+ Used to update the results_dictionary to be returned by parse_values when
52
+ encountering a clause with a scalar RHS (e.g. "s=5" or "arr[0]=5".)
53
+
54
+ Mutates results_dictionary.
55
+
56
+ Args:
57
+ name: Name of variable in assignment ("s" or "arr").
58
+ parse_fn: Function for parsing the actual value.
59
+ var_type: Type of named variable.
60
+ m_dict: Dictionary constructed from regex parsing.
61
+ m_dict['val']: RHS value (scalar)
62
+ m_dict['index']: List index value (or None)
63
+ values: Full expression being parsed
64
+ results_dictionary: The dictionary being updated for return by the parsing
65
+ function.
66
+
67
+ Raises:
68
+ ValueError: If the name has already been used.
69
+ """
70
+ try:
71
+ parsed_value = parse_fn(m_dict["val"])
72
+ except ValueError:
73
+ _parse_fail(name, var_type, m_dict["val"], values)
74
+
75
+ # If no index is provided
76
+ if not m_dict["index"]:
77
+ if name in results_dictionary:
78
+ _reuse_fail(name, values)
79
+ results_dictionary[name] = parsed_value
80
+ else:
81
+ if name in results_dictionary:
82
+ # The name has already been used as a scalar, then it
83
+ # will be in this dictionary and map to a non-dictionary.
84
+ if not isinstance(results_dictionary.get(name), dict):
85
+ _reuse_fail(name, values)
86
+ else:
87
+ results_dictionary[name] = {}
88
+
89
+ index = int(m_dict["index"])
90
+ # Make sure the index position hasn't already been assigned a value.
91
+ if index in results_dictionary[name]:
92
+ _reuse_fail("{}[{}]".format(name, index), values)
93
+ results_dictionary[name][index] = parsed_value
94
+
95
+
96
+ def _process_list_value(name, parse_fn, var_type, m_dict, values, results_dictionary):
97
+ """Update results_dictionary from a list of values.
98
+
99
+ Used to update results_dictionary to be returned by parse_values when
100
+ encountering a clause with a list RHS (e.g. "arr=[1,2,3]".)
101
+
102
+ Mutates results_dictionary.
103
+
104
+ Args:
105
+ name: Name of variable in assignment ("arr").
106
+ parse_fn: Function for parsing individual values.
107
+ var_type: Type of named variable.
108
+ m_dict: Dictionary constructed from regex parsing.
109
+ m_dict['val']: RHS value (scalar)
110
+ values: Full expression being parsed
111
+ results_dictionary: The dictionary being updated for return by the parsing
112
+ function.
113
+
114
+ Raises:
115
+ ValueError: If the name has an index or the values cannot be parsed.
116
+ """
117
+ if m_dict["index"] is not None:
118
+ raise ValueError("Assignment of a list to a list index.")
119
+ elements = filter(None, re.split("[ ,]", m_dict["vals"]))
120
+ # Make sure the name hasn't already been assigned a value
121
+ if name in results_dictionary:
122
+ raise _reuse_fail(name, values)
123
+ try:
124
+ results_dictionary[name] = [parse_fn(e) for e in elements]
125
+ except ValueError:
126
+ _parse_fail(name, var_type, m_dict["vals"], values)
127
+
128
+
129
+ def _cast_to_type_if_compatible(name, param_type, value):
130
+ """Cast hparam to the provided type, if compatible.
131
+
132
+ Args:
133
+ name: Name of the hparam to be cast.
134
+ param_type: The type of the hparam.
135
+ value: The value to be cast, if compatible.
136
+
137
+ Returns:
138
+ The result of casting `value` to `param_type`.
139
+
140
+ Raises:
141
+ ValueError: If the type of `value` is not compatible with param_type.
142
+ * If `param_type` is a string type, but `value` is not.
143
+ * If `param_type` is a boolean, but `value` is not, or vice versa.
144
+ * If `param_type` is an integer type, but `value` is not.
145
+ * If `param_type` is a float type, but `value` is not a numeric type.
146
+ """
147
+ fail_msg = "Could not cast hparam '%s' of type '%s' from value %r" % (
148
+ name,
149
+ param_type,
150
+ value,
151
+ )
152
+
153
+ # Some callers use None, for which we can't do any casting/checking. :(
154
+ if issubclass(param_type, type(None)):
155
+ return value
156
+
157
+ # Avoid converting a non-string type to a string.
158
+ if issubclass(param_type, (six.string_types, six.binary_type)) and not isinstance(
159
+ value, (six.string_types, six.binary_type)
160
+ ):
161
+ raise ValueError(fail_msg)
162
+
163
+ # Avoid converting a number or string type to a boolean or vice versa.
164
+ if issubclass(param_type, bool) != isinstance(value, bool):
165
+ raise ValueError(fail_msg)
166
+
167
+ # Avoid converting float to an integer (the reverse is fine).
168
+ if issubclass(param_type, numbers.Integral) and not isinstance(
169
+ value, numbers.Integral
170
+ ):
171
+ raise ValueError(fail_msg)
172
+
173
+ # Avoid converting a non-numeric type to a numeric type.
174
+ if issubclass(param_type, numbers.Number) and not isinstance(value, numbers.Number):
175
+ raise ValueError(fail_msg)
176
+
177
+ return param_type(value)
178
+
179
+
180
+ def parse_values(values, type_map, ignore_unknown=False):
181
+ """Parses hyperparameter values from a string into a python map.
182
+
183
+ `values` is a string containing comma-separated `name=value` pairs.
184
+ For each pair, the value of the hyperparameter named `name` is set to
185
+ `value`.
186
+
187
+ If a hyperparameter name appears multiple times in `values`, a ValueError
188
+ is raised (e.g. 'a=1,a=2', 'a[1]=1,a[1]=2').
189
+
190
+ If a hyperparameter name in both an index assignment and scalar assignment,
191
+ a ValueError is raised. (e.g. 'a=[1,2,3],a[0] = 1').
192
+
193
+ The hyperparameter name may contain '.' symbols, which will result in an
194
+ attribute name that is only accessible through the getattr and setattr
195
+ functions. (And must be first explicit added through add_hparam.)
196
+
197
+ WARNING: Use of '.' in your variable names is allowed, but is not well
198
+ supported and not recommended.
199
+
200
+ The `value` in `name=value` must follows the syntax according to the
201
+ type of the parameter:
202
+
203
+ * Scalar integer: A Python-parsable integer point value. E.g.: 1,
204
+ 100, -12.
205
+ * Scalar float: A Python-parsable floating point value. E.g.: 1.0,
206
+ -.54e89.
207
+ * Boolean: Either true or false.
208
+ * Scalar string: A non-empty sequence of characters, excluding comma,
209
+ spaces, and square brackets. E.g.: foo, bar_1.
210
+ * List: A comma separated list of scalar values of the parameter type
211
+ enclosed in square brackets. E.g.: [1,2,3], [1.0,1e-12], [high,low].
212
+
213
+ When index assignment is used, the corresponding type_map key should be the
214
+ list name. E.g. for "arr[1]=0" the type_map must have the key "arr" (not
215
+ "arr[1]").
216
+
217
+ Args:
218
+ values: String. Comma separated list of `name=value` pairs where
219
+ 'value' must follow the syntax described above.
220
+ type_map: A dictionary mapping hyperparameter names to types. Note every
221
+ parameter name in values must be a key in type_map. The values must
222
+ conform to the types indicated, where a value V is said to conform to a
223
+ type T if either V has type T, or V is a list of elements of type T.
224
+ Hence, for a multidimensional parameter 'x' taking float values,
225
+ 'x=[0.1,0.2]' will parse successfully if type_map['x'] = float.
226
+ ignore_unknown: Bool. Whether values that are missing a type in type_map
227
+ should be ignored. If set to True, a ValueError will not be raised for
228
+ unknown hyperparameter type.
229
+
230
+ Returns:
231
+ A python map mapping each name to either:
232
+ * A scalar value.
233
+ * A list of scalar values.
234
+ * A dictionary mapping index numbers to scalar values.
235
+ (e.g. "x=5,L=[1,2],arr[1]=3" results in {'x':5,'L':[1,2],'arr':{1:3}}")
236
+
237
+ Raises:
238
+ ValueError: If there is a problem with input.
239
+ * If `values` cannot be parsed.
240
+ * If a list is assigned to a list index (e.g. 'a[1] = [1,2,3]').
241
+ * If the same rvalue is assigned two different values (e.g. 'a=1,a=2',
242
+ 'a[1]=1,a[1]=2', or 'a=1,a=[1]')
243
+ """
244
+ results_dictionary = {}
245
+ pos = 0
246
+ while pos < len(values):
247
+ m = PARAM_RE.match(values, pos)
248
+ if not m:
249
+ raise ValueError("Malformed hyperparameter value: %s" % values[pos:])
250
+ # Check that there is a comma between parameters and move past it.
251
+ pos = m.end()
252
+ # Parse the values.
253
+ m_dict = m.groupdict()
254
+ name = m_dict["name"]
255
+ if name not in type_map:
256
+ if ignore_unknown:
257
+ continue
258
+ raise ValueError("Unknown hyperparameter type for %s" % name)
259
+ type_ = type_map[name]
260
+
261
+ # Set up correct parsing function (depending on whether type_ is a bool)
262
+ if type_ == bool:
263
+
264
+ def parse_bool(value):
265
+ if value in ["true", "True"]:
266
+ return True
267
+ elif value in ["false", "False"]:
268
+ return False
269
+ else:
270
+ try:
271
+ return bool(int(value))
272
+ except ValueError:
273
+ _parse_fail(name, type_, value, values)
274
+
275
+ parse = parse_bool
276
+ else:
277
+ parse = type_
278
+
279
+ # If a singe value is provided
280
+ if m_dict["val"] is not None:
281
+ _process_scalar_value(
282
+ name, parse, type_, m_dict, values, results_dictionary
283
+ )
284
+
285
+ # If the assigned value is a list:
286
+ elif m_dict["vals"] is not None:
287
+ _process_list_value(name, parse, type_, m_dict, values, results_dictionary)
288
+
289
+ else: # Not assigned a list or value
290
+ _parse_fail(name, type_, "", values)
291
+
292
+ return results_dictionary
293
+
294
+
295
+ class HParams(object):
296
+ """Class to hold a set of hyperparameters as name-value pairs.
297
+
298
+ A `HParams` object holds hyperparameters used to build and train a model,
299
+ such as the number of hidden units in a neural net layer or the learning rate
300
+ to use when training.
301
+
302
+ You first create a `HParams` object by specifying the names and values of the
303
+ hyperparameters.
304
+
305
+ To make them easily accessible the parameter names are added as direct
306
+ attributes of the class. A typical usage is as follows:
307
+
308
+ ```python
309
+ # Create a HParams object specifying names and values of the model
310
+ # hyperparameters:
311
+ hparams = HParams(learning_rate=0.1, num_hidden_units=100)
312
+
313
+ # The hyperparameter are available as attributes of the HParams object:
314
+ hparams.learning_rate ==> 0.1
315
+ hparams.num_hidden_units ==> 100
316
+ ```
317
+
318
+ Hyperparameters have type, which is inferred from the type of their value
319
+ passed at construction type. The currently supported types are: integer,
320
+ float, boolean, string, and list of integer, float, boolean, or string.
321
+
322
+ You can override hyperparameter values by calling the
323
+ [`parse()`](#HParams.parse) method, passing a string of comma separated
324
+ `name=value` pairs. This is intended to make it possible to override
325
+ any hyperparameter values from a single command-line flag to which
326
+ the user passes 'hyper-param=value' pairs. It avoids having to define
327
+ one flag for each hyperparameter.
328
+
329
+ The syntax expected for each value depends on the type of the parameter.
330
+ See `parse()` for a description of the syntax.
331
+
332
+ Example:
333
+
334
+ ```python
335
+ # Define a command line flag to pass name=value pairs.
336
+ # For example using argparse:
337
+ import argparse
338
+ parser = argparse.ArgumentParser(description='Train my model.')
339
+ parser.add_argument('--hparams', type=str,
340
+ help='Comma separated list of "name=value" pairs.')
341
+ args = parser.parse_args()
342
+ ...
343
+ def my_program():
344
+ # Create a HParams object specifying the names and values of the
345
+ # model hyperparameters:
346
+ hparams = tf.HParams(learning_rate=0.1, num_hidden_units=100,
347
+ activations=['relu', 'tanh'])
348
+
349
+ # Override hyperparameters values by parsing the command line
350
+ hparams.parse(args.hparams)
351
+
352
+ # If the user passed `--hparams=learning_rate=0.3` on the command line
353
+ # then 'hparams' has the following attributes:
354
+ hparams.learning_rate ==> 0.3
355
+ hparams.num_hidden_units ==> 100
356
+ hparams.activations ==> ['relu', 'tanh']
357
+
358
+ # If the hyperparameters are in json format use parse_json:
359
+ hparams.parse_json('{"learning_rate": 0.3, "activations": "relu"}')
360
+ ```
361
+ """
362
+
363
+ _HAS_DYNAMIC_ATTRIBUTES = True # Required for pytype checks.
364
+
365
+ def __init__(self, model_structure=None, **kwargs):
366
+ """Create an instance of `HParams` from keyword arguments.
367
+
368
+ The keyword arguments specify name-values pairs for the hyperparameters.
369
+ The parameter types are inferred from the type of the values passed.
370
+
371
+ The parameter names are added as attributes of `HParams` object, so they
372
+ can be accessed directly with the dot notation `hparams._name_`.
373
+
374
+ Example:
375
+
376
+ ```python
377
+ # Define 3 hyperparameters: 'learning_rate' is a float parameter,
378
+ # 'num_hidden_units' an integer parameter, and 'activation' a string
379
+ # parameter.
380
+ hparams = tf.HParams(
381
+ learning_rate=0.1, num_hidden_units=100, activation='relu')
382
+
383
+ hparams.activation ==> 'relu'
384
+ ```
385
+
386
+ Note that a few names are reserved and cannot be used as hyperparameter
387
+ names. If you use one of the reserved name the constructor raises a
388
+ `ValueError`.
389
+
390
+ Args:
391
+ model_structure: An instance of ModelStructure, defining the feature
392
+ crosses to be used in the Trial.
393
+ **kwargs: Key-value pairs where the key is the hyperparameter name and
394
+ the value is the value for the parameter.
395
+
396
+ Raises:
397
+ ValueError: If both `hparam_def` and initialization values are provided,
398
+ or if one of the arguments is invalid.
399
+
400
+ """
401
+ # Register the hyperparameters and their type in _hparam_types.
402
+ # This simplifies the implementation of parse().
403
+ # _hparam_types maps the parameter name to a tuple (type, bool).
404
+ # The type value is the type of the parameter for scalar hyperparameters,
405
+ # or the type of the list elements for multidimensional hyperparameters.
406
+ # The bool value is True if the value is a list, False otherwise.
407
+ self._hparam_types = {}
408
+ self._model_structure = model_structure
409
+ for name, value in six.iteritems(kwargs):
410
+ self.add_hparam(name, value)
411
+
412
+ def add_hparam(self, name, value):
413
+ """Adds {name, value} pair to hyperparameters.
414
+
415
+ Args:
416
+ name: Name of the hyperparameter.
417
+ value: Value of the hyperparameter. Can be one of the following types:
418
+ int, float, string, int list, float list, or string list.
419
+
420
+ Raises:
421
+ ValueError: if one of the arguments is invalid.
422
+ """
423
+ # Keys in kwargs are unique, but 'name' could the name of a pre-existing
424
+ # attribute of this object. In that case we refuse to use it as a
425
+ # hyperparameter name.
426
+ if getattr(self, name, None) is not None:
427
+ raise ValueError("Hyperparameter name is reserved: %s" % name)
428
+ if isinstance(value, (list, tuple)):
429
+ if not value:
430
+ raise ValueError(
431
+ "Multi-valued hyperparameters cannot be empty: %s" % name
432
+ )
433
+ self._hparam_types[name] = (type(value[0]), True)
434
+ else:
435
+ self._hparam_types[name] = (type(value), False)
436
+ setattr(self, name, value)
437
+
438
+ def set_hparam(self, name, value):
439
+ """Set the value of an existing hyperparameter.
440
+
441
+ This function verifies that the type of the value matches the type of the
442
+ existing hyperparameter.
443
+
444
+ Args:
445
+ name: Name of the hyperparameter.
446
+ value: New value of the hyperparameter.
447
+
448
+ Raises:
449
+ KeyError: If the hyperparameter doesn't exist.
450
+ ValueError: If there is a type mismatch.
451
+ """
452
+ param_type, is_list = self._hparam_types[name]
453
+ if isinstance(value, list):
454
+ if not is_list:
455
+ raise ValueError(
456
+ "Must not pass a list for single-valued parameter: %s" % name
457
+ )
458
+ setattr(
459
+ self,
460
+ name,
461
+ [_cast_to_type_if_compatible(name, param_type, v) for v in value],
462
+ )
463
+ else:
464
+ if is_list:
465
+ raise ValueError(
466
+ "Must pass a list for multi-valued parameter: %s." % name
467
+ )
468
+ setattr(self, name, _cast_to_type_if_compatible(name, param_type, value))
469
+
470
+ def del_hparam(self, name):
471
+ """Removes the hyperparameter with key 'name'.
472
+
473
+ Does nothing if it isn't present.
474
+
475
+ Args:
476
+ name: Name of the hyperparameter.
477
+ """
478
+ if hasattr(self, name):
479
+ delattr(self, name)
480
+ del self._hparam_types[name]
481
+
482
+ def parse(self, values):
483
+ """Override existing hyperparameter values, parsing new values from a string.
484
+
485
+ See parse_values for more detail on the allowed format for values.
486
+
487
+ Args:
488
+ values: String. Comma separated list of `name=value` pairs where 'value'
489
+ must follow the syntax described above.
490
+
491
+ Returns:
492
+ The `HParams` instance.
493
+
494
+ Raises:
495
+ ValueError: If `values` cannot be parsed or a hyperparameter in `values`
496
+ doesn't exist.
497
+ """
498
+ type_map = {}
499
+ for name, t in self._hparam_types.items():
500
+ param_type, _ = t
501
+ type_map[name] = param_type
502
+
503
+ values_map = parse_values(values, type_map)
504
+ return self.override_from_dict(values_map)
505
+
506
+ def override_from_dict(self, values_dict):
507
+ """Override existing hyperparameter values, parsing new values from a dictionary.
508
+
509
+ Args:
510
+ values_dict: Dictionary of name:value pairs.
511
+
512
+ Returns:
513
+ The `HParams` instance.
514
+
515
+ Raises:
516
+ KeyError: If a hyperparameter in `values_dict` doesn't exist.
517
+ ValueError: If `values_dict` cannot be parsed.
518
+ """
519
+ for name, value in values_dict.items():
520
+ self.set_hparam(name, value)
521
+ return self
522
+
523
+ def set_model_structure(self, model_structure):
524
+ self._model_structure = model_structure
525
+
526
+ def get_model_structure(self):
527
+ return self._model_structure
528
+
529
+ def to_json(self, indent=None, separators=None, sort_keys=False):
530
+ """Serializes the hyperparameters into JSON.
531
+
532
+ Args:
533
+ indent: If a non-negative integer, JSON array elements and object members
534
+ will be pretty-printed with that indent level. An indent level of 0, or
535
+ negative, will only insert newlines. `None` (the default) selects the
536
+ most compact representation.
537
+ separators: Optional `(item_separator, key_separator)` tuple. Default is
538
+ `(', ', ': ')`.
539
+ sort_keys: If `True`, the output dictionaries will be sorted by key.
540
+
541
+ Returns:
542
+ A JSON string.
543
+ """
544
+
545
+ def remove_callables(x):
546
+ """Omit callable elements from input with arbitrary nesting."""
547
+ if isinstance(x, dict):
548
+ return {
549
+ k: remove_callables(v)
550
+ for k, v in six.iteritems(x)
551
+ if not callable(v)
552
+ }
553
+ elif isinstance(x, list):
554
+ return [remove_callables(i) for i in x if not callable(i)]
555
+ return x
556
+
557
+ return json.dumps(
558
+ remove_callables(self.values()),
559
+ indent=indent,
560
+ separators=separators,
561
+ sort_keys=sort_keys,
562
+ )
563
+
564
+ def parse_json(self, values_json):
565
+ """Override existing hyperparameter values, parsing new values from a json object.
566
+
567
+ Args:
568
+ values_json: String containing a json object of name:value pairs.
569
+
570
+ Returns:
571
+ The `HParams` instance.
572
+
573
+ Raises:
574
+ KeyError: If a hyperparameter in `values_json` doesn't exist.
575
+ ValueError: If `values_json` cannot be parsed.
576
+ """
577
+ values_map = json.loads(values_json)
578
+ return self.override_from_dict(values_map)
579
+
580
+ def values(self):
581
+ """Return the hyperparameter values as a Python dictionary.
582
+
583
+ Returns:
584
+ A dictionary with hyperparameter names as keys. The values are the
585
+ hyperparameter values.
586
+ """
587
+ return {n: getattr(self, n) for n in self._hparam_types.keys()}
588
+
589
+ def get(self, key, default=None):
590
+ """Returns the value of `key` if it exists, else `default`."""
591
+ if key in self._hparam_types:
592
+ # Ensure that default is compatible with the parameter type.
593
+ if default is not None:
594
+ param_type, is_param_list = self._hparam_types[key]
595
+ type_str = "list<%s>" % param_type if is_param_list else str(param_type)
596
+ fail_msg = (
597
+ "Hparam '%s' of type '%s' is incompatible with "
598
+ "default=%s" % (key, type_str, default)
599
+ )
600
+
601
+ is_default_list = isinstance(default, list)
602
+ if is_param_list != is_default_list:
603
+ raise ValueError(fail_msg)
604
+
605
+ try:
606
+ if is_default_list:
607
+ for value in default:
608
+ _cast_to_type_if_compatible(key, param_type, value)
609
+ else:
610
+ _cast_to_type_if_compatible(key, param_type, default)
611
+ except ValueError as e:
612
+ raise ValueError("%s. %s" % (fail_msg, e))
613
+
614
+ return getattr(self, key)
615
+
616
+ return default
617
+
618
+ def __contains__(self, key):
619
+ return key in self._hparam_types
620
+
621
+ def __str__(self):
622
+ return str(sorted(self.values().items()))
623
+
624
+ def __repr__(self):
625
+ return "%s(%s)" % (type(self).__name__, self.__str__())
626
+
627
+ @staticmethod
628
+ def _get_kind_name(param_type, is_list):
629
+ """Returns the field name given parameter type and is_list.
630
+
631
+ Args:
632
+ param_type: Data type of the hparam.
633
+ is_list: Whether this is a list.
634
+
635
+ Returns:
636
+ A string representation of the field name.
637
+
638
+ Raises:
639
+ ValueError: If parameter type is not recognized.
640
+ """
641
+ if issubclass(param_type, bool):
642
+ # This check must happen before issubclass(param_type, six.integer_types),
643
+ # since Python considers bool to be a subclass of int.
644
+ typename = "bool"
645
+ elif issubclass(param_type, six.integer_types):
646
+ # Setting 'int' and 'long' types to be 'int64' to ensure the type is
647
+ # compatible with both Python2 and Python3.
648
+ typename = "int64"
649
+ elif issubclass(param_type, (six.string_types, six.binary_type)):
650
+ # Setting 'string' and 'bytes' types to be 'bytes' to ensure the type is
651
+ # compatible with both Python2 and Python3.
652
+ typename = "bytes"
653
+ elif issubclass(param_type, float):
654
+ typename = "float"
655
+ else:
656
+ raise ValueError("Unsupported parameter type: %s" % str(param_type))
657
+
658
+ suffix = "list" if is_list else "value"
659
+ return "_".join([typename, suffix])
utils/hubert.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ # This code is modified from https://github.com/svc-develop-team/so-vits-svc/blob/4.0/preprocess_hubert_f0.py
7
+
8
+ import os
9
+ import librosa
10
+ import torch
11
+ import numpy as np
12
+ from fairseq import checkpoint_utils
13
+ from tqdm import tqdm
14
+ import torch
15
+
16
+
17
+ def load_hubert_model(hps):
18
+ # Load model
19
+ ckpt_path = hps.hubert_file
20
+ print("Load Hubert Model...")
21
+
22
+ models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
23
+ [ckpt_path],
24
+ suffix="",
25
+ )
26
+ model = models[0]
27
+ model.eval()
28
+
29
+ if torch.cuda.is_available():
30
+ model = model.cuda()
31
+
32
+ return model
33
+
34
+
35
+ def get_hubert_content(hmodel, wav_16k_tensor):
36
+ feats = wav_16k_tensor
37
+ if feats.dim() == 2: # double channels
38
+ feats = feats.mean(-1)
39
+ assert feats.dim() == 1, feats.dim()
40
+ feats = feats.view(1, -1)
41
+ padding_mask = torch.BoolTensor(feats.shape).fill_(False)
42
+ inputs = {
43
+ "source": feats.to(wav_16k_tensor.device),
44
+ "padding_mask": padding_mask.to(wav_16k_tensor.device),
45
+ "output_layer": 9, # layer 9
46
+ }
47
+ with torch.no_grad():
48
+ logits = hmodel.extract_features(**inputs)
49
+ feats = hmodel.final_proj(logits[0]).squeeze(0)
50
+
51
+ return feats
52
+
53
+
54
+ def content_vector_encoder(model, audio_path, default_sampling_rate=16000):
55
+ """
56
+ # content vector default sr: 16000
57
+ """
58
+
59
+ wav16k, sr = librosa.load(audio_path, sr=default_sampling_rate)
60
+ device = next(model.parameters()).device
61
+ wav16k = torch.from_numpy(wav16k).to(device)
62
+
63
+ # (1, 256, frame_len)
64
+ content_feature = get_hubert_content(model, wav_16k_tensor=wav16k)
65
+
66
+ return content_feature.cpu().detach().numpy()
67
+
68
+
69
+ def repeat_expand_2d(content, target_len):
70
+ """
71
+ content : [hubert_dim(256), src_len]
72
+ target: [hubert_dim(256), target_len]
73
+ """
74
+ src_len = content.shape[-1]
75
+ target = torch.zeros([content.shape[0], target_len], dtype=torch.float).to(
76
+ content.device
77
+ )
78
+ temp = torch.arange(src_len + 1) * target_len / src_len
79
+ current_pos = 0
80
+ for i in range(target_len):
81
+ if i < temp[current_pos + 1]:
82
+ target[:, i] = content[:, current_pos]
83
+ else:
84
+ current_pos += 1
85
+ target[:, i] = content[:, current_pos]
86
+
87
+ return target
88
+
89
+
90
+ def get_mapped_features(raw_content_features, mapping_features):
91
+ """
92
+ Content Vector: frameshift = 20ms, hop_size = 480 in 24k
93
+
94
+ Now it's only used for mapping to bigvgan's mels (sr = 24k, hop_size = 256, frameshift ~= 10.7 ms)
95
+ """
96
+ source_hop = 480
97
+ target_hop = 256
98
+
99
+ factor = np.gcd(source_hop, target_hop)
100
+ source_hop //= factor
101
+ target_hop //= factor
102
+ print(
103
+ "Mapping source's {} frames => target's {} frames".format(
104
+ target_hop, source_hop
105
+ )
106
+ )
107
+
108
+ results = []
109
+ for index, mapping_feat in enumerate(tqdm(mapping_features)):
110
+ # mappping_feat: (mels_frame_len, n_mels)
111
+ target_len = len(mapping_feat)
112
+
113
+ # (source_len, 256)
114
+ raw_feats = raw_content_features[index][0].cpu().numpy().T
115
+ source_len, width = raw_feats.shape
116
+
117
+ # const ~= target_len * target_hop
118
+ const = source_len * source_hop // target_hop * target_hop
119
+
120
+ # (source_len * source_hop, dim)
121
+ up_sampling_feats = np.repeat(raw_feats, source_hop, axis=0)
122
+ # (const, dim) -> (const/target_hop, target_hop, dim) -> (const/target_hop, dim)
123
+ down_sampling_feats = np.average(
124
+ up_sampling_feats[:const].reshape(-1, target_hop, width), axis=1
125
+ )
126
+
127
+ err = abs(target_len - len(down_sampling_feats))
128
+ if err > 3:
129
+ print("index:", index)
130
+ print("mels:", mapping_feat.shape)
131
+ print("raw content vector:", raw_feats.shape)
132
+ print("up_sampling:", up_sampling_feats.shape)
133
+ print("down_sampling_feats:", down_sampling_feats.shape)
134
+ exit()
135
+ if len(down_sampling_feats) < target_len:
136
+ # (1, dim) -> (err, dim)
137
+ end = down_sampling_feats[-1][None, :].repeat(err, axis=0)
138
+ down_sampling_feats = np.concatenate([down_sampling_feats, end], axis=0)
139
+
140
+ # (target_len, dim)
141
+ feats = down_sampling_feats[:target_len]
142
+ results.append(feats)
143
+
144
+ return results
145
+
146
+
147
+ def extract_hubert_features_of_dataset(datasets, model, out_dir):
148
+ for utt in tqdm(datasets):
149
+ uid = utt["Uid"]
150
+ audio_path = utt["Path"]
151
+
152
+ content_vector_feature = content_vector_encoder(model, audio_path) # (T, 256)
153
+
154
+ save_path = os.path.join(out_dir, uid + ".npy")
155
+ np.save(save_path, content_vector_feature)
utils/io.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import os
7
+ import numpy as np
8
+ import torch
9
+ import torchaudio
10
+
11
+
12
+ def save_feature(process_dir, feature_dir, item, feature, overrides=True):
13
+ """Save features to path
14
+
15
+ Args:
16
+ process_dir (str): directory to store features
17
+ feature_dir (_type_): directory to store one type of features (mel, energy, ...)
18
+ item (str): uid
19
+ feature (tensor): feature tensor
20
+ overrides (bool, optional): whether to override existing files. Defaults to True.
21
+ """
22
+ process_dir = os.path.join(process_dir, feature_dir)
23
+ os.makedirs(process_dir, exist_ok=True)
24
+ out_path = os.path.join(process_dir, item + ".npy")
25
+
26
+ if os.path.exists(out_path):
27
+ if overrides:
28
+ np.save(out_path, feature)
29
+ else:
30
+ np.save(out_path, feature)
31
+
32
+
33
+ def save_txt(process_dir, feature_dir, item, feature, overrides=True):
34
+ process_dir = os.path.join(process_dir, feature_dir)
35
+ os.makedirs(process_dir, exist_ok=True)
36
+ out_path = os.path.join(process_dir, item + ".txt")
37
+
38
+ if os.path.exists(out_path):
39
+ if overrides:
40
+ f = open(out_path, "w")
41
+ f.writelines(feature)
42
+ f.close()
43
+ else:
44
+ f = open(out_path, "w")
45
+ f.writelines(feature)
46
+ f.close()
47
+
48
+
49
+ def save_audio(path, waveform, fs, add_silence=False, turn_up=False, volume_peak=0.9):
50
+ """Save audio to path with processing (turn up volume, add silence)
51
+ Args:
52
+ path (str): path to save audio
53
+ waveform (numpy array): waveform to save
54
+ fs (int): sampling rate
55
+ add_silence (bool, optional): whether to add silence to beginning and end. Defaults to False.
56
+ turn_up (bool, optional): whether to turn up volume. Defaults to False.
57
+ volume_peak (float, optional): volume peak. Defaults to 0.9.
58
+ """
59
+ if turn_up:
60
+ # continue to turn up to volume_peak
61
+ ratio = volume_peak / max(waveform.max(), abs(waveform.min()))
62
+ waveform = waveform * ratio
63
+
64
+ if add_silence:
65
+ silence_len = fs // 20
66
+ silence = np.zeros((silence_len,), dtype=waveform.dtype)
67
+ result = np.concatenate([silence, waveform, silence])
68
+ waveform = result
69
+
70
+ waveform = torch.as_tensor(waveform, dtype=torch.float32, device="cpu")
71
+ if len(waveform.size()) == 1:
72
+ waveform = waveform[None, :]
73
+ elif waveform.size(0) != 1:
74
+ # Stereo to mono
75
+ waveform = torch.mean(waveform, dim=0, keepdim=True)
76
+ torchaudio.save(path, waveform, fs, encoding="PCM_S", bits_per_sample=16)
77
+
78
+
79
+ def save_torch_audio(process_dir, feature_dir, item, wav_torch, fs, overrides=True):
80
+ """Save torch audio to path without processing
81
+ Args:
82
+ process_dir (str): directory to store features
83
+ feature_dir (_type_): directory to store one type of features (mel, energy, ...)
84
+ item (str): uid
85
+ wav_torch (tensor): feature tensor
86
+ fs (int): sampling rate
87
+ overrides (bool, optional): whether to override existing files. Defaults to True.
88
+ """
89
+ if wav_torch.shape != 2:
90
+ wav_torch = wav_torch.unsqueeze(0)
91
+
92
+ process_dir = os.path.join(process_dir, feature_dir)
93
+ os.makedirs(process_dir, exist_ok=True)
94
+ out_path = os.path.join(process_dir, item + ".wav")
95
+
96
+ torchaudio.save(out_path, wav_torch, fs)
97
+
98
+
99
+ async def async_load_audio(path, sample_rate: int = 24000):
100
+ r"""
101
+ Args:
102
+ path: The source loading path.
103
+ sample_rate: The target sample rate, will automatically resample if necessary.
104
+
105
+ Returns:
106
+ waveform: The waveform object. Should be [1 x sequence_len].
107
+ """
108
+
109
+ async def use_torchaudio_load(path):
110
+ return torchaudio.load(path)
111
+
112
+ waveform, sr = await use_torchaudio_load(path)
113
+ waveform = torch.mean(waveform, dim=0, keepdim=True)
114
+
115
+ if sr != sample_rate:
116
+ waveform = torchaudio.functional.resample(waveform, sr, sample_rate)
117
+
118
+ if torch.any(torch.isnan(waveform) or torch.isinf(waveform)):
119
+ raise ValueError("NaN or Inf found in waveform.")
120
+ return waveform
121
+
122
+
123
+ async def async_save_audio(
124
+ path,
125
+ waveform,
126
+ sample_rate: int = 24000,
127
+ add_silence: bool = False,
128
+ volume_peak: float = 0.9,
129
+ ):
130
+ r"""
131
+ Args:
132
+ path: The target saving path.
133
+ waveform: The waveform object. Should be [n_channel x sequence_len].
134
+ sample_rate: Sample rate.
135
+ add_silence: If ``true``, concat 0.05s silence to beginning and end.
136
+ volume_peak: Turn up volume for larger number, vice versa.
137
+ """
138
+
139
+ async def use_torchaudio_save(path, waveform, sample_rate):
140
+ torchaudio.save(
141
+ path, waveform, sample_rate, encoding="PCM_S", bits_per_sample=16
142
+ )
143
+
144
+ waveform = torch.as_tensor(waveform, device="cpu", dtype=torch.float32)
145
+ shape = waveform.size()[:-1]
146
+
147
+ ratio = abs(volume_peak) / max(waveform.max(), abs(waveform.min()))
148
+ waveform = waveform * ratio
149
+
150
+ if add_silence:
151
+ silence_len = sample_rate // 20
152
+ silence = torch.zeros((*shape, silence_len), dtype=waveform.type())
153
+ waveform = torch.concatenate((silence, waveform, silence), dim=-1)
154
+
155
+ if waveform.dim() == 1:
156
+ waveform = waveform[None]
157
+
158
+ await use_torchaudio_save(path, waveform, sample_rate)
159
+
160
+
161
+ def load_mel_extrema(cfg, dataset_name, split):
162
+ dataset_dir = os.path.join(
163
+ cfg.OUTPUT_PATH,
164
+ "preprocess/{}_version".format(cfg.data.process_version),
165
+ dataset_name,
166
+ )
167
+
168
+ min_file = os.path.join(
169
+ dataset_dir,
170
+ "mel_min_max",
171
+ split.split("_")[-1],
172
+ "mel_min.npy",
173
+ )
174
+ max_file = os.path.join(
175
+ dataset_dir,
176
+ "mel_min_max",
177
+ split.split("_")[-1],
178
+ "mel_max.npy",
179
+ )
180
+ mel_min = np.load(min_file)
181
+ mel_max = np.load(max_file)
182
+ return mel_min, mel_max
utils/io_optim.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import torch
7
+ import torchaudio
8
+ import json
9
+ import os
10
+ import numpy as np
11
+ import librosa
12
+ import whisper
13
+ from torch.nn.utils.rnn import pad_sequence
14
+
15
+
16
+ class TorchaudioDataset(torch.utils.data.Dataset):
17
+ def __init__(self, cfg, dataset, sr, accelerator=None, metadata=None):
18
+ """
19
+ Args:
20
+ cfg: config
21
+ dataset: dataset name
22
+
23
+ """
24
+ assert isinstance(dataset, str)
25
+
26
+ self.sr = sr
27
+ self.cfg = cfg
28
+
29
+ if metadata is None:
30
+ self.train_metadata_path = os.path.join(
31
+ cfg.preprocess.processed_dir, dataset, cfg.preprocess.train_file
32
+ )
33
+ self.valid_metadata_path = os.path.join(
34
+ cfg.preprocess.processed_dir, dataset, cfg.preprocess.valid_file
35
+ )
36
+ self.metadata = self.get_metadata()
37
+ else:
38
+ self.metadata = metadata
39
+
40
+ if accelerator is not None:
41
+ self.device = accelerator.device
42
+ elif torch.cuda.is_available():
43
+ self.device = torch.device("cuda")
44
+ else:
45
+ self.device = torch.device("cpu")
46
+
47
+ def get_metadata(self):
48
+ metadata = []
49
+ with open(self.train_metadata_path, "r", encoding="utf-8") as t:
50
+ metadata.extend(json.load(t))
51
+ with open(self.valid_metadata_path, "r", encoding="utf-8") as v:
52
+ metadata.extend(json.load(v))
53
+ return metadata
54
+
55
+ def __len__(self):
56
+ return len(self.metadata)
57
+
58
+ def __getitem__(self, index):
59
+ utt_info = self.metadata[index]
60
+ wav_path = utt_info["Path"]
61
+
62
+ wav, sr = torchaudio.load(wav_path)
63
+
64
+ # resample
65
+ if sr != self.sr:
66
+ wav = torchaudio.functional.resample(wav, sr, self.sr)
67
+ # downmixing
68
+ if wav.shape[0] > 1:
69
+ wav = torch.mean(wav, dim=0, keepdim=True)
70
+ assert wav.shape[0] == 1
71
+ wav = wav.squeeze(0)
72
+ # record the length of wav without padding
73
+ length = wav.shape[0]
74
+ # wav: (T)
75
+ return utt_info, wav, length
76
+
77
+
78
+ class LibrosaDataset(TorchaudioDataset):
79
+ def __init__(self, cfg, dataset, sr, accelerator=None, metadata=None):
80
+ super().__init__(cfg, dataset, sr, accelerator, metadata)
81
+
82
+ def __getitem__(self, index):
83
+ utt_info = self.metadata[index]
84
+ wav_path = utt_info["Path"]
85
+
86
+ wav, _ = librosa.load(wav_path, sr=self.sr)
87
+ # wav: (T)
88
+ wav = torch.from_numpy(wav)
89
+
90
+ # record the length of wav without padding
91
+ length = wav.shape[0]
92
+ return utt_info, wav, length
93
+
94
+
95
+ class FFmpegDataset(TorchaudioDataset):
96
+ def __init__(self, cfg, dataset, sr, accelerator=None, metadata=None):
97
+ super().__init__(cfg, dataset, sr, accelerator, metadata)
98
+
99
+ def __getitem__(self, index):
100
+ utt_info = self.metadata[index]
101
+ wav_path = utt_info["Path"]
102
+
103
+ # wav: (T,)
104
+ wav = whisper.load_audio(wav_path, sr=16000) # sr = 16000
105
+ # convert to torch tensor
106
+ wav = torch.from_numpy(wav)
107
+ # record the length of wav without padding
108
+ length = wav.shape[0]
109
+
110
+ return utt_info, wav, length
111
+
112
+
113
+ def collate_batch(batch_list):
114
+ """
115
+ Args:
116
+ batch_list: list of (metadata, wav, length)
117
+ """
118
+ metadata = [item[0] for item in batch_list]
119
+ # wavs: (B, T)
120
+ wavs = pad_sequence([item[1] for item in batch_list], batch_first=True)
121
+ lens = [item[2] for item in batch_list]
122
+
123
+ return metadata, wavs, lens
utils/mel.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import torch
7
+ from librosa.filters import mel as librosa_mel_fn
8
+
9
+
10
+ def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
11
+ # Min value: ln(1e-5) = -11.5129
12
+ return torch.log(torch.clamp(x, min=clip_val) * C)
13
+
14
+
15
+ def spectral_normalize_torch(magnitudes):
16
+ output = dynamic_range_compression_torch(magnitudes)
17
+ return output
18
+
19
+
20
+ def extract_linear_features(y, cfg, center=False):
21
+ if torch.min(y) < -1.0:
22
+ print("min value is ", torch.min(y))
23
+ if torch.max(y) > 1.0:
24
+ print("max value is ", torch.max(y))
25
+
26
+ global hann_window
27
+ hann_window[str(y.device)] = torch.hann_window(cfg.win_size).to(y.device)
28
+
29
+ y = torch.nn.functional.pad(
30
+ y.unsqueeze(1),
31
+ (int((cfg.n_fft - cfg.hop_size) / 2), int((cfg.n_fft - cfg.hop_size) / 2)),
32
+ mode="reflect",
33
+ )
34
+ y = y.squeeze(1)
35
+
36
+ # complex tensor as default, then use view_as_real for future pytorch compatibility
37
+ spec = torch.stft(
38
+ y,
39
+ cfg.n_fft,
40
+ hop_length=cfg.hop_size,
41
+ win_length=cfg.win_size,
42
+ window=hann_window[str(y.device)],
43
+ center=center,
44
+ pad_mode="reflect",
45
+ normalized=False,
46
+ onesided=True,
47
+ return_complex=True,
48
+ )
49
+ spec = torch.view_as_real(spec)
50
+ spec = torch.sqrt(spec.pow(2).sum(-1) + (1e-9))
51
+ spec = torch.squeeze(spec, 0)
52
+ return spec
53
+
54
+
55
+ def mel_spectrogram_torch(y, cfg, center=False):
56
+ """
57
+ TODO: to merge this funtion with the extract_mel_features below
58
+ """
59
+ if torch.min(y) < -1.0:
60
+ print("min value is ", torch.min(y))
61
+ if torch.max(y) > 1.0:
62
+ print("max value is ", torch.max(y))
63
+
64
+ global mel_basis, hann_window
65
+ if cfg.fmax not in mel_basis:
66
+ mel = librosa_mel_fn(
67
+ sr=cfg.sample_rate,
68
+ n_fft=cfg.n_fft,
69
+ n_mels=cfg.n_mel,
70
+ fmin=cfg.fmin,
71
+ fmax=cfg.fmax,
72
+ )
73
+ mel_basis[str(cfg.fmax) + "_" + str(y.device)] = (
74
+ torch.from_numpy(mel).float().to(y.device)
75
+ )
76
+ hann_window[str(y.device)] = torch.hann_window(cfg.win_size).to(y.device)
77
+
78
+ y = torch.nn.functional.pad(
79
+ y.unsqueeze(1),
80
+ (int((cfg.n_fft - cfg.hop_size) / 2), int((cfg.n_fft - cfg.hop_size) / 2)),
81
+ mode="reflect",
82
+ )
83
+ y = y.squeeze(1)
84
+
85
+ spec = torch.stft(
86
+ y,
87
+ cfg.n_fft,
88
+ hop_length=cfg.hop_size,
89
+ win_length=cfg.win_size,
90
+ window=hann_window[str(y.device)],
91
+ center=center,
92
+ pad_mode="reflect",
93
+ normalized=False,
94
+ onesided=True,
95
+ return_complex=True,
96
+ )
97
+
98
+ spec = torch.view_as_real(spec)
99
+ spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
100
+
101
+ spec = torch.matmul(mel_basis[str(cfg.fmax) + "_" + str(y.device)], spec)
102
+ spec = spectral_normalize_torch(spec)
103
+
104
+ return spec
105
+
106
+
107
+ mel_basis = {}
108
+ hann_window = {}
109
+
110
+
111
+ def extract_mel_features(
112
+ y,
113
+ cfg,
114
+ center=False,
115
+ ):
116
+ """Extract mel features
117
+
118
+ Args:
119
+ y (tensor): audio data in tensor
120
+ cfg (dict): configuration in cfg.preprocess
121
+ center (bool, optional): In STFT, whether t-th frame is centered at time t*hop_length. Defaults to False.
122
+
123
+ Returns:
124
+ tensor: a tensor containing the mel feature calculated based on STFT result
125
+ """
126
+ if torch.min(y) < -1.0:
127
+ print("min value is ", torch.min(y))
128
+ if torch.max(y) > 1.0:
129
+ print("max value is ", torch.max(y))
130
+
131
+ global mel_basis, hann_window
132
+ if cfg.fmax not in mel_basis:
133
+ mel = librosa_mel_fn(
134
+ sr=cfg.sample_rate,
135
+ n_fft=cfg.n_fft,
136
+ n_mels=cfg.n_mel,
137
+ fmin=cfg.fmin,
138
+ fmax=cfg.fmax,
139
+ )
140
+ mel_basis[str(cfg.fmax) + "_" + str(y.device)] = (
141
+ torch.from_numpy(mel).float().to(y.device)
142
+ )
143
+ hann_window[str(y.device)] = torch.hann_window(cfg.win_size).to(y.device)
144
+
145
+ y = torch.nn.functional.pad(
146
+ y.unsqueeze(1),
147
+ (int((cfg.n_fft - cfg.hop_size) / 2), int((cfg.n_fft - cfg.hop_size) / 2)),
148
+ mode="reflect",
149
+ )
150
+ y = y.squeeze(1)
151
+
152
+ # complex tensor as default, then use view_as_real for future pytorch compatibility
153
+ spec = torch.stft(
154
+ y,
155
+ cfg.n_fft,
156
+ hop_length=cfg.hop_size,
157
+ win_length=cfg.win_size,
158
+ window=hann_window[str(y.device)],
159
+ center=center,
160
+ pad_mode="reflect",
161
+ normalized=False,
162
+ onesided=True,
163
+ return_complex=True,
164
+ )
165
+ spec = torch.view_as_real(spec)
166
+ spec = torch.sqrt(spec.pow(2).sum(-1) + (1e-9))
167
+
168
+ spec = torch.matmul(mel_basis[str(cfg.fmax) + "_" + str(y.device)], spec)
169
+ spec = spectral_normalize_torch(spec)
170
+ return spec.squeeze(0)
171
+
172
+
173
+ def extract_mel_features_tts(
174
+ y,
175
+ cfg,
176
+ center=False,
177
+ taco=False,
178
+ _stft=None,
179
+ ):
180
+ """Extract mel features
181
+
182
+ Args:
183
+ y (tensor): audio data in tensor
184
+ cfg (dict): configuration in cfg.preprocess
185
+ center (bool, optional): In STFT, whether t-th frame is centered at time t*hop_length. Defaults to False.
186
+ taco: use tacotron mel
187
+
188
+ Returns:
189
+ tensor: a tensor containing the mel feature calculated based on STFT result
190
+ """
191
+ if not taco:
192
+ if torch.min(y) < -1.0:
193
+ print("min value is ", torch.min(y))
194
+ if torch.max(y) > 1.0:
195
+ print("max value is ", torch.max(y))
196
+
197
+ global mel_basis, hann_window
198
+ if cfg.fmax not in mel_basis:
199
+ mel = librosa_mel_fn(
200
+ sr=cfg.sample_rate,
201
+ n_fft=cfg.n_fft,
202
+ n_mels=cfg.n_mel,
203
+ fmin=cfg.fmin,
204
+ fmax=cfg.fmax,
205
+ )
206
+ mel_basis[str(cfg.fmax) + "_" + str(y.device)] = (
207
+ torch.from_numpy(mel).float().to(y.device)
208
+ )
209
+ hann_window[str(y.device)] = torch.hann_window(cfg.win_size).to(y.device)
210
+
211
+ y = torch.nn.functional.pad(
212
+ y.unsqueeze(1),
213
+ (int((cfg.n_fft - cfg.hop_size) / 2), int((cfg.n_fft - cfg.hop_size) / 2)),
214
+ mode="reflect",
215
+ )
216
+ y = y.squeeze(1)
217
+
218
+ # complex tensor as default, then use view_as_real for future pytorch compatibility
219
+ spec = torch.stft(
220
+ y,
221
+ cfg.n_fft,
222
+ hop_length=cfg.hop_size,
223
+ win_length=cfg.win_size,
224
+ window=hann_window[str(y.device)],
225
+ center=center,
226
+ pad_mode="reflect",
227
+ normalized=False,
228
+ onesided=True,
229
+ return_complex=True,
230
+ )
231
+ spec = torch.view_as_real(spec)
232
+ spec = torch.sqrt(spec.pow(2).sum(-1) + (1e-9))
233
+
234
+ spec = torch.matmul(mel_basis[str(cfg.fmax) + "_" + str(y.device)], spec)
235
+ spec = spectral_normalize_torch(spec)
236
+ else:
237
+ audio = torch.clip(y, -1, 1)
238
+ audio = torch.autograd.Variable(audio, requires_grad=False)
239
+ spec, energy = _stft.mel_spectrogram(audio)
240
+
241
+ return spec.squeeze(0)
242
+
243
+
244
+ def amplitude_phase_spectrum(y, cfg):
245
+ hann_window = torch.hann_window(cfg.win_size).to(y.device)
246
+
247
+ y = torch.nn.functional.pad(
248
+ y.unsqueeze(1),
249
+ (int((cfg.n_fft - cfg.hop_size) / 2), int((cfg.n_fft - cfg.hop_size) / 2)),
250
+ mode="reflect",
251
+ )
252
+ y = y.squeeze(1)
253
+
254
+ stft_spec = torch.stft(
255
+ y,
256
+ cfg.n_fft,
257
+ hop_length=cfg.hop_size,
258
+ win_length=cfg.win_size,
259
+ window=hann_window,
260
+ center=False,
261
+ return_complex=True,
262
+ )
263
+
264
+ stft_spec = torch.view_as_real(stft_spec)
265
+ if stft_spec.size()[0] == 1:
266
+ stft_spec = stft_spec.squeeze(0)
267
+
268
+ if len(list(stft_spec.size())) == 4:
269
+ rea = stft_spec[:, :, :, 0] # [batch_size, n_fft//2+1, frames]
270
+ imag = stft_spec[:, :, :, 1] # [batch_size, n_fft//2+1, frames]
271
+ else:
272
+ rea = stft_spec[:, :, 0] # [n_fft//2+1, frames]
273
+ imag = stft_spec[:, :, 1] # [n_fft//2+1, frames]
274
+
275
+ log_amplitude = torch.log(
276
+ torch.abs(torch.sqrt(torch.pow(rea, 2) + torch.pow(imag, 2))) + 1e-5
277
+ ) # [n_fft//2+1, frames]
278
+ phase = torch.atan2(imag, rea) # [n_fft//2+1, frames]
279
+
280
+ return log_amplitude, phase, rea, imag
utils/mert.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ # This code is modified from https://huggingface.co/m-a-p/MERT-v1-330M
7
+
8
+ import torch
9
+ from tqdm import tqdm
10
+ import numpy as np
11
+
12
+ from transformers import Wav2Vec2FeatureExtractor
13
+ from transformers import AutoModel
14
+ import torchaudio
15
+ import torchaudio.transforms as T
16
+ from sklearn.preprocessing import StandardScaler
17
+
18
+
19
+ def mert_encoder(model, processor, audio_path, hps):
20
+ """
21
+ # mert default sr: 24000
22
+ """
23
+ with torch.no_grad():
24
+ resample_rate = processor.sampling_rate
25
+ device = next(model.parameters()).device
26
+
27
+ input_audio, sampling_rate = torchaudio.load(audio_path)
28
+ input_audio = input_audio.squeeze()
29
+
30
+ if sampling_rate != resample_rate:
31
+ resampler = T.Resample(sampling_rate, resample_rate)
32
+ input_audio = resampler(input_audio)
33
+
34
+ inputs = processor(
35
+ input_audio, sampling_rate=resample_rate, return_tensors="pt"
36
+ ).to(
37
+ device
38
+ ) # {input_values: tensor, attention_mask: tensor}
39
+
40
+ outputs = model(**inputs, output_hidden_states=True) # list: len is 25
41
+
42
+ # [25 layer, Time steps, 1024 feature_dim]
43
+ # all_layer_hidden_states = torch.stack(outputs.hidden_states).squeeze()
44
+ # mert_features.append(all_layer_hidden_states)
45
+
46
+ feature = outputs.hidden_states[
47
+ hps.mert_feature_layer
48
+ ].squeeze() # [1, frame len, 1024] -> [frame len, 1024]
49
+
50
+ return feature.cpu().detach().numpy()
51
+
52
+
53
+ def mert_features_normalization(raw_mert_features):
54
+ normalized_mert_features = list()
55
+
56
+ mert_features = np.array(raw_mert_features)
57
+ scaler = StandardScaler().fit(mert_features)
58
+ for raw_mert_feature in raw_mert_feature:
59
+ normalized_mert_feature = scaler.transform(raw_mert_feature)
60
+ normalized_mert_features.append(normalized_mert_feature)
61
+ return normalized_mert_features
62
+
63
+
64
+ def get_mapped_mert_features(raw_mert_features, mapping_features, fast_mapping=True):
65
+ source_hop = 320
66
+ target_hop = 256
67
+
68
+ factor = np.gcd(source_hop, target_hop)
69
+ source_hop //= factor
70
+ target_hop //= factor
71
+ print(
72
+ "Mapping source's {} frames => target's {} frames".format(
73
+ target_hop, source_hop
74
+ )
75
+ )
76
+
77
+ mert_features = []
78
+ for index, mapping_feat in enumerate(tqdm(mapping_features)):
79
+ # mapping_feat: (mels_frame_len, n_mels)
80
+ target_len = mapping_feat.shape[0]
81
+
82
+ # (frame_len, 1024)
83
+ raw_feats = raw_mert_features[index].cpu().numpy()
84
+ source_len, width = raw_feats.shape
85
+
86
+ # const ~= target_len * target_hop
87
+ const = source_len * source_hop // target_hop * target_hop
88
+
89
+ # (source_len * source_hop, dim)
90
+ up_sampling_feats = np.repeat(raw_feats, source_hop, axis=0)
91
+ # (const, dim) -> (const/target_hop, target_hop, dim) -> (const/target_hop, dim)
92
+ down_sampling_feats = np.average(
93
+ up_sampling_feats[:const].reshape(-1, target_hop, width), axis=1
94
+ )
95
+
96
+ err = abs(target_len - len(down_sampling_feats))
97
+ if err > 3:
98
+ print("index:", index)
99
+ print("mels:", mapping_feat.shape)
100
+ print("raw mert vector:", raw_feats.shape)
101
+ print("up_sampling:", up_sampling_feats.shape)
102
+ print("const:", const)
103
+ print("down_sampling_feats:", down_sampling_feats.shape)
104
+ exit()
105
+ if len(down_sampling_feats) < target_len:
106
+ # (1, dim) -> (err, dim)
107
+ end = down_sampling_feats[-1][None, :].repeat(err, axis=0)
108
+ down_sampling_feats = np.concatenate([down_sampling_feats, end], axis=0)
109
+
110
+ # (target_len, dim)
111
+ feats = down_sampling_feats[:target_len]
112
+ mert_features.append(feats)
113
+
114
+ return mert_features
115
+
116
+
117
+ def load_mert_model(hps):
118
+ print("Loading MERT Model: ", hps.mert_model)
119
+
120
+ # Load model
121
+ model_name = hps.mert_model
122
+ model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
123
+
124
+ if torch.cuda.is_available():
125
+ model = model.cuda()
126
+
127
+ # model = model.eval()
128
+
129
+ preprocessor = Wav2Vec2FeatureExtractor.from_pretrained(
130
+ model_name, trust_remote_code=True
131
+ )
132
+ return model, preprocessor
133
+
134
+
135
+ # loading the corresponding preprocessor config
136
+ # def load_preprocessor (model_name="m-a-p/MERT-v1-330M"):
137
+ # print('load_preprocessor...')
138
+ # preprocessor = Wav2Vec2FeatureExtractor.from_pretrained(model_name,trust_remote_code=True)
139
+ # return preprocessor
utils/mfa_prepare.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ """ This code is modified from https://montreal-forced-aligner.readthedocs.io/en/latest/user_guide/performance.html"""
7
+
8
+ import os
9
+ import subprocess
10
+ from multiprocessing import Pool
11
+ from tqdm import tqdm
12
+ import torchaudio
13
+ from pathlib import Path
14
+
15
+
16
+ def remove_empty_dirs(path):
17
+ """remove empty directories in a given path"""
18
+ # Check if the given path is a directory
19
+ if not os.path.isdir(path):
20
+ print(f"{path} is not a directory")
21
+ return
22
+
23
+ # Walk through all directories and subdirectories
24
+ for root, dirs, _ in os.walk(path, topdown=False):
25
+ for dir in dirs:
26
+ dir_path = os.path.join(root, dir)
27
+ # Check if the directory is empty
28
+ if not os.listdir(dir_path):
29
+ os.rmdir(dir_path) # "Removed empty directory
30
+
31
+
32
+ def process_single_wav_file(task):
33
+ """process a single wav file"""
34
+ wav_file, output_dir = task
35
+ speaker_id, book_name, filename = Path(wav_file).parts[-3:]
36
+
37
+ output_book_dir = Path(output_dir, speaker_id)
38
+ output_book_dir.mkdir(parents=True, exist_ok=True)
39
+ new_filename = f"{speaker_id}_{book_name}_{filename}"
40
+
41
+ new_wav_file = Path(output_book_dir, new_filename)
42
+ command = [
43
+ "ffmpeg",
44
+ "-nostdin",
45
+ "-hide_banner",
46
+ "-loglevel",
47
+ "error",
48
+ "-nostats",
49
+ "-i",
50
+ wav_file,
51
+ "-acodec",
52
+ "pcm_s16le",
53
+ "-ar",
54
+ "16000",
55
+ new_wav_file,
56
+ ]
57
+ subprocess.check_call(
58
+ command
59
+ ) # Run the command to convert the file to 16kHz and 16-bit PCM
60
+ os.remove(wav_file)
61
+
62
+
63
+ def process_wav_files(wav_files, output_dir, n_process):
64
+ """process wav files in parallel"""
65
+ tasks = [(wav_file, output_dir) for wav_file in wav_files]
66
+ print(f"Processing {len(tasks)} files")
67
+ with Pool(processes=n_process) as pool:
68
+ for _ in tqdm(
69
+ pool.imap_unordered(process_single_wav_file, tasks), total=len(tasks)
70
+ ):
71
+ pass
72
+ print("Removing empty directories...")
73
+ remove_empty_dirs(output_dir)
74
+ print("Done!")
75
+
76
+
77
+ def get_wav_files(dataset_path):
78
+ """get all wav files in the dataset"""
79
+ wav_files = []
80
+ for speaker_id in os.listdir(dataset_path):
81
+ speaker_dir = os.path.join(dataset_path, speaker_id)
82
+ if not os.path.isdir(speaker_dir):
83
+ continue
84
+ for book_name in os.listdir(speaker_dir):
85
+ book_dir = os.path.join(speaker_dir, book_name)
86
+ if not os.path.isdir(book_dir):
87
+ continue
88
+ for file in os.listdir(book_dir):
89
+ if file.endswith(".wav"):
90
+ wav_files.append(os.path.join(book_dir, file))
91
+ print("Found {} wav files".format(len(wav_files)))
92
+ return wav_files
93
+
94
+
95
+ def filter_wav_files_by_length(wav_files, max_len_sec=15):
96
+ """filter wav files by length"""
97
+ print("original wav files: {}".format(len(wav_files)))
98
+ filtered_wav_files = []
99
+ for audio_file in wav_files:
100
+ metadata = torchaudio.info(str(audio_file))
101
+ audio_length = metadata.num_frames / metadata.sample_rate
102
+ if audio_length <= max_len_sec:
103
+ filtered_wav_files.append(audio_file)
104
+ else:
105
+ os.remove(audio_file)
106
+ print("filtered wav files: {}".format(len(filtered_wav_files)))
107
+ return filtered_wav_files
108
+
109
+
110
+ if __name__ == "__main__":
111
+ dataset_path = "/path/to/output/directory"
112
+ n_process = 16
113
+ max_len_sec = 15
114
+ wav_files = get_wav_files(dataset_path)
115
+ filtered_wav_files = filter_wav_files_by_length(wav_files, max_len_sec)
116
+ process_wav_files(filtered_wav_files, dataset_path, n_process)
utils/model_summary.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import humanfriendly
7
+ import numpy as np
8
+ import torch
9
+
10
+
11
+ def get_human_readable_count(number: int) -> str:
12
+ """Return human_readable_count
13
+
14
+ Originated from:
15
+ https://github.com/PyTorchLightning/pytorch-lightning/blob/master/pytorch_lightning/core/memory.py
16
+
17
+ Abbreviates an integer number with K, M, B, T for thousands, millions,
18
+ billions and trillions, respectively.
19
+ Examples:
20
+ >>> get_human_readable_count(123)
21
+ '123 '
22
+ >>> get_human_readable_count(1234) # (one thousand)
23
+ '1 K'
24
+ >>> get_human_readable_count(2e6) # (two million)
25
+ '2 M'
26
+ >>> get_human_readable_count(3e9) # (three billion)
27
+ '3 B'
28
+ >>> get_human_readable_count(4e12) # (four trillion)
29
+ '4 T'
30
+ >>> get_human_readable_count(5e15) # (more than trillion)
31
+ '5,000 T'
32
+ Args:
33
+ number: a positive integer number
34
+ Return:
35
+ A string formatted according to the pattern described above.
36
+ """
37
+ assert number >= 0
38
+ labels = [" ", "K", "M", "B", "T"]
39
+ num_digits = int(np.floor(np.log10(number)) + 1 if number > 0 else 1)
40
+ num_groups = int(np.ceil(num_digits / 3))
41
+ num_groups = min(num_groups, len(labels))
42
+ shift = -3 * (num_groups - 1)
43
+ number = number * (10**shift)
44
+ index = num_groups - 1
45
+ return f"{number:.2f} {labels[index]}"
46
+
47
+
48
+ def to_bytes(dtype) -> int:
49
+ return int(str(dtype)[-2:]) // 8
50
+
51
+
52
+ def model_summary(model: torch.nn.Module) -> str:
53
+ message = "Model structure:\n"
54
+ message += str(model)
55
+ tot_params = sum(p.numel() for p in model.parameters())
56
+ num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
57
+ percent_trainable = "{:.1f}".format(num_params * 100.0 / tot_params)
58
+ tot_params = get_human_readable_count(tot_params)
59
+ num_params = get_human_readable_count(num_params)
60
+ message += "\n\nModel summary:\n"
61
+ message += f" Class Name: {model.__class__.__name__}\n"
62
+ message += f" Total Number of model parameters: {tot_params}\n"
63
+ message += (
64
+ f" Number of trainable parameters: {num_params} ({percent_trainable}%)\n"
65
+ )
66
+ num_bytes = humanfriendly.format_size(
67
+ sum(
68
+ p.numel() * to_bytes(p.dtype) for p in model.parameters() if p.requires_grad
69
+ )
70
+ )
71
+ message += f" Size: {num_bytes}\n"
72
+ dtype = next(iter(model.parameters())).dtype
73
+ message += f" Type: {dtype}"
74
+ return message
utils/prompt_preparer.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import torch
7
+
8
+
9
+ class PromptPreparer:
10
+ def prepare_prompts(self, y, y_lens, codes, nar_stage, y_prompts_codes):
11
+ if self.prefix_mode == 0:
12
+ y_emb, prefix_len = self._handle_prefix_mode_0(y, codes, nar_stage)
13
+ elif self.prefix_mode == 1:
14
+ y_emb, prefix_len = self._handle_prefix_mode_1(y, y_lens, codes, nar_stage)
15
+ elif self.prefix_mode in [2, 4]:
16
+ y_emb, prefix_len = self._handle_prefix_mode_2_4(
17
+ y, y_lens, codes, nar_stage, y_prompts_codes
18
+ )
19
+ else:
20
+ raise ValueError("Invalid prefix mode")
21
+
22
+ return y_emb, prefix_len
23
+
24
+ def _handle_prefix_mode_0(self, y, codes, nar_stage):
25
+ prefix_len = 0
26
+ y_emb = self.nar_audio_embeddings[0](y)
27
+ for j in range(1, nar_stage):
28
+ y_emb = y_emb + self.nar_audio_embeddings[j](codes[..., j])
29
+ return y_emb, 0
30
+
31
+ def _handle_prefix_mode_1(self, y, y_lens, codes, nar_stage):
32
+ int_low = (0.25 * y_lens.min()).type(torch.int64).item()
33
+ prefix_len = torch.randint(int_low, int_low * 2, size=()).item()
34
+ prefix_len = min(prefix_len, 225)
35
+
36
+ y_prompts = self.nar_audio_embeddings[0](y[:, :prefix_len])
37
+ y_emb = self.nar_audio_embeddings[0](y[:, prefix_len:])
38
+ for j in range(1, self.num_quantizers):
39
+ y_prompts += self.nar_audio_embeddings[j](codes[:, :prefix_len, j])
40
+ if j < nar_stage:
41
+ y_emb += self.nar_audio_embeddings[j](codes[:, prefix_len:, j])
42
+ y_emb = torch.concat([y_prompts, y_emb], axis=1)
43
+ return y_emb, prefix_len
44
+
45
+ def _handle_prefix_mode_2_4(self, y, y_lens, codes, nar_stage, y_prompts_codes):
46
+ if self.prefix_mode == 2:
47
+ prefix_len = min(225, int(0.25 * y_lens.min().item()))
48
+
49
+ y_prompts_codes = []
50
+ for b in range(codes.shape[0]):
51
+ start = self.rng.randint(0, y_lens[b].item() - prefix_len)
52
+ y_prompts_codes.append(
53
+ torch.clone(codes[b, start : start + prefix_len])
54
+ )
55
+ codes[b, start : start + prefix_len, nar_stage] = self.audio_token_num
56
+ y_prompts_codes = torch.stack(y_prompts_codes, dim=0)
57
+ else:
58
+ prefix_len = y_prompts_codes.shape[1]
59
+
60
+ y_prompts = self.nar_audio_embeddings[0](y_prompts_codes[..., 0])
61
+ y_emb = self.nar_audio_embeddings[0](y)
62
+ for j in range(1, self.num_quantizers):
63
+ y_prompts += self.nar_audio_embeddings[j](y_prompts_codes[..., j])
64
+ if j < nar_stage:
65
+ y_emb += self.nar_audio_embeddings[j](codes[..., j])
66
+ y_emb = torch.concat([y_prompts, y_emb], axis=1)
67
+
68
+ return y_emb, prefix_len
utils/ssim.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ # This code is modified from https://github.com/Po-Hsun-Su/pytorch-ssim
7
+
8
+ import torch
9
+ import torch.nn.functional as F
10
+ from torch.autograd import Variable
11
+ from math import exp
12
+
13
+
14
+ def gaussian(window_size, sigma):
15
+ gauss = torch.Tensor(
16
+ [
17
+ exp(-((x - window_size // 2) ** 2) / float(2 * sigma**2))
18
+ for x in range(window_size)
19
+ ]
20
+ )
21
+ return gauss / gauss.sum()
22
+
23
+
24
+ def create_window(window_size, channel):
25
+ _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
26
+ _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
27
+ window = Variable(
28
+ _2D_window.expand(channel, 1, window_size, window_size).contiguous()
29
+ )
30
+ return window
31
+
32
+
33
+ def _ssim(img1, img2, window, window_size, channel, size_average=True):
34
+ mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
35
+ mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
36
+
37
+ mu1_sq = mu1.pow(2)
38
+ mu2_sq = mu2.pow(2)
39
+ mu1_mu2 = mu1 * mu2
40
+
41
+ sigma1_sq = (
42
+ F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
43
+ )
44
+ sigma2_sq = (
45
+ F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
46
+ )
47
+ sigma12 = (
48
+ F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel)
49
+ - mu1_mu2
50
+ )
51
+
52
+ C1 = 0.01**2
53
+ C2 = 0.03**2
54
+
55
+ ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / (
56
+ (mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)
57
+ )
58
+
59
+ if size_average:
60
+ return ssim_map.mean()
61
+ else:
62
+ return ssim_map.mean(1)
63
+
64
+
65
+ class SSIM(torch.nn.Module):
66
+ def __init__(self, window_size=11, size_average=True):
67
+ super(SSIM, self).__init__()
68
+ self.window_size = window_size
69
+ self.size_average = size_average
70
+ self.channel = 1
71
+ self.window = create_window(window_size, self.channel)
72
+
73
+ def forward(self, fake, real, bias=6.0):
74
+ fake = fake[:, None, :, :] + bias # [B, 1, T, n_mels]
75
+ real = real[:, None, :, :] + bias # [B, 1, T, n_mels]
76
+ self.window = self.window.to(dtype=fake.dtype, device=fake.device)
77
+ loss = 1 - _ssim(
78
+ fake, real, self.window, self.window_size, self.channel, self.size_average
79
+ )
80
+ return loss
utils/stft.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import torch
7
+ import torch.nn.functional as F
8
+ import numpy as np
9
+ from scipy.signal import get_window
10
+ from librosa.util import pad_center, tiny
11
+ from librosa.filters import mel as librosa_mel_fn
12
+
13
+ import torch
14
+ import numpy as np
15
+ import librosa.util as librosa_util
16
+ from scipy.signal import get_window
17
+
18
+
19
+ def window_sumsquare(
20
+ window,
21
+ n_frames,
22
+ hop_length,
23
+ win_length,
24
+ n_fft,
25
+ dtype=np.float32,
26
+ norm=None,
27
+ ):
28
+ """
29
+ # from librosa 0.6
30
+ Compute the sum-square envelope of a window function at a given hop length.
31
+
32
+ This is used to estimate modulation effects induced by windowing
33
+ observations in short-time fourier transforms.
34
+
35
+ Parameters
36
+ ----------
37
+ window : string, tuple, number, callable, or list-like
38
+ Window specification, as in `get_window`
39
+
40
+ n_frames : int > 0
41
+ The number of analysis frames
42
+
43
+ hop_length : int > 0
44
+ The number of samples to advance between frames
45
+
46
+ win_length : [optional]
47
+ The length of the window function. By default, this matches `n_fft`.
48
+
49
+ n_fft : int > 0
50
+ The length of each analysis frame.
51
+
52
+ dtype : np.dtype
53
+ The data type of the output
54
+
55
+ Returns
56
+ -------
57
+ wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
58
+ The sum-squared envelope of the window function
59
+ """
60
+ if win_length is None:
61
+ win_length = n_fft
62
+
63
+ n = n_fft + hop_length * (n_frames - 1)
64
+ x = np.zeros(n, dtype=dtype)
65
+
66
+ # Compute the squared window at the desired length
67
+ win_sq = get_window(window, win_length, fftbins=True)
68
+ win_sq = librosa_util.normalize(win_sq, norm=norm) ** 2
69
+ win_sq = librosa_util.pad_center(win_sq, n_fft)
70
+
71
+ # Fill the envelope
72
+ for i in range(n_frames):
73
+ sample = i * hop_length
74
+ x[sample : min(n, sample + n_fft)] += win_sq[: max(0, min(n_fft, n - sample))]
75
+ return x
76
+
77
+
78
+ def griffin_lim(magnitudes, stft_fn, n_iters=30):
79
+ """
80
+ PARAMS
81
+ ------
82
+ magnitudes: spectrogram magnitudes
83
+ stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods
84
+ """
85
+
86
+ angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size())))
87
+ angles = angles.astype(np.float32)
88
+ angles = torch.autograd.Variable(torch.from_numpy(angles))
89
+ signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
90
+
91
+ for i in range(n_iters):
92
+ _, angles = stft_fn.transform(signal)
93
+ signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
94
+ return signal
95
+
96
+
97
+ def dynamic_range_compression(x, C=1, clip_val=1e-5):
98
+ """
99
+ PARAMS
100
+ ------
101
+ C: compression factor
102
+ """
103
+ return torch.log(torch.clamp(x, min=clip_val) * C)
104
+
105
+
106
+ def dynamic_range_decompression(x, C=1):
107
+ """
108
+ PARAMS
109
+ ------
110
+ C: compression factor used to compress
111
+ """
112
+ return torch.exp(x) / C
113
+
114
+
115
+ class STFT(torch.nn.Module):
116
+ """adapted from Prem Seetharaman's https://github.com/pseeth/pytorch-stft"""
117
+
118
+ def __init__(self, filter_length, hop_length, win_length, window="hann"):
119
+ super(STFT, self).__init__()
120
+ self.filter_length = filter_length
121
+ self.hop_length = hop_length
122
+ self.win_length = win_length
123
+ self.window = window
124
+ self.forward_transform = None
125
+ scale = self.filter_length / self.hop_length
126
+ fourier_basis = np.fft.fft(np.eye(self.filter_length))
127
+
128
+ cutoff = int((self.filter_length / 2 + 1))
129
+ fourier_basis = np.vstack(
130
+ [np.real(fourier_basis[:cutoff, :]), np.imag(fourier_basis[:cutoff, :])]
131
+ )
132
+
133
+ forward_basis = torch.FloatTensor(fourier_basis[:, None, :])
134
+ inverse_basis = torch.FloatTensor(
135
+ np.linalg.pinv(scale * fourier_basis).T[:, None, :]
136
+ )
137
+
138
+ if window is not None:
139
+ assert filter_length >= win_length
140
+ # get window and zero center pad it to filter_length
141
+ fft_window = get_window(window, win_length, fftbins=True)
142
+ fft_window = pad_center(fft_window, filter_length)
143
+ fft_window = torch.from_numpy(fft_window).float()
144
+
145
+ # window the bases
146
+ forward_basis *= fft_window
147
+ inverse_basis *= fft_window
148
+
149
+ self.register_buffer("forward_basis", forward_basis.float())
150
+ self.register_buffer("inverse_basis", inverse_basis.float())
151
+
152
+ def transform(self, input_data):
153
+ num_batches = input_data.size(0)
154
+ num_samples = input_data.size(1)
155
+
156
+ self.num_samples = num_samples
157
+
158
+ # similar to librosa, reflect-pad the input
159
+ input_data = input_data.view(num_batches, 1, num_samples)
160
+ input_data = F.pad(
161
+ input_data.unsqueeze(1),
162
+ (int(self.filter_length / 2), int(self.filter_length / 2), 0, 0),
163
+ mode="reflect",
164
+ )
165
+ input_data = input_data.squeeze(1)
166
+
167
+ forward_transform = F.conv1d(
168
+ input_data.cuda(),
169
+ torch.autograd.Variable(self.forward_basis, requires_grad=False).cuda(),
170
+ stride=self.hop_length,
171
+ padding=0,
172
+ ).cpu()
173
+
174
+ cutoff = int((self.filter_length / 2) + 1)
175
+ real_part = forward_transform[:, :cutoff, :]
176
+ imag_part = forward_transform[:, cutoff:, :]
177
+
178
+ magnitude = torch.sqrt(real_part**2 + imag_part**2)
179
+ phase = torch.autograd.Variable(torch.atan2(imag_part.data, real_part.data))
180
+
181
+ return magnitude, phase
182
+
183
+ def inverse(self, magnitude, phase):
184
+ recombine_magnitude_phase = torch.cat(
185
+ [magnitude * torch.cos(phase), magnitude * torch.sin(phase)], dim=1
186
+ )
187
+
188
+ inverse_transform = F.conv_transpose1d(
189
+ recombine_magnitude_phase,
190
+ torch.autograd.Variable(self.inverse_basis, requires_grad=False),
191
+ stride=self.hop_length,
192
+ padding=0,
193
+ )
194
+
195
+ if self.window is not None:
196
+ window_sum = window_sumsquare(
197
+ self.window,
198
+ magnitude.size(-1),
199
+ hop_length=self.hop_length,
200
+ win_length=self.win_length,
201
+ n_fft=self.filter_length,
202
+ dtype=np.float32,
203
+ )
204
+ # remove modulation effects
205
+ approx_nonzero_indices = torch.from_numpy(
206
+ np.where(window_sum > tiny(window_sum))[0]
207
+ )
208
+ window_sum = torch.autograd.Variable(
209
+ torch.from_numpy(window_sum), requires_grad=False
210
+ )
211
+ window_sum = window_sum.cuda() if magnitude.is_cuda else window_sum
212
+ inverse_transform[:, :, approx_nonzero_indices] /= window_sum[
213
+ approx_nonzero_indices
214
+ ]
215
+
216
+ # scale by hop ratio
217
+ inverse_transform *= float(self.filter_length) / self.hop_length
218
+
219
+ inverse_transform = inverse_transform[:, :, int(self.filter_length / 2) :]
220
+ inverse_transform = inverse_transform[:, :, : -int(self.filter_length / 2) :]
221
+
222
+ return inverse_transform
223
+
224
+ def forward(self, input_data):
225
+ self.magnitude, self.phase = self.transform(input_data)
226
+ reconstruction = self.inverse(self.magnitude, self.phase)
227
+ return reconstruction
228
+
229
+
230
+ class TacotronSTFT(torch.nn.Module):
231
+ def __init__(
232
+ self,
233
+ filter_length,
234
+ hop_length,
235
+ win_length,
236
+ n_mel_channels,
237
+ sampling_rate,
238
+ mel_fmin,
239
+ mel_fmax,
240
+ ):
241
+ super(TacotronSTFT, self).__init__()
242
+ self.n_mel_channels = n_mel_channels
243
+ self.sampling_rate = sampling_rate
244
+ self.stft_fn = STFT(filter_length, hop_length, win_length)
245
+ mel_basis = librosa_mel_fn(
246
+ sampling_rate, filter_length, n_mel_channels, mel_fmin, mel_fmax
247
+ )
248
+ mel_basis = torch.from_numpy(mel_basis).float()
249
+ self.register_buffer("mel_basis", mel_basis)
250
+
251
+ def spectral_normalize(self, magnitudes):
252
+ output = dynamic_range_compression(magnitudes)
253
+ return output
254
+
255
+ def spectral_de_normalize(self, magnitudes):
256
+ output = dynamic_range_decompression(magnitudes)
257
+ return output
258
+
259
+ def mel_spectrogram(self, y):
260
+ """Computes mel-spectrograms from a batch of waves
261
+ PARAMS
262
+ ------
263
+ y: Variable(torch.FloatTensor) with shape (B, T) in range [-1, 1]
264
+
265
+ RETURNS
266
+ -------
267
+ mel_output: torch.FloatTensor of shape (B, n_mel_channels, T)
268
+ """
269
+ assert torch.min(y.data) >= -1
270
+ assert torch.max(y.data) <= 1
271
+
272
+ magnitudes, phases = self.stft_fn.transform(y)
273
+ magnitudes = magnitudes.data
274
+ mel_output = torch.matmul(self.mel_basis, magnitudes)
275
+ mel_output = self.spectral_normalize(mel_output)
276
+ energy = torch.norm(magnitudes, dim=1)
277
+
278
+ return mel_output, energy
utils/symbol_table.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ # This code is modified from
7
+ # https://github.com/lifeiteng/vall-e/blob/9c69096d603ce13174fb5cb025f185e2e9b36ac7/valle/utils/symbol_table.py
8
+
9
+ from dataclasses import dataclass
10
+ from dataclasses import field
11
+ from typing import Dict
12
+ from typing import Generic
13
+ from typing import List
14
+ from typing import Optional
15
+ from typing import TypeVar
16
+ from typing import Union
17
+
18
+ Symbol = TypeVar("Symbol")
19
+
20
+
21
+ @dataclass(repr=False)
22
+ class SymbolTable(Generic[Symbol]):
23
+ """SymbolTable that maps symbol IDs, found on the FSA arcs to
24
+ actual objects. These objects can be arbitrary Python objects
25
+ that can serve as keys in a dictionary (i.e. they need to be
26
+ hashable and immutable).
27
+
28
+ The SymbolTable can only be read to/written from disk if the
29
+ symbols are strings.
30
+ """
31
+
32
+ _id2sym: Dict[int, Symbol] = field(default_factory=dict)
33
+ """Map an integer to a symbol.
34
+ """
35
+
36
+ _sym2id: Dict[Symbol, int] = field(default_factory=dict)
37
+ """Map a symbol to an integer.
38
+ """
39
+
40
+ _next_available_id: int = 1
41
+ """A helper internal field that helps adding new symbols
42
+ to the table efficiently.
43
+ """
44
+
45
+ eps: Symbol = "<eps>"
46
+ """Null symbol, always mapped to index 0.
47
+ """
48
+
49
+ def __post_init__(self):
50
+ assert all(self._sym2id[sym] == idx for idx, sym in self._id2sym.items())
51
+ assert all(self._id2sym[idx] == sym for sym, idx in self._sym2id.items())
52
+ assert 0 not in self._id2sym or self._id2sym[0] == self.eps
53
+
54
+ self._next_available_id = max(self._id2sym, default=0) + 1
55
+ self._id2sym.setdefault(0, self.eps)
56
+ self._sym2id.setdefault(self.eps, 0)
57
+
58
+ @staticmethod
59
+ def from_str(s: str) -> "SymbolTable":
60
+ """Build a symbol table from a string.
61
+
62
+ The string consists of lines. Every line has two fields separated
63
+ by space(s), tab(s) or both. The first field is the symbol and the
64
+ second the integer id of the symbol.
65
+
66
+ Args:
67
+ s:
68
+ The input string with the format described above.
69
+ Returns:
70
+ An instance of :class:`SymbolTable`.
71
+ """
72
+ id2sym: Dict[int, str] = dict()
73
+ sym2id: Dict[str, int] = dict()
74
+
75
+ for line in s.split("\n"):
76
+ fields = line.split()
77
+ if len(fields) == 0:
78
+ continue # skip empty lines
79
+ assert (
80
+ len(fields) == 2
81
+ ), f"Expect a line with 2 fields. Given: {len(fields)}"
82
+ sym, idx = fields[0], int(fields[1])
83
+ assert sym not in sym2id, f"Duplicated symbol {sym}"
84
+ assert idx not in id2sym, f"Duplicated id {idx}"
85
+ id2sym[idx] = sym
86
+ sym2id[sym] = idx
87
+
88
+ eps = id2sym.get(0, "<eps>")
89
+
90
+ return SymbolTable(_id2sym=id2sym, _sym2id=sym2id, eps=eps)
91
+
92
+ @staticmethod
93
+ def from_file(filename: str) -> "SymbolTable":
94
+ """Build a symbol table from file.
95
+
96
+ Every line in the symbol table file has two fields separated by
97
+ space(s), tab(s) or both. The following is an example file:
98
+
99
+ .. code-block::
100
+
101
+ <eps> 0
102
+ a 1
103
+ b 2
104
+ c 3
105
+
106
+ Args:
107
+ filename:
108
+ Name of the symbol table file. Its format is documented above.
109
+
110
+ Returns:
111
+ An instance of :class:`SymbolTable`.
112
+
113
+ """
114
+ with open(filename, "r", encoding="utf-8") as f:
115
+ return SymbolTable.from_str(f.read().strip())
116
+
117
+ def to_str(self) -> str:
118
+ """
119
+ Returns:
120
+ Return a string representation of this object. You can pass
121
+ it to the method ``from_str`` to recreate an identical object.
122
+ """
123
+ s = ""
124
+ for idx, symbol in sorted(self._id2sym.items()):
125
+ s += f"{symbol} {idx}\n"
126
+ return s
127
+
128
+ def to_file(self, filename: str):
129
+ """Serialize the SymbolTable to a file.
130
+
131
+ Every line in the symbol table file has two fields separated by
132
+ space(s), tab(s) or both. The following is an example file:
133
+
134
+ .. code-block::
135
+
136
+ <eps> 0
137
+ a 1
138
+ b 2
139
+ c 3
140
+
141
+ Args:
142
+ filename:
143
+ Name of the symbol table file. Its format is documented above.
144
+ """
145
+ with open(filename, "w") as f:
146
+ for idx, symbol in sorted(self._id2sym.items()):
147
+ print(symbol, idx, file=f)
148
+
149
+ def add(self, symbol: Symbol, index: Optional[int] = None) -> int:
150
+ """Add a new symbol to the SymbolTable.
151
+
152
+ Args:
153
+ symbol:
154
+ The symbol to be added.
155
+ index:
156
+ Optional int id to which the symbol should be assigned.
157
+ If it is not available, a ValueError will be raised.
158
+
159
+ Returns:
160
+ The int id to which the symbol has been assigned.
161
+ """
162
+ # Already in the table? Return its ID.
163
+ if symbol in self._sym2id:
164
+ return self._sym2id[symbol]
165
+ # Specific ID not provided - use next available.
166
+ if index is None:
167
+ index = self._next_available_id
168
+ # Specific ID provided but not available.
169
+ if index in self._id2sym:
170
+ raise ValueError(
171
+ f"Cannot assign id '{index}' to '{symbol}' - "
172
+ f"already occupied by {self._id2sym[index]}"
173
+ )
174
+ self._sym2id[symbol] = index
175
+ self._id2sym[index] = symbol
176
+
177
+ # Update next available ID if needed
178
+ if self._next_available_id <= index:
179
+ self._next_available_id = index + 1
180
+
181
+ return index
182
+
183
+ def get(self, k: Union[int, Symbol]) -> Union[Symbol, int]:
184
+ """Get a symbol for an id or get an id for a symbol
185
+
186
+ Args:
187
+ k:
188
+ If it is an id, it tries to find the symbol corresponding
189
+ to the id; if it is a symbol, it tries to find the id
190
+ corresponding to the symbol.
191
+
192
+ Returns:
193
+ An id or a symbol depending on the given `k`.
194
+ """
195
+ if isinstance(k, int):
196
+ return self._id2sym[k]
197
+ else:
198
+ return self._sym2id[k]
199
+
200
+ def merge(self, other: "SymbolTable") -> "SymbolTable":
201
+ """Create a union of two SymbolTables.
202
+ Raises an AssertionError if the same IDs are occupied by
203
+ different symbols.
204
+
205
+ Args:
206
+ other:
207
+ A symbol table to merge with ``self``.
208
+
209
+ Returns:
210
+ A new symbol table.
211
+ """
212
+ self._check_compatible(other)
213
+ return SymbolTable(
214
+ _id2sym={**self._id2sym, **other._id2sym},
215
+ _sym2id={**self._sym2id, **other._sym2id},
216
+ eps=self.eps,
217
+ )
218
+
219
+ def _check_compatible(self, other: "SymbolTable") -> None:
220
+ # Epsilon compatibility
221
+ assert self.eps == other.eps, (
222
+ f"Mismatched epsilon symbol: " f"{self.eps} != {other.eps}"
223
+ )
224
+ # IDs compatibility
225
+ common_ids = set(self._id2sym).intersection(other._id2sym)
226
+ for idx in common_ids:
227
+ assert self[idx] == other[idx], (
228
+ f"ID conflict for id: {idx}, "
229
+ f'self[idx] = "{self[idx]}", '
230
+ f'other[idx] = "{other[idx]}"'
231
+ )
232
+ # Symbols compatibility
233
+ common_symbols = set(self._sym2id).intersection(other._sym2id)
234
+ for sym in common_symbols:
235
+ assert self[sym] == other[sym], (
236
+ f"ID conflict for id: {sym}, "
237
+ f'self[sym] = "{self[sym]}", '
238
+ f'other[sym] = "{other[sym]}"'
239
+ )
240
+
241
+ def __getitem__(self, item: Union[int, Symbol]) -> Union[Symbol, int]:
242
+ return self.get(item)
243
+
244
+ def __contains__(self, item: Union[int, Symbol]) -> bool:
245
+ if isinstance(item, int):
246
+ return item in self._id2sym
247
+ else:
248
+ return item in self._sym2id
249
+
250
+ def __len__(self) -> int:
251
+ return len(self._id2sym)
252
+
253
+ def __eq__(self, other: "SymbolTable") -> bool:
254
+ if len(self) != len(other):
255
+ return False
256
+
257
+ for s in self.symbols:
258
+ if self[s] != other[s]:
259
+ return False
260
+
261
+ return True
262
+
263
+ @property
264
+ def ids(self) -> List[int]:
265
+ """Returns a list of integer IDs corresponding to the symbols."""
266
+ ans = list(self._id2sym.keys())
267
+ ans.sort()
268
+ return ans
269
+
270
+ @property
271
+ def symbols(self) -> List[Symbol]:
272
+ """Returns a list of symbols (e.g., strings) corresponding to
273
+ the integer IDs.
274
+ """
275
+ ans = list(self._sym2id.keys())
276
+ ans.sort()
277
+ return ans
278
+
279
+
280
+ class TextToken:
281
+ def __init__(
282
+ self,
283
+ text_tokens: List[str],
284
+ add_eos: bool = True,
285
+ add_bos: bool = True,
286
+ pad_symbol: str = "<pad>",
287
+ bos_symbol: str = "<bos>",
288
+ eos_symbol: str = "<eos>",
289
+ ):
290
+ self.pad_symbol = pad_symbol
291
+ self.add_eos = add_eos
292
+ self.add_bos = add_bos
293
+ self.bos_symbol = bos_symbol
294
+ self.eos_symbol = eos_symbol
295
+
296
+ unique_tokens = [pad_symbol]
297
+ if add_bos:
298
+ unique_tokens.append(bos_symbol)
299
+ if add_eos:
300
+ unique_tokens.append(eos_symbol)
301
+ unique_tokens.extend(sorted(text_tokens))
302
+
303
+ self.token2idx = {token: idx for idx, token in enumerate(unique_tokens)}
304
+ self.idx2token = unique_tokens
305
+
306
+ def get_token_id_seq(self, text):
307
+ tokens_seq = [p for p in text]
308
+ seq = (
309
+ ([self.bos_symbol] if self.add_bos else [])
310
+ + tokens_seq
311
+ + ([self.eos_symbol] if self.add_eos else [])
312
+ )
313
+
314
+ token_ids = [self.token2idx[token] for token in seq]
315
+ token_lens = len(tokens_seq) + self.add_eos + self.add_bos
316
+
317
+ return token_ids, token_lens
utils/tokenizer.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ # This code is modified from
7
+ # https://github.com/lifeiteng/vall-e/blob/9c69096d603ce13174fb5cb025f185e2e9b36ac7/valle/data/tokenizer.py
8
+
9
+ import re
10
+ from typing import Any, Dict, List, Optional, Pattern, Union
11
+
12
+ import torch
13
+ import torchaudio
14
+ from encodec import EncodecModel
15
+ from encodec.utils import convert_audio
16
+
17
+
18
+ class AudioTokenizer:
19
+ """EnCodec audio tokenizer for encoding and decoding audio.
20
+
21
+ Attributes:
22
+ device: The device on which the codec model is loaded.
23
+ codec: The pretrained EnCodec model.
24
+ sample_rate: Sample rate of the model.
25
+ channels: Number of audio channels in the model.
26
+ """
27
+
28
+ def __init__(self, device: Any = None) -> None:
29
+ model = EncodecModel.encodec_model_24khz()
30
+ model.set_target_bandwidth(6.0)
31
+ remove_encodec_weight_norm(model)
32
+
33
+ if not device:
34
+ device = torch.device("cpu")
35
+ if torch.cuda.is_available():
36
+ device = torch.device("cuda:0")
37
+
38
+ self._device = device
39
+
40
+ self.codec = model.to(device)
41
+ self.sample_rate = model.sample_rate
42
+ self.channels = model.channels
43
+
44
+ @property
45
+ def device(self):
46
+ return self._device
47
+
48
+ def encode(self, wav: torch.Tensor) -> torch.Tensor:
49
+ """Encode the audio waveform.
50
+
51
+ Args:
52
+ wav: A tensor representing the audio waveform.
53
+
54
+ Returns:
55
+ A tensor representing the encoded audio.
56
+ """
57
+ return self.codec.encode(wav.to(self.device))
58
+
59
+ def decode(self, frames: torch.Tensor) -> torch.Tensor:
60
+ """Decode the encoded audio frames.
61
+
62
+ Args:
63
+ frames: A tensor representing the encoded audio frames.
64
+
65
+ Returns:
66
+ A tensor representing the decoded audio waveform.
67
+ """
68
+ return self.codec.decode(frames)
69
+
70
+
71
+ def tokenize_audio(tokenizer: AudioTokenizer, audio_path: str):
72
+ """
73
+ Tokenize the audio waveform using the given AudioTokenizer.
74
+
75
+ Args:
76
+ tokenizer: An instance of AudioTokenizer.
77
+ audio_path: Path to the audio file.
78
+
79
+ Returns:
80
+ A tensor of encoded frames from the audio.
81
+
82
+ Raises:
83
+ FileNotFoundError: If the audio file is not found.
84
+ RuntimeError: If there's an error processing the audio data.
85
+ """
86
+ # try:
87
+ # Load and preprocess the audio waveform
88
+ wav, sr = torchaudio.load(audio_path)
89
+ wav = convert_audio(wav, sr, tokenizer.sample_rate, tokenizer.channels)
90
+ wav = wav.unsqueeze(0)
91
+
92
+ # Extract discrete codes from EnCodec
93
+ with torch.no_grad():
94
+ encoded_frames = tokenizer.encode(wav)
95
+ return encoded_frames
96
+
97
+ # except FileNotFoundError:
98
+ # raise FileNotFoundError(f"Audio file not found at {audio_path}")
99
+ # except Exception as e:
100
+ # raise RuntimeError(f"Error processing audio data: {e}")
101
+
102
+
103
+ def remove_encodec_weight_norm(model):
104
+ from encodec.modules import SConv1d
105
+ from encodec.modules.seanet import SConvTranspose1d, SEANetResnetBlock
106
+ from torch.nn.utils import remove_weight_norm
107
+
108
+ encoder = model.encoder.model
109
+ for key in encoder._modules:
110
+ if isinstance(encoder._modules[key], SEANetResnetBlock):
111
+ remove_weight_norm(encoder._modules[key].shortcut.conv.conv)
112
+ block_modules = encoder._modules[key].block._modules
113
+ for skey in block_modules:
114
+ if isinstance(block_modules[skey], SConv1d):
115
+ remove_weight_norm(block_modules[skey].conv.conv)
116
+ elif isinstance(encoder._modules[key], SConv1d):
117
+ remove_weight_norm(encoder._modules[key].conv.conv)
118
+
119
+ decoder = model.decoder.model
120
+ for key in decoder._modules:
121
+ if isinstance(decoder._modules[key], SEANetResnetBlock):
122
+ remove_weight_norm(decoder._modules[key].shortcut.conv.conv)
123
+ block_modules = decoder._modules[key].block._modules
124
+ for skey in block_modules:
125
+ if isinstance(block_modules[skey], SConv1d):
126
+ remove_weight_norm(block_modules[skey].conv.conv)
127
+ elif isinstance(decoder._modules[key], SConvTranspose1d):
128
+ remove_weight_norm(decoder._modules[key].convtr.convtr)
129
+ elif isinstance(decoder._modules[key], SConv1d):
130
+ remove_weight_norm(decoder._modules[key].conv.conv)
131
+
132
+
133
+ def extract_encodec_token(wav_path):
134
+ model = EncodecModel.encodec_model_24khz()
135
+ model.set_target_bandwidth(6.0)
136
+
137
+ wav, sr = torchaudio.load(wav_path)
138
+ wav = convert_audio(wav, sr, model.sample_rate, model.channels)
139
+ wav = wav.unsqueeze(0)
140
+ if torch.cuda.is_available():
141
+ model = model.cuda()
142
+ wav = wav.cuda()
143
+ with torch.no_grad():
144
+ encoded_frames = model.encode(wav)
145
+ codes_ = torch.cat(
146
+ [encoded[0] for encoded in encoded_frames], dim=-1
147
+ ) # [B, n_q, T]
148
+ codes = codes_.cpu().numpy()[0, :, :].T # [T, 8]
149
+
150
+ return codes
utils/topk_sampling.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+
7
+ import torch
8
+ import torch.nn.functional as F
9
+
10
+
11
+ # This function is modified from https://github.com/microsoft/unilm/blob/master/xtune/src/transformers/modeling_utils.py
12
+ def top_k_top_p_filtering(
13
+ logits, top_k=0, top_p=1.0, filter_value=-float("Inf"), min_tokens_to_keep=1
14
+ ):
15
+ """
16
+ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering.
17
+
18
+ Args:
19
+ logits (torch.Tensor): Logits distribution with shape (batch size, vocabulary size).
20
+ top_k (int, optional): Keep only top k tokens with highest probability (top-k filtering).
21
+ Set to 0 to disable. Defaults to 0.
22
+ top_p (float, optional): Keep the top tokens with a cumulative probability >= top_p (nucleus filtering).
23
+ Must be between 0 and 1, inclusive. Defaults to 1.0.
24
+ filter_value (float, optional): The value to assign to filtered logits. Defaults to -float('Inf').
25
+ min_tokens_to_keep (int, optional): Ensure that at least this number of tokens are kept per batch example.
26
+ Defaults to 1.
27
+
28
+ Returns:
29
+ torch.Tensor: The filtered logits.
30
+ """
31
+ """
32
+ Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
33
+ Make sure we keep at least min_tokens_to_keep per batch example in the output
34
+ From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
35
+ """
36
+ if top_k > 0:
37
+ # Apply top-k filtering
38
+ top_k = min(max(top_k, min_tokens_to_keep), logits.size(-1))
39
+ indices_to_remove = logits < torch.topk(logits, top_k).values[..., -1, None]
40
+ logits[indices_to_remove] = filter_value
41
+
42
+ if top_p < 1.0:
43
+ # Apply top-p filtering
44
+ sorted_logits, sorted_indices = torch.sort(logits, descending=True)
45
+ cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
46
+
47
+ # Create a mask to remove tokens with cumulative probability above the top_p threshold
48
+ sorted_indices_to_remove = cumulative_probs > top_p
49
+ if min_tokens_to_keep > 1:
50
+ sorted_indices_to_remove[..., :min_tokens_to_keep] = 0
51
+ sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
52
+ sorted_indices_to_remove[..., 0] = 0
53
+
54
+ # Scatter sorted tensors back to original indexing
55
+ indices_to_remove = sorted_indices.scatter(
56
+ 1, sorted_indices, sorted_indices_to_remove
57
+ )
58
+ logits[indices_to_remove] = filter_value
59
+
60
+ return logits
61
+
62
+
63
+ def topk_sampling(logits, top_k=50, top_p=1.0, temperature=1.0):
64
+ """
65
+ Perform top-k and top-p sampling on logits.
66
+
67
+ Args:
68
+ logits (torch.Tensor): The logits to sample from.
69
+ top_k (int, optional): The number of highest probability tokens to keep for top-k filtering.
70
+ Must be a positive integer. Defaults to 50.
71
+ top_p (float, optional): The cumulative probability threshold for nucleus sampling.
72
+ Must be between 0 and 1. Defaults to 1.0.
73
+ temperature (float, optional): The scaling factor to adjust the logits distribution.
74
+ Must be strictly positive. Defaults to 1.0.
75
+
76
+ Returns:
77
+ torch.Tensor: The sampled token.
78
+ """
79
+
80
+ # Adjust logits using temperature
81
+ if temperature != 1.0:
82
+ logits = logits / temperature
83
+
84
+ # Top-p/top-k filtering
85
+ logits = top_k_top_p_filtering(logits, top_k=top_k, top_p=top_p)
86
+
87
+ # Sample from the filtered distribution
88
+ token = torch.multinomial(F.softmax(logits, dim=-1), num_samples=1)
89
+ return token
utils/trainer_utils.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import torch
7
+
8
+
9
+ def check_nan(logger, loss, y_pred, y_gt):
10
+ if torch.any(torch.isnan(loss)):
11
+ logger.info("out has nan: ", torch.any(torch.isnan(y_pred)))
12
+ logger.info("y_gt has nan: ", torch.any(torch.isnan(y_gt)))
13
+ logger.info("out: ", y_pred)
14
+ logger.info("y_gt: ", y_gt)
15
+ logger.info("loss = {:.4f}\n".format(loss.item()))
16
+ exit()
utils/util.py ADDED
@@ -0,0 +1,687 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+
7
+ import collections
8
+ import glob
9
+ import os
10
+ import random
11
+ import time
12
+ import argparse
13
+ from collections import OrderedDict
14
+
15
+ import json5
16
+ import numpy as np
17
+ import glob
18
+ from torch.nn import functional as F
19
+
20
+
21
+ try:
22
+ from ruamel.yaml import YAML as yaml
23
+ except:
24
+ from ruamel_yaml import YAML as yaml
25
+
26
+ import torch
27
+
28
+ from utils.hparam import HParams
29
+ import logging
30
+ from logging import handlers
31
+
32
+
33
+ def str2bool(v):
34
+ """Used in argparse.ArgumentParser.add_argument to indicate
35
+ that a type is a bool type and user can enter
36
+
37
+ - yes, true, t, y, 1, to represent True
38
+ - no, false, f, n, 0, to represent False
39
+
40
+ See https://stackoverflow.com/questions/15008758/parsing-boolean-values-with-argparse # noqa
41
+ """
42
+ if isinstance(v, bool):
43
+ return v
44
+ if v.lower() in ("yes", "true", "t", "y", "1"):
45
+ return True
46
+ elif v.lower() in ("no", "false", "f", "n", "0"):
47
+ return False
48
+ else:
49
+ raise argparse.ArgumentTypeError("Boolean value expected.")
50
+
51
+
52
+ def find_checkpoint_of_mapper(mapper_ckpt_dir):
53
+ mapper_ckpts = glob.glob(os.path.join(mapper_ckpt_dir, "ckpts/*.pt"))
54
+
55
+ # Select the max steps
56
+ mapper_ckpts.sort()
57
+ mapper_weights_file = mapper_ckpts[-1]
58
+ return mapper_weights_file
59
+
60
+
61
+ def pad_f0_to_tensors(f0s, batched=None):
62
+ # Initialize
63
+ tensors = []
64
+
65
+ if batched == None:
66
+ # Get the max frame for padding
67
+ size = -1
68
+ for f0 in f0s:
69
+ size = max(size, f0.shape[-1])
70
+
71
+ tensor = torch.zeros(len(f0s), size)
72
+
73
+ for i, f0 in enumerate(f0s):
74
+ tensor[i, : f0.shape[-1]] = f0[:]
75
+
76
+ tensors.append(tensor)
77
+ else:
78
+ start = 0
79
+ while start + batched - 1 < len(f0s):
80
+ end = start + batched - 1
81
+
82
+ # Get the max frame for padding
83
+ size = -1
84
+ for i in range(start, end + 1):
85
+ size = max(size, f0s[i].shape[-1])
86
+
87
+ tensor = torch.zeros(batched, size)
88
+
89
+ for i in range(start, end + 1):
90
+ tensor[i - start, : f0s[i].shape[-1]] = f0s[i][:]
91
+
92
+ tensors.append(tensor)
93
+
94
+ start = start + batched
95
+
96
+ if start != len(f0s):
97
+ end = len(f0s)
98
+
99
+ # Get the max frame for padding
100
+ size = -1
101
+ for i in range(start, end):
102
+ size = max(size, f0s[i].shape[-1])
103
+
104
+ tensor = torch.zeros(len(f0s) - start, size)
105
+
106
+ for i in range(start, end):
107
+ tensor[i - start, : f0s[i].shape[-1]] = f0s[i][:]
108
+
109
+ tensors.append(tensor)
110
+
111
+ return tensors
112
+
113
+
114
+ def pad_mels_to_tensors(mels, batched=None):
115
+ """
116
+ Args:
117
+ mels: A list of mel-specs
118
+ Returns:
119
+ tensors: A list of tensors containing the batched mel-specs
120
+ mel_frames: A list of tensors containing the frames of the original mel-specs
121
+ """
122
+ # Initialize
123
+ tensors = []
124
+ mel_frames = []
125
+
126
+ # Split mel-specs into batches to avoid cuda memory exceed
127
+ if batched == None:
128
+ # Get the max frame for padding
129
+ size = -1
130
+ for mel in mels:
131
+ size = max(size, mel.shape[-1])
132
+
133
+ tensor = torch.zeros(len(mels), mels[0].shape[0], size)
134
+ mel_frame = torch.zeros(len(mels), dtype=torch.int32)
135
+
136
+ for i, mel in enumerate(mels):
137
+ tensor[i, :, : mel.shape[-1]] = mel[:]
138
+ mel_frame[i] = mel.shape[-1]
139
+
140
+ tensors.append(tensor)
141
+ mel_frames.append(mel_frame)
142
+ else:
143
+ start = 0
144
+ while start + batched - 1 < len(mels):
145
+ end = start + batched - 1
146
+
147
+ # Get the max frame for padding
148
+ size = -1
149
+ for i in range(start, end + 1):
150
+ size = max(size, mels[i].shape[-1])
151
+
152
+ tensor = torch.zeros(batched, mels[0].shape[0], size)
153
+ mel_frame = torch.zeros(batched, dtype=torch.int32)
154
+
155
+ for i in range(start, end + 1):
156
+ tensor[i - start, :, : mels[i].shape[-1]] = mels[i][:]
157
+ mel_frame[i - start] = mels[i].shape[-1]
158
+
159
+ tensors.append(tensor)
160
+ mel_frames.append(mel_frame)
161
+
162
+ start = start + batched
163
+
164
+ if start != len(mels):
165
+ end = len(mels)
166
+
167
+ # Get the max frame for padding
168
+ size = -1
169
+ for i in range(start, end):
170
+ size = max(size, mels[i].shape[-1])
171
+
172
+ tensor = torch.zeros(len(mels) - start, mels[0].shape[0], size)
173
+ mel_frame = torch.zeros(len(mels) - start, dtype=torch.int32)
174
+
175
+ for i in range(start, end):
176
+ tensor[i - start, :, : mels[i].shape[-1]] = mels[i][:]
177
+ mel_frame[i - start] = mels[i].shape[-1]
178
+
179
+ tensors.append(tensor)
180
+ mel_frames.append(mel_frame)
181
+
182
+ return tensors, mel_frames
183
+
184
+
185
+ def load_model_config(args):
186
+ """Load model configurations (in args.json under checkpoint directory)
187
+
188
+ Args:
189
+ args (ArgumentParser): arguments to run bins/preprocess.py
190
+
191
+ Returns:
192
+ dict: dictionary that stores model configurations
193
+ """
194
+ if args.checkpoint_dir is None:
195
+ assert args.checkpoint_file is not None
196
+ checkpoint_dir = os.path.split(args.checkpoint_file)[0]
197
+ else:
198
+ checkpoint_dir = args.checkpoint_dir
199
+ config_path = os.path.join(checkpoint_dir, "args.json")
200
+ print("config_path: ", config_path)
201
+
202
+ config = load_config(config_path)
203
+ return config
204
+
205
+
206
+ def remove_and_create(dir):
207
+ if os.path.exists(dir):
208
+ os.system("rm -r {}".format(dir))
209
+ os.makedirs(dir, exist_ok=True)
210
+
211
+
212
+ def has_existed(path, warning=False):
213
+ if not warning:
214
+ return os.path.exists(path)
215
+
216
+ if os.path.exists(path):
217
+ answer = input(
218
+ "The path {} has existed. \nInput 'y' (or hit Enter) to skip it, and input 'n' to re-write it [y/n]\n".format(
219
+ path
220
+ )
221
+ )
222
+ if not answer == "n":
223
+ return True
224
+
225
+ return False
226
+
227
+
228
+ def remove_older_ckpt(saved_model_name, checkpoint_dir, max_to_keep=5):
229
+ if os.path.exists(os.path.join(checkpoint_dir, "checkpoint")):
230
+ with open(os.path.join(checkpoint_dir, "checkpoint"), "r") as f:
231
+ ckpts = [x.strip() for x in f.readlines()]
232
+ else:
233
+ ckpts = []
234
+ ckpts.append(saved_model_name)
235
+ for item in ckpts[:-max_to_keep]:
236
+ if os.path.exists(os.path.join(checkpoint_dir, item)):
237
+ os.remove(os.path.join(checkpoint_dir, item))
238
+ with open(os.path.join(checkpoint_dir, "checkpoint"), "w") as f:
239
+ for item in ckpts[-max_to_keep:]:
240
+ f.write("{}\n".format(item))
241
+
242
+
243
+ def set_all_random_seed(seed: int):
244
+ random.seed(seed)
245
+ np.random.seed(seed)
246
+ torch.random.manual_seed(seed)
247
+
248
+
249
+ def save_checkpoint(
250
+ args,
251
+ generator,
252
+ g_optimizer,
253
+ step,
254
+ discriminator=None,
255
+ d_optimizer=None,
256
+ max_to_keep=5,
257
+ ):
258
+ saved_model_name = "model.ckpt-{}.pt".format(step)
259
+ checkpoint_path = os.path.join(args.checkpoint_dir, saved_model_name)
260
+
261
+ if discriminator and d_optimizer:
262
+ torch.save(
263
+ {
264
+ "generator": generator.state_dict(),
265
+ "discriminator": discriminator.state_dict(),
266
+ "g_optimizer": g_optimizer.state_dict(),
267
+ "d_optimizer": d_optimizer.state_dict(),
268
+ "global_step": step,
269
+ },
270
+ checkpoint_path,
271
+ )
272
+ else:
273
+ torch.save(
274
+ {
275
+ "generator": generator.state_dict(),
276
+ "g_optimizer": g_optimizer.state_dict(),
277
+ "global_step": step,
278
+ },
279
+ checkpoint_path,
280
+ )
281
+
282
+ print("Saved checkpoint: {}".format(checkpoint_path))
283
+
284
+ if os.path.exists(os.path.join(args.checkpoint_dir, "checkpoint")):
285
+ with open(os.path.join(args.checkpoint_dir, "checkpoint"), "r") as f:
286
+ ckpts = [x.strip() for x in f.readlines()]
287
+ else:
288
+ ckpts = []
289
+ ckpts.append(saved_model_name)
290
+ for item in ckpts[:-max_to_keep]:
291
+ if os.path.exists(os.path.join(args.checkpoint_dir, item)):
292
+ os.remove(os.path.join(args.checkpoint_dir, item))
293
+ with open(os.path.join(args.checkpoint_dir, "checkpoint"), "w") as f:
294
+ for item in ckpts[-max_to_keep:]:
295
+ f.write("{}\n".format(item))
296
+
297
+
298
+ def attempt_to_restore(
299
+ generator, g_optimizer, checkpoint_dir, discriminator=None, d_optimizer=None
300
+ ):
301
+ checkpoint_list = os.path.join(checkpoint_dir, "checkpoint")
302
+ if os.path.exists(checkpoint_list):
303
+ checkpoint_filename = open(checkpoint_list).readlines()[-1].strip()
304
+ checkpoint_path = os.path.join(checkpoint_dir, "{}".format(checkpoint_filename))
305
+ print("Restore from {}".format(checkpoint_path))
306
+ checkpoint = torch.load(checkpoint_path, map_location="cpu")
307
+ if generator:
308
+ if not list(generator.state_dict().keys())[0].startswith("module."):
309
+ raw_dict = checkpoint["generator"]
310
+ clean_dict = OrderedDict()
311
+ for k, v in raw_dict.items():
312
+ if k.startswith("module."):
313
+ clean_dict[k[7:]] = v
314
+ else:
315
+ clean_dict[k] = v
316
+ generator.load_state_dict(clean_dict)
317
+ else:
318
+ generator.load_state_dict(checkpoint["generator"])
319
+ if g_optimizer:
320
+ g_optimizer.load_state_dict(checkpoint["g_optimizer"])
321
+ global_step = 100000
322
+ if discriminator and "discriminator" in checkpoint.keys():
323
+ discriminator.load_state_dict(checkpoint["discriminator"])
324
+ global_step = checkpoint["global_step"]
325
+ print("restore discriminator")
326
+ if d_optimizer and "d_optimizer" in checkpoint.keys():
327
+ d_optimizer.load_state_dict(checkpoint["d_optimizer"])
328
+ print("restore d_optimizer...")
329
+ else:
330
+ global_step = 0
331
+ return global_step
332
+
333
+
334
+ class ExponentialMovingAverage(object):
335
+ def __init__(self, decay):
336
+ self.decay = decay
337
+ self.shadow = {}
338
+
339
+ def register(self, name, val):
340
+ self.shadow[name] = val.clone()
341
+
342
+ def update(self, name, x):
343
+ assert name in self.shadow
344
+ update_delta = self.shadow[name] - x
345
+ self.shadow[name] -= (1.0 - self.decay) * update_delta
346
+
347
+
348
+ def apply_moving_average(model, ema):
349
+ for name, param in model.named_parameters():
350
+ if name in ema.shadow:
351
+ ema.update(name, param.data)
352
+
353
+
354
+ def register_model_to_ema(model, ema):
355
+ for name, param in model.named_parameters():
356
+ if param.requires_grad:
357
+ ema.register(name, param.data)
358
+
359
+
360
+ class YParams(HParams):
361
+ def __init__(self, yaml_file):
362
+ if not os.path.exists(yaml_file):
363
+ raise IOError("yaml file: {} is not existed".format(yaml_file))
364
+ super().__init__()
365
+ self.d = collections.OrderedDict()
366
+ with open(yaml_file) as fp:
367
+ for _, v in yaml().load(fp).items():
368
+ for k1, v1 in v.items():
369
+ try:
370
+ if self.get(k1):
371
+ self.set_hparam(k1, v1)
372
+ else:
373
+ self.add_hparam(k1, v1)
374
+ self.d[k1] = v1
375
+ except Exception:
376
+ import traceback
377
+
378
+ print(traceback.format_exc())
379
+
380
+ # @property
381
+ def get_elements(self):
382
+ return self.d.items()
383
+
384
+
385
+ def override_config(base_config, new_config):
386
+ """Update new configurations in the original dict with the new dict
387
+
388
+ Args:
389
+ base_config (dict): original dict to be overridden
390
+ new_config (dict): dict with new configurations
391
+
392
+ Returns:
393
+ dict: updated configuration dict
394
+ """
395
+ for k, v in new_config.items():
396
+ if type(v) == dict:
397
+ if k not in base_config.keys():
398
+ base_config[k] = {}
399
+ base_config[k] = override_config(base_config[k], v)
400
+ else:
401
+ base_config[k] = v
402
+ return base_config
403
+
404
+
405
+ def get_lowercase_keys_config(cfg):
406
+ """Change all keys in cfg to lower case
407
+
408
+ Args:
409
+ cfg (dict): dictionary that stores configurations
410
+
411
+ Returns:
412
+ dict: dictionary that stores configurations
413
+ """
414
+ updated_cfg = dict()
415
+ for k, v in cfg.items():
416
+ if type(v) == dict:
417
+ v = get_lowercase_keys_config(v)
418
+ updated_cfg[k.lower()] = v
419
+ return updated_cfg
420
+
421
+
422
+ def _load_config(config_fn, lowercase=False):
423
+ """Load configurations into a dictionary
424
+
425
+ Args:
426
+ config_fn (str): path to configuration file
427
+ lowercase (bool, optional): whether changing keys to lower case. Defaults to False.
428
+
429
+ Returns:
430
+ dict: dictionary that stores configurations
431
+ """
432
+ with open(config_fn, "r") as f:
433
+ data = f.read()
434
+ config_ = json5.loads(data)
435
+ if "base_config" in config_:
436
+ # load configurations from new path
437
+ p_config_path = os.path.join(os.getenv("WORK_DIR"), config_["base_config"])
438
+ p_config_ = _load_config(p_config_path)
439
+ config_ = override_config(p_config_, config_)
440
+ if lowercase:
441
+ # change keys in config_ to lower case
442
+ config_ = get_lowercase_keys_config(config_)
443
+ return config_
444
+
445
+
446
+ def load_config(config_fn, lowercase=False):
447
+ """Load configurations into a dictionary
448
+
449
+ Args:
450
+ config_fn (str): path to configuration file
451
+ lowercase (bool, optional): _description_. Defaults to False.
452
+
453
+ Returns:
454
+ JsonHParams: an object that stores configurations
455
+ """
456
+ config_ = _load_config(config_fn, lowercase=lowercase)
457
+ # create an JsonHParams object with configuration dict
458
+ cfg = JsonHParams(**config_)
459
+ return cfg
460
+
461
+
462
+ def save_config(save_path, cfg):
463
+ """Save configurations into a json file
464
+
465
+ Args:
466
+ save_path (str): path to save configurations
467
+ cfg (dict): dictionary that stores configurations
468
+ """
469
+ with open(save_path, "w") as f:
470
+ json5.dump(
471
+ cfg, f, ensure_ascii=False, indent=4, quote_keys=True, sort_keys=True
472
+ )
473
+
474
+
475
+ class JsonHParams:
476
+ def __init__(self, **kwargs):
477
+ for k, v in kwargs.items():
478
+ if type(v) == dict:
479
+ v = JsonHParams(**v)
480
+ self[k] = v
481
+
482
+ def keys(self):
483
+ return self.__dict__.keys()
484
+
485
+ def items(self):
486
+ return self.__dict__.items()
487
+
488
+ def values(self):
489
+ return self.__dict__.values()
490
+
491
+ def __len__(self):
492
+ return len(self.__dict__)
493
+
494
+ def __getitem__(self, key):
495
+ return getattr(self, key)
496
+
497
+ def __setitem__(self, key, value):
498
+ return setattr(self, key, value)
499
+
500
+ def __contains__(self, key):
501
+ return key in self.__dict__
502
+
503
+ def __repr__(self):
504
+ return self.__dict__.__repr__()
505
+
506
+
507
+ class ValueWindow:
508
+ def __init__(self, window_size=100):
509
+ self._window_size = window_size
510
+ self._values = []
511
+
512
+ def append(self, x):
513
+ self._values = self._values[-(self._window_size - 1) :] + [x]
514
+
515
+ @property
516
+ def sum(self):
517
+ return sum(self._values)
518
+
519
+ @property
520
+ def count(self):
521
+ return len(self._values)
522
+
523
+ @property
524
+ def average(self):
525
+ return self.sum / max(1, self.count)
526
+
527
+ def reset(self):
528
+ self._values = []
529
+
530
+
531
+ class Logger(object):
532
+ def __init__(
533
+ self,
534
+ filename,
535
+ level="info",
536
+ when="D",
537
+ backCount=10,
538
+ fmt="%(asctime)s : %(message)s",
539
+ ):
540
+ self.level_relations = {
541
+ "debug": logging.DEBUG,
542
+ "info": logging.INFO,
543
+ "warning": logging.WARNING,
544
+ "error": logging.ERROR,
545
+ "crit": logging.CRITICAL,
546
+ }
547
+ if level == "debug":
548
+ fmt = "%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s"
549
+ self.logger = logging.getLogger(filename)
550
+ format_str = logging.Formatter(fmt)
551
+ self.logger.setLevel(self.level_relations.get(level))
552
+ sh = logging.StreamHandler()
553
+ sh.setFormatter(format_str)
554
+ th = handlers.TimedRotatingFileHandler(
555
+ filename=filename, when=when, backupCount=backCount, encoding="utf-8"
556
+ )
557
+ th.setFormatter(format_str)
558
+ self.logger.addHandler(sh)
559
+ self.logger.addHandler(th)
560
+ self.logger.info(
561
+ "==========================New Starting Here=============================="
562
+ )
563
+
564
+
565
+ def init_weights(m, mean=0.0, std=0.01):
566
+ classname = m.__class__.__name__
567
+ if classname.find("Conv") != -1:
568
+ m.weight.data.normal_(mean, std)
569
+
570
+
571
+ def get_padding(kernel_size, dilation=1):
572
+ return int((kernel_size * dilation - dilation) / 2)
573
+
574
+
575
+ def slice_segments(x, ids_str, segment_size=4):
576
+ ret = torch.zeros_like(x[:, :, :segment_size])
577
+ for i in range(x.size(0)):
578
+ idx_str = ids_str[i]
579
+ idx_end = idx_str + segment_size
580
+ ret[i] = x[i, :, idx_str:idx_end]
581
+ return ret
582
+
583
+
584
+ def rand_slice_segments(x, x_lengths=None, segment_size=4):
585
+ b, d, t = x.size()
586
+ if x_lengths is None:
587
+ x_lengths = t
588
+ ids_str_max = x_lengths - segment_size + 1
589
+ ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long)
590
+ ret = slice_segments(x, ids_str, segment_size)
591
+ return ret, ids_str
592
+
593
+
594
+ def subsequent_mask(length):
595
+ mask = torch.tril(torch.ones(length, length)).unsqueeze(0).unsqueeze(0)
596
+ return mask
597
+
598
+
599
+ @torch.jit.script
600
+ def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
601
+ n_channels_int = n_channels[0]
602
+ in_act = input_a + input_b
603
+ t_act = torch.tanh(in_act[:, :n_channels_int, :])
604
+ s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
605
+ acts = t_act * s_act
606
+ return acts
607
+
608
+
609
+ def convert_pad_shape(pad_shape):
610
+ l = pad_shape[::-1]
611
+ pad_shape = [item for sublist in l for item in sublist]
612
+ return pad_shape
613
+
614
+
615
+ def sequence_mask(length, max_length=None):
616
+ if max_length is None:
617
+ max_length = length.max()
618
+ x = torch.arange(max_length, dtype=length.dtype, device=length.device)
619
+ return x.unsqueeze(0) < length.unsqueeze(1)
620
+
621
+
622
+ def generate_path(duration, mask):
623
+ """
624
+ duration: [b, 1, t_x]
625
+ mask: [b, 1, t_y, t_x]
626
+ """
627
+ device = duration.device
628
+
629
+ b, _, t_y, t_x = mask.shape
630
+ cum_duration = torch.cumsum(duration, -1)
631
+
632
+ cum_duration_flat = cum_duration.view(b * t_x)
633
+ path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype)
634
+ path = path.view(b, t_x, t_y)
635
+ path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1]
636
+ path = path.unsqueeze(1).transpose(2, 3) * mask
637
+ return path
638
+
639
+
640
+ def clip_grad_value_(parameters, clip_value, norm_type=2):
641
+ if isinstance(parameters, torch.Tensor):
642
+ parameters = [parameters]
643
+ parameters = list(filter(lambda p: p.grad is not None, parameters))
644
+ norm_type = float(norm_type)
645
+ if clip_value is not None:
646
+ clip_value = float(clip_value)
647
+
648
+ total_norm = 0
649
+ for p in parameters:
650
+ param_norm = p.grad.data.norm(norm_type)
651
+ total_norm += param_norm.item() ** norm_type
652
+ if clip_value is not None:
653
+ p.grad.data.clamp_(min=-clip_value, max=clip_value)
654
+ total_norm = total_norm ** (1.0 / norm_type)
655
+ return total_norm
656
+
657
+
658
+ def get_current_time():
659
+ pass
660
+
661
+
662
+ def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
663
+ """
664
+ Args:
665
+ lengths:
666
+ A 1-D tensor containing sentence lengths.
667
+ max_len:
668
+ The length of masks.
669
+ Returns:
670
+ Return a 2-D bool tensor, where masked positions
671
+ are filled with `True` and non-masked positions are
672
+ filled with `False`.
673
+
674
+ >>> lengths = torch.tensor([1, 3, 2, 5])
675
+ >>> make_pad_mask(lengths)
676
+ tensor([[False, True, True, True, True],
677
+ [False, False, False, True, True],
678
+ [False, False, True, True, True],
679
+ [False, False, False, False, False]])
680
+ """
681
+ assert lengths.ndim == 1, lengths.ndim
682
+ max_len = max(max_len, lengths.max())
683
+ n = lengths.size(0)
684
+ seq_range = torch.arange(0, max_len, device=lengths.device)
685
+ expaned_lengths = seq_range.unsqueeze(0).expand(n, max_len)
686
+
687
+ return expaned_lengths >= lengths.unsqueeze(-1)
utils/whisper_transcription.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import os
7
+ import pathlib
8
+ import string
9
+ import time
10
+ from multiprocessing import Pool, Value, Lock
11
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
12
+ import torch
13
+ import whisper
14
+
15
+ processed_files_count = Value("i", 0) # count of processed files
16
+ lock = Lock() # lock for the count
17
+
18
+
19
+ def preprocess_text(text):
20
+ """Preprocess text after ASR"""
21
+ return text.lower().translate(str.maketrans("", "", string.punctuation))
22
+
23
+
24
+ def transcribe_audio(model, processor, audio_file, device):
25
+ """Transcribe audio file"""
26
+ audio = whisper.load_audio(audio_file) # load from path
27
+ audio = whisper.pad_or_trim(audio) # default 30 seconds
28
+ inputs = whisper.log_mel_spectrogram(audio).to(
29
+ device=device
30
+ ) # convert to spectrogram
31
+ inputs = inputs.unsqueeze(0).type(torch.cuda.HalfTensor) # add batch dimension
32
+
33
+ outputs = model.generate(
34
+ inputs=inputs, max_new_tokens=128
35
+ ) # generate transcription
36
+ transcription = processor.batch_decode(outputs, skip_special_tokens=True)[
37
+ 0
38
+ ] # decode
39
+ transcription_processed = preprocess_text(transcription) # preprocess
40
+ return transcription_processed
41
+
42
+
43
+ def write_transcription(audio_file, transcription):
44
+ """Write transcription to txt file"""
45
+ txt_file = audio_file.with_suffix(".txt")
46
+ with open(txt_file, "w") as file:
47
+ file.write(transcription)
48
+
49
+
50
+ def init_whisper(model_id, device):
51
+ """Initialize whisper model and processor"""
52
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
53
+ print(f"Loading model {model_id}") # model_id = "distil-whisper/distil-large-v2"
54
+ distil_model = AutoModelForSpeechSeq2Seq.from_pretrained(
55
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=False
56
+ )
57
+ distil_model = distil_model.to(device)
58
+ processor = AutoProcessor.from_pretrained(model_id)
59
+ return distil_model, processor
60
+
61
+
62
+ def asr_wav_files(file_list, gpu_id, total_files, model_id):
63
+ """Transcribe wav files in a list"""
64
+ device = f"cuda:{gpu_id}" if torch.cuda.is_available() else "cpu"
65
+ whisper_model, processor = init_whisper(model_id, device)
66
+ print(f"Processing on {device} starts")
67
+ start_time = time.time()
68
+ for audio_file in file_list:
69
+ try:
70
+ transcription = transcribe_audio(
71
+ whisper_model, processor, audio_file, device
72
+ )
73
+ write_transcription(audio_file, transcription)
74
+ with lock:
75
+ processed_files_count.value += 1
76
+ if processed_files_count.value % 5 == 0:
77
+ current_time = time.time()
78
+ avg_time_per_file = (current_time - start_time) / (
79
+ processed_files_count.value
80
+ )
81
+ remaining_files = total_files - processed_files_count.value
82
+ estimated_time_remaining = avg_time_per_file * remaining_files
83
+ remaining_time_formatted = time.strftime(
84
+ "%H:%M:%S", time.gmtime(estimated_time_remaining)
85
+ )
86
+ print(
87
+ f"Processed {processed_files_count.value}/{total_files} files, time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}, Estimated time remaining: {remaining_time_formatted}"
88
+ )
89
+ except Exception as e:
90
+ print(f"Error processing file {audio_file}: {e}")
91
+
92
+
93
+ def asr_main(input_dir, num_gpus, model_id):
94
+ """Transcribe wav files in a directory"""
95
+ num_processes = min(num_gpus, os.cpu_count())
96
+ print(f"Using {num_processes} GPUs for transcription")
97
+ wav_files = list(pathlib.Path(input_dir).rglob("*.wav"))
98
+ total_files = len(wav_files)
99
+ print(f"Found {total_files} wav files in {input_dir}")
100
+ files_per_process = len(wav_files) // num_processes
101
+ print(f"Processing {files_per_process} files per process")
102
+ with Pool(num_processes) as p:
103
+ p.starmap(
104
+ asr_wav_files,
105
+ [
106
+ (
107
+ wav_files[i * files_per_process : (i + 1) * files_per_process],
108
+ i % num_gpus,
109
+ total_files,
110
+ model_id,
111
+ )
112
+ for i in range(num_processes)
113
+ ],
114
+ )
115
+ print("Done!")
116
+
117
+
118
+ if __name__ == "__main__":
119
+ input_dir = "/path/to/output/directory"
120
+ num_gpus = 2
121
+ model_id = "distil-whisper/distil-large-v2"
122
+ asr_main(input_dir, num_gpus, model_id)
utils/world.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ # 1. Extract WORLD features including F0, AP, SP
7
+ # 2. Transform between SP and MCEP
8
+ import torchaudio
9
+ import pyworld as pw
10
+ import numpy as np
11
+ import torch
12
+ import diffsptk
13
+ import os
14
+ from tqdm import tqdm
15
+ import pickle
16
+ import torchaudio
17
+
18
+
19
+ def get_mcep_params(fs):
20
+ """Hyperparameters of transformation between SP and MCEP
21
+
22
+ Reference:
23
+ https://github.com/CSTR-Edinburgh/merlin/blob/master/misc/scripts/vocoder/world_v2/copy_synthesis.sh
24
+
25
+ """
26
+ if fs in [44100, 48000]:
27
+ fft_size = 2048
28
+ alpha = 0.77
29
+ if fs in [16000]:
30
+ fft_size = 1024
31
+ alpha = 0.58
32
+ return fft_size, alpha
33
+
34
+
35
+ def extract_world_features(waveform, frameshift=10):
36
+ # waveform: (1, seq)
37
+ # x: (seq,)
38
+ x = np.array(waveform, dtype=np.double)
39
+
40
+ _f0, t = pw.dio(x, fs, frame_period=frameshift) # raw pitch extractor
41
+ f0 = pw.stonemask(x, _f0, t, fs) # pitch refinement
42
+ sp = pw.cheaptrick(x, f0, t, fs) # extract smoothed spectrogram
43
+ ap = pw.d4c(x, f0, t, fs) # extract aperiodicity
44
+
45
+ return f0, sp, ap, fs
46
+
47
+
48
+ def sp2mcep(x, mcsize, fs):
49
+ fft_size, alpha = get_mcep_params(fs)
50
+ x = torch.as_tensor(x, dtype=torch.float)
51
+
52
+ tmp = diffsptk.ScalarOperation("SquareRoot")(x)
53
+ tmp = diffsptk.ScalarOperation("Multiplication", 32768.0)(tmp)
54
+ mgc = diffsptk.MelCepstralAnalysis(
55
+ cep_order=mcsize - 1, fft_length=fft_size, alpha=alpha, n_iter=1
56
+ )(tmp)
57
+ return mgc.numpy()
58
+
59
+
60
+ def mcep2sp(x, mcsize, fs):
61
+ fft_size, alpha = get_mcep_params(fs)
62
+ x = torch.as_tensor(x, dtype=torch.float)
63
+
64
+ tmp = diffsptk.MelGeneralizedCepstrumToSpectrum(
65
+ alpha=alpha,
66
+ cep_order=mcsize - 1,
67
+ fft_length=fft_size,
68
+ )(x)
69
+ tmp = diffsptk.ScalarOperation("Division", 32768.0)(tmp)
70
+ sp = diffsptk.ScalarOperation("Power", 2)(tmp)
71
+ return sp.double().numpy()
72
+
73
+
74
+ def f0_statistics(f0_features, path):
75
+ print("\nF0 statistics...")
76
+
77
+ total_f0 = []
78
+ for f0 in tqdm(f0_features):
79
+ total_f0 += [f for f in f0 if f != 0]
80
+
81
+ mean = sum(total_f0) / len(total_f0)
82
+ print("Min = {}, Max = {}, Mean = {}".format(min(total_f0), max(total_f0), mean))
83
+
84
+ with open(path, "wb") as f:
85
+ pickle.dump([mean, total_f0], f)
86
+
87
+
88
+ def world_synthesis(f0, sp, ap, fs, frameshift):
89
+ y = pw.synthesize(
90
+ f0, sp, ap, fs, frame_period=frameshift
91
+ ) # synthesize an utterance using the parameters
92
+ return y
visualization/SingVisio/System_Introduction_of_SingVisio_V2.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dd205eace26d91a558e70662a61f017e3ca78e89d98cf45a72ee0911c6a64d2
3
+ size 4592895
visualization/SingVisio/webpage/Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ FROM python:3.10
7
+
8
+ WORKDIR /app
9
+
10
+ COPY resources ./resources
11
+ COPY img ./img
12
+ COPY index.html ./index.html
13
+ COPY server.py ./server.py
14
+ COPY config ./config
15
+
16
+ RUN pip install numpy scikit-learn flask flask_cors gunicorn -i https://pypi.tuna.tsinghua.edu.cn/simple
17
+
18
+ EXPOSE 8000
19
+
20
+ ENTRYPOINT ["gunicorn", "-w", "8", "-b", "0.0.0.0:8000", "server:app"]
21
+
22
+ # docker build -t singvisio .
23
+ # docker run -v $(pwd)/data:/app/data -p 8000:8000 singvisio
visualization/SingVisio/webpage/README.md ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## SingVisio Webpage
2
+
3
+ This is the source code for the SingVisio Webpage. This README file will introduce the project and provide an installation guide. For introduction to SingVisio, please check this [README.md](../../../egs/visualization/SingVisio/README.md) file.
4
+
5
+ ### Tech Stack
6
+
7
+ - [Tailwind CSS](https://tailwindcss.com/)
8
+ - [Flowbite](https://flowbite.com/)
9
+ - [D3.js](https://d3js.org/)
10
+ - [Driver.js](https://driverjs.com/)
11
+
12
+ ### Structure
13
+
14
+ - `index.html`: The entry point file.
15
+ - `config`: Contains JSON configuration files loaded by `index.html`.
16
+ - `img`: Image files.
17
+ - `resources`: Contains CSS styles and JavaScript files.
18
+ - `init.js`: Loads the configuration and initializes variables.
19
+ - `function.js`: Houses the functions used in this project.
20
+ - `event.js`: Binds webpage mouse and keyboard events to functions.
21
+ - `Dockerfile`: For building a Docker image if deployment is needed.
22
+
23
+ ### Configuration
24
+
25
+ Before installation, you need to configure the data path in the `config/default.json` file.
26
+
27
+ To better understand our project, please note that this configuration pertains to our pre-processed data. If you want to visualize your own data, you can follow the guide below to properly set up the system.
28
+
29
+ 1. **Update the Data Configuration** in the `config/default.json` file.
30
+
31
+ SingVisio will read the configuration from this JSON file and render the webpage. Be aware that any errors in the JSON file may cause the system to shut down.
32
+
33
+ ```json
34
+ {
35
+ "pathData": {
36
+ "<mode_name>": { // supports multiple modes
37
+ "users": ["basic", "advanced"], // mode choice: "basic" or "advanced"
38
+ "multi": ["<id>"], // song_id, sourcesinger_id, or target_id. Set to false to disable. Enables multiple choices for the configured checkbox.
39
+ "curve": true, // set to true if the metric curve is needed
40
+ "referenceMap": { // configures reference paths when multiple choices are enabled.
41
+ "<sourcesinger_id>": [ // e.g., m4singer_Tenor-6
42
+ "<path_to_wav>", // e.g., Tenor-6_ε―‚ε―žζ²™ζ΄²ε†·_0002
43
+ ]
44
+ },
45
+ "data": [
46
+ { // supports multiple datasets
47
+ "dataset": "<dataset_name>",
48
+ "basePath": "<path_to_the_processed_data>",
49
+ "pathMap": {
50
+ "<sourcesinger_id>": {
51
+ "songs": [
52
+ "<song_id>" // set song ID; supports multiple IDs
53
+ ],
54
+ "targets": [
55
+ "<target_id>" // set target singer ID; supports multiple IDs
56
+ ]
57
+ }
58
+ }
59
+ }
60
+ ]
61
+ }
62
+ },
63
+ "mapToName": {
64
+ "<map_from>": "<map_to>"
65
+ },
66
+ "mapToSong": {
67
+ "<map_from>": "<map_to>"
68
+ },
69
+ "mapToSpace": {
70
+ "<map_from>": "<map_to>"
71
+ },
72
+ "picTypes": [
73
+ "<pic_type>" // supports multiple types
74
+ ],
75
+ "evaluation_data": [
76
+ { // supports multiple data sets
77
+ "target": "<target_id>",
78
+ "sourcesinger": "<sourcesinger_id>",
79
+ "song": "<song_id>",
80
+ "best": [
81
+ "<best_metric>" // activated when clicking the respective metric
82
+ ]
83
+ },
84
+ ],
85
+ "colorList": [
86
+ "<color_hex_code>" // supports multiple colors
87
+ ],
88
+ "histogramData": [
89
+ { // displayed in the top left graph
90
+ "type": "high", // "high" or "low"; "high" means the higher, the better
91
+ "name": "<metric_name>",
92
+ "value": <metric_value>
93
+ }
94
+ ]
95
+ }
96
+ ```
97
+
98
+ 2. **Change the Data Source Path**
99
+
100
+ The total size of our pre-processed data is approximately 60-70 GB. We provide an online host server, and the server path (`baseLink`) can be modified in the `index.html` file on line 15.
101
+
102
+ If you prefer to host the data on your local computer, you can set the `baseLink` value to an empty string as shown below. This will direct the server to read data from your local `data` folder.
103
+
104
+ ```html
105
+ <script>
106
+ const baseLink = ''; // do not end with '/'
107
+ </script>
108
+ ```
109
+
110
+ ### Installation
111
+
112
+ This project does not require a build process. There are multiple ways to run it, but here we introduce the simplest method:
113
+
114
+ 1. Install Python 3.10 and required packages.
115
+ ```bash
116
+ pip install numpy scikit-learn flask flask_cors gunicorn
117
+ ```
118
+
119
+ 2. Run the following command to start the HTTP server:
120
+
121
+ ```bash
122
+ cd webpage
123
+ gunicorn -w 8 -b 0.0.0.0:8080 server:app
124
+ ```
125
+
126
+ 3. After starting the HTTP web server, open the following link in your browser: [http://localhost:8080/](http://localhost:8080/)
visualization/SingVisio/webpage/config/default.json ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "pathData": {
3
+ "Step Comparison": {
4
+ "users": ["basic", "advanced"],
5
+ "multi": false,
6
+ "data": [
7
+ {
8
+ "dataset": "SVCC",
9
+ "basePath": "data/gd_svcc",
10
+ "pathMap": {
11
+ "SF1": {
12
+ "songs": [
13
+ "30001",
14
+ "30002",
15
+ "30003"
16
+ ],
17
+ "targets": [
18
+ "svcc_IDF1",
19
+ "svcc_IDM1",
20
+ "svcc_CDF1",
21
+ "svcc_CDM1"
22
+ ]
23
+ },
24
+ "SM1": {
25
+ "songs": [
26
+ "30001",
27
+ "30002",
28
+ "30003"
29
+ ],
30
+ "targets": [
31
+ "svcc_IDF1",
32
+ "svcc_IDM1",
33
+ "svcc_CDF1",
34
+ "svcc_CDM1"
35
+ ]
36
+ }
37
+ }
38
+ },
39
+ {
40
+ "dataset": "M4Singer",
41
+ "basePath": "data/gd_m4sg",
42
+ "pathMap": {
43
+ "Alto-1": {
44
+ "songs": [
45
+ "ηΎŽι”™_0014"
46
+ ],
47
+ "targets": [
48
+ "opencpop"
49
+ ]
50
+ },
51
+ "Bass-1": {
52
+ "songs": [
53
+ "十年_0008"
54
+ ],
55
+ "targets": [
56
+ "opencpop"
57
+ ]
58
+ },
59
+ "Soprano-2": {
60
+ "songs": [
61
+ "εŒζ‘Œηš„δ½ _0018"
62
+ ],
63
+ "targets": [
64
+ "opencpop"
65
+ ]
66
+ },
67
+ "Tenor-5": {
68
+ "songs": [
69
+ "ηˆ±η¬‘ηš„ηœΌη›_0010"
70
+ ],
71
+ "targets": [
72
+ "opencpop"
73
+ ]
74
+ }
75
+ }
76
+ }
77
+ ]
78
+ },
79
+ "Metric Comparison": {
80
+ "users": ["basic", "advanced"],
81
+ "multi": false,
82
+ "curve": true,
83
+ "data": [
84
+ {
85
+ "dataset": "SVCC",
86
+ "basePath": "data/ev_best",
87
+ "pathMap": {
88
+ "SM1": {
89
+ "songs": [
90
+ "30009"
91
+ ],
92
+ "targets": [
93
+ "svcc_IDM1"
94
+ ]
95
+ },
96
+ "SF1": {
97
+ "songs": [
98
+ "30005",
99
+ "30006",
100
+ "30009",
101
+ "30016",
102
+ "30022",
103
+ "30019"
104
+ ],
105
+ "targets": [
106
+ "svcc_IDF1"
107
+ ]
108
+ }
109
+ }
110
+ }
111
+ ]
112
+ },
113
+ "Source Singer Comparison": {
114
+ "users": ["advanced"],
115
+ "multi": [
116
+ "sourcesinger_id"
117
+ ],
118
+ "referenceMap": {
119
+ "m4singer_Alto-7": [
120
+ "Alto-7_ε―‚ε―žζ²™ζ΄²ε†·_0000",
121
+ "Alto-7_ε―‚ε―žζ²™ζ΄²ε†·_0011"
122
+ ],
123
+ "m4singer_Bass-1": [
124
+ "Bass-1_ε―‚ε―žζ²™ζ΄²ε†·_0002",
125
+ "Bass-1_ε―‚ε―žζ²™ζ΄²ε†·_0021"
126
+ ],
127
+ "m4singer_Tenor-6": [
128
+ "Tenor-6_ε―‚ε―žζ²™ζ΄²ε†·_0002",
129
+ "Tenor-6_ε―‚ε―žζ²™ζ΄²ε†·_0020"
130
+ ],
131
+ "m4singer_Tenor-7": [
132
+ "Tenor-7_ε―‚ε―žζ²™ζ΄²ε†·_0002",
133
+ "Tenor-7_ε―‚ε―žζ²™ζ΄²ε†·_0013",
134
+ "Tenor-7_ε―‚ε―žζ²™ζ΄²ε†·_0023"
135
+ ]
136
+ },
137
+ "indexMode": "number",
138
+ "data": [
139
+ {
140
+ "dataset": "M4Singer",
141
+ "basePath": "data/dc_dss",
142
+ "pathMap": {
143
+ "Alto-7": {
144
+ "songs": [
145
+ "ε―‚ε―žζ²™ζ΄²ε†·_0000",
146
+ "ε―‚ε―žζ²™ζ΄²ε†·_0011"
147
+ ],
148
+ "targets": [
149
+ "m4singer_Tenor-7",
150
+ "m4singer_Alto-7"
151
+ ]
152
+ },
153
+ "Bass-1": {
154
+ "songs": [
155
+ "ε―‚ε―žζ²™ζ΄²ε†·_0002",
156
+ "ε―‚ε―žζ²™ζ΄²ε†·_0021"
157
+ ],
158
+ "targets": [
159
+ "m4singer_Tenor-7",
160
+ "m4singer_Bass-1"
161
+ ]
162
+ },
163
+ "Tenor-6": {
164
+ "songs": [
165
+ "ε―‚ε―žζ²™ζ΄²ε†·_0002",
166
+ "ε―‚ε―žζ²™ζ΄²ε†·_0020"
167
+ ],
168
+ "targets": [
169
+ "m4singer_Tenor-7",
170
+ "m4singer_Tenor-6"
171
+ ]
172
+ },
173
+ "Tenor-7": {
174
+ "songs": [
175
+ "ε―‚ε―žζ²™ζ΄²ε†·_0002",
176
+ "ε―‚ε―žζ²™ζ΄²ε†·_0013"
177
+ ],
178
+ "targets": [
179
+ "m4singer_Alto-7",
180
+ "m4singer_Bass-1",
181
+ "m4singer_Tenor-6"
182
+ ]
183
+ }
184
+ }
185
+ }
186
+ ]
187
+ },
188
+ "Song Comparison": {
189
+ "users": ["advanced"],
190
+ "multi": [
191
+ "song_id"
192
+ ],
193
+ "referenceMap": {
194
+ "m4singer_Tenor-6": [
195
+ "Tenor-6_ε―‚ε―žζ²™ζ΄²ε†·_0002",
196
+ "Tenor-6_ε―‚ε―žζ²™ζ΄²ε†·_0020"
197
+ ],
198
+ "m4singer_Tenor-7": [
199
+ "Tenor-7_ε―‚ε―žζ²™ζ΄²ε†·_0002",
200
+ "Tenor-7_ε―‚ε―žζ²™ζ΄²ε†·_0013"
201
+ ]
202
+ },
203
+ "data": [
204
+ {
205
+ "dataset": "M4Singer",
206
+ "basePath": "data/dc_dss",
207
+ "pathMap": {
208
+ "Tenor-6": {
209
+ "songs": [
210
+ "ε―‚ε―žζ²™ζ΄²ε†·_0002",
211
+ "ε―‚ε―žζ²™ζ΄²ε†·_0020"
212
+ ],
213
+ "targets": [
214
+ "m4singer_Tenor-7",
215
+ "m4singer_Tenor-6"
216
+ ]
217
+ }
218
+ }
219
+ }
220
+ ]
221
+ },
222
+ "Target Singer Comparison": {
223
+ "users": ["advanced"],
224
+ "multi": [
225
+ "song_id",
226
+ "target_id"
227
+ ],
228
+ "referenceMap": {
229
+ "m4singer_Alto-7": [
230
+ "Alto-7_ε―‚ε―žζ²™ζ΄²ε†·_0000",
231
+ "Alto-7_ε―‚ε―žζ²™ζ΄²ε†·_0011"
232
+ ],
233
+ "m4singer_Bass-1": [
234
+ "Bass-1_ε―‚ε―žζ²™ζ΄²ε†·_0002",
235
+ "Bass-1_ε―‚ε―žζ²™ζ΄²ε†·_0021"
236
+ ],
237
+ "m4singer_Tenor-7": [
238
+ "Tenor-7_ε―‚ε―žζ²™ζ΄²ε†·_0002",
239
+ "Tenor-7_ε―‚ε―žζ²™ζ΄²ε†·_0013"
240
+ ],
241
+ "m4singer_Tenor-6": [
242
+ "Tenor-6_ε―‚ε―žζ²™ζ΄²ε†·_0002",
243
+ "Tenor-6_ε―‚ε―žζ²™ζ΄²ε†·_0020"
244
+ ]
245
+ },
246
+ "data": [
247
+ {
248
+ "dataset": "M4Singer",
249
+ "basePath": "data/dc_ssd",
250
+ "pathMap": {
251
+ "Tenor-6": {
252
+ "songs": [
253
+ "ε―‚ε―žζ²™ζ΄²ε†·_0002",
254
+ "ε―‚ε―žζ²™ζ΄²ε†·_0020"
255
+ ],
256
+ "targets": [
257
+ "m4singer_Alto-7",
258
+ "m4singer_Bass-1",
259
+ "m4singer_Tenor-7",
260
+ "m4singer_Tenor-6"
261
+ ]
262
+ }
263
+ }
264
+ }
265
+ ]
266
+ }
267
+ },
268
+ "mapToName": {
269
+ "SF1": "Singer 1",
270
+ "SM1": "Singer 2",
271
+ "CDF1": "Singer 3",
272
+ "CDM1": "Singer 4",
273
+ "IDF1": "Singer 5",
274
+ "IDM1": "Singer 6",
275
+ "svcc_CDF1": "Singer 3",
276
+ "svcc_CDM1": "Singer 4",
277
+ "svcc_IDF1": "Singer 5",
278
+ "svcc_IDM1": "Singer 6",
279
+ "Alto-1": "Singer 7",
280
+ "m4singer_Alto-1": "Singer 7",
281
+ "Alto-7": "Singer 8",
282
+ "m4singer_Alto-7": "Singer 8",
283
+ "Bass-1": "Singer 9",
284
+ "m4singer_Bass-1": "Singer 9",
285
+ "Soprano-2": "Singer 10",
286
+ "m4singer_Soprano-2": "Singer 10",
287
+ "Tenor-5": "Singer 11",
288
+ "m4singer_Tenor-5": "Singer 11",
289
+ "Tenor-6": "Singer 12",
290
+ "m4singer_Tenor-6": "Singer 12",
291
+ "Tenor-7": "Singer 13",
292
+ "m4singer_Tenor-7": "Singer 13",
293
+ "opencpop": "Singer 14"
294
+ },
295
+ "mapToSong": {
296
+ "30001": "Song 1",
297
+ "30002": "Song 2",
298
+ "30003": "Song 3",
299
+ "10001": "Song 4",
300
+ "10030": "Song 5",
301
+ "10120": "Song 6",
302
+ "10140": "Song 7",
303
+ "ηΎŽι”™_0014": "Song 8",
304
+ "十年_0008": "Song 9",
305
+ "εŒζ‘Œηš„δ½ _0018": "Song 10",
306
+ "ηˆ±η¬‘ηš„ηœΌη›_0010": "Song 11",
307
+ "ε―‚ε―žζ²™ζ΄²ε†·_0000": "Song 12",
308
+ "ε―‚ε―žζ²™ζ΄²ε†·_0002": "Song 12",
309
+ "ε―‚ε―žζ²™ζ΄²ε†·_0011": "Song 13",
310
+ "ε―‚ε―žζ²™ζ΄²ε†·_0013": "Song 13",
311
+ "ε―‚ε―žζ²™ζ΄²ε†·_0020": "Song 13",
312
+ "ε―‚ε―žζ²™ζ΄²ε†·_0021": "Song 14",
313
+ "30005": "Song 15",
314
+ "30006": "Song 16",
315
+ "30009": "Song 17",
316
+ "30016": "Song 18",
317
+ "30022": "Song 19",
318
+ "30019": "Song 20"
319
+ },
320
+ "mapToSpace": {
321
+ "encoded_step": "Step (Diffusion step)",
322
+ "noise_step_layer0": "Step + Noise (First layer)",
323
+ "noise_step_layer10": "Step + Noise (Middle layer)",
324
+ "noise_step_layer19": "Step + Noise (Last layer)",
325
+ "noise_step_condition_layer0": "Step + Noise + Condition (First layer)",
326
+ "noise_step_condition_layer10": "Step + Noise + Condition (Middle layer)",
327
+ "noise_step_condition_layer19": "Step + Noise + Condition (Last layer)"
328
+ },
329
+ "picTypes": [
330
+ "encoded_step",
331
+ "noise_step_layer0",
332
+ "noise_step_layer10",
333
+ "noise_step_layer19",
334
+ "noise_step_condition_layer0",
335
+ "noise_step_condition_layer10",
336
+ "noise_step_condition_layer19"
337
+ ],
338
+ "evaluation_data": [
339
+ {
340
+ "target": "svcc_IDM1",
341
+ "sourcesinger": "SM1",
342
+ "song": "30009",
343
+ "best": [
344
+ "MCD"
345
+ ]
346
+ },
347
+ {
348
+ "target": "svcc_IDF1",
349
+ "sourcesinger": "SF1",
350
+ "song": "30016",
351
+ "best": [
352
+ "F0CORR",
353
+ "FAD"
354
+ ]
355
+ },
356
+ {
357
+ "target": "svcc_IDF1",
358
+ "sourcesinger": "SF1",
359
+ "song": "30009",
360
+ "best": [
361
+ "F0RMSE",
362
+ "CER"
363
+ ]
364
+ },
365
+ {
366
+ "target": "svcc_IDF1",
367
+ "sourcesinger": "SF1",
368
+ "song": "30019",
369
+ "best": [
370
+ "Dembed"
371
+ ]
372
+ }
373
+ ],
374
+ "colorList": [
375
+ "#FFA500",
376
+ "#1C64F2",
377
+ "#7E3AF2",
378
+ "#9F580A"
379
+ ],
380
+ "histogramData": [
381
+ {
382
+ "type": "high",
383
+ "name": "F0CORR",
384
+ "value": 0.946698913
385
+ },
386
+ {
387
+ "type": "high",
388
+ "name": "Dembed",
389
+ "value": 0.688410708
390
+ },
391
+ {
392
+ "type": "low",
393
+ "name": "MCD",
394
+ "value": 11.44773471
395
+ },
396
+ {
397
+ "type": "low",
398
+ "name": "F0RMSE",
399
+ "value": 70.81400428
400
+ },
401
+ {
402
+ "type": "low",
403
+ "name": "FAD",
404
+ "value": 10.35121372
405
+ }
406
+ ]
407
+ }
visualization/SingVisio/webpage/img/difference_bar.jpg ADDED
visualization/SingVisio/webpage/img/syllable.png ADDED
visualization/SingVisio/webpage/index.html ADDED
@@ -0,0 +1,390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!--
2
+ Copyright (c) 2023 Amphion.
3
+ This source code is licensed under the MIT license found in the
4
+ LICENSE file in the root directory of this source tree。
5
+ -->
6
+
7
+ <!DOCTYPE html>
8
+ <html>
9
+
10
+ <head>
11
+ <meta charset="utf-8">
12
+ <meta name="viewport" content="width=1200px, initial-scale=1.0">
13
+ <title>SingVisio: Visual Analytics of Diffusion Model for Singing Voice Conversion</title>
14
+ <script>
15
+ const baseLink = 'https://dsvc.openmmlab.org.cn'; // end without '/'
16
+ </script>
17
+ <!-- Load Tailwind CSS and D3.js -->
18
+ <script src="./resources/tailwind.js"></script>
19
+ <script src="./resources/d3.v4.min.js"></script>
20
+ <script src="./resources/htl.min.js"></script>
21
+ <script src="./resources/d3-scale-chromatic.v1.min.js"></script>
22
+ <script src="./resources/d3-contour.v1.min.js"></script>
23
+ <!-- Load the Guide driver -->
24
+ <script src="./resources/driver.js.iife.min.js"></script>
25
+ <link rel="stylesheet" href="./resources/driver.min.css">
26
+ <!-- Config Tailwind CSS -->
27
+ <script type="module">
28
+ import cfg from "./tailwind.config.js";
29
+ tailwind.config = cfg;
30
+ </script>
31
+ <style type="text/tailwindcss">
32
+ @layer components {
33
+ .btn-small {
34
+ @apply px-3 py-2 text-xs font-medium text-center text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-200 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700
35
+ }
36
+ .btn {
37
+ @apply text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 mr-2 mb-2 dark:bg-blue-600 dark:hover:bg-blue-700 focus:outline-none dark:focus:ring-blue-800;
38
+ }
39
+ .btn-sec {
40
+ @apply py-2.5 px-5 mr-2 mb-2 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-200 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700
41
+ }
42
+ .select-select {
43
+ @apply my-0 py-2 px-1 bg-gray-50 border border-gray-300 text-gray-900 text-xs rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500
44
+ }
45
+ .select-label {
46
+ @apply block text-sm font-medium text-gray-900 dark:text-white;
47
+ }
48
+ .card {
49
+ @apply p-6 bg-white border border-gray-200 rounded-lg dark:bg-gray-800 dark:border-gray-700
50
+ }
51
+ .card-title {
52
+ @apply mb-2 text-base font-bold tracking-tight text-gray-900 dark:text-white
53
+ }
54
+ .timeline-point {
55
+ @apply absolute w-3 h-3 bg-gray-200 rounded-full mt-1.5 -left-1.5 border border-white dark:border-gray-900 dark:bg-gray-700
56
+ }
57
+ .timeline-title {
58
+ @apply text-lg font-semibold text-gray-900 dark:text-white
59
+ }
60
+ .timeline-subtitle {
61
+ @apply text-base font-normal text-gray-500 dark:text-gray-400
62
+ }
63
+ .small-input {
64
+ @apply block w-full p-2 text-gray-900 border border-gray-300 rounded-lg bg-gray-50 sm:text-xs focus:ring-blue-500 focus:border-blue-500 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500;
65
+ }
66
+ .checkbox {
67
+ @apply w-4 h-4 text-blue-600 bg-gray-100 border-gray-300 rounded focus:ring-blue-500 dark:focus:ring-blue-600 dark:ring-offset-gray-800 focus:ring-2 dark:bg-gray-700 dark:border-gray-600
68
+ }
69
+ .dropdown_button_text {
70
+ @apply w-full text-xs font-normal text-gray-900 dark:text-white text-left
71
+ }
72
+ .dropdown_button {
73
+ @apply btn-sec text-xs flex items-center w-full px-2 py-2 my-0 disabled:cursor-not-allowed disabled:opacity-50
74
+ }
75
+ }
76
+ </style>
77
+ <style>
78
+ input.step-axis {
79
+ outline: none;
80
+ -webkit-appearance: none;
81
+ background: #0000002b;
82
+ height: 8px;
83
+ }
84
+
85
+ input.step-axis::-webkit-slider-thumb {
86
+ -webkit-appearance: none;
87
+ position: relative;
88
+ width: 18px;
89
+ height: 18px;
90
+ background: url("./img/syllable.png") no-repeat;
91
+ background-size: 18px;
92
+ border-radius: 50%;
93
+ cursor: pointer;
94
+ }
95
+
96
+ audio {
97
+ outline: none;
98
+ height: 34px;
99
+ }
100
+
101
+ /* make a input with two handles */
102
+ .inputs {
103
+ display: block;
104
+ width: 100%;
105
+ height: 10px;
106
+ /* background-color: azure; */
107
+ }
108
+
109
+ .inputs input {
110
+ position: absolute;
111
+ }
112
+
113
+ .inputs input::-webkit-slider-thumb {
114
+ pointer-events: all;
115
+ z-index: 2;
116
+ }
117
+
118
+ .inputs input::-webkit-slider-runnable-track {
119
+ pointer-events: none;
120
+ z-index: 1;
121
+ }
122
+ </style>
123
+ </head>
124
+
125
+ <body class="bg-gray-100 dark:bg-gray-900">
126
+ <div id="alert"
127
+ class="hidden fixed top-0 right-0 left-0 z-50 w-full h-[100vh] bg-black bg-opacity-50 justify-center items-center overflow-y-hidden">
128
+ <div class="card flex flex-col min-w-[400px] max-w-2xl max-h-[80vh] p-0 overflow-hidden">
129
+ <!-- Modal header -->
130
+ <div class="flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600">
131
+ <h3 id="alert_title" class="text-xl font-semibold text-gray-900 dark:text-white">
132
+ Title
133
+ </h3>
134
+ <button id="close_alert" type="button"
135
+ class="text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white">
136
+ <svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none"
137
+ viewBox="0 0 14 14">
138
+ <path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
139
+ d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6" />
140
+ </svg>
141
+ <span class="sr-only">Close modal</span>
142
+ </button>
143
+ </div>
144
+ <!-- Modal body -->
145
+ <div id="alert_text"
146
+ class="p-4 md:p-5 space-y-4 text-base leading-relaxed text-gray-700 dark:text-gray-200 overflow-y-auto">
147
+ Text
148
+ </div>
149
+ <!-- Modal footer -->
150
+ <div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
151
+ <button id="finish_alert" type="button"
152
+ class="ml-auto text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800">OK</button>
153
+ </div>
154
+ </div>
155
+ </div>
156
+ <div class="bg-white dark:bg-gray-800 w-full py-4 px-6 border border-b border-gray-200 dark:border-gray-600">
157
+ <div class="mx-auto max-w-[1490px] grid grid-cols-6 align-center items-center">
158
+ <!-- <img class="dark:hidden" src="img/cuhksz_logo.png" alt="cuhksz logo" class="h-[40px]">
159
+ <img class="hidden dark:block" src="img/cuhksz_logo_white.png" alt="cuhksz logo" class="h-[40px]"> -->
160
+ <span class="col-span-1"></span>
161
+ <span id="title" class="col-span-4 mx-auto font-[800] text-[20px] dark:text-white">SingVisio: Visual
162
+ Analytics of Diffusion Model for Singing Voice Conversion</span>
163
+ <!-- <span class="ml-auto mr-0 text-sm dark:text-white">Team: <i>Human Language Technology Lab,
164
+ CUHK-Shenzhen</i></span> -->
165
+ <div class="flex">
166
+ <button class="btn-small ml-auto" id="mode_change">Switch to _</button>
167
+ <button class="btn-small ml-2" id="help">Help?</button>
168
+ </div>
169
+ </div>
170
+ </div>
171
+ <div class="max-w-[1500px] m-auto">
172
+ <div class="flex flex-row items-start gap-0.5 py-3 p-1">
173
+ <div class="w-[300px] flex flex-col flex-none">
174
+ <div id="performance" class="card p-2 mb-2 flex flex-col flex-none relative">
175
+ <button class="absolute right-1 top-1 btn-small px-1.5 py-0.5 ml-auto rounded-full"
176
+ id="metrics_help">?</button>
177
+ <div class="flex flex-row">
178
+ <div id="histogram" class="flex-none"></div>
179
+ <div id="histogram2" class="flex-none"></div>
180
+ </div>
181
+ <span class="text-[12px] mx-auto dark:text-white">Metrics</span>
182
+ </div>
183
+
184
+ <div id="touch_map" class="card p-2 relative">
185
+ <button class="absolute right-1 top-1 btn-small px-1.5 py-0.5 ml-auto rounded-full"
186
+ id="projection_help">?</button>
187
+ <div class="flex mb-1 align-center items-center space-between dark:text-white">
188
+ <div class="ml-1 text-sm">Step: <span id="current_step_display_number"></span></div>
189
+ <div class="ml-auto flex mr-2">
190
+ <button class="btn-sec h-9 w-9 p-2.5 mb-0" id="reset_map">
191
+ <svg class="w-3.5 h-3.5" aria-hidden="true" xmlns="http://www.w3.org/2000/svg"
192
+ fill="none" viewBox="0 0 18 20">
193
+ <path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"
194
+ stroke-width="2"
195
+ d="M16 1v5h-5M2 19v-5h5m10-4a8 8 0 0 1-14.947 3.97M1 10a8 8 0 0 1 14.947-3.97" />
196
+ </svg>
197
+ </button>
198
+ </div>
199
+ </div>
200
+ <div id="dataviz_axisZoom" class="flex flex-wrap border bg-white dark:bg-gray-800 relative"></div>
201
+ </div>
202
+ </div>
203
+ <div class="w-full">
204
+ <div id="step_preview" class="flex min-w-[500px] w-full bg-white dark:bg-gray-800 p-2 card mb-2">
205
+ <div class="mx-auto" id="preview_container">
206
+ </div>
207
+ <div class="mx-auto" id="preview_container2">
208
+ </div>
209
+ <div class="flex flex-col">
210
+ <button class="btn-sec" id="refreshpreview">
211
+ <svg class="w-4 h-4" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none"
212
+ viewBox="0 0 18 20">
213
+ <path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"
214
+ stroke-width="2"
215
+ d="M16 1v5h-5M2 19v-5h5m10-4a8 8 0 0 1-14.947 3.97M1 10a8 8 0 0 1 14.947-3.97" />
216
+ </svg>
217
+ </button>
218
+ </div>
219
+ </div>
220
+ <div id="mel_card_container" class="grid grid-cols-3 min-w-[915px] w-full gap-1 justify-items-center">
221
+ </div>
222
+ <div id="tips">
223
+ </div>
224
+ <div id="tooltip" role="tooltip"
225
+ class="invisible absolute z-10 inline-block px-3 py-2 text-sm font-medium text-white bg-gray-900 rounded-lg shadow-sm opacity-[0.9] dark:bg-gray-700">
226
+ Tooltip content
227
+ </div>
228
+ </div>
229
+ <div class="shrink-0 w-[180px]">
230
+
231
+ <div class="card py-2 px-3 relative">
232
+ <button class="absolute right-1 top-1 btn-small px-1.5 py-0.5 ml-auto rounded-full"
233
+ id="control_help">?</button>
234
+ <div class="flex items-center">
235
+ <h5 class="card-title my-1 text-lg">Control Panel</h5>
236
+ </div>
237
+ <div class="flex flex-col w-full rounded-lg gap-0.5" id="control_panel">
238
+ <div>
239
+ <label for="mode_id" class="select-label">Display Mode</label>
240
+ <select id="mode_id" class="select-select"></select>
241
+ </div>
242
+ <div>
243
+ <label for="sourcesinger_id" class="select-label">Source Singer</label>
244
+ <button id="sourcesinger_id" class="dropdown_button" type="button">
245
+ <span class="dropdown_button_text" id="sourcesinger_id_text">Choose Singer</span> <svg
246
+ class="w-2.5 h-2.5" aria-hidden="true" xmlns="http://www.w3.org/2000/svg"
247
+ fill="none" viewBox="0 0 10 6">
248
+ <path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"
249
+ stroke-width="2" d="m1 1 4 4 4-4" />
250
+ </svg>
251
+ </button>
252
+ <!-- Dropdown menu -->
253
+ <div id="sourcesinger_id_dropdown"
254
+ class="absolute z-10 hidden bg-white divide-y divide-gray-100 rounded-lg shadow w-44 dark:bg-gray-700">
255
+ <ul class="py-2 text-sm text-gray-700 dark:text-gray-200">
256
+ </ul>
257
+ </div>
258
+ </div>
259
+ <div>
260
+ <label for="song_id" class="select-label">Song</label>
261
+ <button id="song_id" class="dropdown_button" type="button">
262
+ <span class="dropdown_button_text" id="song_id_text">Choose Song</span> <svg
263
+ class="w-2.5 h-2.5" aria-hidden="true" xmlns="http://www.w3.org/2000/svg"
264
+ fill="none" viewBox="0 0 10 6">
265
+ <path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"
266
+ stroke-width="2" d="m1 1 4 4 4-4" />
267
+ </svg>
268
+ </button>
269
+ <!-- Dropdown menu -->
270
+ <div id="song_id_dropdown"
271
+ class="absolute z-10 hidden bg-white divide-y divide-gray-100 rounded-lg shadow w-44 dark:bg-gray-700">
272
+ <ul class="py-2 text-sm text-gray-700 dark:text-gray-200">
273
+ </ul>
274
+ </div>
275
+ </div>
276
+ <div>
277
+ <label for="target_id" class="select-label">Target Singer</label>
278
+ <button id="target_id" class="dropdown_button" type="button">
279
+ <span class="dropdown_button_text" id="target_id_text">Target Singer</span> <svg
280
+ class="w-2.5 h-2.5" aria-hidden="true" xmlns="http://www.w3.org/2000/svg"
281
+ fill="none" viewBox="0 0 10 6">
282
+ <path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"
283
+ stroke-width="2" d="m1 1 4 4 4-4" />
284
+ </svg>
285
+ </button>
286
+ <!-- Dropdown menu -->
287
+ <div id="target_id_dropdown"
288
+ class="absolute z-10 hidden bg-white divide-y divide-gray-100 rounded-lg shadow w-44 dark:bg-gray-700">
289
+ <ul class="py-2 text-sm text-gray-700 dark:text-gray-200">
290
+ </ul>
291
+ </div>
292
+ </div>
293
+ <div class="relative">
294
+ <label for="pic_id" class="select-label">Projection Embedding</label>
295
+ <select id="pic_id" class="select-select"></select>
296
+ </div>
297
+ <div class="relative" id="components">
298
+ <label for="components" class="select-label">Components</label>
299
+ <div class="flex flex-col gap-0.5">
300
+ <div class="flex items-center">
301
+ <input id="components_pitch" type="checkbox" checked class="checkbox">
302
+ <label for="components_pitch"
303
+ class="ml-1 text-[0.775rem] font-normal text-gray-900 dark:text-gray-300">F0
304
+ contour</label>
305
+ </div>
306
+ <div class="flex items-start">
307
+ <input id="components_frequncy" type="checkbox" checked class="checkbox">
308
+ <div class="flex flex-col gap-0.5 grow">
309
+ <label for="components_frequncy"
310
+ class="ml-1 mb-1 text-[0.775rem] font-normal text-gray-900 dark:text-gray-300">Frequency</label>
311
+ <div class="flex inputs w-full">
312
+ <input id="inputs_min" type="range"
313
+ class="h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700"
314
+ value="0" min="0">
315
+ <input id="inputs_max" type="range"
316
+ class="h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700"
317
+ value="100" max="100">
318
+ </div>
319
+ <div class="flex w-full">
320
+ <span id="inputs_left" class="ml-1 mr-auto text-[0.7rem] font-normal text-gray-900 dark:text-white">0</span>
321
+ <span id="inputs_right" class="mr-2 ml-auto text-[0.7rem] font-normal text-gray-900 dark:text-white">100</span>
322
+ </div>
323
+ </div>
324
+ </div>
325
+
326
+ <div class="flex items-start">
327
+ <input id="sampling_steps" type="checkbox" class="checkbox">
328
+ <div class="flex flex-col grow">
329
+ <label for="sampling_steps"
330
+ class="ml-1 text-[0.775rem] font-normal text-gray-900 dark:text-gray-300">Sampling
331
+ steps</label>
332
+ <div class="flex flex-row h-[32px]">
333
+ <span class="my-auto mx-1 text-[0.775rem] font-normal text-gray-900 dark:text-white">Step count:</span>
334
+ <input type="text"
335
+ class="small-input flex-none w-[50px] text-center bg-white dark:bg-gray-800"
336
+ id="sampling_num" value="100">
337
+ </div>
338
+
339
+ </div>
340
+ </div>
341
+ </div>
342
+ </div>
343
+
344
+
345
+ <div id="step_axis">
346
+ <label for="range" class="select-label">Step Axis</label>
347
+
348
+ <div
349
+ class="items-center w-full rounded-lg bg-gray-50 flex flex-row gap-2 px-2 py-0.5 border border-gray-300 dark:border-gray-600 dark:text-white dark:bg-gray-700">
350
+
351
+ <input class="step-axis my-2 w-full" id="range" type="range" min="0" max="999" value="0"
352
+ step="1">
353
+ <button class="btn-small" id="controls">
354
+ <svg id="icon_play" style="display: none" class="w-3 h-3" aria-hidden="true"
355
+ xmlns="http://www.w3.org/2000/svg" fill="currentColor" viewBox="0 0 14 16">
356
+ <path
357
+ d="M0 .984v14.032a1 1 0 0 0 1.506.845l12.006-7.016a.974.974 0 0 0 0-1.69L1.506.139A1 1 0 0 0 0 .984Z" />
358
+ </svg>
359
+ <svg id="icon_stop" class="w-3 h-3" aria-hidden="true"
360
+ xmlns="http://www.w3.org/2000/svg" fill="currentColor" viewBox="0 0 12 16">
361
+ <path
362
+ d="M3 0H2a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h1a2 2 0 0 0 2-2V2a2 2 0 0 0-2-2Zm7 0H9a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h1a2 2 0 0 0 2-2V2a2 2 0 0 0-2-2Z" />
363
+ </svg>
364
+ </button>
365
+
366
+ </div>
367
+ <div class="flex gap-1 mt-2">
368
+ <span class="my-auto mr-1 text-sm font-medium text-gray-900 dark:text-white">Step:</span>
369
+ <input type="text"
370
+ class="small-input flex-none w-[60px] text-center bg-white dark:bg-gray-800" id="value">
371
+ <button class="btn-small" id="add_preview">
372
+ Pin
373
+ </button>
374
+ </div>
375
+ </div>
376
+
377
+ </div>
378
+ </div>
379
+ </div>
380
+ </div>
381
+
382
+ </div>
383
+
384
+ <script src="./resources/init.js"></script>
385
+ <script src="./resources/function.js"></script>
386
+ <script src="./resources/event.js"></script>
387
+ <script>
388
+ initConfig('./config/default.json')
389
+ </script>
390
+ </body>