saeki commited on
Commit
699157c
1 Parent(s): a6efb81
configs/test/melspec/audio_effect_transfer.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ preprocessed_path: "./preprocessed/audio_effect_transfer"
3
+ output_path: "./output/melspec/audio_effect_transfer"
4
+ feature_type: "melspec"
5
+ source:
6
+ dataset_path: "./data/tono"
7
+ config_path: "./configs/test/melspec/ssl_tono.yaml"
8
+ ckpt_path: "./ckpts_tono/tono_melspec_multi_nopre_0217.ckpt"
9
+ target:
10
+ dataset_path: "./data/jvs_22k-low"
11
+ config_path: "./configs/test/melspec/pretrain_jvs.yaml"
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ sampling_rate: 22050
16
+ segment_length: -1
17
+ frame_shift: 256
18
+
19
+ model: null
20
+
21
+ train:
22
+ epoch: 100
23
+ batchsize: 8
24
+ multi_gpu_mode: False
25
+ num_workers: 4
26
+ learning_rate: 0.001
27
+ grad_clip_thresh: 1.0
28
+ logger_step: 1000
configs/test/melspec/dual.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "pretrain"
3
+ corpus_type: "multi-unseen"
4
+ source_path: "./data/jvs_22k"
5
+ aux_path: null
6
+ preprocessed_path: "./preprocessed/dual"
7
+ preprocess:
8
+ n_train: 90
9
+ n_val: 5
10
+ n_test: 5
11
+ sampling_rate: 22050
12
+ segment_length: 2
configs/test/melspec/pretrain_jvs.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "pretrain"
3
+ corpus_type: "multi-unseen" # (single, multi-seen, multi-unseen)
4
+ source_path: "./data/jvs_22k-low"
5
+ aux_path: "./data/jvs_22k"
6
+ preprocessed_path: "./preprocessed/jvs"
7
+ output_path: "./output/melspec/pretrain"
8
+ test_wav_path: null
9
+ feature_type: "melspec"
10
+ hifigan_path: "./hifigan/hifigan_melspec_universal"
11
+ power_norm: True
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ n_train: 90
16
+ n_val: 5
17
+ n_test: 5
18
+ sampling_rate: 22050
19
+ frame_length: 1024
20
+ frame_shift: 256
21
+ fft_length: 1024
22
+ fmin: 0
23
+ fmax: 8000
24
+ n_mels: 80
25
+ comp_factor: 1.0
26
+ min_magnitude: 0.00001
27
+ max_wav_value: 32768.0
28
+ segment_length: -1
29
+
30
+ train:
31
+ batchsize: 8
32
+ epoch: 50
33
+ alpha: 0.1
34
+ augment: True
35
+ multi_gpu_mode: False
36
+ num_workers: 4
37
+ learning_rate: 0.005
38
+ grad_clip_thresh: 1.0
39
+ logger_step: 1000
40
+ load_pretrained: False
41
+ pretrained_path: null
42
+ early_stopping: False
43
+ multi_scale_loss:
44
+ use_linear: False
45
+ gamma: 1.0
46
+ feature_loss:
47
+ type: "mae"
configs/test/melspec/ssl_jsut.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "ssl"
3
+ corpus_type: "single" # (single, multi-seen, multi-unseen)
4
+ source_path: "./data/jsut_22k-low"
5
+ aux_path: "./data/jsut_22k"
6
+ preprocessed_path: "./preprocessed/jsut-low"
7
+ output_path: "./output/melspec/jsut-low"
8
+ test_wav_path: null
9
+ feature_type: "melspec"
10
+ hifigan_path: "./hifigan/hifigan_melspec_universal"
11
+ power_norm: True
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ n_train: 4950
16
+ n_val: 25
17
+ n_test: 25
18
+ sampling_rate: 22050
19
+ frame_length: 1024
20
+ frame_shift: 256
21
+ fft_length: 1024
22
+ fmin: 0
23
+ fmax: 8000
24
+ n_mels: 80
25
+ comp_factor: 1.0
26
+ min_magnitude: 0.00001
27
+ bitrate: "16k"
28
+ max_wav_value: 32768.0
29
+ segment_length: -1
30
+
31
+ train:
32
+ batchsize: 1
33
+ epoch: 50
34
+ epoch_channel: 25
35
+ multi_gpu_mode: False
36
+ num_workers: 4
37
+ learning_rate: 0.001
38
+ alpha: 0.1
39
+ beta: 0.1
40
+ augment: False
41
+ grad_clip_thresh: 1.0
42
+ logger_step: 1000
43
+ load_pretrained: False
44
+ pretrained_path: null
45
+ fix_channel: False
46
+ early_stopping: False
47
+ multi_scale_loss:
48
+ use_linear: True
49
+ gamma: 1.0
50
+ feature_loss:
51
+ type: "mae"
52
+
53
+ dual:
54
+ enable: True
55
+ config_path: ./configs/test/melspec/dual.yaml
configs/test/melspec/ssl_tono.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "ssl"
3
+ corpus_type: "single" # (single, multi-seen, multi-unseen)
4
+ source_path: "./data/tono_22k"
5
+ aux_path: null
6
+ preprocessed_path: "./preprocessed/tono"
7
+ output_path: "./output/melspec/tono"
8
+ test_wav_path: null
9
+ feature_type: "melspec"
10
+ hifigan_path: "./hifigan/hifigan_melspec_universal"
11
+ power_norm: True
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ n_train: 270
16
+ n_val: 34
17
+ n_test: 30
18
+ sampling_rate: 22050
19
+ frame_length: 1024
20
+ frame_shift: 256
21
+ fft_length: 1024
22
+ fmin: 0
23
+ fmax: 8000
24
+ n_mels: 80
25
+ comp_factor: 1.0
26
+ min_magnitude: 0.00001
27
+ bitrate: "16k"
28
+ max_wav_value: 32768.0
29
+ segment_length: -1
30
+
31
+ train:
32
+ batchsize: 4
33
+ epoch: 50
34
+ epoch_channel: 25
35
+ multi_gpu_mode: False
36
+ num_workers: 4
37
+ learning_rate: 0.001
38
+ alpha: 0.1
39
+ beta: 0.1
40
+ grad_clip_thresh: 1.0
41
+ logger_step: 1000
42
+ load_pretrained: False
43
+ pretrained_path: null
44
+ fix_channel: False
45
+ early_stopping: False
46
+ multi_scale_loss:
47
+ use_linear: True
48
+ gamma: 1.0
49
+ feature_loss:
50
+ type: "mae"
51
+
52
+ dual:
53
+ enable: True
54
+ config_path: ./configs/train/melspec/dual.yaml
configs/test/vocfeats/audio_effect_transfer.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ preprocessed_path: "./preprocessed/audio_effect_transfer"
3
+ output_path: "./output/vocfeats/audio_effect_transfer"
4
+ feature_type: "vocfeats"
5
+ source:
6
+ dataset_path: "./data/tono"
7
+ config_path: "./configs/test/melspec/ssl_tono.yaml"
8
+ ckpt_path: "./ckpts_tono/tono_melspec_multi_nopre_0217.ckpt"
9
+ target:
10
+ dataset_path: "./data/jvs_22k-low"
11
+ config_path: "./configs/test/vocfeats/pretrain_jvs.yaml"
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ sampling_rate: 22050
16
+ segment_length: -1
17
+ frame_shift: 256
18
+
19
+ model: null
20
+
21
+ train:
22
+ epoch: 100
23
+ batchsize: 8
24
+ multi_gpu_mode: False
25
+ num_workers: 4
26
+ learning_rate: 0.001
27
+ grad_clip_thresh: 1.0
28
+ logger_step: 1000
configs/test/vocfeats/dual.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "pretrain"
3
+ corpus_type: "multi-unseen"
4
+ source_path: "./data/jvs_22k"
5
+ aux_path: null
6
+ preprocessed_path: "./preprocessed/dual"
7
+ preprocess:
8
+ n_train: 90
9
+ n_val: 5
10
+ n_test: 5
11
+ sampling_rate: 22050
12
+ segment_length: 2
configs/test/vocfeats/pretrain_jvs.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "pretrain"
3
+ corpus_type: "multi-unseen" # (single, multi-seen, multi-unseen)
4
+ source_path: "./data/jvs_22k-low"
5
+ aux_path: "./data/jvs_22k"
6
+ preprocessed_path: "./preprocessed/jvs"
7
+ output_path: "./output/vocfeats/pretrain"
8
+ test_wav_path: null
9
+ feature_type: "vocfeats"
10
+ hifigan_path: "./hifigan/hifigan_jvs_40d_600k"
11
+ power_norm: True
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ n_train: 90
16
+ n_val: 5
17
+ n_test: 5
18
+ sampling_rate: 22050
19
+ frame_length: 1024
20
+ frame_shift: 256
21
+ fft_length: 1024
22
+ fmin: 0
23
+ fmax: 8000
24
+ n_mels: 80
25
+ cep_order: 40
26
+ f0_extractor: "dio"
27
+ comp_factor: 1.0
28
+ min_magnitude: 0.00001
29
+ max_wav_value: 32768.0
30
+ segment_length: -1
31
+
32
+ train:
33
+ batchsize: 8
34
+ epoch: 50
35
+ alpha: 0.1
36
+ augment: True
37
+ multi_gpu_mode: False
38
+ num_workers: 4
39
+ learning_rate: 0.005
40
+ grad_clip_thresh: 1.0
41
+ logger_step: 1000
42
+ load_pretrained: False
43
+ pretrained_path: null
44
+ early_stopping: False
45
+ multi_scale_loss:
46
+ use_linear: True
47
+ gamma: 1.0
48
+ feature_loss:
49
+ type: "mae"
configs/test/vocfeats/ssl_jsut.yaml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "ssl"
3
+ corpus_type: "single" # (single, multi-seen, multi-unseen)
4
+ source_path: "./data/jsut_22k-low"
5
+ aux_path: "./data/jsut_22k"
6
+ preprocessed_path: "./preprocessed/jsut-low"
7
+ output_path: "./output/vocfeats/jsut-low"
8
+ test_wav_path: null
9
+ feature_type: "vocfeats"
10
+ hifigan_path: "./hifigan/hifigan_jvs_40d_600k"
11
+ power_norm: True
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ n_train: 4950
16
+ n_val: 25
17
+ n_test: 25
18
+ sampling_rate: 22050
19
+ frame_length: 1024
20
+ frame_shift: 256
21
+ fft_length: 1024
22
+ fmin: 0
23
+ fmax: 8000
24
+ n_mels: 80
25
+ cep_order: 40
26
+ f0_extractor: "harvest"
27
+ comp_factor: 1.0
28
+ min_magnitude: 0.00001
29
+ bitrate: "16k"
30
+ max_wav_value: 32768.0
31
+ segment_length: -1
32
+
33
+ train:
34
+ batchsize: 1
35
+ epoch: 50
36
+ epoch_channel: 25
37
+ multi_gpu_mode: False
38
+ num_workers: 4
39
+ learning_rate: 0.001
40
+ alpha: 0.1
41
+ beta: 0.1
42
+ augment: False
43
+ grad_clip_thresh: 1.0
44
+ logger_step: 1000
45
+ load_pretrained: False
46
+ pretrained_path: null
47
+ fix_channel: False
48
+ early_stopping: False
49
+ multi_scale_loss:
50
+ use_linear: True
51
+ gamma: 1.0
52
+ feature_loss:
53
+ type: "mae"
54
+
55
+ dual:
56
+ enable: True
57
+ config_path: ./configs/test/vocfeats/dual.yaml
configs/test/vocfeats/ssl_tono.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "ssl"
3
+ corpus_type: "single" # (single, multi-seen, multi-unseen)
4
+ source_path: "./data/tono"
5
+ aux_path: null
6
+ preprocessed_path: "./preprocessed/tono-denoise"
7
+ output_path: "./output/vocfeats/tono-denoise"
8
+ test_wav_path: null
9
+ feature_type: "vocfeats"
10
+ hifigan_path: "./hifigan/hifigan_jvs_40d_600k"
11
+ power_norm: True
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ n_train: 270
16
+ n_val: 34
17
+ n_test: 30
18
+ sampling_rate: 22050
19
+ frame_length: 1024
20
+ frame_shift: 256
21
+ fft_length: 1024
22
+ fmin: 0
23
+ fmax: 8000
24
+ n_mels: 80
25
+ cep_order: 40
26
+ comp_factor: 1.0
27
+ min_magnitude: 0.00001
28
+ bitrate: "16k"
29
+ f0_extractor: "harvest"
30
+ max_wav_value: 32768.0
31
+ segment_length: -1
32
+
33
+ train:
34
+ batchsize: 4
35
+ epoch: 50
36
+ epoch_channel: 25
37
+ multi_gpu_mode: False
38
+ num_workers: 4
39
+ learning_rate: 0.001
40
+ alpha: 0.1
41
+ beta: 0.1
42
+ grad_clip_thresh: 1.0
43
+ logger_step: 1000
44
+ load_pretrained: False
45
+ pretrained_path: null
46
+ fix_channel: False
47
+ early_stopping: False
48
+ multi_scale_loss:
49
+ use_linear: True
50
+ gamma: 1.0
51
+ feature_loss:
52
+ type: "mae"
53
+
54
+ dual:
55
+ enable: True
56
+ config_path: ./configs/train/vocfeats/dual.yaml
configs/train/melspec/dual.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "pretrain"
3
+ corpus_type: "multi-unseen"
4
+ source_path: "./data/jvs_22k"
5
+ aux_path: null
6
+ preprocessed_path: "./preprocessed/dual"
7
+ preprocess:
8
+ n_train: 90
9
+ n_val: 5
10
+ n_test: 5
11
+ sampling_rate: 22050
12
+ segment_length: 2
13
+
configs/train/melspec/pretrain_jvs.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "pretrain"
3
+ corpus_type: "multi-unseen" # (single, multi-seen, multi-unseen)
4
+ source_path: "./data/jvs_22k-low"
5
+ aux_path: ./data/jvs_22k"
6
+ preprocessed_path: "./preprocessed/jvs"
7
+ output_path: "./output/melspec/pretrain"
8
+ test_wav_path: null
9
+ feature_type: "melspec"
10
+ hifigan_path: "./hifigan/hifigan_melspec_universal"
11
+ power_norm: True
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ n_train: 90
16
+ n_val: 5
17
+ n_test: 5
18
+ sampling_rate: 22050
19
+ frame_length: 1024
20
+ frame_shift: 256
21
+ fft_length: 1024
22
+ fmin: 0
23
+ fmax: 8000
24
+ n_mels: 80
25
+ comp_factor: 1.0
26
+ min_magnitude: 0.00001
27
+ max_wav_value: 32768.0
28
+ segment_length: 2
29
+
30
+ train:
31
+ batchsize: 8
32
+ epoch: 50
33
+ alpha: 0.1
34
+ augment: True
35
+ multi_gpu_mode: False
36
+ num_workers: 4
37
+ learning_rate: 0.005
38
+ grad_clip_thresh: 1.0
39
+ logger_step: 1000
40
+ load_pretrained: False
41
+ pretrained_path: null
42
+ early_stopping: False
43
+ multi_scale_loss:
44
+ use_linear: False
45
+ gamma: 1.0
46
+ feature_loss:
47
+ type: "mae"
configs/train/melspec/ssl_jsut.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "ssl"
3
+ corpus_type: "single" # (single, multi-seen, multi-unseen)
4
+ source_path: "./data/jsut_22k-low"
5
+ aux_path: "./data/jsut_22k"
6
+ preprocessed_path: "./preprocessed/jsut-low"
7
+ output_path: "./output/melspec/jsut-low"
8
+ test_wav_path: null
9
+ feature_type: "melspec"
10
+ hifigan_path: "./hifigan/hifigan_melspec_universal"
11
+ power_norm: True
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ n_train: 4950
16
+ n_val: 25
17
+ n_test: 25
18
+ sampling_rate: 22050
19
+ frame_length: 1024
20
+ frame_shift: 256
21
+ fft_length: 1024
22
+ fmin: 0
23
+ fmax: 8000
24
+ n_mels: 80
25
+ comp_factor: 1.0
26
+ min_magnitude: 0.00001
27
+ bitrate: "16k"
28
+ max_wav_value: 32768.0
29
+ segment_length: 2
30
+
31
+ train:
32
+ batchsize: 4
33
+ epoch: 50
34
+ epoch_channel: 25
35
+ multi_gpu_mode: False
36
+ num_workers: 4
37
+ learning_rate: 0.001
38
+ alpha: 0.1
39
+ beta: 0.1
40
+ grad_clip_thresh: 1.0
41
+ logger_step: 1000
42
+ load_pretrained: True
43
+ pretrained_path: null
44
+ fix_channel: False
45
+ early_stopping: False
46
+ multi_scale_loss:
47
+ use_linear: True
48
+ gamma: 1.0
49
+ feature_loss:
50
+ type: "mae"
51
+
52
+ dual:
53
+ enable: True
54
+ config_path: ./configs/train/melspec/dual.yaml
configs/train/melspec/ssl_tono.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "ssl"
3
+ corpus_type: "single" # (single, multi-seen, multi-unseen)
4
+ source_path: "./data/tono"
5
+ aux_path: null
6
+ preprocessed_path: "./preprocessed/tono"
7
+ output_path: "./output/melspec/tono"
8
+ test_wav_path: null
9
+ feature_type: "melspec"
10
+ hifigan_path: "./hifigan/hifigan_melspec_universal"
11
+ power_norm: True
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ n_train: 270
16
+ n_val: 34
17
+ n_test: 30
18
+ sampling_rate: 22050
19
+ frame_length: 1024
20
+ frame_shift: 256
21
+ fft_length: 1024
22
+ fmin: 0
23
+ fmax: 8000
24
+ n_mels: 80
25
+ comp_factor: 1.0
26
+ min_magnitude: 0.00001
27
+ bitrate: "16k"
28
+ max_wav_value: 32768.0
29
+ segment_length: 2
30
+
31
+ train:
32
+ batchsize: 4
33
+ epoch: 50
34
+ epoch_channel: 25
35
+ multi_gpu_mode: False
36
+ num_workers: 4
37
+ learning_rate: 0.001
38
+ alpha: 0.1
39
+ beta: 0.1
40
+ grad_clip_thresh: 1.0
41
+ logger_step: 1000
42
+ load_pretrained: False
43
+ pretrained_path: null
44
+ fix_channel: False
45
+ early_stopping: False
46
+ multi_scale_loss:
47
+ use_linear: True
48
+ gamma: 1.0
49
+ feature_loss:
50
+ type: "mae"
51
+
52
+ dual:
53
+ enable: True
54
+ config_path: ./configs/train/melspec/dual.yaml
configs/train/vocfeats/dual.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "pretrain"
3
+ corpus_type: "multi-unseen"
4
+ source_path: "./data/jvs_22k"
5
+ aux_path: null
6
+ preprocessed_path: "./preprocessed/dual"
7
+ preprocess:
8
+ n_train: 90
9
+ n_val: 5
10
+ n_test: 5
11
+ sampling_rate: 22050
12
+ segment_length: 2
configs/train/vocfeats/pretrain_jvs.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "pretrain"
3
+ corpus_type: "multi-unseen" # (single, multi-seen, multi-unseen)
4
+ source_path: "./data/jvs_22k-low"
5
+ aux_path: "./data/jvs_22k"
6
+ preprocessed_path: "./preprocessed/jvs"
7
+ output_path: "./output/vocfeats/pretrain"
8
+ test_wav_path: null
9
+ feature_type: "vocfeats"
10
+ hifigan_path: "./hifigan/hifigan_jvs_40d_600k"
11
+ power_norm: True
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ n_train: 90
16
+ n_val: 5
17
+ n_test: 5
18
+ sampling_rate: 22050
19
+ frame_length: 1024
20
+ frame_shift: 256
21
+ fft_length: 1024
22
+ fmin: 0
23
+ fmax: 8000
24
+ n_mels: 80
25
+ cep_order: 40
26
+ f0_extractor: "dio"
27
+ comp_factor: 1.0
28
+ min_magnitude: 0.00001
29
+ max_wav_value: 32768.0
30
+ segment_length: 2
31
+
32
+ train:
33
+ batchsize: 8
34
+ epoch: 50
35
+ alpha: 0.1
36
+ augment: True
37
+ multi_gpu_mode: False
38
+ num_workers: 4
39
+ learning_rate: 0.005
40
+ grad_clip_thresh: 1.0
41
+ logger_step: 1000
42
+ load_pretrained: False
43
+ pretrained_path: null
44
+ early_stopping: False
45
+ multi_scale_loss:
46
+ use_linear: True
47
+ gamma: 1.0
48
+ feature_loss:
49
+ type: "mae"
configs/train/vocfeats/ssl_jsut.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "ssl"
3
+ corpus_type: "single" # (single, multi-seen, multi-unseen)
4
+ source_path: "./data/jsut_22k-low"
5
+ aux_path: "./data/jsut_22k"
6
+ preprocessed_path: "./preprocessed/jsut-low"
7
+ output_path: "./output/vocfeats/jsut-low"
8
+ test_wav_path: null
9
+ feature_type: "vocfeats"
10
+ hifigan_path: "./hifigan/hifigan_jvs_40d_600k"
11
+ power_norm: True
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ n_train: 4950
16
+ n_val: 25
17
+ n_test: 25
18
+ sampling_rate: 22050
19
+ frame_length: 1024
20
+ frame_shift: 256
21
+ fft_length: 1024
22
+ fmin: 0
23
+ fmax: 8000
24
+ n_mels: 80
25
+ cep_order: 40
26
+ comp_factor: 1.0
27
+ min_magnitude: 0.00001
28
+ bitrate: "16k"
29
+ f0_extractor: "harvest"
30
+ max_wav_value: 32768.0
31
+ segment_length: 2
32
+
33
+ train:
34
+ batchsize: 4
35
+ epoch: 50
36
+ epoch_channel: 25
37
+ multi_gpu_mode: False
38
+ num_workers: 4
39
+ learning_rate: 0.001
40
+ alpha: 0.1
41
+ beta: 0.1
42
+ grad_clip_thresh: 1.0
43
+ logger_step: 1000
44
+ load_pretrained: True
45
+ pretrained_path: null
46
+ fix_channel: False
47
+ early_stopping: False
48
+ multi_scale_loss:
49
+ use_linear: True
50
+ gamma: 1.0
51
+ feature_loss:
52
+ type: "mae"
53
+
54
+ dual:
55
+ enable: True
56
+ config_path: ./configs/train/vocfeats/dual.yaml
configs/train/vocfeats/ssl_tono.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ stage: "ssl"
3
+ corpus_type: "single" # (single, multi-seen, multi-unseen)
4
+ source_path: "./data/tono"
5
+ aux_path: null
6
+ preprocessed_path: "./preprocessed/tono"
7
+ output_path: "./output/vocfeats/tono"
8
+ test_wav_path: null
9
+ feature_type: "vocfeats"
10
+ hifigan_path: "./hifigan/hifigan_jvs_40d_600k"
11
+ power_norm: True
12
+ use_gst: False
13
+
14
+ preprocess:
15
+ n_train: 270
16
+ n_val: 34
17
+ n_test: 30
18
+ sampling_rate: 22050
19
+ frame_length: 1024
20
+ frame_shift: 256
21
+ fft_length: 1024
22
+ fmin: 0
23
+ fmax: 8000
24
+ n_mels: 80
25
+ cep_order: 40
26
+ comp_factor: 1.0
27
+ min_magnitude: 0.00001
28
+ bitrate: "16k"
29
+ f0_extractor: "harvest"
30
+ max_wav_value: 32768.0
31
+ segment_length: 2
32
+
33
+ train:
34
+ batchsize: 4
35
+ epoch: 50
36
+ epoch_channel: 25
37
+ multi_gpu_mode: False
38
+ num_workers: 4
39
+ learning_rate: 0.001
40
+ alpha: 0.1
41
+ beta: 0.1
42
+ grad_clip_thresh: 1.0
43
+ logger_step: 1000
44
+ load_pretrained: False
45
+ pretrained_path: null
46
+ fix_channel: False
47
+ early_stopping: False
48
+ multi_scale_loss:
49
+ use_linear: True
50
+ gamma: 1.0
51
+ feature_loss:
52
+ type: "mae"
53
+
54
+ dual:
55
+ enable: True
56
+ config_path: ./configs/train/vocfeats/dual.yaml