alefiury commited on
Commit
7782720
·
verified ·
1 Parent(s): 71edba2

Upload 2 files

Browse files
Files changed (2) hide show
  1. common.yaml +22 -0
  2. config.yaml +93 -0
common.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ path: ./logs/${hydra.job.config_name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
2
+
3
+ log_level: INFO
4
+ seed: 1
5
+ tb_log_dir: tensorboard
6
+ tqdm: true
7
+
8
+ hydra:
9
+ run:
10
+ dir: ${path}
11
+ job_logging:
12
+ formatters:
13
+ colorlog:
14
+ format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s:%(lineno)s:%(funcName)s()%(reset)s][%(log_color)s%(levelname)s%(reset)s]
15
+ - %(message)s'
16
+ handlers:
17
+ file:
18
+ filename: ${hydra.run.dir}/${hydra.job.name}_${now:%Y-%m-%d}_${now:%H-%M-%S}.log
19
+
20
+ defaults:
21
+ - override hydra/job_logging: colorlog
22
+ - override hydra/hydra_logging: colorlog
config.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - common
3
+
4
+ train:
5
+ batch_size: 128
6
+ betas: [0.8, 0.99]
7
+ c_kl: 1.0
8
+ c_mel: 45
9
+ distributed: false # BUG: multi-gpu is not working
10
+ use_multiprocessing: false # BUG: multi-gpu is not working
11
+ epochs: 20
12
+ eps: 1e-9
13
+ fp16_run: false
14
+ init_lr_ratio: 1
15
+ raise_error: false
16
+ learning_rate: 2e-4
17
+ log_interval: 10
18
+ log_level: ${log_level}
19
+ lr_decay: 0.98
20
+ max_speclen: 128
21
+ port: 8005
22
+ resume_training: false # set to false to finetune from a model
23
+ seed: 1234
24
+ segment_size: 8960
25
+ use_sr: false
26
+ valid_epoch_interval: 1
27
+ valid_steps_interval: 1000
28
+ save_epoch_interval: 10
29
+ save_steps_interval: 1000
30
+ warmup_epochs: 0
31
+ # weighted_batch_speaker_sampling : false
32
+ # weighted_batch_lang_sampling : false
33
+ weighted_batch_speaker_sampling : 0.5
34
+ weighted_batch_lang_sampling : 0.5
35
+
36
+ data:
37
+ dataset_dir: /raid/lucasgris/free-svc/data
38
+ filter_length: 1280
39
+ hop_length: 320
40
+ max_wav_value: 32768.0
41
+ mel_fmax: null
42
+ mel_fmin: 0.0
43
+ n_mel_channels: 80
44
+ num_workers: 64
45
+ # For pitch extraction, set the pitch_predictor (will compute in dataloader) or pitch_features_dir (will load from disk)
46
+ pitch_predictor: rmvpe # pm | crepe | harvest | dio | rmvpe | fcpe
47
+ pitch_features_dir: ${data.dataset_dir}/pitch_features/
48
+ sampling_rate: 24000
49
+ spectrogram_dir: null #${data.dataset_dir}/spectrograms # it is recommended NOT to use if you have small disk space
50
+ # For speaker embedding extraction, set the use_spk_emb to True and spk_embeddings_dir (will load from disk) or configure the model to compute it on forward
51
+ use_spk_emb: true
52
+ spk_embeddings_dir: ${data.dataset_dir}/spk_embeddings
53
+ # SR augmentation is deprecated, set use_sr to False
54
+ sr_min_max: [68, 92]
55
+ # For content feature extraction, set the content_feature_dir (will load from disk) or configure the model to compute it on forward
56
+ content_feature_dir: null
57
+ training_files: data/train.csv
58
+ validation_files: data/valid.csv
59
+ win_length: 1280
60
+
61
+ model:
62
+ save_dir: null
63
+ filter_channels: 768
64
+ finetune_from_model:
65
+ discriminator: /raid/lucasgris/free-svc/D-freevc-24.pth
66
+ generator: /raid/lucasgris/free-svc/freevc-24.pth
67
+ hidden_channels: 192
68
+ inter_channels: 192
69
+ kernel_size: 3
70
+ n_heads: 2
71
+ n_layers_q: 3
72
+ n_layers: 6
73
+ p_dropout: 0.1
74
+ resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
75
+ resblock_kernel_sizes: [3,7,11]
76
+ resblock: 1
77
+ c_dim: 768
78
+ upsample_initial_channel: 512
79
+ upsample_kernel_sizes: [16,16,4,4]
80
+ upsample_rates: [10,8,2,2]
81
+ use_spectral_norm: false
82
+ freeze_external_spk: true
83
+ device: cuda
84
+ # For online speaker embedding extraction, set the use_spk_emb to True and spk_encoder_type
85
+ use_spk_emb: false
86
+ gin_channels: null # gin_channels = spk_encoder.embedding_dim
87
+ spk_encoder_type: null # ECAPA2SpeakerEncoder16k |
88
+ # For content feature extraction, set the content_encoder_type and content_encoder_ckpt
89
+ content_encoder_type: null # load from disk (data) - hubert | wavlm
90
+ content_encoder_ckpt: null # load from disk (data) - [path] | models/wavlm/WavLM-Large.pt | lengyue233/content-vec-best
91
+ post_content_encoder_type: vits-encoder-with-uv-emb # or freevc-bottleneck
92
+ coarse_f0: true
93
+ cond_f0_on_flow: false