kevinwang676's picture
Duplicate from zlc99/M4Singer
26925fd
raw
history blame
3.59 kB
base_config:
- usr/configs/popcs_ds_beta6.yaml
binarizer_cls: data_gen.singing.binarize.M4SingerBinarizer
raw_data_dir: 'data/raw/m4singer'
processed_data_dir: 'xxx'
binary_data_dir: 'data/binary/m4singer'
datasets: [
'm4singer',
]
test_prefixes: [
'Alto-2#岁月神偷',
'Alto-2#奇妙能力歌',
'Tenor-1#一千年以后',
'Tenor-1#童话',
'Tenor-2#消愁',
'Tenor-2#一荤一素',
'Soprano-1#念奴娇赤壁怀古',
'Soprano-1#问春',
]
num_spk: 20
vocoder: vocoders.hifigan.HifiGAN
vocoder_ckpt: checkpoints/m4singer_hifigan
pe_enable: true
pe_ckpt: 'checkpoints/m4singer_pe'
mel_vmin: -6.
mel_vmax: 1.5
wav2spec_eps: 1e-6
audio_sample_rate: 24000
hop_size: 128 # Hop size.
fft_size: 512 # FFT size.
win_size: 512 # FFT size.
fmin: 30
fmax: 12000
min_level_db: -120
use_pitch_embed: true
use_spk_embed: false
use_spk_id: true
use_midi: true
use_gt_f0: false
use_gt_dur: false
lambda_f0: 1.0
lambda_uv: 1.0
#lambda_energy: 0.1
lambda_ph_dur: 1.0
lambda_sent_dur: 1.0
lambda_word_dur: 1.0
predictor_grad: 0.1
hidden_size: 256
binarization_args:
with_wav: false
with_spk_embed: true
with_align: true
fs2_ckpt: ''
use_nsf: true
# config for experiments
max_frames: 5000
max_tokens: 40000
max_sentences: 12
predictor_layers: 5
rel_pos: true
dur_predictor_layers: 5
dur_predictor_kernel: 3
num_valid_plots: 10
save_gt: true
spec_max: [-0.3894500136375427, -0.3796464204788208, -0.2914905250072479, -0.15550297498703003, -0.08502643555402756, 0.10698417574167252, -0.0739326998591423, -0.0541548952460289, 0.15501998364925385, 0.06483431905508041, 0.03054228238761425, -0.013737732544541359, -0.004876468330621719, 0.04368264228105545, 0.13329921662807465, 0.16471388936042786, 0.04605761915445328, -0.05680707097053528, 0.0542571023106575, -0.0076539707370102406, -0.00953489076346159, -0.04434828832745552, 0.001293870504014194, -0.12238839268684387, 0.06418416649103165, 0.02843189612030983, 0.08505241572856903, 0.07062800228595734, 0.00120724702719599, -0.07675088942050934, 0.03785804659128189, 0.04890783503651619, -0.06888376921415329, -0.0839693546295166, -0.17545585334300995, -0.2911079525947571, -0.4238220453262329, -0.262084037065506, -0.3002263605594635, -0.3845032751560211, -0.3906497061252594, -0.6550108790397644, -0.7810799479484558, -0.7503029704093933, -0.7995198965072632, -0.8092347383499146, -0.6196113228797913, -0.6684317588806152, -0.7735874056816101, -0.8324533104896545, -0.9601566791534424, -0.955253541469574, -0.748817503452301, -0.9106167554855347, -0.9707801342010498, -1.053107500076294, -1.0448424816131592, -1.1082794666290283, -1.1296544075012207, -1.071642279624939, -1.1003081798553467, -1.166810154914856, -1.1408926248550415, -1.1330615282058716, -1.1167492866516113, -1.0716774463653564, -1.035891056060791, -1.0092483758926392, -0.9675999879837036, -0.938962996006012, -1.0120564699172974, -0.9777995347976685, -1.029313564300537, -0.9459163546562195, -0.8519706130027771, -0.7751091122627258, -0.7933766841888428, -0.9019735455513, -0.9983296990394592, -1.505873441696167]
spec_min: [-6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0]