config: conf/train.yaml print_config: false log_level: INFO dry_run: false iterator_type: sequence output_dir: exp/tts_train_raw_phn_none ngpu: 1 seed: 0 num_workers: 1 num_att_plot: 1 dist_backend: nccl dist_init_method: env:// dist_world_size: null dist_rank: null local_rank: 0 dist_master_addr: null dist_master_port: null dist_launcher: null multiprocessing_distributed: false unused_parameters: false sharded_ddp: false cudnn_enabled: true cudnn_benchmark: false cudnn_deterministic: true collect_stats: false write_collected_feats: false max_epoch: 200 patience: null val_scheduler_criterion: - valid - loss early_stopping_criterion: - valid - loss - min best_model_criterion: - - valid - loss - min - - train - loss - min keep_nbest_models: 3 nbest_averaging_interval: 0 grad_clip: 2.0 grad_clip_type: 2.0 grad_noise: false accum_grad: 4 no_forward_run: false resume: true train_dtype: float32 use_amp: false log_interval: null use_matplotlib: true use_tensorboard: true create_graph_in_tensorboard: false use_wandb: false wandb_project: null wandb_id: null wandb_entity: null wandb_name: null wandb_model_log_interval: -1 detect_anomaly: false pretrain_path: null init_param: - ../tts_pretrain_phn_residual/exp/tts_train_phn_none/2epoch.pth:tts_pretrain.encoder:tts.encoder - ../tts_pretrain_phn_residual/exp/tts_train_phn_none/2epoch.pth:tts_pretrain.lid_emb:tts.lid_emb ignore_init_mismatch: false freeze_param: - tts.encoder.adapter - tts.encoder.embed - tts.lid_emb num_iters_per_epoch: null batch_size: 20 valid_batch_size: null batch_bins: 400000 valid_batch_bins: null train_shape_file: - exp/tts_stats_raw_phn_none/train/text_shape.phn - exp/tts_stats_raw_phn_none/train/speech_shape valid_shape_file: - exp/tts_stats_raw_phn_none/valid/text_shape.phn - exp/tts_stats_raw_phn_none/valid/speech_shape batch_type: numel valid_batch_type: null fold_length: - 150 - 204800 sort_in_batch: descending sort_batch: descending multiple_iterator: false chunk_length: 500 chunk_shift_ratio: 0.5 num_cache_chunks: 1024 train_data_path_and_name_and_type: - - /local/11454483.1.gpu/dump/raw/train/text - text - text - - /local/11454483.1.gpu/dump/raw/train/wav.scp - speech - sound - - /local/11454483.1.gpu/dump/xvector/train/xvector.scp - spembs - kaldi_ark - - /local/11454483.1.gpu/dump/raw/train/utt2lid - lids - text_int valid_data_path_and_name_and_type: - - /local/11454483.1.gpu/dump/raw/dev/text - text - text - - /local/11454483.1.gpu/dump/raw/dev/wav.scp - speech - sound - - /local/11454483.1.gpu/dump/xvector/dev/xvector.scp - spembs - kaldi_ark - - /local/11454483.1.gpu/dump/raw/dev/utt2lid - lids - text_int allow_variable_data_keys: false max_cache_size: 0.0 max_cache_fd: 32 valid_max_cache_size: null optim: adam optim_conf: lr: 1.0 scheduler: noamlr scheduler_conf: model_size: 512 warmup_steps: 50000 token_list: - - - n - t - s - l - a - e - k - d - m - ə - r - i - p - o - v - ɪ - ˈa - ɾ - j - z - ˈɛ - ˈe - ɛ - b - ˈo - f - ˈi - u - ð - ʁ - h - ɡ - ɔ - ʃ - ˈu - w - ˌe - ts - ŋ - ˌa - æ - iː - ˈɪ - ˈiː - ˈaː - ɹ - ʊ - ɑ - ˈeː - ˈɔ - x - aː - tʃ - ˌi - ˌo - tː - oː - ɣ - ˈoː - eː - y - θ - ɲ - ə- - ʋ - ʒ - ˌɛ - ˈɑ - β - uː - ˈuː - ˈaɪ - ç - ˈɑ̃ - ˈɔ̃ - ˈæ - ɚ - ˌɪ - ɑ̃ - ˌu - ˌɔ - ˈy - ɜ - tʲ - ˈeɪ - ˈɑː - ˌeː - ʌ - ᵻ - ɐ - ˌɑ - ɨ - ɔ̃ - dʒ - e- - ˌiː - a- - ˈʌ - ˌʊ - əl - ʎ - ˌaɪ - aɪ - ˈɔː - ss - ˈaʊ - rʲ - kː - ˈoʊ - ˌaː - ɑː - nʲ - ˌoː - ø - ˈɛɪ - ɛɪ - ˌæ - ʂ - ɲʲ - ˌɑː - ɕ - ˈai - vʲ - dʲ - ai - ei - ɛ̃ - mʲ - ˈø - ɭ - ˈɵ - pː - ˈɛ̃ - ɔː - oʊ - ˈɜː - ˈʊ - tɕ - ɟ - ˌaʊ - ˈœ - kʲ - ˈuo - ˈoi - æː - dʑ - l̩ - ˈie - ɪː - ie - oi - ˌeɪ - ˈɨ - yː - ˈɪː - ˌy - øː - ˈʏ - ˈɛː - ˈoːɹ - ˌuː - ˌʌ - ˈeu - ˈei - aʊ - ˌoi - bː - ˌai - ˈœy - ˈøː - ˈɑːɹ - œ̃ - ˈæː - au - y- - r̝̊ - ɵ - ˌɵ - c - ˌɛɪ - ˈɔø - ˈyː - ee - pʲ - ˈee - bʲ - ˈyø - iə - ˈiə - ˌɨ - ˌøː - ɔːɹ - ɔø - eɪ - ʑ - ˈau - ˈʊɹ - r̝ - dʒː - ˌeʊ - ˈɔːɹ - ˌoʊ - ˌʊɹ - ɑːɹ - ˈæy - ˌyː - s^ - eu - ˌə - tʃː - ˈə - ˌei - ea - tsʲ - ẽ - ʌʊ - œy - ˈʌʊ - nʲʲ - ˌæi - ˌʏ - ˌɛː - ˈɪɹ - æi - ˈɛɹ - ˈæi - ˈɔɪ - ã - dzː - r̩ - ˈẽ - ou - œ - ɜː - uo - tʲʲ - ˌø - ɛɹ - ɭʲ - iɪ - (en) - ʂʲ - tsː - ˌuo - ˌʌʊ - oːɹ - ˈou - ˌɛ̃ - ʝ - eʊ - ɨ̃ - ˈɔa - ɟː - ʊɐ - ˈr̩ - tʃʲ - uɪ - ɡʲ - ˈea - ˌʊɐ - ˈʊɐ - ɛː - ˌyi - t^ - tɕʲ - ˌea - (fr) - ɕʲ - ʀ - ˌɔø - ʏ - ˌœ - ˈoɪ - ˌau - eɑ - ˌɪː - ˈeʊ - ˈiɪ - ˈã - ˌɔː - ˌã - sʲ - ˈaɪɚ - ˌɑ̃ - ˌæː - ey - ˌœy - ˈaɪə - d̪ - ɾʲ - ˌøi - dː - ˌie - ui - fʲ - n̩ - ʔ - ˌou - yi - ˌɑːɹ - tsʲʲ - ˌɐ - ˈœ̃ - ˌyø - dz - ɡː - ɾʲʲ - ˈl̩ - ˈøy - ˌæy - cː - æy - ʊɹ - ʑʲ - ˌɜː - yʊ - ˌɛɹ - pf - dʑʲ - ˌoːɹ - ˈɨ̃ - ˈiʊ - õ - ɔa - ˌɔa - ˌee - ˈĩ - ˌiɪ - ˌɔːɹ - ˈɒ - ja - ĩ - ˈũ - ɒ - ũ - ʃʲ - ɪɹ - ju - (de) - yø - ˌeu - d^ - ˈiu - ˈja - øi - ˈeɑ - ˈyi - ɾʲˌʲ - ʃʲʲ - ʃʲˌʲ - aɪə - ˈuɪ - iu - ˈõ - iɐ - ˌẽ - iʊ - ˌr̩ - ˈui - əʊ - u" - ˌɔ̃ - ˈəʊ - iy - ʲ - zʲˌʲ - (it) - ˌɒ - ɔɪ - ˌɪɹ - ˈɵː - ˈu" - nʲˌʲ - (nl) - ˌl̩ - ˈey - βː - lʲʲ - oɪ - ˈiɐ - ˌiɐ - lʲ - tsʲˌʲ - xʲ - ˌũ - mʲʲ - dʒʲ - ˌeo - ˈju - r̩ː - lʲˌʲ - ˈøi - t^ː - əɪ - l̩ː - tʃˌʲ - eo - zʲʲ - ˌiy - aʲ - ˌoɪ - tl# - ˈyɪ - ˌiə - ˌey - øy - dʲʲ - ˈl̩ː - ˈyʊ - ˌɨ̃ - ʀʲ - ɣː - ˈeo - ˈʊə - ˌiu - ˌøy - ˈəɪ - ˈeə - aɪɚ - ɪ^ - eə - ˌĩ - t̪ - vʲʲ - (es) - (gn) - zʲ - ˌõ - əː - bʲʲ - (base) - ˌəʊ - ˈə- - (ru) - ˌɔɪ - ˈæiː - tsˌʲ - ˈr̩ː - ə-- - ˌn̩ - uʲ - ˈw - hʲ - ˌeə - yɪ - fʲʲ - ˌyʊ - (el) - ˌaɪɚ - ˈəː - ˌʊə - ɵː - t̪ː - w- - (sl) - eʲ - ˈa- - ˌr̩ː - mʲˌʲ - (fi) - ʒʲʲ - çʲ - ˌaɪə - ˈɚ - (lt) - pʲʲ - ˈɜ - ˌuɪ - ˌja - (pl) - ˈe- - ˌe- - (et) - ˈoːʲ - (kl) - ˈõː - (hu) - ˈiy - ʊə - ˈaʲ - ˌl̩ː - lˌʲ - '1' - ʒʲ - (cs) - ˈææ - ˈts- - ts- - ˌʊː - ˌy" - cʲ - wʲ - ˈãː - ˈuʲ - (ro) - ˌɜ - (sk) - oːʲ - ʊː - ˈtl#tl# - ʃˈʲ - ɬ - ˌə- - (hr) - tl#tl# - ˌœ̃ - ˈʊː - l̩ʲ - dʒˌʲ - tsˈʲ - pʲˌʲ - ˈʌː - ˈeʲ - aːʲ - vʲˌʲ - ˈj - () - eːː - ˌãː - ˈuːʲ - ˈeeʲ - odim: null model_conf: {} use_preprocessor: true token_type: phn bpemodel: null non_linguistic_symbols: null cleaner: null g2p: null feats_extract: fbank feats_extract_conf: n_fft: 1024 hop_length: 256 win_length: null fs: 16000 fmin: 80 fmax: 7600 n_mels: 80 normalize: global_mvn normalize_conf: stats_file: exp/tts_stats_raw_phn_none/train/feats_stats.npz tts: transformer tts_conf: embed_dim: 0 eprenet_conv_layers: 0 eprenet_conv_filts: 0 eprenet_conv_chans: 0 dprenet_layers: 2 dprenet_units: 256 adim: 512 aheads: 8 elayers: 6 eunits: 1024 dlayers: 6 dunits: 1024 positionwise_layer_type: conv1d positionwise_conv_kernel_size: 1 postnet_layers: 5 postnet_filts: 5 postnet_chans: 256 spk_embed_dim: 192 spk_embed_integration_type: add use_gst: true gst_heads: 4 gst_tokens: 16 use_masking: true bce_pos_weight: 5.0 use_scaled_pos_enc: true encoder_normalize_before: true decoder_normalize_before: true reduction_factor: 1 init_type: xavier_uniform init_enc_alpha: 1.0 init_dec_alpha: 1.0 eprenet_dropout_rate: 0.0 dprenet_dropout_rate: 0.5 postnet_dropout_rate: 0.5 transformer_enc_dropout_rate: 0.1 transformer_enc_positional_dropout_rate: 0.1 transformer_enc_attn_dropout_rate: 0.1 transformer_dec_dropout_rate: 0.1 transformer_dec_positional_dropout_rate: 0.1 transformer_dec_attn_dropout_rate: 0.1 transformer_enc_dec_attn_dropout_rate: 0.1 use_guided_attn_loss: true num_heads_applied_guided_attn: 2 num_layers_applied_guided_attn: 2 modules_applied_guided_attn: - encoder-decoder guided_attn_loss_sigma: 0.4 guided_attn_loss_lambda: 10.0 langs: 21 lang_family_encoding: false num_lang_family: 7 use_adapter: true adapter_type: residual use_encoder_w_lid: true pitch_extract: null pitch_extract_conf: {} pitch_normalize: null pitch_normalize_conf: {} energy_extract: null energy_extract_conf: {} energy_normalize: null energy_normalize_conf: {} required: - output_dir - token_list version: '202209' distributed: false