espnet
/

transformer_tts_cmu_indic_hin_ab_finetune

Model card Files Files and versions Community

transformer_tts_cmu_indic_hin_ab_finetune / exp /tts_finetune /config.yaml

soumi-maiti

Add model files

f0bec87 over 1 year ago

raw history blame contribute delete

No virus

5.23 kB

	config: conf/tuning/finetune_train_transformer_sid.yaml
	print_config: false
	log_level: INFO
	dry_run: false
	iterator_type: sequence
	output_dir: exp/tts_finetune
	ngpu: 1
	seed: 0
	num_workers: 1
	num_att_plot: 3
	dist_backend: nccl
	dist_init_method: env://
	dist_world_size: null
	dist_rank: null
	local_rank: 0
	dist_master_addr: null
	dist_master_port: null
	dist_launcher: null
	multiprocessing_distributed: false
	unused_parameters: false
	sharded_ddp: false
	cudnn_enabled: true
	cudnn_benchmark: false
	cudnn_deterministic: true
	collect_stats: false
	write_collected_feats: false
	max_epoch: 100
	patience: null
	val_scheduler_criterion:
	- valid
	- loss
	early_stopping_criterion:
	- valid
	- loss
	- min
	best_model_criterion:
	- - valid
	- loss
	- min
	- - train
	- loss
	- min
	keep_nbest_models: 5
	nbest_averaging_interval: 0
	grad_clip: 1.0
	grad_clip_type: 2.0
	grad_noise: false
	accum_grad: 3
	no_forward_run: false
	resume: true
	train_dtype: float32
	use_amp: false
	log_interval: null
	use_matplotlib: true
	use_tensorboard: true
	use_wandb: false
	wandb_project: null
	wandb_id: null
	wandb_entity: null
	wandb_name: null
	wandb_model_log_interval: -1
	detect_anomaly: false
	pretrain_path: null
	init_param:
	- /ocean/projects/cis210027p/smaiti/espnet1/egs2/cmu_indic/tts1/../tts1_pre_fine/exp/tts_train_transformer_sid_raw_phn_none/train.loss.ave_5best.pth
	ignore_init_mismatch: false
	freeze_param: []
	num_iters_per_epoch: 1000
	batch_size: 20
	valid_batch_size: null
	batch_bins: 6000000
	valid_batch_bins: null
	train_shape_file:
	- exp/tts_stats_raw_phn_none/train/text_shape.phn
	- exp/tts_stats_raw_phn_none/train/speech_shape
	valid_shape_file:
	- exp/tts_stats_raw_phn_none/valid/text_shape.phn
	- exp/tts_stats_raw_phn_none/valid/speech_shape
	batch_type: numel
	valid_batch_type: null
	fold_length:
	- 150
	- 204800
	sort_in_batch: descending
	sort_batch: descending
	multiple_iterator: false
	chunk_length: 500
	chunk_shift_ratio: 0.5
	num_cache_chunks: 1024
	train_data_path_and_name_and_type:
	- - dump/raw/hin_ab_train_no_dev/text
	- text
	- text
	- - dump/raw/hin_ab_train_no_dev/wav.scp
	- speech
	- sound
	- - dump/raw/hin_ab_train_no_dev/utt2sid
	- sids
	- text_int
	valid_data_path_and_name_and_type:
	- - dump/raw/hin_ab_dev/text
	- text
	- text
	- - dump/raw/hin_ab_dev/wav.scp
	- speech
	- sound
	- - dump/raw/hin_ab_dev/utt2sid
	- sids
	- text_int
	allow_variable_data_keys: false
	max_cache_size: 0.0
	max_cache_fd: 32
	valid_max_cache_size: null
	optim: adam
	optim_conf:
	lr: 0.002
	scheduler: noamlr
	scheduler_conf:
	model_size: 512
	warmup_steps: 8000
	token_list:
	- <blank>
	- <unk>
	- '@'
	- n
	- I
	- a
	- k
	- m
	- r
	- l
	- s
	- e
	- t
	- i
	- j
	- 'a:'
	- p
	- d
	- U
	- 'i:'
	- u
	- o
	- w
	- t_d
	- r\
	- g
	- h\
	- b
	- O
	- P
	- A
	- d_d
	- '{'
	- v
	- tS
	- z
	- E
	- h
	- V
	- dZ
	- D
	- r\=
	- N
	- S
	- d`
	- t`
	- f
	- 'e:'
	- 'o:'
	- 'u:'
	- '4'
	- n`
	- b_t
	- k_h
	- '{:'
	- s`
	- J
	- n_d
	- d_d_t
	- T
	- t_h
	- ts\
	- s\
	- l`
	- 'O:'
	- t`_h
	- l_d
	- r`
	- s_d
	- dz\
	- d_t
	- t_d_h
	- g_t
	- p_h
	- A~
	- dz
	- tS_h
	- e_^
	- '@~'
	- dZ_t
	- 'tS:'
	- 'a~:'
	- u~
	- 'u~:'
	- ts\_h
	- r=
	- Z
	- o~
	- 'k:'
	- 't_d:'
	- O~
	- dz\_t
	- I~
	- r`_0
	- i~
	- d`_t
	- U~
	- r\`
	- p\
	- e~
	- E~
	- 't`:'
	- 'd`:'
	- x
	- 'g:'
	- 'l:'
	- 'dZ:'
	- 's:'
	- a~
	- q
	- 'p:'
	- N_t
	- 'd_d:'
	- O_t
	- r_t
	- G
	- e_t
	- a_t
	- i_t
	- u_t
	- 'b:'
	- 'S:'
	- 'n:'
	- <sos/eos>
	odim: null
	model_conf: {}
	use_preprocessor: true
	token_type: phn
	bpemodel: null
	non_linguistic_symbols: null
	cleaner: null
	g2p: null
	feats_extract: fbank
	feats_extract_conf:
	n_fft: 1024
	hop_length: 256
	win_length: null
	fs: 16000
	fmin: 80
	fmax: 7600
	n_mels: 80
	normalize: global_mvn
	normalize_conf:
	stats_file: exp/tts_stats_raw_phn_none/train/feats_stats.npz
	tts: transformer
	tts_conf:
	spks: 10
	embed_dim: 0
	eprenet_conv_layers: 0
	eprenet_conv_filts: 0
	eprenet_conv_chans: 0
	dprenet_layers: 2
	dprenet_units: 256
	adim: 512
	aheads: 8
	elayers: 6
	eunits: 1024
	dlayers: 6
	dunits: 1024
	positionwise_layer_type: conv1d
	positionwise_conv_kernel_size: 1
	postnet_layers: 5
	postnet_filts: 5
	postnet_chans: 256
	use_masking: true
	bce_pos_weight: 5.0
	use_scaled_pos_enc: true
	encoder_normalize_before: true
	decoder_normalize_before: true
	reduction_factor: 1
	init_type: xavier_uniform
	init_enc_alpha: 1.0
	init_dec_alpha: 1.0
	eprenet_dropout_rate: 0.0
	dprenet_dropout_rate: 0.5
	postnet_dropout_rate: 0.5
	transformer_enc_dropout_rate: 0.1
	transformer_enc_positional_dropout_rate: 0.1
	transformer_enc_attn_dropout_rate: 0.1
	transformer_dec_dropout_rate: 0.1
	transformer_dec_positional_dropout_rate: 0.1
	transformer_dec_attn_dropout_rate: 0.1
	transformer_enc_dec_attn_dropout_rate: 0.1
	use_guided_attn_loss: true
	num_heads_applied_guided_attn: 2
	num_layers_applied_guided_attn: 2
	modules_applied_guided_attn:
	- encoder-decoder
	guided_attn_loss_sigma: 0.4
	guided_attn_loss_lambda: 10.0
	pitch_extract: null
	pitch_extract_conf: {}
	pitch_normalize: null
	pitch_normalize_conf: {}
	energy_extract: null
	energy_extract_conf: {}
	energy_normalize: null
	energy_normalize_conf: {}
	required:
	- output_dir
	- token_list
	version: 0.10.7a1
	distributed: false