|
batch_size: 16 |
|
accum_grad: 128 |
|
max_epoch: 150 |
|
patience: none |
|
|
|
init: xavier_uniform |
|
best_model_criterion: |
|
- - valid |
|
- acc_asr |
|
- max |
|
keep_nbest_models: 10 |
|
|
|
encoder: conformer |
|
encoder_conf: |
|
output_size: 512 |
|
attention_heads: 8 |
|
linear_units: 2048 |
|
num_blocks: 12 |
|
dropout_rate: 0.1 |
|
positional_dropout_rate: 0.1 |
|
attention_dropout_rate: 0.1 |
|
input_layer: conv2d |
|
normalize_before: true |
|
macaron_style: true |
|
rel_pos_type: latest |
|
pos_enc_layer_type: rel_pos |
|
selfattention_layer_type: rel_selfattn |
|
activation_type: swish |
|
use_cnn_module: true |
|
cnn_module_kernel: 31 |
|
interctc_layer_idx: [6] |
|
|
|
decoder: multi_transformer |
|
decoder_conf: |
|
attention_heads: 8 |
|
linear_units: 2048 |
|
num_blocks: 6 |
|
dropout_rate: 0.1 |
|
positional_dropout_rate: 0.1 |
|
self_attention_dropout_rate: 0.1 |
|
src_attention_dropout_rate: 0.1 |
|
return_hidden: true |
|
|
|
subtitle_encoder: transformer |
|
subtitle_encoder_conf: |
|
output_size: 512 |
|
attention_heads: 8 |
|
linear_units: 2048 |
|
num_blocks: 6 |
|
dropout_rate: 0.1 |
|
positional_dropout_rate: 0.1 |
|
attention_dropout_rate: 0.1 |
|
input_layer: none |
|
normalize_before: true |
|
|
|
subtitle_decoder: multi_transformer |
|
subtitle_decoder_conf: |
|
attention_heads: 8 |
|
linear_units: 2048 |
|
num_blocks: 6 |
|
dropout_rate: 0.1 |
|
positional_dropout_rate: 0.1 |
|
self_attention_dropout_rate: 0.1 |
|
src_attention_dropout_rate: 0.1 |
|
|
|
model_conf: |
|
asr_weight: 0.5 |
|
subs_weight: 0.5 |
|
ctc_weight: 0.3 |
|
interctc_weight: 0.3 |
|
lsm_weight_asr: 0.1 |
|
lsm_weight_mt: 0.1 |
|
length_normalized_loss: false |
|
condition_subtitle_decoder: true |
|
condition_asr_decoder: true |
|
use_asr_feats: "encoder" |
|
|
|
optim: adam |
|
optim_conf: |
|
lr: 0.003 |
|
scheduler: warmuplr |
|
scheduler_conf: |
|
warmup_steps: 100000 |
|
|
|
specaug: specaug |
|
specaug_conf: |
|
apply_time_warp: true |
|
time_warp_window: 5 |
|
time_warp_mode: bicubic |
|
apply_freq_mask: true |
|
freq_mask_width_range: |
|
- 0 |
|
- 30 |
|
num_freq_mask: 2 |
|
apply_time_mask: true |
|
time_mask_width_range: |
|
- 0 |
|
- 40 |
|
num_time_mask: 2 |
|
|
|
|