config: conf/tuning/train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm.yaml print_config: false log_level: INFO dry_run: false iterator_type: sequence output_dir: exp_li/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_raw_layer_9 ngpu: 1 seed: 0 num_workers: 4 num_att_plot: 0 dist_backend: nccl dist_init_method: file:///scratch/bbjs/chen26/espnet_01_23/egs2/librispeech/ssl1/exp_li/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_raw_layer_9/.dist_init_a1240dd2-c062-43da-80c3-13d77add8604 dist_world_size: 32 dist_rank: 0 local_rank: 0 dist_master_addr: null dist_master_port: null dist_launcher: slurm multiprocessing_distributed: true unused_parameters: true sharded_ddp: false cudnn_enabled: true cudnn_benchmark: false cudnn_deterministic: true collect_stats: false write_collected_feats: false max_epoch: 22 patience: null val_scheduler_criterion: - valid - loss early_stopping_criterion: - valid - loss - min best_model_criterion: - - valid - acc_m - max keep_nbest_models: 10 nbest_averaging_interval: 0 grad_clip: 5.0 grad_clip_type: 2.0 grad_noise: false accum_grad: 1 no_forward_run: false resume: true train_dtype: float32 use_amp: true drop_last: true debug_grad: false log_interval: null use_matplotlib: true use_tensorboard: true create_graph_in_tensorboard: false use_wandb: false wandb_project: null wandb_id: null wandb_entity: null wandb_name: null wandb_model_log_interval: -1 detect_anomaly: false pretrain_path: null init_param: [] ignore_init_mismatch: false freeze_param: [] num_iters_per_epoch: 2000 batch_size: 20 valid_batch_size: null batch_bins: 35000000 valid_batch_bins: null train_shape_file: - exp_li/hubert_iter2_stats_raw/splits16/speech_shape - exp_li/hubert_iter2_stats_raw/splits16/text_shape.word valid_shape_file: - exp_li/hubert_iter2_stats_raw/valid/speech_shape - exp_li/hubert_iter2_stats_raw/valid/text_shape.word batch_type: numel valid_batch_type: null fold_length: - 80000 - 400 sort_in_batch: descending sort_batch: descending multiple_iterator: true chunk_length: 500 chunk_shift_ratio: 0.5 num_cache_chunks: 1024 train_data_path_and_name_and_type: - - exp_li/hubert_iter2_stats_raw/splits16/wav.scp - speech - kaldi_ark - - exp_li/hubert_iter2_stats_raw/splits16/text.km.kmeans_iter2_hubert_train_li110_lid_portion0.1 - text - text valid_data_path_and_name_and_type: - - dump/raw/dev_all_li/wav.scp - speech - kaldi_ark - - dump/raw/dev_all_li/text.km.kmeans_iter2_hubert_train_li110_lid_portion0.1 - text - text allow_variable_data_keys: false max_cache_size: 0.0 max_cache_fd: 32 valid_max_cache_size: null optim: adam optim_conf: lr: 0.0005 scheduler: warmuplr scheduler_conf: warmup_steps: 32000 token_list: - '55' - '477' - '43' - '405' - '468' - '486' - '41' - '34' - '0' - '415' - '409' - '367' - '224' - '436' - '17' - '462' - '32' - '481' - '332' - '291' - '395' - '336' - '147' - '364' - '430' - '454' - '44' - '196' - '269' - '230' - '189' - '274' - '305' - '419' - '444' - '352' - '72' - '158' - '349' - '212' - '148' - '13' - '140' - '164' - '329' - '288' - '259' - '9' - '249' - '30' - '198' - '299' - '427' - '301' - '107' - '344' - '213' - '36' - '203' - '120' - '208' - '285' - '69' - '181' - '166' - '130' - '402' - '194' - '37' - '106' - '330' - '407' - '242' - '412' - '167' - '133' - '40' - '211' - '57' - '389' - '325' - '197' - '170' - '190' - '260' - '286' - '257' - '374' - '116' - '38' - '221' - '81' - '87' - '4' - '173' - '94' - '83' - '200' - '331' - '143' - '248' - '22' - '26' - '388' - '174' - '488' - '320' - '397' - '188' - '401' - '126' - '28' - '11' - '304' - '135' - '33' - '109' - '202' - '267' - '86' - '487' - '490' - '482' - '426' - '103' - '151' - '324' - '492' - '238' - '483' - '467' - '1' - '20' - '268' - '21' - '47' - '377' - '351' - '297' - '398' - '348' - '157' - '303' - '100' - '68' - '254' - '216' - '177' - '491' - '171' - '361' - '24' - '338' - '129' - '154' - '192' - '222' - '8' - '156' - '7' - '78' - '64' - '29' - '146' - '90' - '263' - '393' - '95' - '102' - '433' - '480' - '225' - '59' - '66' - '82' - '85' - '54' - '310' - '429' - '176' - '366' - '42' - '298' - '144' - '215' - '318' - '136' - '122' - '459' - '205' - '498' - '112' - '52' - '396' - '282' - '428' - '335' - '339' - '386' - '289' - '187' - '333' - '449' - '458' - '233' - '35' - '400' - '223' - '375' - '70' - '134' - '127' - '410' - '71' - '312' - '73' - '341' - '326' - '273' - '472' - '23' - '113' - '117' - '387' - '207' - '342' - '12' - '49' - '281' - '65' - '356' - '99' - '423' - '141' - '493' - '61' - '494' - '277' - '453' - '362' - '185' - '460' - '256' - '159' - '302' - '88' - '53' - '76' - '243' - '235' - '306' - '278' - '15' - '56' - '25' - '115' - '48' - '264' - '363' - '110' - '204' - '414' - '287' - '184' - '172' - '383' - '316' - '424' - '169' - '358' - '14' - '206' - '91' - '245' - '447' - '60' - '125' - '283' - '246' - '255' - '313' - '97' - '89' - '321' - '214' - '314' - '464' - '27' - '294' - '497' - '128' - '451' - '365' - '478' - '337' - '226' - '422' - '471' - '381' - '63' - '452' - '290' - '118' - '51' - '261' - '432' - '376' - '31' - '80' - '142' - '295' - '275' - '272' - '123' - '270' - '236' - '195' - '469' - '50' - '218' - '435' - '479' - '315' - '182' - '372' - '446' - '132' - '327' - '229' - '217' - '373' - '340' - '153' - '2' - '163' - '199' - '378' - '101' - '79' - '96' - '434' - '489' - '247' - '440' - '448' - '139' - '466' - '150' - '465' - '62' - '421' - '252' - '104' - '180' - '232' - '108' - '307' - '219' - '228' - '322' - '455' - '370' - '39' - '280' - '114' - '240' - '137' - '179' - '162' - '406' - '168' - '368' - '473' - '75' - '441' - '266' - '442' - '119' - '347' - '92' - '209' - '470' - '296' - '476' - '93' - '191' - '437' - '293' - '186' - '111' - '265' - '183' - '145' - '394' - '155' - '420' - '438' - '5' - '463' - '431' - '334' - '138' - '3' - '369' - '403' - '84' - '152' - '392' - '18' - '231' - '417' - '160' - '357' - '323' - '475' - '131' - '485' - '350' - '450' - '439' - '353' - '443' - '384' - '16' - '201' - '346' - '253' - '404' - '445' - '250' - '165' - '98' - '193' - '300' - '328' - '234' - '496' - '67' - '359' - '46' - '345' - '317' - '354' - '385' - '276' - '309' - '425' - '311' - '456' - '220' - '178' - '124' - '244' - '416' - '399' - '161' - '413' - '308' - '371' - '258' - '45' - '360' - '149' - '284' - '241' - '319' - '411' - '461' - '237' - '408' - '390' - '227' - '382' - '10' - '292' - '355' - '262' - '418' - '379' - '6' - '271' - '380' - '105' - '251' - '175' - '239' - '210' - '74' - '495' - '279' - '457' - '343' - '77' - '19' - '391' - '121' - '499' - '474' - '484' - '58' - - init: null collate_fn_conf: label_downsampling: 1 pad: false rand_crop: true mix_speech: true noise_apply_prob: 0.2 input_size: 1 num_classes: 500 use_preprocessor: true use_mixing: true cs_aug: false mixing_splits: 16 token_type: word bpemodel: null non_linguistic_symbols: null cleaner: null g2p: null speech_volume_normalize: null rir_scp: null rir_apply_prob: 1.0 noise_scp: data/noise/wav.scp noise_apply_prob: 0.2 noise_db_range: '13_15' pred_masked_weight: 1.0 pred_nomask_weight: 0.0 loss_weights: 0.0 frontend: null frontend_conf: {} specaug: null specaug_conf: {} normalize: null normalize_conf: {} preencoder: null preencoder_conf: {} encoder: torchaudio_hubert encoder_conf: encoder_projection_dropout: 0.0 encoder_attention_dropout: 0.0 encoder_ff_interm_dropout: 0.0 encoder_dropout: 0.0 encoder_layer_drop: 0.0 extractor_mode: layer_norm encoder_embed_dim: 1024 encoder_num_layers: 24 encoder_num_heads: 16 encoder_ff_interm_features: 4096 encoder_layer_norm_first: true final_dim: 768 feature_grad_mult: null model: torchaudio model_conf: {} required: - output_dir - token_list version: '202211' distributed: true