# @package _group_ common: fp16: true log_format: json log_interval: 200 seed: 1337 tensorboard_logdir: tblog checkpoint: save_interval_updates: 25000 keep_interval_updates: 1 no_epoch_checkpoints: true distributed_training: ddp_backend: no_c10d distributed_backend: 'nccl' distributed_world_size: 128 distributed_port: 29671 nprocs_per_node: 8 find_unused_parameters: true task: _name: hubert_pretraining data: ??? label_dir: ??? labels: ??? label_rate: ${model.label_rate} sample_rate: 16000 max_sample_size: 250000 min_sample_size: 32000 pad_audio: false random_crop: true normalize: true # must be consistent with extractor dataset: num_workers: 6 max_tokens: 900000 skip_invalid_size_inputs_valid_test: true validate_interval: 5 validate_interval_updates: 10000 criterion: _name: hubert pred_masked_weight: 1.0 pred_nomask_weight: 0.0 loss_weights: [10,] optimization: max_update: 400000 lr: [0.0015] clip_norm: 1.0 optimizer: _name: adam adam_betas: (0.9,0.98) adam_eps: 1e-06 weight_decay: 0.01 lr_scheduler: _name: polynomial_decay warmup_updates: 32000 model: _name: hubert label_rate: ??? encoder_layers: 24 encoder_embed_dim: 1024 encoder_ffn_embed_dim: 4096 encoder_attention_heads: 16 final_dim: 768 skip_masked: false skip_nomask: false mask_prob: 0.80 extractor_mode: layer_norm conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' encoder_layerdrop: 0.0 dropout_input: 0.0 dropout_features: 0.0 dropout: 0.0 attention_dropout: 0.0 layer_norm_first: true feature_grad_mult: 1.0 untie_final_proj: true activation_dropout: 0.0 hydra: job: config: override_dirname: kv_sep: '-' item_sep: '__' exclude_keys: - run - task.data run: dir: /checkpoint/wnhsu/w2v/hubert_final/hydra_pt sweep: dir: /checkpoint/wnhsu/w2v/hubert_final/hydra_pt subdir: ${hydra.job.config_name}__${hydra.job.override_dirname}