diff --git "a/exp/log/log-train-2022-12-01-19-18-32" "b/exp/log/log-train-2022-12-01-19-18-32" new file mode 100644--- /dev/null +++ "b/exp/log/log-train-2022-12-01-19-18-32" @@ -0,0 +1,6900 @@ +2022-12-01 19:18:32,911 INFO [train.py:946] Training started +2022-12-01 19:18:32,912 INFO [train.py:956] Device: cuda:0 +2022-12-01 19:18:32,915 INFO [train.py:965] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.22', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '', 'k2-git-date': '', 'lhotse-version': '1.10.0', 'torch-version': '1.12.1', 'torch-cuda-available': True, 'torch-cuda-version': '11.6', 'python-version': '3.9', 'icefall-git-branch': 'master', 'icefall-git-sha1': 'e5d9426-dirty', 'icefall-git-date': 'Tue Nov 22 11:45:03 2022', 'icefall-path': '/root/workspace/icefall', 'k2-path': '/root/workspace/k2/k2/python/k2/__init__.py', 'lhotse-path': '/root/miniconda3/envs/k2/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'VM-0-13-centos', 'IP address': '127.0.0.1'}, 'world_size': 1, 'master_port': 12354, 'tensorboard': True, 'num_epochs': 30, 'start_epoch': 1, 'start_batch': 0, 'exp_dir': PosixPath('pruned_transducer_stateless7/exp'), 'bpe_model': 'data/lang_bpe_500/bpe.model', 'base_lr': 0.05, 'lr_batches': 5000, 'lr_epochs': 3.5, 'context_size': 2, 'prune_range': 5, 'lm_scale': 0.25, 'am_scale': 0.0, 'simple_loss_scale': 0.5, 'seed': 42, 'print_diagnostics': False, 'inf_check': False, 'save_every_n': 2000, 'keep_last_k': 30, 'average_period': 200, 'use_fp16': False, 'num_encoder_layers': '2,4,3,2,4', 'feedforward_dims': '1024,1024,2048,2048,1024', 'nhead': '8,8,8,8,8', 'encoder_dims': '384,384,384,384,384', 'attention_dims': '192,192,192,192,192', 'encoder_unmasked_dims': '256,256,256,256,256', 'zipformer_downsampling_factors': '1,2,4,8,2', 'cnn_module_kernels': '31,31,31,31,31', 'decoder_dim': 512, 'joiner_dim': 512, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 200.0, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'blank_id': 0, 'vocab_size': 500} +2022-12-01 19:18:32,915 INFO [train.py:967] About to create model +2022-12-01 19:18:33,347 INFO [zipformer.py:179] At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8. +2022-12-01 19:18:33,359 INFO [train.py:971] Number of model parameters: 70369391 +2022-12-01 19:18:35,435 INFO [asr_datamodule.py:394] About to get train cuts from data/fbank/xbmu_amdo31_cuts_train.jsonl.gz +2022-12-01 19:18:35,436 INFO [asr_datamodule.py:219] Enable MUSAN +2022-12-01 19:18:35,436 INFO [asr_datamodule.py:220] About to get Musan cuts +2022-12-01 19:18:37,206 INFO [asr_datamodule.py:244] Enable SpecAugment +2022-12-01 19:18:37,206 INFO [asr_datamodule.py:245] Time warp factor: 80 +2022-12-01 19:18:37,206 INFO [asr_datamodule.py:255] Num frame mask: 10 +2022-12-01 19:18:37,206 INFO [asr_datamodule.py:268] About to create train dataset +2022-12-01 19:18:37,206 INFO [asr_datamodule.py:295] Using DynamicBucketingSampler. +2022-12-01 19:18:37,453 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 19:18:38,262 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 19:18:38,547 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 19:18:38,577 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 19:18:39,636 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 19:18:39,949 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 19:18:40,334 INFO [asr_datamodule.py:310] About to create train dataloader +2022-12-01 19:18:40,335 INFO [asr_datamodule.py:401] About to get valid cuts from data/fbank/xbmu_amdo31_cuts_dev.jsonl.gz +2022-12-01 19:18:40,336 INFO [asr_datamodule.py:341] About to create dev dataset +2022-12-01 19:18:40,417 INFO [asr_datamodule.py:358] About to create dev dataloader +2022-12-01 19:18:40,418 INFO [train.py:1173] Sanity check -- see if any of the batches in epoch 1 would cause OOM. +2022-12-01 19:18:40,665 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 19:18:41,193 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 19:18:41,794 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 19:18:41,825 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 19:18:42,594 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 19:18:42,900 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 19:18:43,972 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 19:18:44,501 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 19:18:44,779 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 19:18:44,809 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 19:18:45,870 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 19:18:46,176 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 19:18:47,387 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 19:18:47,628 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 19:18:47,762 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 19:18:48,385 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 19:18:48,717 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 19:18:49,275 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 19:18:49,426 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 19:18:55,332 INFO [train.py:1201] Maximum memory allocated so far is 6661MB +2022-12-01 19:18:56,448 INFO [train.py:1201] Maximum memory allocated so far is 7023MB +2022-12-01 19:18:57,856 INFO [train.py:1201] Maximum memory allocated so far is 7023MB +2022-12-01 19:18:58,844 INFO [train.py:1201] Maximum memory allocated so far is 7023MB +2022-12-01 19:19:00,819 INFO [train.py:1201] Maximum memory allocated so far is 7023MB +2022-12-01 19:19:01,967 INFO [train.py:1201] Maximum memory allocated so far is 7023MB +2022-12-01 19:19:02,337 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 19:19:02,893 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 19:19:03,550 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 19:19:03,581 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 19:19:04,356 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 19:19:04,666 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 19:19:06,862 INFO [train.py:876] Epoch 1, batch 0, loss[loss=7.044, simple_loss=6.378, pruned_loss=6.654, over 4738.00 frames. ], tot_loss[loss=7.044, simple_loss=6.378, pruned_loss=6.654, over 4738.00 frames. ], batch size: 23, lr: 2.50e-02, +2022-12-01 19:19:06,864 INFO [train.py:901] Computing validation loss +2022-12-01 19:19:21,856 INFO [train.py:910] Epoch 1, validation: loss=6.619, simple_loss=5.967, pruned_loss=6.511, over 253132.00 frames. +2022-12-01 19:19:21,857 INFO [train.py:911] Maximum memory allocated so far is 7023MB +2022-12-01 19:19:24,229 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=5.0, num_to_drop=2, layers_to_drop={0, 3} +2022-12-01 19:19:34,645 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=23.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:19:50,482 INFO [train.py:876] Epoch 1, batch 50, loss[loss=1.019, simple_loss=0.9016, pruned_loss=1.048, over 4795.00 frames. ], tot_loss[loss=1.976, simple_loss=1.8, pruned_loss=1.695, over 215200.94 frames. ], batch size: 32, lr: 2.75e-02, +2022-12-01 19:19:56,696 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=9.80 vs. limit=2.0 +2022-12-01 19:20:04,295 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=8.75 vs. limit=2.0 +2022-12-01 19:20:07,455 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 19:20:09,347 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=83.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 19:20:18,053 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=8.90 vs. limit=2.0 +2022-12-01 19:20:19,380 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 2.904e+01 8.190e+01 1.403e+02 3.356e+02 8.475e+03, threshold=2.807e+02, percent-clipped=0.0 +2022-12-01 19:20:19,417 INFO [train.py:876] Epoch 1, batch 100, loss[loss=0.6686, simple_loss=0.5701, pruned_loss=0.7764, over 4689.00 frames. ], tot_loss[loss=1.426, simple_loss=1.277, pruned_loss=1.347, over 376714.21 frames. ], batch size: 21, lr: 3.00e-02, +2022-12-01 19:20:33,885 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 19:20:37,717 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=3.55 vs. limit=2.0 +2022-12-01 19:20:44,553 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=144.0, num_to_drop=2, layers_to_drop={1, 2} +2022-12-01 19:20:45,982 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=59.95 vs. limit=5.0 +2022-12-01 19:20:47,400 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 19:20:48,589 INFO [train.py:876] Epoch 1, batch 150, loss[loss=0.6836, simple_loss=0.5807, pruned_loss=0.743, over 4655.00 frames. ], tot_loss[loss=1.189, simple_loss=1.049, pruned_loss=1.183, over 504975.23 frames. ], batch size: 21, lr: 3.25e-02, +2022-12-01 19:21:18,809 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 3.731e+01 5.639e+01 6.607e+01 8.295e+01 1.493e+02, threshold=1.321e+02, percent-clipped=0.0 +2022-12-01 19:21:18,845 INFO [train.py:876] Epoch 1, batch 200, loss[loss=0.886, simple_loss=0.7601, pruned_loss=0.8616, over 4840.00 frames. ], tot_loss[loss=1.045, simple_loss=0.9133, pruned_loss=1.052, over 604879.22 frames. ], batch size: 49, lr: 3.50e-02, +2022-12-01 19:21:48,720 INFO [train.py:876] Epoch 1, batch 250, loss[loss=0.806, simple_loss=0.6795, pruned_loss=0.7946, over 4893.00 frames. ], tot_loss[loss=0.9592, simple_loss=0.8319, pruned_loss=0.964, over 680327.86 frames. ], batch size: 38, lr: 3.75e-02, +2022-12-01 19:21:54,107 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 19:21:55,475 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.5720, 4.2692, 3.8613, 4.0525, 4.2913, 4.1505, 4.5589, 4.3998], + device='cuda:0'), covar=tensor([0.0101, 0.0137, 0.0216, 0.0171, 0.0098, 0.0139, 0.0107, 0.0172], + device='cuda:0'), in_proj_covar=tensor([0.0009, 0.0009, 0.0010, 0.0010, 0.0009, 0.0010, 0.0010, 0.0009], + device='cuda:0'), out_proj_covar=tensor([8.9048e-06, 8.9000e-06, 9.0381e-06, 9.4650e-06, 9.0976e-06, 9.0842e-06, + 8.9944e-06, 8.9197e-06], device='cuda:0') +2022-12-01 19:22:12,387 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.5933, 4.5900, 4.5935, 4.5936, 4.5932, 4.5944, 4.5922, 4.5883], + device='cuda:0'), covar=tensor([0.0020, 0.0022, 0.0024, 0.0019, 0.0018, 0.0021, 0.0018, 0.0019], + device='cuda:0'), in_proj_covar=tensor([0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009], + device='cuda:0'), out_proj_covar=tensor([8.8591e-06, 8.9126e-06, 8.7890e-06, 8.6446e-06, 8.8003e-06, 8.7499e-06, + 8.6969e-06, 8.8197e-06], device='cuda:0') +2022-12-01 19:22:15,387 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=296.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 19:22:17,698 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=300.0, num_to_drop=2, layers_to_drop={1, 3} +2022-12-01 19:22:18,161 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 5.001e+01 6.326e+01 7.800e+01 9.512e+01 1.835e+02, threshold=1.560e+02, percent-clipped=6.0 +2022-12-01 19:22:18,194 INFO [train.py:876] Epoch 1, batch 300, loss[loss=0.8167, simple_loss=0.6873, pruned_loss=0.7692, over 4856.00 frames. ], tot_loss[loss=0.9002, simple_loss=0.775, pruned_loss=0.8966, over 741662.68 frames. ], batch size: 49, lr: 4.00e-02, +2022-12-01 19:22:18,536 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=7.65 vs. limit=5.0 +2022-12-01 19:22:29,916 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 19:22:40,095 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6091, 3.6095, 3.6076, 3.6101, 3.6009, 3.6048, 3.6091, 3.6089], + device='cuda:0'), covar=tensor([0.0028, 0.0023, 0.0032, 0.0029, 0.0040, 0.0026, 0.0036, 0.0030], + device='cuda:0'), in_proj_covar=tensor([0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009], + device='cuda:0'), out_proj_covar=tensor([8.9878e-06, 8.8810e-06, 9.1622e-06, 9.1449e-06, 9.3954e-06, 9.1117e-06, + 9.1673e-06, 9.0237e-06], device='cuda:0') +2022-12-01 19:22:47,520 INFO [train.py:876] Epoch 1, batch 350, loss[loss=0.7013, simple_loss=0.5793, pruned_loss=0.6698, over 4912.00 frames. ], tot_loss[loss=0.8566, simple_loss=0.7318, pruned_loss=0.8446, over 789783.69 frames. ], batch size: 32, lr: 4.25e-02, +2022-12-01 19:22:51,127 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=357.0, num_to_drop=2, layers_to_drop={0, 2} +2022-12-01 19:23:08,539 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=387.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:23:17,197 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 4.292e+01 6.333e+01 7.416e+01 8.541e+01 1.371e+02, threshold=1.483e+02, percent-clipped=0.0 +2022-12-01 19:23:17,230 INFO [train.py:876] Epoch 1, batch 400, loss[loss=0.8446, simple_loss=0.6965, pruned_loss=0.777, over 4698.00 frames. ], tot_loss[loss=0.8291, simple_loss=0.7027, pruned_loss=0.8081, over 825540.33 frames. ], batch size: 63, lr: 4.50e-02, +2022-12-01 19:23:28,345 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 19:23:28,780 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=6.85 vs. limit=2.0 +2022-12-01 19:23:39,441 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=439.0, num_to_drop=2, layers_to_drop={1, 2} +2022-12-01 19:23:42,737 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 19:23:44,538 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=448.0, num_to_drop=2, layers_to_drop={0, 3} +2022-12-01 19:23:46,194 INFO [train.py:876] Epoch 1, batch 450, loss[loss=0.8585, simple_loss=0.7004, pruned_loss=0.7826, over 4794.00 frames. ], tot_loss[loss=0.8093, simple_loss=0.6803, pruned_loss=0.7793, over 852587.18 frames. ], batch size: 54, lr: 4.75e-02, +2022-12-01 19:24:15,231 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 4.423e+01 6.588e+01 7.686e+01 9.402e+01 2.139e+02, threshold=1.537e+02, percent-clipped=1.0 +2022-12-01 19:24:15,264 INFO [train.py:876] Epoch 1, batch 500, loss[loss=0.8432, simple_loss=0.6827, pruned_loss=0.756, over 4781.00 frames. ], tot_loss[loss=0.7926, simple_loss=0.6606, pruned_loss=0.755, over 871109.32 frames. ], batch size: 51, lr: 4.99e-02, +2022-12-01 19:24:31,335 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.9948, 2.9946, 2.9948, 2.9937, 2.9935, 2.9945, 2.9947, 2.9926], + device='cuda:0'), covar=tensor([0.0035, 0.0032, 0.0028, 0.0028, 0.0034, 0.0027, 0.0033, 0.0033], + device='cuda:0'), in_proj_covar=tensor([0.0010, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009], + device='cuda:0'), out_proj_covar=tensor([9.0877e-06, 8.8438e-06, 8.9211e-06, 9.2128e-06, 9.3073e-06, 9.1738e-06, + 9.0988e-06, 9.1052e-06], device='cuda:0') +2022-12-01 19:24:45,331 INFO [train.py:876] Epoch 1, batch 550, loss[loss=0.8429, simple_loss=0.6746, pruned_loss=0.7509, over 4804.00 frames. ], tot_loss[loss=0.7869, simple_loss=0.6498, pruned_loss=0.7404, over 891005.23 frames. ], batch size: 54, lr: 4.98e-02, +2022-12-01 19:24:51,885 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=562.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 19:24:55,027 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=11.37 vs. limit=5.0 +2022-12-01 19:24:55,490 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=568.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:25:08,665 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=590.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:25:11,098 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.7004, 4.6956, 4.6961, 4.6956, 4.7031, 4.7026, 4.7025, 4.7009], + device='cuda:0'), covar=tensor([0.0026, 0.0042, 0.0034, 0.0031, 0.0031, 0.0041, 0.0031, 0.0036], + device='cuda:0'), in_proj_covar=tensor([0.0008, 0.0009, 0.0009, 0.0008, 0.0008, 0.0009, 0.0008, 0.0009], + device='cuda:0'), out_proj_covar=tensor([8.5351e-06, 8.8921e-06, 8.5839e-06, 8.4305e-06, 8.4170e-06, 8.6259e-06, + 8.2974e-06, 8.6003e-06], device='cuda:0') +2022-12-01 19:25:14,915 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=600.0, num_to_drop=2, layers_to_drop={0, 2} +2022-12-01 19:25:15,385 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 5.523e+01 6.668e+01 7.346e+01 9.390e+01 2.419e+02, threshold=1.469e+02, percent-clipped=3.0 +2022-12-01 19:25:15,418 INFO [train.py:876] Epoch 1, batch 600, loss[loss=0.791, simple_loss=0.6236, pruned_loss=0.7048, over 4844.00 frames. ], tot_loss[loss=0.7823, simple_loss=0.6403, pruned_loss=0.7272, over 906994.98 frames. ], batch size: 40, lr: 4.98e-02, +2022-12-01 19:25:28,373 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=623.0, num_to_drop=2, layers_to_drop={1, 3} +2022-12-01 19:25:31,902 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=629.0, num_to_drop=2, layers_to_drop={2, 3} +2022-12-01 19:25:39,683 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=5.10 vs. limit=2.0 +2022-12-01 19:25:42,919 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=648.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:25:44,725 INFO [train.py:876] Epoch 1, batch 650, loss[loss=0.5422, simple_loss=0.4215, pruned_loss=0.4816, over 4683.00 frames. ], tot_loss[loss=0.774, simple_loss=0.6277, pruned_loss=0.7115, over 919276.30 frames. ], batch size: 21, lr: 4.98e-02, +2022-12-01 19:25:44,880 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=651.0, num_to_drop=2, layers_to_drop={0, 1} +2022-12-01 19:25:45,332 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=652.0, num_to_drop=2, layers_to_drop={0, 1} +2022-12-01 19:25:52,076 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.90 vs. limit=5.0 +2022-12-01 19:26:06,222 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=12.65 vs. limit=5.0 +2022-12-01 19:26:14,047 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 4.981e+01 6.648e+01 7.701e+01 9.062e+01 4.059e+02, threshold=1.540e+02, percent-clipped=4.0 +2022-12-01 19:26:14,080 INFO [train.py:876] Epoch 1, batch 700, loss[loss=0.7377, simple_loss=0.5703, pruned_loss=0.6435, over 4922.00 frames. ], tot_loss[loss=0.7662, simple_loss=0.616, pruned_loss=0.6965, over 925868.62 frames. ], batch size: 31, lr: 4.98e-02, +2022-12-01 19:26:36,164 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=739.0, num_to_drop=2, layers_to_drop={0, 1} +2022-12-01 19:26:38,527 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=743.0, num_to_drop=2, layers_to_drop={0, 1} +2022-12-01 19:26:43,153 INFO [train.py:876] Epoch 1, batch 750, loss[loss=0.8366, simple_loss=0.6389, pruned_loss=0.7254, over 4861.00 frames. ], tot_loss[loss=0.7605, simple_loss=0.6055, pruned_loss=0.6845, over 930314.39 frames. ], batch size: 40, lr: 4.97e-02, +2022-12-01 19:27:04,385 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=787.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:27:06,919 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=36.09 vs. limit=5.0 +2022-12-01 19:27:12,837 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 5.354e+01 6.843e+01 7.951e+01 8.946e+01 1.729e+02, threshold=1.590e+02, percent-clipped=2.0 +2022-12-01 19:27:12,870 INFO [train.py:876] Epoch 1, batch 800, loss[loss=0.6944, simple_loss=0.5258, pruned_loss=0.5947, over 4745.00 frames. ], tot_loss[loss=0.7636, simple_loss=0.6026, pruned_loss=0.6789, over 934517.24 frames. ], batch size: 27, lr: 4.97e-02, +2022-12-01 19:27:40,031 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=847.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:27:42,428 INFO [train.py:876] Epoch 1, batch 850, loss[loss=0.7698, simple_loss=0.5824, pruned_loss=0.6445, over 4808.00 frames. ], tot_loss[loss=0.7646, simple_loss=0.598, pruned_loss=0.672, over 938450.16 frames. ], batch size: 33, lr: 4.96e-02, +2022-12-01 19:28:10,056 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=7.66 vs. limit=5.0 +2022-12-01 19:28:11,418 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 4.800e+01 7.929e+01 9.204e+01 1.165e+02 1.586e+02, threshold=1.841e+02, percent-clipped=0.0 +2022-12-01 19:28:11,451 INFO [train.py:876] Epoch 1, batch 900, loss[loss=0.8748, simple_loss=0.6648, pruned_loss=0.7117, over 4039.00 frames. ], tot_loss[loss=0.7705, simple_loss=0.597, pruned_loss=0.6697, over 942230.42 frames. ], batch size: 72, lr: 4.96e-02, +2022-12-01 19:28:15,690 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=908.0, num_to_drop=2, layers_to_drop={1, 3} +2022-12-01 19:28:21,714 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=918.0, num_to_drop=2, layers_to_drop={1, 3} +2022-12-01 19:28:25,208 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=924.0, num_to_drop=2, layers_to_drop={2, 3} +2022-12-01 19:28:38,160 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=946.0, num_to_drop=2, layers_to_drop={0, 1} +2022-12-01 19:28:41,182 INFO [train.py:876] Epoch 1, batch 950, loss[loss=0.7722, simple_loss=0.5705, pruned_loss=0.639, over 4798.00 frames. ], tot_loss[loss=0.774, simple_loss=0.5948, pruned_loss=0.6649, over 942622.75 frames. ], batch size: 32, lr: 4.96e-02, +2022-12-01 19:28:41,830 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=952.0, num_to_drop=2, layers_to_drop={0, 2} +2022-12-01 19:28:53,110 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=19.26 vs. limit=5.0 +2022-12-01 19:29:10,413 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=1000.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:29:10,935 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 4.981e+01 8.024e+01 9.216e+01 1.097e+02 1.740e+02, threshold=1.843e+02, percent-clipped=0.0 +2022-12-01 19:29:10,968 INFO [train.py:876] Epoch 1, batch 1000, loss[loss=0.9015, simple_loss=0.6722, pruned_loss=0.7222, over 4690.00 frames. ], tot_loss[loss=0.7799, simple_loss=0.5944, pruned_loss=0.6621, over 947306.20 frames. ], batch size: 63, lr: 4.95e-02, +2022-12-01 19:29:14,065 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=3.73 vs. limit=2.0 +2022-12-01 19:29:37,585 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=1043.0, num_to_drop=2, layers_to_drop={1, 2} +2022-12-01 19:29:42,686 INFO [train.py:876] Epoch 1, batch 1050, loss[loss=0.7601, simple_loss=0.551, pruned_loss=0.6178, over 4891.00 frames. ], tot_loss[loss=0.7807, simple_loss=0.59, pruned_loss=0.6557, over 948582.87 frames. ], batch size: 30, lr: 4.95e-02, +2022-12-01 19:30:08,007 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=1091.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:30:14,370 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 5.571e+01 8.939e+01 1.020e+02 1.258e+02 1.955e+02, threshold=2.040e+02, percent-clipped=1.0 +2022-12-01 19:30:14,403 INFO [train.py:876] Epoch 1, batch 1100, loss[loss=0.8791, simple_loss=0.644, pruned_loss=0.6925, over 4772.00 frames. ], tot_loss[loss=0.7807, simple_loss=0.5854, pruned_loss=0.6485, over 949816.99 frames. ], batch size: 58, lr: 4.94e-02, +2022-12-01 19:30:19,591 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=7.29 vs. limit=2.0 +2022-12-01 19:30:31,474 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=7.69 vs. limit=5.0 +2022-12-01 19:30:45,876 INFO [train.py:876] Epoch 1, batch 1150, loss[loss=0.8415, simple_loss=0.6068, pruned_loss=0.6624, over 4839.00 frames. ], tot_loss[loss=0.782, simple_loss=0.582, pruned_loss=0.6424, over 952057.59 frames. ], batch size: 41, lr: 4.94e-02, +2022-12-01 19:30:47,307 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=1153.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:30:55,182 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.23 vs. limit=2.0 +2022-12-01 19:31:07,801 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=7.88 vs. limit=5.0 +2022-12-01 19:31:17,737 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 6.580e+01 9.228e+01 1.205e+02 1.424e+02 3.289e+02, threshold=2.411e+02, percent-clipped=3.0 +2022-12-01 19:31:17,770 INFO [train.py:876] Epoch 1, batch 1200, loss[loss=0.65, simple_loss=0.4597, pruned_loss=0.5128, over 4703.00 frames. ], tot_loss[loss=0.785, simple_loss=0.5802, pruned_loss=0.6374, over 951495.17 frames. ], batch size: 21, lr: 4.93e-02, +2022-12-01 19:31:19,139 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=1203.0, num_to_drop=2, layers_to_drop={0, 1} +2022-12-01 19:31:25,699 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=1214.0, num_to_drop=2, layers_to_drop={1, 3} +2022-12-01 19:31:28,049 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=1218.0, num_to_drop=2, layers_to_drop={1, 2} +2022-12-01 19:31:29,101 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=4.16 vs. limit=2.0 +2022-12-01 19:31:31,665 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=1224.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 19:31:42,359 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=5.08 vs. limit=2.0 +2022-12-01 19:31:45,262 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=1246.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 19:31:48,507 INFO [train.py:876] Epoch 1, batch 1250, loss[loss=0.8235, simple_loss=0.5908, pruned_loss=0.6298, over 4868.00 frames. ], tot_loss[loss=0.7854, simple_loss=0.5768, pruned_loss=0.6305, over 951974.23 frames. ], batch size: 39, lr: 4.92e-02, +2022-12-01 19:31:57,881 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=1266.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:32:01,526 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=1272.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:32:02,587 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=3.31 vs. limit=2.0 +2022-12-01 19:32:12,809 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=6.47 vs. limit=2.0 +2022-12-01 19:32:12,876 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=3.44 vs. limit=2.0 +2022-12-01 19:32:15,736 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=1294.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:32:20,381 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.501e+01 9.695e+01 1.196e+02 1.433e+02 3.154e+02, threshold=2.392e+02, percent-clipped=2.0 +2022-12-01 19:32:20,417 INFO [train.py:876] Epoch 1, batch 1300, loss[loss=0.6219, simple_loss=0.4285, pruned_loss=0.4855, over 3817.00 frames. ], tot_loss[loss=0.7858, simple_loss=0.5739, pruned_loss=0.6233, over 950483.51 frames. ], batch size: 14, lr: 4.92e-02, +2022-12-01 19:32:24,790 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=10.01 vs. limit=5.0 +2022-12-01 19:32:29,766 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2153, 3.7675, 3.7324, 3.7587, 3.2075, 3.1506, 3.8093, 3.8147], + device='cuda:0'), covar=tensor([0.3855, 0.1951, 0.1375, 0.1314, 0.2917, 0.3773, 0.1431, 0.1695], + device='cuda:0'), in_proj_covar=tensor([0.0018, 0.0017, 0.0015, 0.0015, 0.0016, 0.0018, 0.0015, 0.0016], + device='cuda:0'), out_proj_covar=tensor([1.5563e-05, 1.4843e-05, 1.3000e-05, 1.4073e-05, 1.5816e-05, 1.6802e-05, + 1.3792e-05, 1.4760e-05], device='cuda:0') +2022-12-01 19:32:41,580 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=4.09 vs. limit=2.0 +2022-12-01 19:32:53,430 INFO [train.py:876] Epoch 1, batch 1350, loss[loss=0.7946, simple_loss=0.5732, pruned_loss=0.5862, over 4877.00 frames. ], tot_loss[loss=0.7875, simple_loss=0.5724, pruned_loss=0.6169, over 949432.94 frames. ], batch size: 37, lr: 4.91e-02, +2022-12-01 19:33:27,799 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.540e+01 1.172e+02 1.442e+02 1.999e+02 3.841e+02, threshold=2.884e+02, percent-clipped=7.0 +2022-12-01 19:33:27,832 INFO [train.py:876] Epoch 1, batch 1400, loss[loss=0.891, simple_loss=0.641, pruned_loss=0.6496, over 4878.00 frames. ], tot_loss[loss=0.7838, simple_loss=0.568, pruned_loss=0.6061, over 946999.67 frames. ], batch size: 44, lr: 4.91e-02, +2022-12-01 19:33:28,313 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=8.59 vs. limit=5.0 +2022-12-01 19:33:37,043 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=3.55 vs. limit=2.0 +2022-12-01 19:33:41,514 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1703, 3.4978, 3.3925, 3.0609, 2.8106, 2.8415, 3.3540, 3.1076], + device='cuda:0'), covar=tensor([0.3742, 0.3941, 0.3057, 0.5171, 0.5410, 0.5525, 0.3754, 0.6281], + device='cuda:0'), in_proj_covar=tensor([0.0020, 0.0021, 0.0021, 0.0020, 0.0020, 0.0023, 0.0020, 0.0020], + device='cuda:0'), out_proj_covar=tensor([1.7919e-05, 1.8885e-05, 1.6834e-05, 1.8311e-05, 1.9218e-05, 2.1863e-05, + 1.7636e-05, 1.9168e-05], device='cuda:0') +2022-12-01 19:33:45,250 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=11.99 vs. limit=5.0 +2022-12-01 19:33:48,633 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=3.48 vs. limit=2.0 +2022-12-01 19:33:49,046 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-1.pt +2022-12-01 19:33:53,627 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 19:33:54,551 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 19:33:54,847 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 19:33:54,879 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 19:33:56,064 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 19:33:56,384 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 19:33:57,714 INFO [train.py:876] Epoch 2, batch 0, loss[loss=0.8557, simple_loss=0.6201, pruned_loss=0.6146, over 4782.00 frames. ], tot_loss[loss=0.8557, simple_loss=0.6201, pruned_loss=0.6146, over 4782.00 frames. ], batch size: 51, lr: 4.81e-02, +2022-12-01 19:33:57,715 INFO [train.py:901] Computing validation loss +2022-12-01 19:33:59,624 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3182, 1.5315, 2.1633, 1.9687, 2.2776, 2.2317, 2.3410, 2.1875], + device='cuda:0'), covar=tensor([1.7923, 5.4535, 3.7290, 2.8523, 4.0454, 3.5979, 2.4088, 3.5075], + device='cuda:0'), in_proj_covar=tensor([0.0018, 0.0021, 0.0024, 0.0021, 0.0020, 0.0022, 0.0023, 0.0023], + device='cuda:0'), out_proj_covar=tensor([1.4148e-05, 1.4363e-05, 1.8005e-05, 1.8589e-05, 1.4273e-05, 1.6773e-05, + 1.6441e-05, 1.5357e-05], device='cuda:0') +2022-12-01 19:34:09,976 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1386, 1.6549, 1.3783, 1.3577, 1.7627, 1.9184, 1.7639, 2.0802], + device='cuda:0'), covar=tensor([0.4991, 1.1628, 1.4307, 2.1868, 1.3155, 0.7064, 0.9333, 0.4864], + device='cuda:0'), in_proj_covar=tensor([0.0014, 0.0016, 0.0019, 0.0018, 0.0016, 0.0014, 0.0017, 0.0015], + device='cuda:0'), out_proj_covar=tensor([1.2507e-05, 1.4364e-05, 1.6756e-05, 1.6165e-05, 1.4660e-05, 1.2743e-05, + 1.5008e-05, 1.3408e-05], device='cuda:0') +2022-12-01 19:34:13,354 INFO [train.py:910] Epoch 2, validation: loss=0.7885, simple_loss=0.58, pruned_loss=0.5589, over 253132.00 frames. +2022-12-01 19:34:13,354 INFO [train.py:911] Maximum memory allocated so far is 7023MB +2022-12-01 19:34:14,302 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=1434.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:34:47,329 INFO [train.py:876] Epoch 2, batch 50, loss[loss=0.7611, simple_loss=0.5425, pruned_loss=0.547, over 4852.00 frames. ], tot_loss[loss=0.7642, simple_loss=0.5468, pruned_loss=0.551, over 212365.93 frames. ], batch size: 35, lr: 4.80e-02, +2022-12-01 19:34:55,622 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=1495.0, num_to_drop=2, layers_to_drop={0, 1} +2022-12-01 19:34:59,569 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 6.890e+01 1.290e+02 1.605e+02 2.279e+02 4.611e+02, threshold=3.209e+02, percent-clipped=11.0 +2022-12-01 19:35:01,225 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=1503.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 19:35:01,861 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 19:35:05,647 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=1509.0, num_to_drop=2, layers_to_drop={1, 3} +2022-12-01 19:35:22,319 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.88 vs. limit=2.0 +2022-12-01 19:35:22,534 INFO [train.py:876] Epoch 2, batch 100, loss[loss=0.7132, simple_loss=0.5135, pruned_loss=0.502, over 4799.00 frames. ], tot_loss[loss=0.7475, simple_loss=0.5382, pruned_loss=0.5324, over 375123.65 frames. ], batch size: 33, lr: 4.79e-02, +2022-12-01 19:35:28,648 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=10.57 vs. limit=5.0 +2022-12-01 19:35:36,086 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=1551.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:35:37,444 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 19:35:39,586 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4729, 3.4389, 3.5936, 3.3612, 3.4058, 3.0780, 3.4490, 3.4123], + device='cuda:0'), covar=tensor([0.2542, 0.2237, 0.2267, 0.2080, 0.3167, 0.2771, 0.1904, 0.1990], + device='cuda:0'), in_proj_covar=tensor([0.0021, 0.0022, 0.0023, 0.0022, 0.0023, 0.0024, 0.0023, 0.0022], + device='cuda:0'), out_proj_covar=tensor([2.1774e-05, 1.9765e-05, 1.9293e-05, 2.1160e-05, 2.1710e-05, 2.4388e-05, + 1.9281e-05, 1.9579e-05], device='cuda:0') +2022-12-01 19:35:46,713 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=1566.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 19:35:53,606 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 19:35:58,474 INFO [train.py:876] Epoch 2, batch 150, loss[loss=0.6892, simple_loss=0.5102, pruned_loss=0.4688, over 4838.00 frames. ], tot_loss[loss=0.7481, simple_loss=0.5402, pruned_loss=0.5276, over 504700.91 frames. ], batch size: 34, lr: 4.79e-02, +2022-12-01 19:36:11,859 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.025e+02 1.542e+02 1.949e+02 2.627e+02 4.932e+02, threshold=3.898e+02, percent-clipped=11.0 +2022-12-01 19:36:19,101 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=1611.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 19:36:20,241 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.30 vs. limit=2.0 +2022-12-01 19:36:22,680 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4696, 2.6629, 2.3042, 2.2397, 2.3107, 2.9155, 2.5944, 2.5387], + device='cuda:0'), covar=tensor([0.5965, 0.4614, 0.7060, 0.6137, 0.3852, 0.4199, 0.6038, 0.6735], + device='cuda:0'), in_proj_covar=tensor([0.0023, 0.0019, 0.0024, 0.0024, 0.0020, 0.0023, 0.0025, 0.0025], + device='cuda:0'), out_proj_covar=tensor([1.9127e-05, 2.1364e-05, 2.0177e-05, 2.0812e-05, 1.6029e-05, 1.9187e-05, + 1.9562e-05, 1.7447e-05], device='cuda:0') +2022-12-01 19:36:28,505 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=4.91 vs. limit=2.0 +2022-12-01 19:36:30,261 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=1627.0, num_to_drop=2, layers_to_drop={0, 2} +2022-12-01 19:36:34,148 INFO [train.py:876] Epoch 2, batch 200, loss[loss=0.7745, simple_loss=0.5528, pruned_loss=0.5359, over 4799.00 frames. ], tot_loss[loss=0.7403, simple_loss=0.5371, pruned_loss=0.5163, over 603493.78 frames. ], batch size: 32, lr: 4.78e-02, +2022-12-01 19:36:38,713 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.32 vs. limit=2.0 +2022-12-01 19:37:01,477 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=10.44 vs. limit=5.0 +2022-12-01 19:37:02,082 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=1672.0, num_to_drop=2, layers_to_drop={1, 2} +2022-12-01 19:37:10,027 INFO [train.py:876] Epoch 2, batch 250, loss[loss=0.7915, simple_loss=0.5877, pruned_loss=0.5262, over 4882.00 frames. ], tot_loss[loss=0.7357, simple_loss=0.5366, pruned_loss=0.5071, over 680343.65 frames. ], batch size: 44, lr: 4.77e-02, +2022-12-01 19:37:14,343 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=8.11 vs. limit=5.0 +2022-12-01 19:37:15,239 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 19:37:22,913 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.213e+02 1.955e+02 2.469e+02 3.199e+02 6.193e+02, threshold=4.937e+02, percent-clipped=7.0 +2022-12-01 19:37:25,416 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5478, 2.5300, 2.5004, 2.4639, 2.5099, 2.1658, 2.6409, 2.5615], + device='cuda:0'), covar=tensor([0.3901, 0.3777, 0.3816, 0.3205, 0.3767, 0.4944, 0.3707, 0.3155], + device='cuda:0'), in_proj_covar=tensor([0.0023, 0.0022, 0.0024, 0.0020, 0.0021, 0.0023, 0.0022, 0.0020], + device='cuda:0'), out_proj_covar=tensor([1.7983e-05, 1.7687e-05, 1.8137e-05, 1.6120e-05, 1.8195e-05, 2.0149e-05, + 1.7942e-05, 1.7565e-05], device='cuda:0') +2022-12-01 19:37:25,692 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.09 vs. limit=2.0 +2022-12-01 19:37:36,099 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.51 vs. limit=2.0 +2022-12-01 19:37:39,539 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.3835, 4.4857, 4.2996, 4.3510, 4.4696, 3.7159, 3.9427, 4.2811], + device='cuda:0'), covar=tensor([0.1685, 0.1744, 0.1503, 0.1646, 0.1439, 0.3124, 0.1840, 0.1629], + device='cuda:0'), in_proj_covar=tensor([0.0010, 0.0009, 0.0010, 0.0010, 0.0010, 0.0014, 0.0010, 0.0011], + device='cuda:0'), out_proj_covar=tensor([9.3117e-06, 8.7292e-06, 9.3012e-06, 9.1360e-06, 1.0326e-05, 1.5068e-05, + 1.0247e-05, 1.0409e-05], device='cuda:0') +2022-12-01 19:37:46,982 INFO [train.py:876] Epoch 2, batch 300, loss[loss=0.6801, simple_loss=0.5269, pruned_loss=0.4336, over 4928.00 frames. ], tot_loss[loss=0.7277, simple_loss=0.5344, pruned_loss=0.4953, over 742416.07 frames. ], batch size: 32, lr: 4.77e-02, +2022-12-01 19:37:47,302 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=22.71 vs. limit=5.0 +2022-12-01 19:37:57,346 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 19:38:19,301 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=4.75 vs. limit=2.0 +2022-12-01 19:38:20,161 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=9.00 vs. limit=5.0 +2022-12-01 19:38:23,328 INFO [train.py:876] Epoch 2, batch 350, loss[loss=0.7375, simple_loss=0.5624, pruned_loss=0.4719, over 4866.00 frames. ], tot_loss[loss=0.7138, simple_loss=0.5279, pruned_loss=0.4799, over 788055.44 frames. ], batch size: 39, lr: 4.76e-02, +2022-12-01 19:38:28,323 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=1790.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 19:38:36,189 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.189e+02 2.177e+02 2.902e+02 3.978e+02 7.507e+02, threshold=5.804e+02, percent-clipped=10.0 +2022-12-01 19:38:42,425 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=1809.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 19:39:00,481 INFO [train.py:876] Epoch 2, batch 400, loss[loss=0.642, simple_loss=0.4937, pruned_loss=0.405, over 4861.00 frames. ], tot_loss[loss=0.7027, simple_loss=0.5231, pruned_loss=0.4667, over 824245.91 frames. ], batch size: 36, lr: 4.75e-02, +2022-12-01 19:39:04,340 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=1838.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:39:11,617 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 19:39:13,351 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0942, 2.5998, 2.7057, 2.4769, 2.9310, 2.6281, 2.8059, 2.8522], + device='cuda:0'), covar=tensor([1.3731, 1.3569, 1.4232, 1.0721, 0.8609, 1.3350, 1.0127, 1.0092], + device='cuda:0'), in_proj_covar=tensor([0.0019, 0.0019, 0.0019, 0.0016, 0.0017, 0.0018, 0.0021, 0.0021], + device='cuda:0'), out_proj_covar=tensor([1.2314e-05, 1.1944e-05, 1.4990e-05, 1.3695e-05, 1.0367e-05, 1.1442e-05, + 1.2542e-05, 1.4460e-05], device='cuda:0') +2022-12-01 19:39:18,289 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=1857.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:39:18,480 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=1857.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 19:39:28,623 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=1870.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:39:30,429 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=3.73 vs. limit=2.0 +2022-12-01 19:39:31,506 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 19:39:33,154 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2113, 1.8421, 2.1841, 1.8555, 2.3872, 2.6103, 1.8346, 2.4176], + device='cuda:0'), covar=tensor([0.6583, 0.8772, 0.4599, 0.6412, 0.6907, 0.4494, 0.6046, 0.4516], + device='cuda:0'), in_proj_covar=tensor([0.0030, 0.0036, 0.0027, 0.0032, 0.0031, 0.0029, 0.0033, 0.0030], + device='cuda:0'), out_proj_covar=tensor([2.1442e-05, 2.4165e-05, 2.0409e-05, 2.4370e-05, 2.4465e-05, 2.0568e-05, + 2.1878e-05, 2.1703e-05], device='cuda:0') +2022-12-01 19:39:37,747 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5667, 2.7219, 3.1022, 2.7150, 2.7802, 2.4709, 2.8417, 2.5674], + device='cuda:0'), covar=tensor([0.6569, 0.4708, 0.3168, 0.3655, 0.2972, 0.5468, 0.2906, 0.4870], + device='cuda:0'), in_proj_covar=tensor([0.0022, 0.0021, 0.0018, 0.0020, 0.0018, 0.0021, 0.0019, 0.0022], + device='cuda:0'), out_proj_covar=tensor([1.9685e-05, 1.7751e-05, 1.5186e-05, 1.6233e-05, 1.5650e-05, 1.7359e-05, + 1.5098e-05, 1.9210e-05], device='cuda:0') +2022-12-01 19:39:38,033 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.00 vs. limit=2.0 +2022-12-01 19:39:38,312 INFO [train.py:876] Epoch 2, batch 450, loss[loss=0.5375, simple_loss=0.4284, pruned_loss=0.3281, over 4793.00 frames. ], tot_loss[loss=0.6905, simple_loss=0.5184, pruned_loss=0.4525, over 853748.11 frames. ], batch size: 32, lr: 4.74e-02, +2022-12-01 19:39:50,481 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=1899.0, num_to_drop=2, layers_to_drop={2, 3} +2022-12-01 19:39:51,856 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.536e+02 2.460e+02 2.941e+02 4.327e+02 8.468e+02, threshold=5.882e+02, percent-clipped=9.0 +2022-12-01 19:40:05,211 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=1918.0, num_to_drop=2, layers_to_drop={1, 2} +2022-12-01 19:40:08,125 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=1922.0, num_to_drop=1, layers_to_drop={3} +2022-12-01 19:40:14,862 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=1931.0, num_to_drop=2, layers_to_drop={1, 2} +2022-12-01 19:40:16,021 INFO [train.py:876] Epoch 2, batch 500, loss[loss=0.6887, simple_loss=0.4814, pruned_loss=0.4536, over 4899.00 frames. ], tot_loss[loss=0.6811, simple_loss=0.5143, pruned_loss=0.4413, over 874820.63 frames. ], batch size: 29, lr: 4.74e-02, +2022-12-01 19:40:41,735 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=1967.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:40:42,682 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.1216, 3.9048, 4.0466, 3.7114, 3.6763, 3.3332, 3.6314, 3.7192], + device='cuda:0'), covar=tensor([0.1040, 0.1511, 0.0949, 0.1278, 0.1798, 0.2078, 0.1340, 0.1846], + device='cuda:0'), in_proj_covar=tensor([0.0014, 0.0015, 0.0013, 0.0013, 0.0017, 0.0016, 0.0013, 0.0015], + device='cuda:0'), out_proj_covar=tensor([1.2611e-05, 1.2532e-05, 1.1641e-05, 1.1752e-05, 1.6664e-05, 1.9263e-05, + 1.3101e-05, 1.4897e-05], device='cuda:0') +2022-12-01 19:40:42,951 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.23 vs. limit=2.0 +2022-12-01 19:40:53,583 INFO [train.py:876] Epoch 2, batch 550, loss[loss=0.6442, simple_loss=0.5099, pruned_loss=0.39, over 4900.00 frames. ], tot_loss[loss=0.6685, simple_loss=0.5087, pruned_loss=0.4279, over 889309.05 frames. ], batch size: 31, lr: 4.73e-02, +2022-12-01 19:40:58,564 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=9.57 vs. limit=5.0 +2022-12-01 19:40:59,207 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6001, 2.6035, 2.5148, 2.5911, 2.4480, 2.5533, 2.3138, 2.4401], + device='cuda:0'), covar=tensor([0.5634, 0.3139, 0.6275, 0.5561, 0.5818, 0.7709, 0.4973, 0.7194], + device='cuda:0'), in_proj_covar=tensor([0.0023, 0.0019, 0.0025, 0.0024, 0.0023, 0.0029, 0.0024, 0.0027], + device='cuda:0'), out_proj_covar=tensor([1.9165e-05, 1.6949e-05, 2.0592e-05, 1.9861e-05, 1.8581e-05, 2.3673e-05, + 1.9205e-05, 1.9475e-05], device='cuda:0') +2022-12-01 19:41:06,845 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-2000.pt +2022-12-01 19:41:09,933 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.510e+02 2.660e+02 3.288e+02 4.548e+02 8.155e+02, threshold=6.575e+02, percent-clipped=13.0 +2022-12-01 19:41:17,366 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.37 vs. limit=5.0 +2022-12-01 19:41:19,256 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.20 vs. limit=5.0 +2022-12-01 19:41:20,126 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.02 vs. limit=2.0 +2022-12-01 19:41:37,058 INFO [train.py:876] Epoch 2, batch 600, loss[loss=0.5264, simple_loss=0.4246, pruned_loss=0.3141, over 4811.00 frames. ], tot_loss[loss=0.6545, simple_loss=0.5035, pruned_loss=0.4134, over 904637.33 frames. ], batch size: 33, lr: 4.72e-02, +2022-12-01 19:41:49,840 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.79 vs. limit=2.0 +2022-12-01 19:41:54,758 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.7931, 1.8219, 2.1560, 1.7867, 2.0041, 2.0066, 1.6931, 1.6656], + device='cuda:0'), covar=tensor([0.2581, 0.2279, 0.1761, 0.1760, 0.2429, 0.2166, 0.2131, 0.2852], + device='cuda:0'), in_proj_covar=tensor([0.0017, 0.0014, 0.0015, 0.0013, 0.0015, 0.0015, 0.0015, 0.0017], + device='cuda:0'), out_proj_covar=tensor([1.3456e-05, 1.2735e-05, 1.1170e-05, 1.0666e-05, 1.1009e-05, 1.1453e-05, + 1.3238e-05, 1.1807e-05], device='cuda:0') +2022-12-01 19:42:19,708 INFO [train.py:876] Epoch 2, batch 650, loss[loss=0.4268, simple_loss=0.3232, pruned_loss=0.2652, over 4123.00 frames. ], tot_loss[loss=0.6323, simple_loss=0.4917, pruned_loss=0.3946, over 914203.96 frames. ], batch size: 16, lr: 4.71e-02, +2022-12-01 19:42:26,046 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=2090.0, num_to_drop=2, layers_to_drop={0, 2} +2022-12-01 19:42:34,920 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.302e+02 2.313e+02 2.790e+02 3.362e+02 8.512e+02, threshold=5.581e+02, percent-clipped=1.0 +2022-12-01 19:43:01,152 INFO [train.py:876] Epoch 2, batch 700, loss[loss=0.6009, simple_loss=0.4881, pruned_loss=0.3569, over 4800.00 frames. ], tot_loss[loss=0.618, simple_loss=0.4855, pruned_loss=0.3815, over 922085.36 frames. ], batch size: 33, lr: 4.70e-02, +2022-12-01 19:43:02,335 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.75 vs. limit=2.0 +2022-12-01 19:43:03,204 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.93 vs. limit=2.0 +2022-12-01 19:43:05,045 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=2138.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:43:09,645 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=7.46 vs. limit=5.0 +2022-12-01 19:43:42,644 INFO [train.py:876] Epoch 2, batch 750, loss[loss=0.5727, simple_loss=0.4873, pruned_loss=0.329, over 4840.00 frames. ], tot_loss[loss=0.603, simple_loss=0.4796, pruned_loss=0.3681, over 930754.51 frames. ], batch size: 49, lr: 4.69e-02, +2022-12-01 19:43:51,625 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=2194.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:43:54,839 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.89 vs. limit=5.0 +2022-12-01 19:43:57,624 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.094e+02 2.281e+02 2.784e+02 3.872e+02 7.698e+02, threshold=5.569e+02, percent-clipped=6.0 +2022-12-01 19:44:07,855 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=2213.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:44:15,332 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=2222.0, num_to_drop=2, layers_to_drop={0, 1} +2022-12-01 19:44:18,729 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=2226.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:44:24,671 INFO [train.py:876] Epoch 2, batch 800, loss[loss=0.5944, simple_loss=0.4894, pruned_loss=0.3497, over 4803.00 frames. ], tot_loss[loss=0.5861, simple_loss=0.4712, pruned_loss=0.3542, over 937034.86 frames. ], batch size: 58, lr: 4.68e-02, +2022-12-01 19:44:53,747 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=2267.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:44:56,225 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=2270.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:44:57,342 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=9.01 vs. limit=5.0 +2022-12-01 19:45:06,009 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.04 vs. limit=2.0 +2022-12-01 19:45:07,239 INFO [train.py:876] Epoch 2, batch 850, loss[loss=0.7021, simple_loss=0.5503, pruned_loss=0.4269, over 4001.00 frames. ], tot_loss[loss=0.5723, simple_loss=0.4641, pruned_loss=0.3431, over 939639.06 frames. ], batch size: 72, lr: 4.68e-02, +2022-12-01 19:45:22,234 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.315e+02 2.241e+02 2.661e+02 3.516e+02 8.130e+02, threshold=5.322e+02, percent-clipped=6.0 +2022-12-01 19:45:23,590 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.91 vs. limit=2.0 +2022-12-01 19:45:34,477 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=2315.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:45:49,624 INFO [train.py:876] Epoch 2, batch 900, loss[loss=0.4956, simple_loss=0.4313, pruned_loss=0.28, over 4813.00 frames. ], tot_loss[loss=0.5612, simple_loss=0.4587, pruned_loss=0.3341, over 941788.39 frames. ], batch size: 32, lr: 4.67e-02, +2022-12-01 19:45:55,832 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=2340.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 19:46:05,455 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.18 vs. limit=2.0 +2022-12-01 19:46:21,352 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.18 vs. limit=5.0 +2022-12-01 19:46:32,028 INFO [train.py:876] Epoch 2, batch 950, loss[loss=0.5105, simple_loss=0.4351, pruned_loss=0.293, over 4745.00 frames. ], tot_loss[loss=0.5521, simple_loss=0.455, pruned_loss=0.3264, over 944921.26 frames. ], batch size: 27, lr: 4.66e-02, +2022-12-01 19:46:47,467 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.415e+02 2.305e+02 2.939e+02 3.593e+02 8.604e+02, threshold=5.879e+02, percent-clipped=6.0 +2022-12-01 19:46:47,684 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=2401.0, num_to_drop=2, layers_to_drop={0, 3} +2022-12-01 19:46:56,391 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=7.03 vs. limit=5.0 +2022-12-01 19:47:06,766 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=10.54 vs. limit=5.0 +2022-12-01 19:47:14,930 INFO [train.py:876] Epoch 2, batch 1000, loss[loss=0.5183, simple_loss=0.4418, pruned_loss=0.2974, over 4744.00 frames. ], tot_loss[loss=0.5423, simple_loss=0.4504, pruned_loss=0.3184, over 948660.53 frames. ], batch size: 27, lr: 4.65e-02, +2022-12-01 19:47:15,156 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8439, 4.2870, 3.7839, 4.3680, 4.1997, 4.1471, 4.5330, 3.8434], + device='cuda:0'), covar=tensor([0.0664, 0.3141, 0.1253, 0.0550, 0.0641, 0.0603, 0.0794, 0.1168], + device='cuda:0'), in_proj_covar=tensor([0.0021, 0.0021, 0.0020, 0.0020, 0.0022, 0.0020, 0.0018, 0.0020], + device='cuda:0'), out_proj_covar=tensor([1.4761e-05, 1.8823e-05, 1.5627e-05, 1.6469e-05, 1.8501e-05, 1.5992e-05, + 1.3548e-05, 1.5734e-05], device='cuda:0') +2022-12-01 19:47:57,658 INFO [train.py:876] Epoch 2, batch 1050, loss[loss=0.494, simple_loss=0.4418, pruned_loss=0.2731, over 4869.00 frames. ], tot_loss[loss=0.5352, simple_loss=0.4476, pruned_loss=0.3124, over 949420.11 frames. ], batch size: 39, lr: 4.64e-02, +2022-12-01 19:48:07,316 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=2494.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 19:48:08,046 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.0418, 3.6312, 4.0995, 4.1149, 3.6327, 3.3348, 3.7051, 3.7037], + device='cuda:0'), covar=tensor([0.0535, 0.0673, 0.0437, 0.0419, 0.0720, 0.0991, 0.0559, 0.0609], + device='cuda:0'), in_proj_covar=tensor([0.0016, 0.0017, 0.0014, 0.0014, 0.0019, 0.0019, 0.0016, 0.0015], + device='cuda:0'), out_proj_covar=tensor([1.3716e-05, 1.4949e-05, 1.2604e-05, 1.3149e-05, 1.9026e-05, 2.2346e-05, + 1.6057e-05, 1.5079e-05], device='cuda:0') +2022-12-01 19:48:13,161 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.285e+02 2.251e+02 2.718e+02 3.359e+02 5.861e+02, threshold=5.435e+02, percent-clipped=0.0 +2022-12-01 19:48:13,663 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.16 vs. limit=5.0 +2022-12-01 19:48:23,372 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=2513.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:48:34,757 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=2526.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:48:40,659 INFO [train.py:876] Epoch 2, batch 1100, loss[loss=0.6312, simple_loss=0.518, pruned_loss=0.3722, over 4883.00 frames. ], tot_loss[loss=0.5279, simple_loss=0.4447, pruned_loss=0.3064, over 953490.52 frames. ], batch size: 44, lr: 4.63e-02, +2022-12-01 19:48:48,678 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=2542.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:49:05,153 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=2561.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:49:16,337 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=2574.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:49:24,164 INFO [train.py:876] Epoch 2, batch 1150, loss[loss=0.4447, simple_loss=0.383, pruned_loss=0.2532, over 4894.00 frames. ], tot_loss[loss=0.5161, simple_loss=0.4377, pruned_loss=0.2979, over 952282.40 frames. ], batch size: 30, lr: 4.62e-02, +2022-12-01 19:49:27,233 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.17 vs. limit=2.0 +2022-12-01 19:49:39,923 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.648e+02 2.436e+02 3.205e+02 3.744e+02 8.546e+02, threshold=6.410e+02, percent-clipped=9.0 +2022-12-01 19:49:46,699 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.70 vs. limit=5.0 +2022-12-01 19:49:46,718 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.14 vs. limit=2.0 +2022-12-01 19:49:48,192 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6650, 3.4709, 3.3171, 3.0717, 3.3739, 3.5921, 3.2797, 3.0168], + device='cuda:0'), covar=tensor([0.1253, 0.0756, 0.1191, 0.1255, 0.0596, 0.1454, 0.1068, 0.2235], + device='cuda:0'), in_proj_covar=tensor([0.0023, 0.0019, 0.0021, 0.0026, 0.0024, 0.0025, 0.0020, 0.0029], + device='cuda:0'), out_proj_covar=tensor([2.0243e-05, 1.6318e-05, 1.8595e-05, 2.2173e-05, 1.9688e-05, 2.1880e-05, + 1.7582e-05, 2.3723e-05], device='cuda:0') +2022-12-01 19:50:04,081 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5935, 2.6084, 2.4518, 2.5675, 2.4789, 2.8027, 2.3741, 3.0466], + device='cuda:0'), covar=tensor([0.1023, 0.1360, 0.1826, 0.0929, 0.1122, 0.0862, 0.1181, 0.0715], + device='cuda:0'), in_proj_covar=tensor([0.0017, 0.0020, 0.0020, 0.0018, 0.0018, 0.0018, 0.0018, 0.0017], + device='cuda:0'), out_proj_covar=tensor([1.2646e-05, 1.5778e-05, 1.5021e-05, 1.2650e-05, 1.4724e-05, 1.3146e-05, + 1.3628e-05, 1.1915e-05], device='cuda:0') +2022-12-01 19:50:08,195 INFO [train.py:876] Epoch 2, batch 1200, loss[loss=0.5035, simple_loss=0.4346, pruned_loss=0.2862, over 4808.00 frames. ], tot_loss[loss=0.5123, simple_loss=0.4372, pruned_loss=0.2942, over 953003.27 frames. ], batch size: 45, lr: 4.61e-02, +2022-12-01 19:50:10,932 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2196, 3.4867, 3.1504, 3.5232, 3.3439, 2.9237, 3.5863, 3.7149], + device='cuda:0'), covar=tensor([0.0893, 0.0909, 0.1094, 0.0710, 0.0776, 0.1115, 0.0849, 0.0667], + device='cuda:0'), in_proj_covar=tensor([0.0021, 0.0020, 0.0020, 0.0020, 0.0021, 0.0021, 0.0017, 0.0018], + device='cuda:0'), out_proj_covar=tensor([1.6310e-05, 1.7682e-05, 1.5311e-05, 1.6809e-05, 1.8352e-05, 1.6981e-05, + 1.3675e-05, 1.4394e-05], device='cuda:0') +2022-12-01 19:50:46,554 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.16 vs. limit=5.0 +2022-12-01 19:50:52,090 INFO [train.py:876] Epoch 2, batch 1250, loss[loss=0.5275, simple_loss=0.4264, pruned_loss=0.3144, over 4722.00 frames. ], tot_loss[loss=0.5084, simple_loss=0.4359, pruned_loss=0.2908, over 951436.36 frames. ], batch size: 27, lr: 4.60e-02, +2022-12-01 19:50:54,089 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=2685.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:50:59,583 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.45 vs. limit=5.0 +2022-12-01 19:51:03,688 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=2696.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 19:51:07,813 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.457e+02 2.302e+02 2.898e+02 3.873e+02 7.959e+02, threshold=5.796e+02, percent-clipped=3.0 +2022-12-01 19:51:36,370 INFO [train.py:876] Epoch 2, batch 1300, loss[loss=0.4268, simple_loss=0.3878, pruned_loss=0.233, over 4905.00 frames. ], tot_loss[loss=0.5038, simple_loss=0.4341, pruned_loss=0.2871, over 951344.68 frames. ], batch size: 31, lr: 4.59e-02, +2022-12-01 19:51:38,683 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.37 vs. limit=2.0 +2022-12-01 19:51:48,192 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=2746.0, num_to_drop=2, layers_to_drop={1, 2} +2022-12-01 19:51:48,378 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.16 vs. limit=2.0 +2022-12-01 19:51:55,364 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.23 vs. limit=2.0 +2022-12-01 19:52:13,954 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.8399, 2.7195, 2.9845, 2.9770, 2.7497, 2.6322, 3.1375, 3.2376], + device='cuda:0'), covar=tensor([0.1252, 0.2325, 0.1025, 0.0861, 0.1248, 0.0862, 0.0765, 0.0823], + device='cuda:0'), in_proj_covar=tensor([0.0021, 0.0020, 0.0020, 0.0021, 0.0023, 0.0021, 0.0018, 0.0019], + device='cuda:0'), out_proj_covar=tensor([1.6796e-05, 1.9124e-05, 1.5184e-05, 1.7509e-05, 1.9818e-05, 1.7474e-05, + 1.4230e-05, 1.5007e-05], device='cuda:0') +2022-12-01 19:52:19,888 INFO [train.py:876] Epoch 2, batch 1350, loss[loss=0.4677, simple_loss=0.4335, pruned_loss=0.251, over 4801.00 frames. ], tot_loss[loss=0.5012, simple_loss=0.4334, pruned_loss=0.2847, over 952187.73 frames. ], batch size: 51, lr: 4.58e-02, +2022-12-01 19:52:35,577 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.487e+02 2.448e+02 3.004e+02 3.570e+02 6.537e+02, threshold=6.008e+02, percent-clipped=3.0 +2022-12-01 19:52:51,228 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.15 vs. limit=2.0 +2022-12-01 19:52:56,202 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=2824.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 19:53:03,776 INFO [train.py:876] Epoch 2, batch 1400, loss[loss=0.7506, simple_loss=0.5883, pruned_loss=0.4564, over 3930.00 frames. ], tot_loss[loss=0.4964, simple_loss=0.4303, pruned_loss=0.2814, over 947806.79 frames. ], batch size: 72, lr: 4.57e-02, +2022-12-01 19:53:20,415 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.1529, 4.0931, 3.7595, 3.9640, 3.9378, 3.5064, 3.9446, 2.9754], + device='cuda:0'), covar=tensor([0.0325, 0.0228, 0.0461, 0.0351, 0.0307, 0.0534, 0.0426, 0.0629], + device='cuda:0'), in_proj_covar=tensor([0.0018, 0.0016, 0.0018, 0.0017, 0.0018, 0.0016, 0.0016, 0.0015], + device='cuda:0'), out_proj_covar=tensor([1.1606e-05, 9.3108e-06, 1.1040e-05, 9.7766e-06, 1.0260e-05, 9.4725e-06, + 9.6093e-06, 8.9272e-06], device='cuda:0') +2022-12-01 19:53:30,601 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-2.pt +2022-12-01 19:53:46,896 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 19:53:47,800 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 19:53:48,095 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 19:53:48,127 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 19:53:49,269 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 19:53:49,589 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 19:53:51,153 INFO [train.py:876] Epoch 3, batch 0, loss[loss=0.3463, simple_loss=0.3114, pruned_loss=0.1905, over 4688.00 frames. ], tot_loss[loss=0.3463, simple_loss=0.3114, pruned_loss=0.1905, over 4688.00 frames. ], batch size: 23, lr: 4.34e-02, +2022-12-01 19:53:51,154 INFO [train.py:901] Computing validation loss +2022-12-01 19:54:06,716 INFO [train.py:910] Epoch 3, validation: loss=0.3989, simple_loss=0.3922, pruned_loss=0.2028, over 253132.00 frames. +2022-12-01 19:54:06,717 INFO [train.py:911] Maximum memory allocated so far is 7118MB +2022-12-01 19:54:24,206 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=2885.0, num_to_drop=2, layers_to_drop={0, 2} +2022-12-01 19:54:37,968 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.288e+02 2.458e+02 3.024e+02 4.055e+02 7.236e+02, threshold=6.047e+02, percent-clipped=3.0 +2022-12-01 19:54:50,565 INFO [train.py:876] Epoch 3, batch 50, loss[loss=0.466, simple_loss=0.426, pruned_loss=0.253, over 4824.00 frames. ], tot_loss[loss=0.4442, simple_loss=0.398, pruned_loss=0.2452, over 213189.98 frames. ], batch size: 49, lr: 4.33e-02, +2022-12-01 19:54:51,648 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7455, 3.1529, 3.1540, 3.1808, 2.9924, 2.9097, 3.5233, 3.5316], + device='cuda:0'), covar=tensor([0.1057, 0.0827, 0.0914, 0.0790, 0.0844, 0.0875, 0.0628, 0.0644], + device='cuda:0'), in_proj_covar=tensor([0.0021, 0.0020, 0.0020, 0.0020, 0.0023, 0.0020, 0.0018, 0.0019], + device='cuda:0'), out_proj_covar=tensor([1.6971e-05, 1.8624e-05, 1.5881e-05, 1.7561e-05, 2.0609e-05, 1.6926e-05, + 1.4065e-05, 1.5491e-05], device='cuda:0') +2022-12-01 19:55:02,903 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.76 vs. limit=5.0 +2022-12-01 19:55:10,882 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=2937.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:55:12,682 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6204, 3.9332, 3.8547, 3.6893, 3.6427, 4.1545, 4.0982, 3.6359], + device='cuda:0'), covar=tensor([0.0782, 0.0393, 0.0544, 0.0542, 0.0404, 0.0565, 0.0286, 0.0831], + device='cuda:0'), in_proj_covar=tensor([0.0024, 0.0020, 0.0021, 0.0025, 0.0024, 0.0023, 0.0019, 0.0028], + device='cuda:0'), out_proj_covar=tensor([2.0191e-05, 1.6645e-05, 1.7562e-05, 2.2150e-05, 1.9744e-05, 2.0167e-05, + 1.6948e-05, 2.4328e-05], device='cuda:0') +2022-12-01 19:55:14,326 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 19:55:35,289 INFO [train.py:876] Epoch 3, batch 100, loss[loss=0.3456, simple_loss=0.3138, pruned_loss=0.1887, over 4672.00 frames. ], tot_loss[loss=0.4376, simple_loss=0.3951, pruned_loss=0.24, over 377722.63 frames. ], batch size: 21, lr: 4.32e-02, +2022-12-01 19:55:37,778 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.03 vs. limit=2.0 +2022-12-01 19:56:01,448 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 19:56:03,331 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=2996.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:56:05,126 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=2998.0, num_to_drop=2, layers_to_drop={1, 3} +2022-12-01 19:56:07,906 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.433e+02 2.371e+02 2.918e+02 3.571e+02 7.072e+02, threshold=5.836e+02, percent-clipped=3.0 +2022-12-01 19:56:20,615 INFO [train.py:876] Epoch 3, batch 150, loss[loss=0.3603, simple_loss=0.3353, pruned_loss=0.1926, over 4836.00 frames. ], tot_loss[loss=0.431, simple_loss=0.3905, pruned_loss=0.2358, over 505222.81 frames. ], batch size: 25, lr: 4.31e-02, +2022-12-01 19:56:24,307 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=3019.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:56:25,018 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 19:56:44,167 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=3041.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:56:46,955 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=3044.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 19:57:00,434 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3275, 2.9132, 2.8344, 3.2743, 2.9174, 2.9153, 3.4000, 3.3924], + device='cuda:0'), covar=tensor([0.0258, 0.0690, 0.0473, 0.0245, 0.0464, 0.0328, 0.0284, 0.0304], + device='cuda:0'), in_proj_covar=tensor([0.0020, 0.0021, 0.0020, 0.0020, 0.0023, 0.0020, 0.0017, 0.0019], + device='cuda:0'), out_proj_covar=tensor([1.6283e-05, 1.9517e-05, 1.6444e-05, 1.7363e-05, 2.0893e-05, 1.7815e-05, + 1.4203e-05, 1.5472e-05], device='cuda:0') +2022-12-01 19:57:05,740 INFO [train.py:876] Epoch 3, batch 200, loss[loss=0.4649, simple_loss=0.4268, pruned_loss=0.2515, over 4849.00 frames. ], tot_loss[loss=0.4383, simple_loss=0.3956, pruned_loss=0.2405, over 599255.50 frames. ], batch size: 40, lr: 4.30e-02, +2022-12-01 19:57:19,389 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=3080.0, num_to_drop=2, layers_to_drop={1, 3} +2022-12-01 19:57:36,358 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.8910, 2.9063, 3.2926, 3.2851, 3.5198, 2.8367, 3.3916, 2.8133], + device='cuda:0'), covar=tensor([0.0520, 0.0575, 0.0462, 0.0432, 0.0355, 0.0543, 0.0394, 0.0460], + device='cuda:0'), in_proj_covar=tensor([0.0019, 0.0019, 0.0016, 0.0017, 0.0016, 0.0020, 0.0019, 0.0017], + device='cuda:0'), out_proj_covar=tensor([1.6085e-05, 1.4651e-05, 1.2301e-05, 1.3440e-05, 1.1666e-05, 1.6426e-05, + 1.3589e-05, 1.4129e-05], device='cuda:0') +2022-12-01 19:57:37,655 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.586e+02 2.202e+02 2.758e+02 3.383e+02 7.337e+02, threshold=5.516e+02, percent-clipped=2.0 +2022-12-01 19:57:50,456 INFO [train.py:876] Epoch 3, batch 250, loss[loss=0.4002, simple_loss=0.3583, pruned_loss=0.221, over 4847.00 frames. ], tot_loss[loss=0.4387, simple_loss=0.397, pruned_loss=0.2402, over 677561.23 frames. ], batch size: 25, lr: 4.29e-02, +2022-12-01 19:57:58,006 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.38 vs. limit=5.0 +2022-12-01 19:58:00,069 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 19:58:17,343 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0085, 2.8402, 2.9781, 2.6016, 2.4398, 2.8148, 2.7854, 3.0484], + device='cuda:0'), covar=tensor([0.0474, 0.0601, 0.0498, 0.0616, 0.0808, 0.0448, 0.0549, 0.0522], + device='cuda:0'), in_proj_covar=tensor([0.0022, 0.0023, 0.0021, 0.0021, 0.0022, 0.0021, 0.0022, 0.0022], + device='cuda:0'), out_proj_covar=tensor([1.5996e-05, 1.7989e-05, 1.7035e-05, 1.5269e-05, 1.8460e-05, 1.5619e-05, + 1.7059e-05, 1.6560e-05], device='cuda:0') +2022-12-01 19:58:34,430 INFO [train.py:876] Epoch 3, batch 300, loss[loss=0.3732, simple_loss=0.3396, pruned_loss=0.2034, over 4806.00 frames. ], tot_loss[loss=0.445, simple_loss=0.4024, pruned_loss=0.2438, over 739859.40 frames. ], batch size: 26, lr: 4.28e-02, +2022-12-01 19:58:34,851 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=7.55 vs. limit=5.0 +2022-12-01 19:58:38,097 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.4005, 4.1889, 4.2531, 4.5114, 3.7330, 3.7045, 3.5550, 3.7354], + device='cuda:0'), covar=tensor([0.0246, 0.0316, 0.0243, 0.0273, 0.0429, 0.0477, 0.0408, 0.0412], + device='cuda:0'), in_proj_covar=tensor([0.0017, 0.0017, 0.0014, 0.0014, 0.0020, 0.0019, 0.0017, 0.0017], + device='cuda:0'), out_proj_covar=tensor([1.5909e-05, 1.5974e-05, 1.3089e-05, 1.3688e-05, 2.1399e-05, 2.3073e-05, + 1.8247e-05, 1.7603e-05], device='cuda:0') +2022-12-01 19:58:41,016 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5807, 3.1563, 3.2941, 3.5440, 3.7879, 2.5364, 2.6529, 3.4156], + device='cuda:0'), covar=tensor([0.1403, 0.1361, 0.0787, 0.0651, 0.0631, 0.1769, 0.1378, 0.1010], + device='cuda:0'), in_proj_covar=tensor([0.0015, 0.0020, 0.0015, 0.0016, 0.0017, 0.0017, 0.0017, 0.0019], + device='cuda:0'), out_proj_covar=tensor([9.1953e-06, 1.1872e-05, 9.6018e-06, 1.0358e-05, 8.2096e-06, 9.8357e-06, + 9.2590e-06, 1.1924e-05], device='cuda:0') +2022-12-01 19:58:44,574 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.33 vs. limit=2.0 +2022-12-01 19:58:47,648 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=3180.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 19:58:48,442 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 19:58:49,180 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.00 vs. limit=2.0 +2022-12-01 19:59:06,676 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.648e+02 2.448e+02 2.941e+02 3.690e+02 6.158e+02, threshold=5.882e+02, percent-clipped=2.0 +2022-12-01 19:59:18,606 INFO [train.py:876] Epoch 3, batch 350, loss[loss=0.3771, simple_loss=0.3641, pruned_loss=0.1951, over 4800.00 frames. ], tot_loss[loss=0.449, simple_loss=0.4057, pruned_loss=0.2461, over 787265.17 frames. ], batch size: 32, lr: 4.27e-02, +2022-12-01 19:59:19,713 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5130, 3.7521, 3.7697, 4.1747, 3.9335, 3.4580, 4.0612, 3.4692], + device='cuda:0'), covar=tensor([0.1485, 0.0576, 0.1056, 0.0571, 0.0535, 0.1311, 0.0618, 0.0516], + device='cuda:0'), in_proj_covar=tensor([0.0032, 0.0023, 0.0023, 0.0019, 0.0021, 0.0025, 0.0017, 0.0025], + device='cuda:0'), out_proj_covar=tensor([3.0437e-05, 2.0317e-05, 2.0327e-05, 1.6343e-05, 1.8307e-05, 2.2766e-05, + 1.6182e-05, 2.3064e-05], device='cuda:0') +2022-12-01 19:59:42,692 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7391, 3.0371, 3.1196, 3.4260, 3.0290, 2.7460, 3.3247, 2.8792], + device='cuda:0'), covar=tensor([0.2170, 0.0720, 0.1005, 0.0741, 0.0736, 0.1762, 0.0649, 0.0695], + device='cuda:0'), in_proj_covar=tensor([0.0031, 0.0021, 0.0022, 0.0018, 0.0020, 0.0023, 0.0016, 0.0023], + device='cuda:0'), out_proj_covar=tensor([2.9483e-05, 1.8741e-05, 1.9523e-05, 1.5409e-05, 1.7409e-05, 2.1811e-05, + 1.5273e-05, 2.1798e-05], device='cuda:0') +2022-12-01 19:59:48,421 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.54 vs. limit=2.0 +2022-12-01 20:00:02,433 INFO [train.py:876] Epoch 3, batch 400, loss[loss=0.411, simple_loss=0.3961, pruned_loss=0.2129, over 4854.00 frames. ], tot_loss[loss=0.4503, simple_loss=0.4065, pruned_loss=0.2471, over 822088.83 frames. ], batch size: 40, lr: 4.26e-02, +2022-12-01 20:00:10,638 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 20:00:23,185 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=3288.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 20:00:26,612 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8924, 3.2323, 3.2866, 3.2550, 3.2122, 3.7959, 3.4202, 3.4176], + device='cuda:0'), covar=tensor([0.0388, 0.0548, 0.0536, 0.0771, 0.0726, 0.0634, 0.0633, 0.0718], + device='cuda:0'), in_proj_covar=tensor([0.0024, 0.0023, 0.0023, 0.0028, 0.0028, 0.0027, 0.0024, 0.0032], + device='cuda:0'), out_proj_covar=tensor([2.1233e-05, 1.9676e-05, 1.9173e-05, 2.4154e-05, 2.4383e-05, 2.5572e-05, + 2.2254e-05, 2.8777e-05], device='cuda:0') +2022-12-01 20:00:27,439 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=3293.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:00:32,777 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 20:00:34,495 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.712e+02 2.698e+02 3.339e+02 4.359e+02 1.180e+03, threshold=6.679e+02, percent-clipped=6.0 +2022-12-01 20:00:35,148 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=3.90 vs. limit=2.0 +2022-12-01 20:00:44,504 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2317, 3.1607, 3.6334, 3.4854, 2.6419, 2.8146, 3.7936, 3.7779], + device='cuda:0'), covar=tensor([0.0358, 0.0582, 0.0335, 0.0441, 0.0394, 0.0432, 0.0257, 0.0323], + device='cuda:0'), in_proj_covar=tensor([0.0021, 0.0021, 0.0021, 0.0022, 0.0024, 0.0021, 0.0019, 0.0020], + device='cuda:0'), out_proj_covar=tensor([1.7678e-05, 2.0609e-05, 1.7472e-05, 2.0151e-05, 2.3187e-05, 1.9226e-05, + 1.5254e-05, 1.6328e-05], device='cuda:0') +2022-12-01 20:00:47,033 INFO [train.py:876] Epoch 3, batch 450, loss[loss=0.414, simple_loss=0.3883, pruned_loss=0.2198, over 4848.00 frames. ], tot_loss[loss=0.4479, simple_loss=0.4058, pruned_loss=0.245, over 852993.81 frames. ], batch size: 35, lr: 4.25e-02, +2022-12-01 20:01:06,570 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.65 vs. limit=5.0 +2022-12-01 20:01:10,766 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=3341.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:01:17,959 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=3349.0, num_to_drop=2, layers_to_drop={0, 2} +2022-12-01 20:01:32,139 INFO [train.py:876] Epoch 3, batch 500, loss[loss=0.4945, simple_loss=0.4453, pruned_loss=0.2718, over 4828.00 frames. ], tot_loss[loss=0.4392, simple_loss=0.4, pruned_loss=0.2393, over 874874.47 frames. ], batch size: 45, lr: 4.24e-02, +2022-12-01 20:01:38,366 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6999, 3.7065, 3.8001, 3.3975, 3.1755, 3.1630, 3.0413, 3.7930], + device='cuda:0'), covar=tensor([0.0243, 0.0343, 0.0395, 0.0450, 0.0476, 0.0484, 0.0978, 0.0279], + device='cuda:0'), in_proj_covar=tensor([0.0019, 0.0021, 0.0026, 0.0021, 0.0023, 0.0021, 0.0023, 0.0024], + device='cuda:0'), out_proj_covar=tensor([1.8099e-05, 2.0115e-05, 2.4291e-05, 2.0513e-05, 2.2072e-05, 2.0910e-05, + 2.4847e-05, 2.3482e-05], device='cuda:0') +2022-12-01 20:01:40,916 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=3375.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:01:53,539 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=3389.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:02:04,653 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.538e+02 2.396e+02 2.924e+02 3.556e+02 5.754e+02, threshold=5.849e+02, percent-clipped=0.0 +2022-12-01 20:02:17,170 INFO [train.py:876] Epoch 3, batch 550, loss[loss=0.4288, simple_loss=0.4111, pruned_loss=0.2232, over 4883.00 frames. ], tot_loss[loss=0.4351, simple_loss=0.3974, pruned_loss=0.2364, over 894260.04 frames. ], batch size: 44, lr: 4.23e-02, +2022-12-01 20:02:19,264 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.27 vs. limit=2.0 +2022-12-01 20:02:45,602 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.59 vs. limit=5.0 +2022-12-01 20:03:03,005 INFO [train.py:876] Epoch 3, batch 600, loss[loss=0.3434, simple_loss=0.3016, pruned_loss=0.1926, over 4018.00 frames. ], tot_loss[loss=0.4297, simple_loss=0.3933, pruned_loss=0.233, over 905997.90 frames. ], batch size: 16, lr: 4.22e-02, +2022-12-01 20:03:14,365 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.76 vs. limit=5.0 +2022-12-01 20:03:16,759 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=3480.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:03:29,494 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.39 vs. limit=5.0 +2022-12-01 20:03:29,619 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.33 vs. limit=2.0 +2022-12-01 20:03:30,787 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8684, 3.9320, 3.8997, 4.3962, 3.3113, 3.6165, 3.5825, 3.5485], + device='cuda:0'), covar=tensor([0.0230, 0.0199, 0.0143, 0.0100, 0.0351, 0.0254, 0.0227, 0.0303], + device='cuda:0'), in_proj_covar=tensor([0.0018, 0.0016, 0.0014, 0.0014, 0.0019, 0.0018, 0.0016, 0.0017], + device='cuda:0'), out_proj_covar=tensor([1.7349e-05, 1.5148e-05, 1.2388e-05, 1.4110e-05, 2.0579e-05, 2.1405e-05, + 1.6775e-05, 1.7647e-05], device='cuda:0') +2022-12-01 20:03:35,179 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.453e+02 2.575e+02 3.044e+02 3.852e+02 8.768e+02, threshold=6.088e+02, percent-clipped=5.0 +2022-12-01 20:03:48,554 INFO [train.py:876] Epoch 3, batch 650, loss[loss=0.5067, simple_loss=0.444, pruned_loss=0.2847, over 4886.00 frames. ], tot_loss[loss=0.4283, simple_loss=0.3926, pruned_loss=0.232, over 915257.42 frames. ], batch size: 44, lr: 4.21e-02, +2022-12-01 20:04:00,415 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=3528.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:04:16,505 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=3546.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 20:04:33,644 INFO [train.py:876] Epoch 3, batch 700, loss[loss=0.3367, simple_loss=0.329, pruned_loss=0.1721, over 4896.00 frames. ], tot_loss[loss=0.4308, simple_loss=0.3945, pruned_loss=0.2336, over 924300.56 frames. ], batch size: 29, lr: 4.20e-02, +2022-12-01 20:04:40,397 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1469, 2.9644, 3.3765, 3.4691, 2.7502, 2.3628, 3.7262, 3.6408], + device='cuda:0'), covar=tensor([0.0245, 0.0488, 0.0472, 0.0328, 0.0427, 0.0691, 0.0254, 0.0301], + device='cuda:0'), in_proj_covar=tensor([0.0020, 0.0021, 0.0022, 0.0022, 0.0025, 0.0020, 0.0019, 0.0020], + device='cuda:0'), out_proj_covar=tensor([1.6763e-05, 2.1442e-05, 1.9614e-05, 1.9427e-05, 2.4060e-05, 1.8493e-05, + 1.6565e-05, 1.6256e-05], device='cuda:0') +2022-12-01 20:04:40,582 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.93 vs. limit=5.0 +2022-12-01 20:04:59,631 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=3593.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:05:06,891 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.513e+02 2.694e+02 3.375e+02 4.271e+02 9.273e+02, threshold=6.749e+02, percent-clipped=6.0 +2022-12-01 20:05:12,823 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=3607.0, num_to_drop=2, layers_to_drop={0, 1} +2022-12-01 20:05:20,018 INFO [train.py:876] Epoch 3, batch 750, loss[loss=0.4664, simple_loss=0.4303, pruned_loss=0.2513, over 4840.00 frames. ], tot_loss[loss=0.4257, simple_loss=0.3919, pruned_loss=0.2297, over 933113.62 frames. ], batch size: 41, lr: 4.19e-02, +2022-12-01 20:05:28,826 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.01 vs. limit=2.0 +2022-12-01 20:05:40,842 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.97 vs. limit=2.0 +2022-12-01 20:05:44,018 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=3641.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:05:46,811 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=3644.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 20:06:06,060 INFO [train.py:876] Epoch 3, batch 800, loss[loss=0.3081, simple_loss=0.3138, pruned_loss=0.1511, over 4747.00 frames. ], tot_loss[loss=0.4192, simple_loss=0.3884, pruned_loss=0.225, over 939853.94 frames. ], batch size: 27, lr: 4.18e-02, +2022-12-01 20:06:15,895 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=3675.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:06:16,929 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=3676.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 20:06:40,007 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.98 vs. limit=2.0 +2022-12-01 20:06:40,201 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.523e+02 2.456e+02 3.025e+02 3.602e+02 7.500e+02, threshold=6.050e+02, percent-clipped=1.0 +2022-12-01 20:06:48,390 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.16 vs. limit=2.0 +2022-12-01 20:06:53,600 INFO [train.py:876] Epoch 3, batch 850, loss[loss=0.3288, simple_loss=0.3269, pruned_loss=0.1653, over 4818.00 frames. ], tot_loss[loss=0.4148, simple_loss=0.3854, pruned_loss=0.2221, over 940177.71 frames. ], batch size: 25, lr: 4.17e-02, +2022-12-01 20:07:01,403 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=3723.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:07:02,577 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.9445, 2.5286, 2.7075, 3.1944, 2.9356, 2.8386, 2.3973, 2.3908], + device='cuda:0'), covar=tensor([0.0295, 0.0372, 0.0582, 0.0259, 0.0310, 0.0449, 0.0633, 0.0465], + device='cuda:0'), in_proj_covar=tensor([0.0023, 0.0022, 0.0026, 0.0020, 0.0023, 0.0023, 0.0025, 0.0023], + device='cuda:0'), out_proj_covar=tensor([1.5970e-05, 1.6808e-05, 1.8925e-05, 1.2729e-05, 1.7209e-05, 1.6300e-05, + 1.8035e-05, 1.6618e-05], device='cuda:0') +2022-12-01 20:07:15,030 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=3737.0, num_to_drop=2, layers_to_drop={0, 3} +2022-12-01 20:07:15,339 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.02 vs. limit=2.0 +2022-12-01 20:07:24,855 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=3.00 vs. limit=2.0 +2022-12-01 20:07:40,927 INFO [train.py:876] Epoch 3, batch 900, loss[loss=0.4844, simple_loss=0.4383, pruned_loss=0.2653, over 4842.00 frames. ], tot_loss[loss=0.4154, simple_loss=0.3865, pruned_loss=0.2221, over 944891.99 frames. ], batch size: 41, lr: 4.16e-02, +2022-12-01 20:07:46,109 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.99 vs. limit=2.0 +2022-12-01 20:07:46,698 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.0177, 3.7962, 4.1828, 3.9590, 4.1712, 3.9790, 3.9136, 3.4651], + device='cuda:0'), covar=tensor([0.0256, 0.0293, 0.0191, 0.0206, 0.0138, 0.0253, 0.0251, 0.0324], + device='cuda:0'), in_proj_covar=tensor([0.0030, 0.0030, 0.0025, 0.0024, 0.0024, 0.0031, 0.0029, 0.0024], + device='cuda:0'), out_proj_covar=tensor([2.5295e-05, 2.3169e-05, 1.8915e-05, 1.8408e-05, 1.6636e-05, 2.6080e-05, + 2.2359e-05, 1.8969e-05], device='cuda:0') +2022-12-01 20:08:12,107 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.27 vs. limit=5.0 +2022-12-01 20:08:14,279 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.699e+02 2.766e+02 3.703e+02 4.433e+02 1.040e+03, threshold=7.406e+02, percent-clipped=5.0 +2022-12-01 20:08:21,181 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.07 vs. limit=2.0 +2022-12-01 20:08:27,261 INFO [train.py:876] Epoch 3, batch 950, loss[loss=0.4667, simple_loss=0.4389, pruned_loss=0.2472, over 4817.00 frames. ], tot_loss[loss=0.4156, simple_loss=0.3872, pruned_loss=0.222, over 949196.96 frames. ], batch size: 42, lr: 4.15e-02, +2022-12-01 20:08:59,367 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.99 vs. limit=5.0 +2022-12-01 20:09:14,781 INFO [train.py:876] Epoch 3, batch 1000, loss[loss=0.4642, simple_loss=0.4179, pruned_loss=0.2552, over 4804.00 frames. ], tot_loss[loss=0.4205, simple_loss=0.3908, pruned_loss=0.2251, over 947446.86 frames. ], batch size: 33, lr: 4.14e-02, +2022-12-01 20:09:30,371 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.93 vs. limit=2.0 +2022-12-01 20:09:46,500 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.7689, 2.6673, 2.6972, 2.4981, 2.6662, 2.3527, 2.6076, 2.2643], + device='cuda:0'), covar=tensor([0.1760, 0.0796, 0.0617, 0.0770, 0.0696, 0.0754, 0.0923, 0.0970], + device='cuda:0'), in_proj_covar=tensor([0.0034, 0.0035, 0.0032, 0.0034, 0.0032, 0.0032, 0.0038, 0.0033], + device='cuda:0'), out_proj_covar=tensor([2.7949e-05, 3.0151e-05, 2.5531e-05, 2.6637e-05, 2.6075e-05, 2.4735e-05, + 3.1715e-05, 2.5529e-05], device='cuda:0') +2022-12-01 20:09:50,083 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.702e+02 2.850e+02 3.397e+02 4.139e+02 1.104e+03, threshold=6.793e+02, percent-clipped=4.0 +2022-12-01 20:09:51,022 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=3902.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 20:10:03,513 INFO [train.py:876] Epoch 3, batch 1050, loss[loss=0.338, simple_loss=0.3103, pruned_loss=0.1828, over 4775.00 frames. ], tot_loss[loss=0.4175, simple_loss=0.3892, pruned_loss=0.2229, over 950245.32 frames. ], batch size: 26, lr: 4.13e-02, +2022-12-01 20:10:14,214 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=3926.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:10:31,770 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=3944.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 20:10:52,284 INFO [train.py:876] Epoch 3, batch 1100, loss[loss=0.3325, simple_loss=0.3134, pruned_loss=0.1758, over 4773.00 frames. ], tot_loss[loss=0.4134, simple_loss=0.3864, pruned_loss=0.2202, over 951934.89 frames. ], batch size: 26, lr: 4.12e-02, +2022-12-01 20:11:06,313 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1599, 3.0761, 3.6351, 3.1522, 3.8027, 3.3837, 3.1058, 3.3488], + device='cuda:0'), covar=tensor([0.0845, 0.1146, 0.0446, 0.0757, 0.0406, 0.0617, 0.0563, 0.0563], + device='cuda:0'), in_proj_covar=tensor([0.0034, 0.0035, 0.0031, 0.0034, 0.0032, 0.0033, 0.0039, 0.0034], + device='cuda:0'), out_proj_covar=tensor([2.7639e-05, 3.0809e-05, 2.5200e-05, 2.6750e-05, 2.6570e-05, 2.5105e-05, + 3.3985e-05, 2.6432e-05], device='cuda:0') +2022-12-01 20:11:14,226 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=3987.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:11:18,956 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=3992.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 20:11:26,639 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-4000.pt +2022-12-01 20:11:30,116 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.779e+02 2.783e+02 3.423e+02 4.083e+02 9.865e+02, threshold=6.845e+02, percent-clipped=2.0 +2022-12-01 20:11:43,855 INFO [train.py:876] Epoch 3, batch 1150, loss[loss=0.4922, simple_loss=0.4499, pruned_loss=0.2672, over 4825.00 frames. ], tot_loss[loss=0.4115, simple_loss=0.3846, pruned_loss=0.2192, over 951680.91 frames. ], batch size: 47, lr: 4.11e-02, +2022-12-01 20:11:46,406 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.14 vs. limit=5.0 +2022-12-01 20:11:53,959 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.94 vs. limit=2.0 +2022-12-01 20:12:01,255 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=4032.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 20:12:08,171 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4469, 3.1060, 3.6783, 3.4380, 2.7414, 2.9588, 3.0760, 3.8578], + device='cuda:0'), covar=tensor([0.0222, 0.0418, 0.0285, 0.0357, 0.0412, 0.0295, 0.0471, 0.0168], + device='cuda:0'), in_proj_covar=tensor([0.0022, 0.0024, 0.0023, 0.0024, 0.0027, 0.0021, 0.0024, 0.0020], + device='cuda:0'), out_proj_covar=tensor([1.9897e-05, 2.4397e-05, 2.1539e-05, 2.2068e-05, 2.7626e-05, 2.0431e-05, + 2.1686e-05, 1.6301e-05], device='cuda:0') +2022-12-01 20:12:28,334 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3918, 2.2848, 2.4075, 2.8804, 2.4481, 2.2107, 2.2984, 2.3046], + device='cuda:0'), covar=tensor([0.0537, 0.0348, 0.0616, 0.0363, 0.0433, 0.0743, 0.0591, 0.0376], + device='cuda:0'), in_proj_covar=tensor([0.0026, 0.0025, 0.0029, 0.0024, 0.0029, 0.0029, 0.0028, 0.0027], + device='cuda:0'), out_proj_covar=tensor([1.8430e-05, 1.8336e-05, 2.1226e-05, 1.6595e-05, 2.2672e-05, 2.1505e-05, + 2.0051e-05, 1.9569e-05], device='cuda:0') +2022-12-01 20:12:33,949 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.53 vs. limit=2.0 +2022-12-01 20:12:34,048 INFO [train.py:876] Epoch 3, batch 1200, loss[loss=0.466, simple_loss=0.4243, pruned_loss=0.2538, over 4818.00 frames. ], tot_loss[loss=0.4078, simple_loss=0.3822, pruned_loss=0.2167, over 951281.55 frames. ], batch size: 42, lr: 4.10e-02, +2022-12-01 20:13:09,648 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.121e+02 2.470e+02 3.058e+02 3.917e+02 1.673e+03, threshold=6.116e+02, percent-clipped=3.0 +2022-12-01 20:13:23,539 INFO [train.py:876] Epoch 3, batch 1250, loss[loss=0.5246, simple_loss=0.4722, pruned_loss=0.2885, over 4700.00 frames. ], tot_loss[loss=0.4039, simple_loss=0.3794, pruned_loss=0.2142, over 951170.14 frames. ], batch size: 63, lr: 4.09e-02, +2022-12-01 20:13:49,631 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=4141.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:14:08,368 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3924, 2.3525, 2.4076, 2.5046, 2.3915, 2.7696, 2.5102, 2.2177], + device='cuda:0'), covar=tensor([0.0334, 0.0265, 0.0376, 0.0194, 0.0463, 0.0235, 0.0209, 0.0480], + device='cuda:0'), in_proj_covar=tensor([0.0027, 0.0028, 0.0026, 0.0024, 0.0028, 0.0027, 0.0024, 0.0025], + device='cuda:0'), out_proj_covar=tensor([1.9313e-05, 2.0360e-05, 1.9474e-05, 1.7179e-05, 2.1546e-05, 1.9163e-05, + 1.7105e-05, 1.8950e-05], device='cuda:0') +2022-12-01 20:14:12,875 INFO [train.py:876] Epoch 3, batch 1300, loss[loss=0.4057, simple_loss=0.3898, pruned_loss=0.2108, over 4835.00 frames. ], tot_loss[loss=0.4061, simple_loss=0.3809, pruned_loss=0.2157, over 950220.83 frames. ], batch size: 49, lr: 4.08e-02, +2022-12-01 20:14:17,554 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.01 vs. limit=2.0 +2022-12-01 20:14:37,183 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.1490, 4.3156, 3.8550, 4.3754, 4.6719, 4.4354, 4.3993, 4.3465], + device='cuda:0'), covar=tensor([0.0646, 0.0569, 0.0563, 0.0497, 0.0557, 0.0511, 0.0438, 0.0516], + device='cuda:0'), in_proj_covar=tensor([0.0047, 0.0041, 0.0040, 0.0042, 0.0043, 0.0044, 0.0046, 0.0041], + device='cuda:0'), out_proj_covar=tensor([4.6908e-05, 3.8748e-05, 3.9053e-05, 3.8665e-05, 4.1269e-05, 4.0300e-05, + 4.6129e-05, 3.9943e-05], device='cuda:0') +2022-12-01 20:14:40,840 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.88 vs. limit=2.0 +2022-12-01 20:14:49,175 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.788e+02 2.688e+02 3.291e+02 4.344e+02 1.003e+03, threshold=6.582e+02, percent-clipped=4.0 +2022-12-01 20:14:50,334 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=4202.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 20:14:50,438 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=4202.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:15:03,017 INFO [train.py:876] Epoch 3, batch 1350, loss[loss=0.4494, simple_loss=0.4231, pruned_loss=0.2378, over 4798.00 frames. ], tot_loss[loss=0.4066, simple_loss=0.3816, pruned_loss=0.2157, over 950004.95 frames. ], batch size: 58, lr: 4.07e-02, +2022-12-01 20:15:22,352 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.20 vs. limit=2.0 +2022-12-01 20:15:38,002 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=4250.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 20:15:52,712 INFO [train.py:876] Epoch 3, batch 1400, loss[loss=0.438, simple_loss=0.4035, pruned_loss=0.2363, over 4832.00 frames. ], tot_loss[loss=0.4066, simple_loss=0.3814, pruned_loss=0.2159, over 946145.60 frames. ], batch size: 34, lr: 4.06e-02, +2022-12-01 20:16:09,482 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=4282.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:16:15,482 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=4288.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:16:17,526 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6832, 2.2936, 2.5538, 2.8720, 2.5377, 2.6923, 2.0734, 2.6253], + device='cuda:0'), covar=tensor([0.0463, 0.0434, 0.0452, 0.0365, 0.0435, 0.0485, 0.0684, 0.0388], + device='cuda:0'), in_proj_covar=tensor([0.0025, 0.0024, 0.0028, 0.0025, 0.0030, 0.0027, 0.0027, 0.0026], + device='cuda:0'), out_proj_covar=tensor([1.7729e-05, 1.8137e-05, 2.1109e-05, 1.6668e-05, 2.3388e-05, 2.1222e-05, + 2.0040e-05, 2.0126e-05], device='cuda:0') +2022-12-01 20:16:23,187 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-3.pt +2022-12-01 20:16:39,662 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 20:16:40,561 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 20:16:40,854 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 20:16:40,885 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 20:16:42,015 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 20:16:42,333 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 20:16:43,924 INFO [train.py:876] Epoch 4, batch 0, loss[loss=0.3888, simple_loss=0.3802, pruned_loss=0.1987, over 4815.00 frames. ], tot_loss[loss=0.3888, simple_loss=0.3802, pruned_loss=0.1987, over 4815.00 frames. ], batch size: 45, lr: 3.79e-02, +2022-12-01 20:16:43,925 INFO [train.py:901] Computing validation loss +2022-12-01 20:16:59,412 INFO [train.py:910] Epoch 4, validation: loss=0.3323, simple_loss=0.3537, pruned_loss=0.1555, over 253132.00 frames. +2022-12-01 20:16:59,413 INFO [train.py:911] Maximum memory allocated so far is 7415MB +2022-12-01 20:17:03,329 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.679e+02 2.632e+02 3.201e+02 4.256e+02 8.086e+02, threshold=6.402e+02, percent-clipped=3.0 +2022-12-01 20:17:04,678 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2044, 2.7547, 3.5175, 3.5885, 2.7026, 3.3133, 3.7848, 3.8006], + device='cuda:0'), covar=tensor([0.0176, 0.0631, 0.0265, 0.0280, 0.0460, 0.0236, 0.0321, 0.0162], + device='cuda:0'), in_proj_covar=tensor([0.0023, 0.0027, 0.0024, 0.0025, 0.0029, 0.0023, 0.0025, 0.0022], + device='cuda:0'), out_proj_covar=tensor([2.1187e-05, 2.9322e-05, 2.2788e-05, 2.4339e-05, 2.9983e-05, 2.1647e-05, + 2.3022e-05, 1.8954e-05], device='cuda:0') +2022-12-01 20:17:06,915 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.58 vs. limit=2.0 +2022-12-01 20:17:12,637 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.92 vs. limit=5.0 +2022-12-01 20:17:34,868 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=4332.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 20:17:45,815 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4681, 4.1207, 3.3462, 3.3542, 3.3142, 3.5471, 3.8068, 3.9678], + device='cuda:0'), covar=tensor([0.0437, 0.0133, 0.0347, 0.0633, 0.0435, 0.0378, 0.0281, 0.0269], + device='cuda:0'), in_proj_covar=tensor([0.0030, 0.0025, 0.0024, 0.0031, 0.0026, 0.0028, 0.0027, 0.0022], + device='cuda:0'), out_proj_covar=tensor([2.0692e-05, 1.3363e-05, 1.5503e-05, 2.1794e-05, 1.5959e-05, 1.5661e-05, + 1.7253e-05, 1.5767e-05], device='cuda:0') +2022-12-01 20:17:49,428 INFO [train.py:876] Epoch 4, batch 50, loss[loss=0.2441, simple_loss=0.2532, pruned_loss=0.1176, over 4729.00 frames. ], tot_loss[loss=0.3745, simple_loss=0.3594, pruned_loss=0.1948, over 212539.86 frames. ], batch size: 23, lr: 3.78e-02, +2022-12-01 20:17:51,503 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=4349.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:18:15,285 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 20:18:22,181 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=4380.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 20:18:38,725 INFO [train.py:876] Epoch 4, batch 100, loss[loss=0.2763, simple_loss=0.2708, pruned_loss=0.1409, over 4707.00 frames. ], tot_loss[loss=0.3642, simple_loss=0.3524, pruned_loss=0.188, over 374826.63 frames. ], batch size: 23, lr: 3.78e-02, +2022-12-01 20:18:43,234 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.620e+02 2.423e+02 3.045e+02 3.829e+02 7.826e+02, threshold=6.089e+02, percent-clipped=3.0 +2022-12-01 20:18:45,643 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.0769, 3.9901, 3.6750, 3.8644, 3.7673, 3.8458, 4.0275, 4.0922], + device='cuda:0'), covar=tensor([0.0381, 0.0391, 0.0417, 0.0436, 0.0544, 0.0352, 0.0303, 0.0302], + device='cuda:0'), in_proj_covar=tensor([0.0037, 0.0036, 0.0031, 0.0040, 0.0037, 0.0030, 0.0034, 0.0028], + device='cuda:0'), out_proj_covar=tensor([2.3244e-05, 2.2801e-05, 1.7944e-05, 2.4568e-05, 2.3756e-05, 1.6586e-05, + 1.8954e-05, 1.6025e-05], device='cuda:0') +2022-12-01 20:19:02,966 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 20:19:14,348 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.17 vs. limit=2.0 +2022-12-01 20:19:23,916 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4947, 3.4089, 2.8900, 2.9107, 3.3729, 3.7398, 3.1330, 2.8509], + device='cuda:0'), covar=tensor([0.0171, 0.0354, 0.0338, 0.0391, 0.0350, 0.0122, 0.0369, 0.0691], + device='cuda:0'), in_proj_covar=tensor([0.0018, 0.0026, 0.0022, 0.0024, 0.0026, 0.0022, 0.0024, 0.0031], + device='cuda:0'), out_proj_covar=tensor([9.7299e-06, 1.5509e-05, 1.2317e-05, 1.4538e-05, 1.4855e-05, 1.1518e-05, + 1.3306e-05, 2.0070e-05], device='cuda:0') +2022-12-01 20:19:25,415 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 20:19:28,338 INFO [train.py:876] Epoch 4, batch 150, loss[loss=0.3525, simple_loss=0.353, pruned_loss=0.176, over 4833.00 frames. ], tot_loss[loss=0.3751, simple_loss=0.3605, pruned_loss=0.1948, over 504468.14 frames. ], batch size: 41, lr: 3.77e-02, +2022-12-01 20:19:47,440 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.01 vs. limit=2.0 +2022-12-01 20:20:07,492 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.10 vs. limit=2.0 +2022-12-01 20:20:17,811 INFO [train.py:876] Epoch 4, batch 200, loss[loss=0.3799, simple_loss=0.3777, pruned_loss=0.1911, over 4825.00 frames. ], tot_loss[loss=0.3774, simple_loss=0.3631, pruned_loss=0.1958, over 605122.32 frames. ], batch size: 45, lr: 3.76e-02, +2022-12-01 20:20:17,923 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=4497.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:20:18,955 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.1233, 4.1204, 3.7786, 3.9831, 4.5285, 4.2825, 4.0057, 3.5823], + device='cuda:0'), covar=tensor([0.0547, 0.0505, 0.0575, 0.0548, 0.0504, 0.0454, 0.0558, 0.0723], + device='cuda:0'), in_proj_covar=tensor([0.0051, 0.0045, 0.0040, 0.0045, 0.0044, 0.0047, 0.0049, 0.0044], + device='cuda:0'), out_proj_covar=tensor([5.0794e-05, 4.2232e-05, 3.8466e-05, 4.1292e-05, 4.1275e-05, 4.3710e-05, + 4.9379e-05, 4.3937e-05], device='cuda:0') +2022-12-01 20:20:21,834 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.675e+02 2.646e+02 3.418e+02 4.542e+02 9.380e+02, threshold=6.835e+02, percent-clipped=7.0 +2022-12-01 20:21:07,314 INFO [train.py:876] Epoch 4, batch 250, loss[loss=0.3608, simple_loss=0.3448, pruned_loss=0.1884, over 4844.00 frames. ], tot_loss[loss=0.3814, simple_loss=0.3664, pruned_loss=0.1982, over 681413.67 frames. ], batch size: 41, lr: 3.75e-02, +2022-12-01 20:21:15,250 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 20:21:41,257 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=4582.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:21:46,335 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.93 vs. limit=2.0 +2022-12-01 20:21:56,783 INFO [train.py:876] Epoch 4, batch 300, loss[loss=0.4027, simple_loss=0.3924, pruned_loss=0.2065, over 4890.00 frames. ], tot_loss[loss=0.3816, simple_loss=0.3667, pruned_loss=0.1982, over 743603.73 frames. ], batch size: 44, lr: 3.74e-02, +2022-12-01 20:22:00,772 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.619e+02 2.478e+02 3.073e+02 3.552e+02 7.859e+02, threshold=6.147e+02, percent-clipped=2.0 +2022-12-01 20:22:17,014 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 20:22:30,059 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=4630.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:22:44,029 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=4644.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:22:47,008 INFO [train.py:876] Epoch 4, batch 350, loss[loss=0.3211, simple_loss=0.325, pruned_loss=0.1586, over 4882.00 frames. ], tot_loss[loss=0.3768, simple_loss=0.3631, pruned_loss=0.1952, over 791049.31 frames. ], batch size: 38, lr: 3.73e-02, +2022-12-01 20:23:10,093 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.90 vs. limit=2.0 +2022-12-01 20:23:37,467 INFO [train.py:876] Epoch 4, batch 400, loss[loss=0.304, simple_loss=0.314, pruned_loss=0.147, over 4854.00 frames. ], tot_loss[loss=0.3729, simple_loss=0.36, pruned_loss=0.1929, over 824720.01 frames. ], batch size: 35, lr: 3.72e-02, +2022-12-01 20:23:41,464 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.168e+02 2.778e+02 3.350e+02 3.990e+02 8.124e+02, threshold=6.700e+02, percent-clipped=4.0 +2022-12-01 20:23:44,302 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.08 vs. limit=2.0 +2022-12-01 20:23:57,309 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 20:24:25,060 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 20:24:26,933 INFO [train.py:876] Epoch 4, batch 450, loss[loss=0.3728, simple_loss=0.3656, pruned_loss=0.19, over 4859.00 frames. ], tot_loss[loss=0.3694, simple_loss=0.3581, pruned_loss=0.1903, over 851743.92 frames. ], batch size: 36, lr: 3.71e-02, +2022-12-01 20:25:09,331 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2026, 2.0837, 2.2022, 2.3201, 1.9021, 2.1108, 2.5843, 2.4106], + device='cuda:0'), covar=tensor([0.0459, 0.0491, 0.0427, 0.0286, 0.0554, 0.0497, 0.0324, 0.0373], + device='cuda:0'), in_proj_covar=tensor([0.0029, 0.0030, 0.0026, 0.0025, 0.0030, 0.0029, 0.0020, 0.0026], + device='cuda:0'), out_proj_covar=tensor([2.1500e-05, 2.2695e-05, 1.9592e-05, 1.7611e-05, 2.3006e-05, 2.1591e-05, + 1.4647e-05, 1.9176e-05], device='cuda:0') +2022-12-01 20:25:15,818 INFO [train.py:876] Epoch 4, batch 500, loss[loss=0.3711, simple_loss=0.3584, pruned_loss=0.1919, over 4913.00 frames. ], tot_loss[loss=0.3749, simple_loss=0.3625, pruned_loss=0.1936, over 874992.94 frames. ], batch size: 32, lr: 3.70e-02, +2022-12-01 20:25:16,033 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=4797.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:25:19,937 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.407e+02 2.559e+02 3.194e+02 4.263e+02 9.507e+02, threshold=6.389e+02, percent-clipped=3.0 +2022-12-01 20:26:00,592 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.9110, 2.5085, 2.8740, 3.1260, 2.2057, 1.7332, 3.0628, 3.4402], + device='cuda:0'), covar=tensor([0.0264, 0.0593, 0.0432, 0.0340, 0.0482, 0.0427, 0.0258, 0.0170], + device='cuda:0'), in_proj_covar=tensor([0.0026, 0.0030, 0.0027, 0.0029, 0.0032, 0.0025, 0.0026, 0.0024], + device='cuda:0'), out_proj_covar=tensor([2.5408e-05, 3.2357e-05, 2.6546e-05, 2.8847e-05, 3.4516e-05, 2.3992e-05, + 2.5585e-05, 2.1519e-05], device='cuda:0') +2022-12-01 20:26:04,376 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=4845.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:26:06,414 INFO [train.py:876] Epoch 4, batch 550, loss[loss=0.2747, simple_loss=0.2868, pruned_loss=0.1313, over 4906.00 frames. ], tot_loss[loss=0.3702, simple_loss=0.3592, pruned_loss=0.1906, over 890428.75 frames. ], batch size: 29, lr: 3.69e-02, +2022-12-01 20:26:42,336 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6229, 3.6595, 3.5131, 4.2045, 3.2917, 3.4521, 3.4623, 3.3415], + device='cuda:0'), covar=tensor([0.0264, 0.0201, 0.0199, 0.0178, 0.0240, 0.0275, 0.0230, 0.0289], + device='cuda:0'), in_proj_covar=tensor([0.0021, 0.0020, 0.0019, 0.0017, 0.0022, 0.0022, 0.0020, 0.0020], + device='cuda:0'), out_proj_covar=tensor([2.3057e-05, 2.0108e-05, 1.8464e-05, 1.7318e-05, 2.4334e-05, 2.5943e-05, + 2.1182e-05, 2.1462e-05], device='cuda:0') +2022-12-01 20:26:56,198 INFO [train.py:876] Epoch 4, batch 600, loss[loss=0.5424, simple_loss=0.4799, pruned_loss=0.3025, over 4670.00 frames. ], tot_loss[loss=0.3687, simple_loss=0.3579, pruned_loss=0.1897, over 903694.42 frames. ], batch size: 63, lr: 3.68e-02, +2022-12-01 20:27:00,236 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.377e+02 2.544e+02 3.150e+02 3.771e+02 7.691e+02, threshold=6.299e+02, percent-clipped=2.0 +2022-12-01 20:27:42,616 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=4944.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:27:45,605 INFO [train.py:876] Epoch 4, batch 650, loss[loss=0.2823, simple_loss=0.286, pruned_loss=0.1393, over 4911.00 frames. ], tot_loss[loss=0.3667, simple_loss=0.3569, pruned_loss=0.1882, over 917138.04 frames. ], batch size: 29, lr: 3.67e-02, +2022-12-01 20:28:10,339 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.8258, 2.8181, 3.0048, 3.3456, 2.3243, 2.1427, 3.3196, 3.0443], + device='cuda:0'), covar=tensor([0.0246, 0.0473, 0.0330, 0.0323, 0.0356, 0.0287, 0.0260, 0.0340], + device='cuda:0'), in_proj_covar=tensor([0.0024, 0.0028, 0.0025, 0.0027, 0.0030, 0.0023, 0.0025, 0.0024], + device='cuda:0'), out_proj_covar=tensor([2.3338e-05, 3.0221e-05, 2.4903e-05, 2.6831e-05, 3.2678e-05, 2.3183e-05, + 2.4661e-05, 2.1730e-05], device='cuda:0') +2022-12-01 20:28:29,774 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=4992.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:28:34,682 INFO [train.py:876] Epoch 4, batch 700, loss[loss=0.3681, simple_loss=0.3667, pruned_loss=0.1847, over 4808.00 frames. ], tot_loss[loss=0.3662, simple_loss=0.3567, pruned_loss=0.1878, over 925120.23 frames. ], batch size: 33, lr: 3.66e-02, +2022-12-01 20:28:38,629 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.640e+02 2.437e+02 3.329e+02 4.657e+02 9.475e+02, threshold=6.657e+02, percent-clipped=8.0 +2022-12-01 20:28:42,068 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4578, 1.5887, 1.5620, 2.0296, 1.5515, 1.8396, 1.9162, 2.0191], + device='cuda:0'), covar=tensor([0.0877, 0.0534, 0.0537, 0.0417, 0.0753, 0.0472, 0.0725, 0.0444], + device='cuda:0'), in_proj_covar=tensor([0.0043, 0.0038, 0.0038, 0.0032, 0.0043, 0.0035, 0.0039, 0.0036], + device='cuda:0'), out_proj_covar=tensor([3.7876e-05, 3.0431e-05, 3.0338e-05, 2.6105e-05, 3.7102e-05, 2.8185e-05, + 3.2951e-05, 2.8004e-05], device='cuda:0') +2022-12-01 20:29:04,464 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.58 vs. limit=2.0 +2022-12-01 20:29:09,486 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.99 vs. limit=2.0 +2022-12-01 20:29:24,790 INFO [train.py:876] Epoch 4, batch 750, loss[loss=0.3514, simple_loss=0.3563, pruned_loss=0.1732, over 4794.00 frames. ], tot_loss[loss=0.362, simple_loss=0.3536, pruned_loss=0.1852, over 930635.59 frames. ], batch size: 32, lr: 3.65e-02, +2022-12-01 20:29:29,561 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.49 vs. limit=2.0 +2022-12-01 20:29:47,933 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.10 vs. limit=2.0 +2022-12-01 20:29:56,345 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.29 vs. limit=5.0 +2022-12-01 20:30:13,461 INFO [train.py:876] Epoch 4, batch 800, loss[loss=0.4324, simple_loss=0.4216, pruned_loss=0.2216, over 4876.00 frames. ], tot_loss[loss=0.3648, simple_loss=0.3562, pruned_loss=0.1866, over 934691.51 frames. ], batch size: 44, lr: 3.65e-02, +2022-12-01 20:30:17,346 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.642e+02 2.618e+02 3.294e+02 3.993e+02 9.345e+02, threshold=6.588e+02, percent-clipped=5.0 +2022-12-01 20:30:20,231 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.12 vs. limit=2.0 +2022-12-01 20:30:38,662 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.04 vs. limit=2.0 +2022-12-01 20:30:51,013 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.11 vs. limit=2.0 +2022-12-01 20:31:02,202 INFO [train.py:876] Epoch 4, batch 850, loss[loss=0.4295, simple_loss=0.4181, pruned_loss=0.2204, over 4800.00 frames. ], tot_loss[loss=0.3614, simple_loss=0.3541, pruned_loss=0.1843, over 941427.66 frames. ], batch size: 51, lr: 3.64e-02, +2022-12-01 20:31:11,434 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.23 vs. limit=5.0 +2022-12-01 20:31:30,282 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.97 vs. limit=2.0 +2022-12-01 20:31:50,838 INFO [train.py:876] Epoch 4, batch 900, loss[loss=0.3428, simple_loss=0.3425, pruned_loss=0.1715, over 4833.00 frames. ], tot_loss[loss=0.3612, simple_loss=0.3544, pruned_loss=0.184, over 945930.99 frames. ], batch size: 34, lr: 3.63e-02, +2022-12-01 20:31:54,916 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.606e+02 2.599e+02 3.172e+02 4.347e+02 1.117e+03, threshold=6.344e+02, percent-clipped=4.0 +2022-12-01 20:32:37,165 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.14 vs. limit=2.0 +2022-12-01 20:32:40,553 INFO [train.py:876] Epoch 4, batch 950, loss[loss=0.3752, simple_loss=0.3676, pruned_loss=0.1914, over 4858.00 frames. ], tot_loss[loss=0.3589, simple_loss=0.3524, pruned_loss=0.1827, over 949596.06 frames. ], batch size: 40, lr: 3.62e-02, +2022-12-01 20:33:30,086 INFO [train.py:876] Epoch 4, batch 1000, loss[loss=0.3223, simple_loss=0.3272, pruned_loss=0.1587, over 4915.00 frames. ], tot_loss[loss=0.3576, simple_loss=0.3514, pruned_loss=0.1819, over 949730.11 frames. ], batch size: 32, lr: 3.61e-02, +2022-12-01 20:33:33,913 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.532e+02 2.573e+02 3.377e+02 4.450e+02 7.336e+02, threshold=6.755e+02, percent-clipped=5.0 +2022-12-01 20:33:43,074 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.47 vs. limit=2.0 +2022-12-01 20:33:49,579 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1224, 3.9793, 3.2116, 2.9341, 3.5203, 3.4579, 3.6835, 3.2971], + device='cuda:0'), covar=tensor([0.1859, 0.0330, 0.0524, 0.1443, 0.0560, 0.0563, 0.0558, 0.0624], + device='cuda:0'), in_proj_covar=tensor([0.0085, 0.0060, 0.0054, 0.0073, 0.0057, 0.0064, 0.0061, 0.0055], + device='cuda:0'), out_proj_covar=tensor([6.5203e-05, 3.2208e-05, 3.3837e-05, 5.4445e-05, 3.5081e-05, 3.6818e-05, + 3.7596e-05, 3.5878e-05], device='cuda:0') +2022-12-01 20:34:19,332 INFO [train.py:876] Epoch 4, batch 1050, loss[loss=0.3306, simple_loss=0.3401, pruned_loss=0.1606, over 4855.00 frames. ], tot_loss[loss=0.358, simple_loss=0.3521, pruned_loss=0.182, over 950455.60 frames. ], batch size: 40, lr: 3.60e-02, +2022-12-01 20:34:47,479 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.02 vs. limit=2.0 +2022-12-01 20:35:03,785 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0420, 2.4466, 3.0031, 3.1731, 2.8523, 2.9592, 3.3582, 2.4634], + device='cuda:0'), covar=tensor([0.3865, 0.0764, 0.0857, 0.0263, 0.0591, 0.0870, 0.0400, 0.0506], + device='cuda:0'), in_proj_covar=tensor([0.0109, 0.0038, 0.0054, 0.0037, 0.0048, 0.0046, 0.0037, 0.0050], + device='cuda:0'), out_proj_covar=tensor([1.2319e-04, 4.1038e-05, 5.4071e-05, 3.5033e-05, 4.7706e-05, 4.8556e-05, + 3.8068e-05, 5.0142e-05], device='cuda:0') +2022-12-01 20:35:08,502 INFO [train.py:876] Epoch 4, batch 1100, loss[loss=0.4213, simple_loss=0.4075, pruned_loss=0.2176, over 4786.00 frames. ], tot_loss[loss=0.3589, simple_loss=0.3531, pruned_loss=0.1824, over 951629.50 frames. ], batch size: 51, lr: 3.59e-02, +2022-12-01 20:35:12,491 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.614e+02 2.802e+02 3.506e+02 4.528e+02 9.875e+02, threshold=7.011e+02, percent-clipped=4.0 +2022-12-01 20:35:57,786 INFO [train.py:876] Epoch 4, batch 1150, loss[loss=0.4466, simple_loss=0.4228, pruned_loss=0.2352, over 4780.00 frames. ], tot_loss[loss=0.3647, simple_loss=0.358, pruned_loss=0.1857, over 950752.43 frames. ], batch size: 54, lr: 3.58e-02, +2022-12-01 20:36:16,110 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.06 vs. limit=2.0 +2022-12-01 20:36:47,166 INFO [train.py:876] Epoch 4, batch 1200, loss[loss=0.3408, simple_loss=0.3537, pruned_loss=0.164, over 4847.00 frames. ], tot_loss[loss=0.365, simple_loss=0.3582, pruned_loss=0.1859, over 949901.37 frames. ], batch size: 40, lr: 3.57e-02, +2022-12-01 20:36:47,403 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1174, 2.3469, 2.5722, 2.4649, 2.1388, 1.8677, 1.7662, 2.1875], + device='cuda:0'), covar=tensor([0.0448, 0.0270, 0.0208, 0.0230, 0.0389, 0.0378, 0.0658, 0.0336], + device='cuda:0'), in_proj_covar=tensor([0.0031, 0.0031, 0.0021, 0.0030, 0.0034, 0.0032, 0.0035, 0.0032], + device='cuda:0'), out_proj_covar=tensor([2.5234e-05, 2.4045e-05, 1.6658e-05, 2.3558e-05, 2.6872e-05, 2.6089e-05, + 2.9421e-05, 2.5906e-05], device='cuda:0') +2022-12-01 20:36:50,948 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.688e+02 2.696e+02 3.538e+02 4.457e+02 7.944e+02, threshold=7.077e+02, percent-clipped=2.0 +2022-12-01 20:37:20,392 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2019, 2.2417, 2.0969, 2.1776, 2.5216, 2.4211, 2.1689, 1.9181], + device='cuda:0'), covar=tensor([0.0416, 0.0293, 0.0492, 0.0396, 0.0199, 0.0972, 0.0437, 0.0330], + device='cuda:0'), in_proj_covar=tensor([0.0025, 0.0022, 0.0024, 0.0023, 0.0024, 0.0023, 0.0023, 0.0023], + device='cuda:0'), out_proj_covar=tensor([1.9098e-05, 1.6626e-05, 1.8722e-05, 1.7124e-05, 1.8970e-05, 1.8180e-05, + 1.7638e-05, 1.7870e-05], device='cuda:0') +2022-12-01 20:37:22,756 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.54 vs. limit=5.0 +2022-12-01 20:37:32,376 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.48 vs. limit=5.0 +2022-12-01 20:37:35,832 INFO [train.py:876] Epoch 4, batch 1250, loss[loss=0.3205, simple_loss=0.3222, pruned_loss=0.1594, over 4928.00 frames. ], tot_loss[loss=0.359, simple_loss=0.3535, pruned_loss=0.1823, over 951304.72 frames. ], batch size: 32, lr: 3.56e-02, +2022-12-01 20:37:37,594 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.13 vs. limit=5.0 +2022-12-01 20:37:42,347 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.36 vs. limit=5.0 +2022-12-01 20:37:47,964 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0363, 2.6772, 3.2568, 2.8300, 3.0051, 2.9403, 3.6804, 2.8579], + device='cuda:0'), covar=tensor([0.4075, 0.0942, 0.0599, 0.0216, 0.0487, 0.0860, 0.0253, 0.0545], + device='cuda:0'), in_proj_covar=tensor([0.0117, 0.0041, 0.0057, 0.0039, 0.0051, 0.0047, 0.0038, 0.0052], + device='cuda:0'), out_proj_covar=tensor([1.3158e-04, 4.4504e-05, 5.6779e-05, 3.7434e-05, 5.0464e-05, 4.9012e-05, + 3.9436e-05, 5.3311e-05], device='cuda:0') +2022-12-01 20:38:25,399 INFO [train.py:876] Epoch 4, batch 1300, loss[loss=0.2979, simple_loss=0.3069, pruned_loss=0.1444, over 4741.00 frames. ], tot_loss[loss=0.358, simple_loss=0.3527, pruned_loss=0.1816, over 950148.82 frames. ], batch size: 27, lr: 3.56e-02, +2022-12-01 20:38:29,449 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.618e+02 2.494e+02 2.988e+02 3.754e+02 9.296e+02, threshold=5.976e+02, percent-clipped=3.0 +2022-12-01 20:38:59,501 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.93 vs. limit=2.0 +2022-12-01 20:39:05,664 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3699, 2.6065, 2.6514, 2.9936, 3.5379, 3.0983, 2.6604, 3.5543], + device='cuda:0'), covar=tensor([0.0244, 0.0570, 0.0689, 0.0419, 0.0183, 0.0412, 0.0410, 0.0170], + device='cuda:0'), in_proj_covar=tensor([0.0031, 0.0037, 0.0040, 0.0030, 0.0034, 0.0035, 0.0026, 0.0036], + device='cuda:0'), out_proj_covar=tensor([3.0873e-05, 3.9372e-05, 4.1509e-05, 3.4908e-05, 3.6157e-05, 3.9703e-05, + 3.2019e-05, 3.4479e-05], device='cuda:0') +2022-12-01 20:39:14,324 INFO [train.py:876] Epoch 4, batch 1350, loss[loss=0.2639, simple_loss=0.29, pruned_loss=0.1189, over 4893.00 frames. ], tot_loss[loss=0.3595, simple_loss=0.3544, pruned_loss=0.1824, over 951261.19 frames. ], batch size: 30, lr: 3.55e-02, +2022-12-01 20:39:17,379 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=5650.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 20:40:03,316 INFO [train.py:876] Epoch 4, batch 1400, loss[loss=0.4584, simple_loss=0.4234, pruned_loss=0.2467, over 4140.00 frames. ], tot_loss[loss=0.3627, simple_loss=0.3561, pruned_loss=0.1847, over 946354.30 frames. ], batch size: 72, lr: 3.54e-02, +2022-12-01 20:40:06,982 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.550e+02 2.482e+02 3.200e+02 4.136e+02 9.789e+02, threshold=6.399e+02, percent-clipped=2.0 +2022-12-01 20:40:17,036 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=5711.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 20:40:33,330 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-4.pt +2022-12-01 20:40:50,020 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 20:40:50,595 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 20:40:51,239 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 20:40:51,270 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 20:40:52,069 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 20:40:52,389 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 20:40:54,357 INFO [train.py:876] Epoch 5, batch 0, loss[loss=0.3819, simple_loss=0.376, pruned_loss=0.1939, over 4830.00 frames. ], tot_loss[loss=0.3819, simple_loss=0.376, pruned_loss=0.1939, over 4830.00 frames. ], batch size: 45, lr: 3.29e-02, +2022-12-01 20:40:54,358 INFO [train.py:901] Computing validation loss +2022-12-01 20:41:09,779 INFO [train.py:910] Epoch 5, validation: loss=0.3051, simple_loss=0.3372, pruned_loss=0.1365, over 253132.00 frames. +2022-12-01 20:41:09,780 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-01 20:41:35,147 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=5755.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:41:58,939 INFO [train.py:876] Epoch 5, batch 50, loss[loss=0.3603, simple_loss=0.3803, pruned_loss=0.1702, over 4838.00 frames. ], tot_loss[loss=0.331, simple_loss=0.3343, pruned_loss=0.1638, over 214302.54 frames. ], batch size: 41, lr: 3.28e-02, +2022-12-01 20:42:20,725 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.435e+02 2.499e+02 3.085e+02 4.040e+02 6.673e+02, threshold=6.171e+02, percent-clipped=1.0 +2022-12-01 20:42:21,315 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.37 vs. limit=2.0 +2022-12-01 20:42:21,759 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 20:42:31,753 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6845, 3.1659, 3.5570, 3.3628, 3.0584, 3.3890, 3.1592, 3.9602], + device='cuda:0'), covar=tensor([0.0131, 0.0453, 0.0305, 0.0359, 0.0312, 0.0236, 0.0400, 0.0122], + device='cuda:0'), in_proj_covar=tensor([0.0030, 0.0037, 0.0039, 0.0030, 0.0034, 0.0033, 0.0027, 0.0035], + device='cuda:0'), out_proj_covar=tensor([3.0239e-05, 3.9237e-05, 4.0198e-05, 3.4817e-05, 3.5356e-05, 3.7081e-05, + 3.1726e-05, 3.3622e-05], device='cuda:0') +2022-12-01 20:42:35,905 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=5816.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:42:44,474 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.6316, 1.5536, 2.0103, 2.1140, 1.4982, 1.7256, 1.2870, 1.3141], + device='cuda:0'), covar=tensor([0.0560, 0.0484, 0.0171, 0.0237, 0.0610, 0.0415, 0.0596, 0.0698], + device='cuda:0'), in_proj_covar=tensor([0.0038, 0.0037, 0.0026, 0.0034, 0.0042, 0.0036, 0.0041, 0.0040], + device='cuda:0'), out_proj_covar=tensor([3.1109e-05, 2.9743e-05, 1.9117e-05, 2.6305e-05, 3.5282e-05, 2.9891e-05, + 3.5774e-05, 3.3473e-05], device='cuda:0') +2022-12-01 20:42:46,194 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.7338, 4.8244, 4.5633, 4.9104, 5.0862, 4.8501, 4.8103, 4.6439], + device='cuda:0'), covar=tensor([0.0354, 0.0326, 0.0283, 0.0232, 0.0345, 0.0219, 0.0345, 0.0358], + device='cuda:0'), in_proj_covar=tensor([0.0059, 0.0051, 0.0045, 0.0054, 0.0050, 0.0052, 0.0059, 0.0049], + device='cuda:0'), out_proj_covar=tensor([6.0177e-05, 4.6951e-05, 4.2289e-05, 5.1313e-05, 4.7805e-05, 4.8133e-05, + 5.7056e-05, 4.6755e-05], device='cuda:0') +2022-12-01 20:42:47,221 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2642, 3.1673, 3.5322, 3.3312, 2.8944, 2.4373, 3.2186, 3.1759], + device='cuda:0'), covar=tensor([0.0366, 0.0416, 0.0213, 0.0323, 0.0540, 0.1950, 0.0334, 0.0782], + device='cuda:0'), in_proj_covar=tensor([0.0047, 0.0044, 0.0044, 0.0057, 0.0059, 0.0072, 0.0037, 0.0065], + device='cuda:0'), out_proj_covar=tensor([4.1700e-05, 4.0699e-05, 3.8883e-05, 5.3455e-05, 5.8122e-05, 7.8201e-05, + 3.5479e-05, 6.2987e-05], device='cuda:0') +2022-12-01 20:42:47,919 INFO [train.py:876] Epoch 5, batch 100, loss[loss=0.4135, simple_loss=0.3736, pruned_loss=0.2267, over 4834.00 frames. ], tot_loss[loss=0.3292, simple_loss=0.3333, pruned_loss=0.1626, over 379393.24 frames. ], batch size: 41, lr: 3.27e-02, +2022-12-01 20:42:49,092 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6349, 2.5487, 3.3720, 3.1075, 3.3844, 2.8354, 3.7100, 2.9486], + device='cuda:0'), covar=tensor([0.3485, 0.1429, 0.0617, 0.0207, 0.0446, 0.1065, 0.0233, 0.0633], + device='cuda:0'), in_proj_covar=tensor([0.0119, 0.0042, 0.0059, 0.0041, 0.0052, 0.0047, 0.0036, 0.0052], + device='cuda:0'), out_proj_covar=tensor([1.3414e-04, 4.5600e-05, 5.9562e-05, 3.9597e-05, 5.0999e-05, 4.9128e-05, + 3.8475e-05, 5.3061e-05], device='cuda:0') +2022-12-01 20:43:03,839 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 20:43:16,551 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=5858.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:43:26,224 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 20:43:36,602 INFO [train.py:876] Epoch 5, batch 150, loss[loss=0.49, simple_loss=0.4393, pruned_loss=0.2704, over 3994.00 frames. ], tot_loss[loss=0.337, simple_loss=0.3399, pruned_loss=0.167, over 507171.65 frames. ], batch size: 72, lr: 3.27e-02, +2022-12-01 20:43:40,041 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.09 vs. limit=2.0 +2022-12-01 20:43:58,957 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.704e+02 2.498e+02 3.191e+02 4.171e+02 9.020e+02, threshold=6.382e+02, percent-clipped=2.0 +2022-12-01 20:44:09,278 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.19 vs. limit=2.0 +2022-12-01 20:44:16,782 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=5919.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:44:26,233 INFO [train.py:876] Epoch 5, batch 200, loss[loss=0.2742, simple_loss=0.2816, pruned_loss=0.1334, over 4925.00 frames. ], tot_loss[loss=0.3391, simple_loss=0.3421, pruned_loss=0.1681, over 607083.94 frames. ], batch size: 31, lr: 3.26e-02, +2022-12-01 20:44:39,597 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.04 vs. limit=2.0 +2022-12-01 20:44:54,889 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.3714, 4.1507, 4.1208, 4.5508, 4.2908, 4.5634, 3.7453, 4.4462], + device='cuda:0'), covar=tensor([0.0391, 0.0318, 0.0176, 0.0224, 0.0244, 0.0226, 0.0222, 0.0196], + device='cuda:0'), in_proj_covar=tensor([0.0061, 0.0056, 0.0047, 0.0061, 0.0062, 0.0049, 0.0051, 0.0046], + device='cuda:0'), out_proj_covar=tensor([4.0200e-05, 3.5393e-05, 2.8691e-05, 3.9225e-05, 4.0886e-05, 2.9403e-05, + 3.0788e-05, 2.8212e-05], device='cuda:0') +2022-12-01 20:45:08,648 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.08 vs. limit=5.0 +2022-12-01 20:45:09,913 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 20:45:14,735 INFO [train.py:876] Epoch 5, batch 250, loss[loss=0.3944, simple_loss=0.3987, pruned_loss=0.195, over 4726.00 frames. ], tot_loss[loss=0.3414, simple_loss=0.3448, pruned_loss=0.169, over 685575.81 frames. ], batch size: 63, lr: 3.25e-02, +2022-12-01 20:45:26,121 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=5990.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:45:26,383 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-01 20:45:31,122 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.80 vs. limit=5.0 +2022-12-01 20:45:32,168 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.35 vs. limit=5.0 +2022-12-01 20:45:35,703 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-6000.pt +2022-12-01 20:45:38,997 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.434e+02 2.677e+02 3.542e+02 4.478e+02 1.276e+03, threshold=7.084e+02, percent-clipped=9.0 +2022-12-01 20:45:43,955 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=6006.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 20:46:05,967 INFO [train.py:876] Epoch 5, batch 300, loss[loss=0.3002, simple_loss=0.3129, pruned_loss=0.1438, over 4905.00 frames. ], tot_loss[loss=0.3377, simple_loss=0.3406, pruned_loss=0.1674, over 743569.00 frames. ], batch size: 31, lr: 3.24e-02, +2022-12-01 20:46:17,002 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 20:46:20,251 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=6043.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:46:27,803 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=6051.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:46:44,799 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.34 vs. limit=2.0 +2022-12-01 20:46:55,339 INFO [train.py:876] Epoch 5, batch 350, loss[loss=0.3384, simple_loss=0.335, pruned_loss=0.1709, over 4799.00 frames. ], tot_loss[loss=0.3344, simple_loss=0.3379, pruned_loss=0.1655, over 789396.59 frames. ], batch size: 33, lr: 3.23e-02, +2022-12-01 20:47:01,777 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.26 vs. limit=2.0 +2022-12-01 20:47:02,506 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8302, 2.1515, 2.4181, 2.3567, 2.2274, 1.8066, 1.7451, 2.0292], + device='cuda:0'), covar=tensor([0.1313, 0.0980, 0.0598, 0.0537, 0.0757, 0.0781, 0.1534, 0.0698], + device='cuda:0'), in_proj_covar=tensor([0.0052, 0.0041, 0.0058, 0.0048, 0.0053, 0.0049, 0.0074, 0.0050], + device='cuda:0'), out_proj_covar=tensor([4.4703e-05, 3.8490e-05, 4.9183e-05, 4.0865e-05, 4.6672e-05, 4.2317e-05, + 7.0950e-05, 4.1366e-05], device='cuda:0') +2022-12-01 20:47:05,853 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.49 vs. limit=5.0 +2022-12-01 20:47:17,278 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.338e+02 2.532e+02 3.190e+02 4.191e+02 1.602e+03, threshold=6.379e+02, percent-clipped=3.0 +2022-12-01 20:47:20,802 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=6104.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:47:25,472 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.9178, 2.5026, 2.9837, 2.8319, 2.4597, 1.8284, 2.9930, 3.2681], + device='cuda:0'), covar=tensor([0.0153, 0.0456, 0.0280, 0.0382, 0.0330, 0.0338, 0.0250, 0.0171], + device='cuda:0'), in_proj_covar=tensor([0.0026, 0.0034, 0.0026, 0.0032, 0.0031, 0.0025, 0.0027, 0.0025], + device='cuda:0'), out_proj_covar=tensor([2.6584e-05, 3.8766e-05, 2.7289e-05, 3.2866e-05, 3.5718e-05, 2.6458e-05, + 2.7115e-05, 2.3774e-05], device='cuda:0') +2022-12-01 20:47:27,279 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=6111.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:47:44,943 INFO [train.py:876] Epoch 5, batch 400, loss[loss=0.2756, simple_loss=0.2895, pruned_loss=0.1309, over 4777.00 frames. ], tot_loss[loss=0.3311, simple_loss=0.3356, pruned_loss=0.1633, over 823965.66 frames. ], batch size: 26, lr: 3.23e-02, +2022-12-01 20:47:50,146 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.5347, 4.7780, 4.4757, 5.2452, 4.3418, 3.8846, 4.8336, 4.1531], + device='cuda:0'), covar=tensor([0.0228, 0.0146, 0.0132, 0.0108, 0.0214, 0.0216, 0.0090, 0.0162], + device='cuda:0'), in_proj_covar=tensor([0.0027, 0.0026, 0.0024, 0.0022, 0.0029, 0.0030, 0.0025, 0.0025], + device='cuda:0'), out_proj_covar=tensor([3.2192e-05, 2.8187e-05, 2.4581e-05, 2.3974e-05, 3.4710e-05, 3.5030e-05, + 2.6765e-05, 2.7664e-05], device='cuda:0') +2022-12-01 20:47:51,322 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=6135.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:47:59,199 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 20:48:06,007 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.34 vs. limit=2.0 +2022-12-01 20:48:20,651 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4239, 1.7642, 3.0904, 2.6906, 3.0275, 3.0697, 3.5078, 2.5174], + device='cuda:0'), covar=tensor([0.0942, 0.0207, 0.0168, 0.0253, 0.0140, 0.0259, 0.0103, 0.0170], + device='cuda:0'), in_proj_covar=tensor([0.0025, 0.0022, 0.0025, 0.0025, 0.0026, 0.0023, 0.0024, 0.0024], + device='cuda:0'), out_proj_covar=tensor([2.0520e-05, 1.6980e-05, 2.0386e-05, 1.9235e-05, 2.0403e-05, 1.9185e-05, + 1.8376e-05, 1.8506e-05], device='cuda:0') +2022-12-01 20:48:24,151 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 20:48:26,021 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=6170.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:48:34,596 INFO [train.py:876] Epoch 5, batch 450, loss[loss=0.3447, simple_loss=0.3631, pruned_loss=0.1631, over 4853.00 frames. ], tot_loss[loss=0.3316, simple_loss=0.3355, pruned_loss=0.1638, over 853302.51 frames. ], batch size: 36, lr: 3.22e-02, +2022-12-01 20:48:48,584 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9379, 3.6976, 3.4817, 3.8642, 3.3460, 2.9767, 4.1347, 2.8382], + device='cuda:0'), covar=tensor([0.0210, 0.0179, 0.0176, 0.0217, 0.0282, 0.1223, 0.0102, 0.0791], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0045, 0.0046, 0.0062, 0.0062, 0.0081, 0.0039, 0.0070], + device='cuda:0'), out_proj_covar=tensor([4.8148e-05, 4.1700e-05, 4.2350e-05, 5.9039e-05, 6.1604e-05, 8.6062e-05, + 3.7011e-05, 6.8403e-05], device='cuda:0') +2022-12-01 20:48:51,530 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=6196.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:48:56,447 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.558e+02 2.563e+02 3.024e+02 3.997e+02 1.101e+03, threshold=6.049e+02, percent-clipped=3.0 +2022-12-01 20:48:58,712 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=6203.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:49:06,762 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=6211.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:49:09,613 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=6214.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:49:24,306 INFO [train.py:876] Epoch 5, batch 500, loss[loss=0.2416, simple_loss=0.2544, pruned_loss=0.1144, over 4829.00 frames. ], tot_loss[loss=0.3283, simple_loss=0.3331, pruned_loss=0.1617, over 873689.47 frames. ], batch size: 25, lr: 3.21e-02, +2022-12-01 20:49:26,473 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=6231.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:49:59,137 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=6264.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:50:00,982 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=6266.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:50:06,941 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=6272.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:50:13,473 INFO [train.py:876] Epoch 5, batch 550, loss[loss=0.3013, simple_loss=0.3173, pruned_loss=0.1427, over 4854.00 frames. ], tot_loss[loss=0.3262, simple_loss=0.3319, pruned_loss=0.1603, over 889193.09 frames. ], batch size: 35, lr: 3.20e-02, +2022-12-01 20:50:14,739 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9409, 3.3735, 3.4980, 3.9760, 3.3936, 2.7528, 4.1350, 3.4603], + device='cuda:0'), covar=tensor([0.0254, 0.0287, 0.0178, 0.0200, 0.0371, 0.1715, 0.0152, 0.0527], + device='cuda:0'), in_proj_covar=tensor([0.0054, 0.0044, 0.0047, 0.0062, 0.0063, 0.0079, 0.0039, 0.0070], + device='cuda:0'), out_proj_covar=tensor([4.9507e-05, 4.1561e-05, 4.2975e-05, 5.9012e-05, 6.2807e-05, 8.4786e-05, + 3.7788e-05, 6.9163e-05], device='cuda:0') +2022-12-01 20:50:35,399 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.481e+02 2.482e+02 3.175e+02 4.041e+02 1.492e+03, threshold=6.349e+02, percent-clipped=6.0 +2022-12-01 20:50:40,302 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=6306.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 20:50:52,136 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.67 vs. limit=5.0 +2022-12-01 20:51:00,521 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=6327.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:51:02,327 INFO [train.py:876] Epoch 5, batch 600, loss[loss=0.3309, simple_loss=0.3398, pruned_loss=0.161, over 4879.00 frames. ], tot_loss[loss=0.3256, simple_loss=0.3311, pruned_loss=0.16, over 902591.64 frames. ], batch size: 38, lr: 3.19e-02, +2022-12-01 20:51:10,486 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5496, 3.2838, 3.4546, 3.3445, 3.9380, 3.4697, 3.7604, 3.8758], + device='cuda:0'), covar=tensor([0.0167, 0.0348, 0.0084, 0.0254, 0.0207, 0.0378, 0.0143, 0.0252], + device='cuda:0'), in_proj_covar=tensor([0.0040, 0.0051, 0.0032, 0.0042, 0.0038, 0.0055, 0.0037, 0.0037], + device='cuda:0'), out_proj_covar=tensor([3.2721e-05, 4.3647e-05, 2.4011e-05, 3.3005e-05, 3.0072e-05, 4.9697e-05, + 3.1408e-05, 2.8849e-05], device='cuda:0') +2022-12-01 20:51:18,786 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=6346.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:51:22,536 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.06 vs. limit=5.0 +2022-12-01 20:51:26,738 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=6354.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 20:51:50,646 INFO [train.py:876] Epoch 5, batch 650, loss[loss=0.3659, simple_loss=0.3749, pruned_loss=0.1785, over 4795.00 frames. ], tot_loss[loss=0.3298, simple_loss=0.3345, pruned_loss=0.1625, over 912827.20 frames. ], batch size: 54, lr: 3.19e-02, +2022-12-01 20:52:10,334 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=6399.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:52:12,382 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.385e+02 2.623e+02 3.618e+02 4.498e+02 1.122e+03, threshold=7.236e+02, percent-clipped=8.0 +2022-12-01 20:52:22,282 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=6411.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:52:40,128 INFO [train.py:876] Epoch 5, batch 700, loss[loss=0.3352, simple_loss=0.3415, pruned_loss=0.1645, over 4867.00 frames. ], tot_loss[loss=0.3293, simple_loss=0.3341, pruned_loss=0.1623, over 921206.15 frames. ], batch size: 38, lr: 3.18e-02, +2022-12-01 20:52:41,618 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.01 vs. limit=2.0 +2022-12-01 20:53:09,905 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=6459.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:53:13,015 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=6462.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:53:17,824 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=6467.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:53:29,152 INFO [train.py:876] Epoch 5, batch 750, loss[loss=0.382, simple_loss=0.3797, pruned_loss=0.1922, over 4879.00 frames. ], tot_loss[loss=0.3278, simple_loss=0.333, pruned_loss=0.1613, over 926490.47 frames. ], batch size: 37, lr: 3.17e-02, +2022-12-01 20:53:41,336 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=6491.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:53:50,868 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.737e+02 2.761e+02 3.335e+02 3.977e+02 8.125e+02, threshold=6.669e+02, percent-clipped=2.0 +2022-12-01 20:54:03,554 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=6514.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:54:10,430 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3218, 2.8108, 2.8247, 3.1056, 2.6766, 2.5926, 3.7500, 2.5253], + device='cuda:0'), covar=tensor([0.0360, 0.0317, 0.0351, 0.0516, 0.0534, 0.1670, 0.0212, 0.0947], + device='cuda:0'), in_proj_covar=tensor([0.0055, 0.0046, 0.0048, 0.0062, 0.0063, 0.0079, 0.0039, 0.0071], + device='cuda:0'), out_proj_covar=tensor([5.1061e-05, 4.4577e-05, 4.5148e-05, 5.9013e-05, 6.3040e-05, 8.3687e-05, + 3.8628e-05, 7.0281e-05], device='cuda:0') +2022-12-01 20:54:12,349 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=6523.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 20:54:14,865 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=6526.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:54:16,949 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=6528.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:54:17,749 INFO [train.py:876] Epoch 5, batch 800, loss[loss=0.3296, simple_loss=0.3314, pruned_loss=0.1639, over 4791.00 frames. ], tot_loss[loss=0.3251, simple_loss=0.3316, pruned_loss=0.1593, over 933834.15 frames. ], batch size: 32, lr: 3.16e-02, +2022-12-01 20:54:47,513 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=6559.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:54:50,513 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=6562.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:54:55,596 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=6567.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:55:07,340 INFO [train.py:876] Epoch 5, batch 850, loss[loss=0.287, simple_loss=0.2961, pruned_loss=0.1389, over 4911.00 frames. ], tot_loss[loss=0.3221, simple_loss=0.3292, pruned_loss=0.1575, over 935762.93 frames. ], batch size: 29, lr: 3.16e-02, +2022-12-01 20:55:29,553 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.777e+02 2.686e+02 3.164e+02 4.136e+02 7.207e+02, threshold=6.327e+02, percent-clipped=2.0 +2022-12-01 20:55:41,014 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.54 vs. limit=2.0 +2022-12-01 20:55:50,095 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=6622.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:55:56,961 INFO [train.py:876] Epoch 5, batch 900, loss[loss=0.3402, simple_loss=0.3602, pruned_loss=0.1601, over 4823.00 frames. ], tot_loss[loss=0.3216, simple_loss=0.3292, pruned_loss=0.157, over 940220.45 frames. ], batch size: 45, lr: 3.15e-02, +2022-12-01 20:55:57,049 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=6629.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:56:13,703 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=6646.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:56:16,975 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.02 vs. limit=2.0 +2022-12-01 20:56:45,412 INFO [train.py:876] Epoch 5, batch 950, loss[loss=0.3485, simple_loss=0.3588, pruned_loss=0.1691, over 4829.00 frames. ], tot_loss[loss=0.3219, simple_loss=0.3292, pruned_loss=0.1573, over 942024.24 frames. ], batch size: 49, lr: 3.14e-02, +2022-12-01 20:56:53,206 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7622, 3.4779, 3.2685, 3.8426, 3.4369, 2.9834, 4.2097, 3.2540], + device='cuda:0'), covar=tensor([0.0258, 0.0228, 0.0228, 0.0280, 0.0375, 0.1545, 0.0147, 0.0687], + device='cuda:0'), in_proj_covar=tensor([0.0054, 0.0045, 0.0049, 0.0063, 0.0067, 0.0083, 0.0042, 0.0075], + device='cuda:0'), out_proj_covar=tensor([5.0514e-05, 4.5152e-05, 4.6528e-05, 6.0160e-05, 6.7562e-05, 8.7004e-05, + 4.1448e-05, 7.4480e-05], device='cuda:0') +2022-12-01 20:56:56,442 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=6690.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:57:00,349 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=6694.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:57:05,496 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=6699.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:57:07,236 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.592e+02 2.575e+02 3.418e+02 4.510e+02 1.385e+03, threshold=6.835e+02, percent-clipped=6.0 +2022-12-01 20:57:34,327 INFO [train.py:876] Epoch 5, batch 1000, loss[loss=0.3505, simple_loss=0.3633, pruned_loss=0.1689, over 4872.00 frames. ], tot_loss[loss=0.3196, simple_loss=0.3281, pruned_loss=0.1555, over 945699.39 frames. ], batch size: 39, lr: 3.13e-02, +2022-12-01 20:57:51,967 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=6747.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:58:22,661 INFO [train.py:876] Epoch 5, batch 1050, loss[loss=0.2989, simple_loss=0.3103, pruned_loss=0.1438, over 4801.00 frames. ], tot_loss[loss=0.323, simple_loss=0.3309, pruned_loss=0.1575, over 947048.52 frames. ], batch size: 33, lr: 3.13e-02, +2022-12-01 20:58:34,855 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=6791.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:58:44,165 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.64 vs. limit=5.0 +2022-12-01 20:58:44,609 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.557e+02 2.522e+02 3.042e+02 4.513e+02 1.040e+03, threshold=6.085e+02, percent-clipped=4.0 +2022-12-01 20:59:01,317 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=6818.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 20:59:06,105 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=6823.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:59:09,034 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=6826.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:59:11,913 INFO [train.py:876] Epoch 5, batch 1100, loss[loss=0.4258, simple_loss=0.4176, pruned_loss=0.217, over 4867.00 frames. ], tot_loss[loss=0.3229, simple_loss=0.3309, pruned_loss=0.1574, over 948825.81 frames. ], batch size: 39, lr: 3.12e-02, +2022-12-01 20:59:21,708 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=6839.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:59:40,261 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6689, 3.6433, 3.4364, 3.0439, 3.6158, 3.4441, 3.5981, 3.2963], + device='cuda:0'), covar=tensor([0.1465, 0.0171, 0.0176, 0.0586, 0.0187, 0.0254, 0.0175, 0.0221], + device='cuda:0'), in_proj_covar=tensor([0.0123, 0.0076, 0.0072, 0.0089, 0.0072, 0.0084, 0.0074, 0.0067], + device='cuda:0'), out_proj_covar=tensor([9.0343e-05, 4.5949e-05, 4.6242e-05, 6.3573e-05, 4.4845e-05, 5.1144e-05, + 4.6786e-05, 4.3038e-05], device='cuda:0') +2022-12-01 20:59:40,903 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=6859.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:59:48,852 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=6867.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:59:55,572 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=6874.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 20:59:59,695 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=6878.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 21:00:00,523 INFO [train.py:876] Epoch 5, batch 1150, loss[loss=0.4425, simple_loss=0.4228, pruned_loss=0.2312, over 4698.00 frames. ], tot_loss[loss=0.3236, simple_loss=0.3313, pruned_loss=0.1579, over 948338.82 frames. ], batch size: 63, lr: 3.11e-02, +2022-12-01 21:00:02,875 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.10 vs. limit=2.0 +2022-12-01 21:00:21,792 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.839e+02 3.091e+02 3.531e+02 4.649e+02 1.337e+03, threshold=7.062e+02, percent-clipped=12.0 +2022-12-01 21:00:27,972 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=6907.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:00:30,108 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=6909.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:00:35,823 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=6915.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:00:42,735 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=6922.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:00:49,423 INFO [train.py:876] Epoch 5, batch 1200, loss[loss=0.2494, simple_loss=0.2742, pruned_loss=0.1124, over 4770.00 frames. ], tot_loss[loss=0.3249, simple_loss=0.3324, pruned_loss=0.1587, over 948447.65 frames. ], batch size: 26, lr: 3.10e-02, +2022-12-01 21:00:59,191 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=6939.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 21:01:29,349 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=6970.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:01:29,589 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=6970.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:01:38,275 INFO [train.py:876] Epoch 5, batch 1250, loss[loss=0.2768, simple_loss=0.2923, pruned_loss=0.1306, over 4891.00 frames. ], tot_loss[loss=0.3208, simple_loss=0.3297, pruned_loss=0.1559, over 949670.42 frames. ], batch size: 30, lr: 3.10e-02, +2022-12-01 21:01:44,355 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=6985.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:02:00,286 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.491e+02 2.471e+02 2.938e+02 3.651e+02 1.185e+03, threshold=5.876e+02, percent-clipped=4.0 +2022-12-01 21:02:19,334 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.96 vs. limit=2.0 +2022-12-01 21:02:27,232 INFO [train.py:876] Epoch 5, batch 1300, loss[loss=0.4134, simple_loss=0.3954, pruned_loss=0.2157, over 4793.00 frames. ], tot_loss[loss=0.3224, simple_loss=0.3306, pruned_loss=0.1571, over 952077.46 frames. ], batch size: 51, lr: 3.09e-02, +2022-12-01 21:02:29,286 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6974, 2.1423, 2.8203, 2.4153, 2.5575, 3.0168, 2.7481, 3.1282], + device='cuda:0'), covar=tensor([0.0094, 0.0579, 0.0343, 0.0464, 0.0239, 0.0188, 0.0270, 0.0167], + device='cuda:0'), in_proj_covar=tensor([0.0026, 0.0037, 0.0030, 0.0035, 0.0033, 0.0026, 0.0027, 0.0027], + device='cuda:0'), out_proj_covar=tensor([2.6408e-05, 4.2754e-05, 3.2324e-05, 3.7489e-05, 3.8304e-05, 2.9015e-05, + 2.8648e-05, 2.6616e-05], device='cuda:0') +2022-12-01 21:02:45,975 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=7048.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 21:03:15,876 INFO [train.py:876] Epoch 5, batch 1350, loss[loss=0.3079, simple_loss=0.323, pruned_loss=0.1464, over 4861.00 frames. ], tot_loss[loss=0.3219, simple_loss=0.3306, pruned_loss=0.1566, over 952970.73 frames. ], batch size: 36, lr: 3.08e-02, +2022-12-01 21:03:37,530 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.374e+02 2.492e+02 3.148e+02 4.311e+02 9.177e+02, threshold=6.297e+02, percent-clipped=12.0 +2022-12-01 21:03:45,489 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=7109.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 21:03:47,638 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.03 vs. limit=5.0 +2022-12-01 21:03:54,283 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=7118.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 21:03:58,646 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=7123.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:04:04,547 INFO [train.py:876] Epoch 5, batch 1400, loss[loss=0.2656, simple_loss=0.2933, pruned_loss=0.119, over 4904.00 frames. ], tot_loss[loss=0.3187, simple_loss=0.3284, pruned_loss=0.1545, over 955179.61 frames. ], batch size: 29, lr: 3.07e-02, +2022-12-01 21:04:22,475 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=8.21 vs. limit=5.0 +2022-12-01 21:04:22,718 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.36 vs. limit=2.0 +2022-12-01 21:04:29,439 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.96 vs. limit=2.0 +2022-12-01 21:04:35,665 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-5.pt +2022-12-01 21:04:52,127 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 21:04:53,049 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 21:04:53,345 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 21:04:53,376 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 21:04:54,521 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 21:04:54,839 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 21:04:56,413 INFO [train.py:876] Epoch 6, batch 0, loss[loss=0.3179, simple_loss=0.3228, pruned_loss=0.1565, over 4791.00 frames. ], tot_loss[loss=0.3179, simple_loss=0.3228, pruned_loss=0.1565, over 4791.00 frames. ], batch size: 32, lr: 2.86e-02, +2022-12-01 21:04:56,414 INFO [train.py:901] Computing validation loss +2022-12-01 21:05:11,562 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4671, 1.6051, 1.2095, 1.5130, 1.6758, 1.6721, 1.4788, 1.6577], + device='cuda:0'), covar=tensor([0.0407, 0.0283, 0.0398, 0.0255, 0.0307, 0.0291, 0.0401, 0.0311], + device='cuda:0'), in_proj_covar=tensor([0.0031, 0.0025, 0.0032, 0.0022, 0.0030, 0.0025, 0.0029, 0.0026], + device='cuda:0'), out_proj_covar=tensor([2.9069e-05, 2.0409e-05, 2.8551e-05, 1.8988e-05, 2.6638e-05, 2.1371e-05, + 2.5578e-05, 2.2480e-05], device='cuda:0') +2022-12-01 21:05:11,991 INFO [train.py:910] Epoch 6, validation: loss=0.2903, simple_loss=0.3276, pruned_loss=0.1265, over 253132.00 frames. +2022-12-01 21:05:11,992 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-01 21:05:13,191 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=7163.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:05:15,954 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=7166.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:05:20,694 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=7171.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:05:50,214 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.676e+02 2.538e+02 3.410e+02 4.631e+02 1.066e+03, threshold=6.820e+02, percent-clipped=9.0 +2022-12-01 21:05:54,656 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.98 vs. limit=2.0 +2022-12-01 21:06:00,872 INFO [train.py:876] Epoch 6, batch 50, loss[loss=0.3418, simple_loss=0.3411, pruned_loss=0.1712, over 4122.00 frames. ], tot_loss[loss=0.2982, simple_loss=0.3129, pruned_loss=0.1417, over 214114.45 frames. ], batch size: 72, lr: 2.86e-02, +2022-12-01 21:06:02,604 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.86 vs. limit=5.0 +2022-12-01 21:06:12,683 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=7224.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 21:06:20,237 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 21:06:22,768 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=7234.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 21:06:26,813 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3333, 2.1584, 2.1385, 2.1273, 2.8152, 1.0706, 2.7301, 2.0920], + device='cuda:0'), covar=tensor([0.0712, 0.0356, 0.0584, 0.0269, 0.0228, 0.2615, 0.0546, 0.0188], + device='cuda:0'), in_proj_covar=tensor([0.0028, 0.0026, 0.0028, 0.0027, 0.0027, 0.0025, 0.0025, 0.0026], + device='cuda:0'), out_proj_covar=tensor([2.3087e-05, 2.1040e-05, 2.2923e-05, 2.1653e-05, 2.0670e-05, 2.2055e-05, + 2.0015e-05, 2.0750e-05], device='cuda:0') +2022-12-01 21:06:28,653 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6021, 3.8777, 3.8358, 4.5689, 3.6031, 3.4694, 3.8467, 3.2327], + device='cuda:0'), covar=tensor([0.0289, 0.0200, 0.0187, 0.0112, 0.0220, 0.0242, 0.0157, 0.0259], + device='cuda:0'), in_proj_covar=tensor([0.0029, 0.0027, 0.0027, 0.0023, 0.0031, 0.0031, 0.0026, 0.0027], + device='cuda:0'), out_proj_covar=tensor([3.4147e-05, 2.8877e-05, 2.8529e-05, 2.3984e-05, 3.6202e-05, 3.6207e-05, + 2.8351e-05, 3.0268e-05], device='cuda:0') +2022-12-01 21:06:29,081 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.30 vs. limit=5.0 +2022-12-01 21:06:30,007 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.02 vs. limit=2.0 +2022-12-01 21:06:38,258 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5940, 2.3300, 2.2347, 2.6543, 2.1601, 2.5805, 2.8195, 2.8070], + device='cuda:0'), covar=tensor([0.0270, 0.0324, 0.0334, 0.0161, 0.0355, 0.0232, 0.0115, 0.0199], + device='cuda:0'), in_proj_covar=tensor([0.0031, 0.0032, 0.0028, 0.0025, 0.0031, 0.0028, 0.0022, 0.0025], + device='cuda:0'), out_proj_covar=tensor([2.3020e-05, 2.4706e-05, 2.0772e-05, 1.7859e-05, 2.4228e-05, 2.0722e-05, + 1.5219e-05, 1.7928e-05], device='cuda:0') +2022-12-01 21:06:49,721 INFO [train.py:876] Epoch 6, batch 100, loss[loss=0.246, simple_loss=0.2867, pruned_loss=0.1027, over 4863.00 frames. ], tot_loss[loss=0.3047, simple_loss=0.3189, pruned_loss=0.1453, over 377729.88 frames. ], batch size: 36, lr: 2.85e-02, +2022-12-01 21:06:52,684 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=7265.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:07:01,167 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 21:07:02,454 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=7275.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:07:05,929 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.00 vs. limit=2.0 +2022-12-01 21:07:11,488 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0088, 2.8112, 3.1512, 3.1650, 2.5057, 3.4112, 2.9185, 2.7623], + device='cuda:0'), covar=tensor([0.4758, 0.0630, 0.0625, 0.0194, 0.0666, 0.0359, 0.0555, 0.0450], + device='cuda:0'), in_proj_covar=tensor([0.0149, 0.0053, 0.0072, 0.0053, 0.0069, 0.0058, 0.0049, 0.0066], + device='cuda:0'), out_proj_covar=tensor([1.6856e-04, 6.0487e-05, 7.7066e-05, 5.2892e-05, 6.8661e-05, 6.3779e-05, + 5.4299e-05, 7.1888e-05], device='cuda:0') +2022-12-01 21:07:12,424 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=7285.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:07:25,112 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 21:07:27,895 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.412e+02 2.626e+02 3.154e+02 4.236e+02 9.382e+02, threshold=6.307e+02, percent-clipped=1.0 +2022-12-01 21:07:32,598 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.89 vs. limit=2.0 +2022-12-01 21:07:35,223 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4502, 2.3950, 2.2887, 2.7468, 2.2643, 2.5334, 2.5928, 2.7328], + device='cuda:0'), covar=tensor([0.0293, 0.0277, 0.0276, 0.0141, 0.0329, 0.0218, 0.0129, 0.0201], + device='cuda:0'), in_proj_covar=tensor([0.0030, 0.0031, 0.0026, 0.0024, 0.0030, 0.0027, 0.0021, 0.0024], + device='cuda:0'), out_proj_covar=tensor([2.2675e-05, 2.3590e-05, 1.9754e-05, 1.7241e-05, 2.3527e-05, 1.9652e-05, + 1.5134e-05, 1.7211e-05], device='cuda:0') +2022-12-01 21:07:38,913 INFO [train.py:876] Epoch 6, batch 150, loss[loss=0.4009, simple_loss=0.3883, pruned_loss=0.2067, over 4791.00 frames. ], tot_loss[loss=0.3038, simple_loss=0.3179, pruned_loss=0.1449, over 504903.30 frames. ], batch size: 58, lr: 2.84e-02, +2022-12-01 21:07:49,595 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=3.16 vs. limit=2.0 +2022-12-01 21:07:55,812 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3316, 2.3609, 2.2327, 2.6880, 2.1780, 2.4295, 2.6621, 2.5756], + device='cuda:0'), covar=tensor([0.0321, 0.0292, 0.0274, 0.0139, 0.0314, 0.0244, 0.0113, 0.0217], + device='cuda:0'), in_proj_covar=tensor([0.0030, 0.0030, 0.0026, 0.0024, 0.0030, 0.0027, 0.0021, 0.0024], + device='cuda:0'), out_proj_covar=tensor([2.2231e-05, 2.3232e-05, 1.9371e-05, 1.7100e-05, 2.2988e-05, 1.9781e-05, + 1.4788e-05, 1.7055e-05], device='cuda:0') +2022-12-01 21:07:59,469 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=7333.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:08:02,806 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=7336.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:08:08,950 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.45 vs. limit=2.0 +2022-12-01 21:08:28,024 INFO [train.py:876] Epoch 6, batch 200, loss[loss=0.2821, simple_loss=0.289, pruned_loss=0.1376, over 4752.00 frames. ], tot_loss[loss=0.3036, simple_loss=0.3185, pruned_loss=0.1444, over 603857.33 frames. ], batch size: 26, lr: 2.84e-02, +2022-12-01 21:09:06,586 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.606e+02 2.507e+02 3.286e+02 4.316e+02 1.031e+03, threshold=6.572e+02, percent-clipped=8.0 +2022-12-01 21:09:09,573 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=7404.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 21:09:12,488 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=7407.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:09:16,300 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 21:09:17,274 INFO [train.py:876] Epoch 6, batch 250, loss[loss=0.2733, simple_loss=0.3045, pruned_loss=0.121, over 4848.00 frames. ], tot_loss[loss=0.303, simple_loss=0.3183, pruned_loss=0.1438, over 683066.51 frames. ], batch size: 41, lr: 2.83e-02, +2022-12-01 21:10:06,543 INFO [train.py:876] Epoch 6, batch 300, loss[loss=0.2957, simple_loss=0.3158, pruned_loss=0.1378, over 4854.00 frames. ], tot_loss[loss=0.2976, simple_loss=0.3147, pruned_loss=0.1402, over 745084.69 frames. ], batch size: 35, lr: 2.82e-02, +2022-12-01 21:10:12,825 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=7468.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:10:21,204 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 21:10:44,871 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.344e+02 2.394e+02 3.034e+02 4.061e+02 7.419e+02, threshold=6.068e+02, percent-clipped=2.0 +2022-12-01 21:10:56,089 INFO [train.py:876] Epoch 6, batch 350, loss[loss=0.3094, simple_loss=0.3153, pruned_loss=0.1518, over 4898.00 frames. ], tot_loss[loss=0.2938, simple_loss=0.3113, pruned_loss=0.1381, over 793816.51 frames. ], batch size: 30, lr: 2.82e-02, +2022-12-01 21:10:56,285 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0679, 2.5264, 3.0324, 2.4894, 2.5873, 2.3484, 2.8358, 3.3077], + device='cuda:0'), covar=tensor([0.0110, 0.0649, 0.0358, 0.0614, 0.0292, 0.0205, 0.0350, 0.0203], + device='cuda:0'), in_proj_covar=tensor([0.0030, 0.0042, 0.0034, 0.0040, 0.0036, 0.0030, 0.0032, 0.0032], + device='cuda:0'), out_proj_covar=tensor([3.0685e-05, 4.8187e-05, 3.6972e-05, 4.3766e-05, 4.1708e-05, 3.3140e-05, + 3.4699e-05, 3.1574e-05], device='cuda:0') +2022-12-01 21:11:02,943 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=7519.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 21:11:14,290 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.51 vs. limit=2.0 +2022-12-01 21:11:16,912 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.14 vs. limit=2.0 +2022-12-01 21:11:17,346 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=7534.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 21:11:44,154 INFO [train.py:876] Epoch 6, batch 400, loss[loss=0.2496, simple_loss=0.2736, pruned_loss=0.1129, over 4695.00 frames. ], tot_loss[loss=0.2983, simple_loss=0.3145, pruned_loss=0.141, over 829317.56 frames. ], batch size: 23, lr: 2.81e-02, +2022-12-01 21:11:47,334 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=7565.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:11:55,157 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.25 vs. limit=2.0 +2022-12-01 21:11:58,471 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 21:12:02,991 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=7582.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 21:12:21,439 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.765e+02 2.787e+02 3.299e+02 4.156e+02 7.611e+02, threshold=6.598e+02, percent-clipped=8.0 +2022-12-01 21:12:23,549 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 21:12:24,885 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6808, 3.8547, 3.6071, 3.4247, 3.6129, 3.7337, 3.8409, 3.5313], + device='cuda:0'), covar=tensor([0.1507, 0.0231, 0.0348, 0.0524, 0.0415, 0.0309, 0.0287, 0.0317], + device='cuda:0'), in_proj_covar=tensor([0.0116, 0.0075, 0.0070, 0.0087, 0.0070, 0.0078, 0.0074, 0.0066], + device='cuda:0'), out_proj_covar=tensor([8.4381e-05, 4.5324e-05, 4.4315e-05, 6.0312e-05, 4.4167e-05, 4.8229e-05, + 4.5648e-05, 4.2289e-05], device='cuda:0') +2022-12-01 21:12:32,232 INFO [train.py:876] Epoch 6, batch 450, loss[loss=0.348, simple_loss=0.3592, pruned_loss=0.1684, over 4786.00 frames. ], tot_loss[loss=0.2993, simple_loss=0.3163, pruned_loss=0.1412, over 858232.28 frames. ], batch size: 51, lr: 2.81e-02, +2022-12-01 21:12:33,271 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=7613.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:12:44,264 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=7624.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:12:50,860 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=7631.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:12:58,869 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.1234, 4.0724, 4.2415, 4.2324, 4.6368, 4.4290, 4.3524, 4.2332], + device='cuda:0'), covar=tensor([0.0655, 0.0581, 0.0375, 0.0438, 0.0457, 0.0362, 0.0426, 0.0414], + device='cuda:0'), in_proj_covar=tensor([0.0077, 0.0068, 0.0056, 0.0069, 0.0060, 0.0066, 0.0076, 0.0062], + device='cuda:0'), out_proj_covar=tensor([7.8216e-05, 6.0562e-05, 5.0662e-05, 6.5502e-05, 5.6690e-05, 6.2234e-05, + 7.2394e-05, 5.8961e-05], device='cuda:0') +2022-12-01 21:13:12,984 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.47 vs. limit=5.0 +2022-12-01 21:13:21,092 INFO [train.py:876] Epoch 6, batch 500, loss[loss=0.351, simple_loss=0.3589, pruned_loss=0.1716, over 4813.00 frames. ], tot_loss[loss=0.2988, simple_loss=0.3163, pruned_loss=0.1406, over 877951.92 frames. ], batch size: 45, lr: 2.80e-02, +2022-12-01 21:13:43,309 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.21 vs. limit=5.0 +2022-12-01 21:13:43,895 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=7685.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:13:48,134 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-01 21:13:59,419 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.616e+02 2.541e+02 3.098e+02 3.936e+02 9.202e+02, threshold=6.195e+02, percent-clipped=3.0 +2022-12-01 21:14:02,401 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=7704.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 21:14:10,293 INFO [train.py:876] Epoch 6, batch 550, loss[loss=0.2002, simple_loss=0.2139, pruned_loss=0.09327, over 4208.00 frames. ], tot_loss[loss=0.2977, simple_loss=0.315, pruned_loss=0.1402, over 892800.90 frames. ], batch size: 16, lr: 2.79e-02, +2022-12-01 21:14:40,872 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=7743.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:14:49,497 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=7752.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 21:14:58,812 INFO [train.py:876] Epoch 6, batch 600, loss[loss=0.2745, simple_loss=0.2932, pruned_loss=0.1279, over 4806.00 frames. ], tot_loss[loss=0.2957, simple_loss=0.3129, pruned_loss=0.1392, over 906767.59 frames. ], batch size: 33, lr: 2.79e-02, +2022-12-01 21:14:59,586 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.74 vs. limit=5.0 +2022-12-01 21:14:59,944 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=7763.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:15:20,415 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.95 vs. limit=2.0 +2022-12-01 21:15:37,554 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.616e+02 2.462e+02 3.295e+02 3.962e+02 8.398e+02, threshold=6.591e+02, percent-clipped=3.0 +2022-12-01 21:15:38,860 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=7802.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:15:40,869 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=7804.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:15:48,466 INFO [train.py:876] Epoch 6, batch 650, loss[loss=0.3917, simple_loss=0.3815, pruned_loss=0.201, over 4798.00 frames. ], tot_loss[loss=0.2964, simple_loss=0.3136, pruned_loss=0.1396, over 917275.35 frames. ], batch size: 58, lr: 2.78e-02, +2022-12-01 21:15:55,425 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=7819.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 21:16:13,703 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.79 vs. limit=5.0 +2022-12-01 21:16:36,540 INFO [train.py:876] Epoch 6, batch 700, loss[loss=0.2268, simple_loss=0.2489, pruned_loss=0.1023, over 4635.00 frames. ], tot_loss[loss=0.2974, simple_loss=0.315, pruned_loss=0.1399, over 927047.92 frames. ], batch size: 21, lr: 2.77e-02, +2022-12-01 21:16:37,850 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=7863.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:16:41,450 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=7867.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:16:52,548 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.6828, 1.9669, 1.5726, 1.5494, 1.4963, 1.9321, 1.8056, 1.6630], + device='cuda:0'), covar=tensor([0.0448, 0.0203, 0.0544, 0.0387, 0.0356, 0.0465, 0.0352, 0.0290], + device='cuda:0'), in_proj_covar=tensor([0.0025, 0.0024, 0.0026, 0.0027, 0.0026, 0.0023, 0.0024, 0.0025], + device='cuda:0'), out_proj_covar=tensor([2.1618e-05, 1.9498e-05, 2.1764e-05, 2.2485e-05, 2.1170e-05, 2.0261e-05, + 1.9129e-05, 2.0169e-05], device='cuda:0') +2022-12-01 21:17:11,257 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.99 vs. limit=2.0 +2022-12-01 21:17:14,334 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.553e+02 2.394e+02 3.020e+02 3.986e+02 7.920e+02, threshold=6.040e+02, percent-clipped=1.0 +2022-12-01 21:17:21,281 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4209, 2.6043, 1.9849, 2.6015, 2.1393, 2.5130, 1.5785, 2.2719], + device='cuda:0'), covar=tensor([0.0615, 0.0561, 0.0939, 0.0422, 0.1319, 0.0632, 0.1722, 0.0411], + device='cuda:0'), in_proj_covar=tensor([0.0048, 0.0046, 0.0067, 0.0051, 0.0068, 0.0057, 0.0083, 0.0053], + device='cuda:0'), out_proj_covar=tensor([4.3098e-05, 4.2997e-05, 6.0132e-05, 4.4436e-05, 6.1276e-05, 5.0209e-05, + 8.1794e-05, 4.5428e-05], device='cuda:0') +2022-12-01 21:17:24,962 INFO [train.py:876] Epoch 6, batch 750, loss[loss=0.3476, simple_loss=0.3527, pruned_loss=0.1713, over 4772.00 frames. ], tot_loss[loss=0.2964, simple_loss=0.3137, pruned_loss=0.1395, over 933228.06 frames. ], batch size: 58, lr: 2.77e-02, +2022-12-01 21:17:43,486 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=7931.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:18:13,906 INFO [train.py:876] Epoch 6, batch 800, loss[loss=0.1698, simple_loss=0.2016, pruned_loss=0.06903, over 3960.00 frames. ], tot_loss[loss=0.2943, simple_loss=0.3124, pruned_loss=0.1381, over 938195.02 frames. ], batch size: 15, lr: 2.76e-02, +2022-12-01 21:18:30,539 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=7979.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:18:31,566 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=7980.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:18:36,526 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8412, 1.9366, 1.9416, 1.3443, 1.4459, 1.8835, 1.4646, 2.3074], + device='cuda:0'), covar=tensor([0.0222, 0.0147, 0.0202, 0.0325, 0.0299, 0.0215, 0.0344, 0.0196], + device='cuda:0'), in_proj_covar=tensor([0.0037, 0.0031, 0.0028, 0.0034, 0.0039, 0.0030, 0.0036, 0.0032], + device='cuda:0'), out_proj_covar=tensor([3.1368e-05, 2.4930e-05, 2.3135e-05, 2.8840e-05, 3.2568e-05, 2.5092e-05, + 3.1544e-05, 2.8592e-05], device='cuda:0') +2022-12-01 21:18:50,860 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-8000.pt +2022-12-01 21:18:54,082 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.673e+02 2.306e+02 3.004e+02 3.659e+02 7.825e+02, threshold=6.008e+02, percent-clipped=2.0 +2022-12-01 21:19:01,006 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8091, 2.8232, 3.2412, 3.5210, 2.6604, 3.1292, 3.2722, 2.6618], + device='cuda:0'), covar=tensor([0.2861, 0.0571, 0.0330, 0.0104, 0.0397, 0.0430, 0.0189, 0.0460], + device='cuda:0'), in_proj_covar=tensor([0.0160, 0.0062, 0.0081, 0.0057, 0.0078, 0.0064, 0.0052, 0.0073], + device='cuda:0'), out_proj_covar=tensor([1.8007e-04, 7.0647e-05, 8.9046e-05, 6.0564e-05, 7.8279e-05, 7.0348e-05, + 5.8473e-05, 8.1538e-05], device='cuda:0') +2022-12-01 21:19:04,637 INFO [train.py:876] Epoch 6, batch 850, loss[loss=0.3155, simple_loss=0.3312, pruned_loss=0.1499, over 4827.00 frames. ], tot_loss[loss=0.2936, simple_loss=0.3124, pruned_loss=0.1374, over 941097.27 frames. ], batch size: 34, lr: 2.76e-02, +2022-12-01 21:19:15,858 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.23 vs. limit=5.0 +2022-12-01 21:19:52,333 INFO [train.py:876] Epoch 6, batch 900, loss[loss=0.3235, simple_loss=0.3317, pruned_loss=0.1576, over 4876.00 frames. ], tot_loss[loss=0.2953, simple_loss=0.3137, pruned_loss=0.1385, over 945019.81 frames. ], batch size: 37, lr: 2.75e-02, +2022-12-01 21:19:53,426 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=8063.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:19:53,711 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0928, 3.5454, 3.9448, 3.3757, 3.3256, 3.6296, 3.6997, 3.5152], + device='cuda:0'), covar=tensor([0.0772, 0.0311, 0.0192, 0.0338, 0.0379, 0.0374, 0.0211, 0.0362], + device='cuda:0'), in_proj_covar=tensor([0.0099, 0.0074, 0.0062, 0.0082, 0.0083, 0.0073, 0.0066, 0.0063], + device='cuda:0'), out_proj_covar=tensor([6.7675e-05, 4.8467e-05, 3.9057e-05, 5.5208e-05, 5.5603e-05, 4.6431e-05, + 4.1248e-05, 4.1122e-05], device='cuda:0') +2022-12-01 21:20:28,605 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=8099.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:20:30,446 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.283e+02 2.812e+02 3.355e+02 4.747e+02 9.548e+02, threshold=6.710e+02, percent-clipped=13.0 +2022-12-01 21:20:38,778 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.93 vs. limit=2.0 +2022-12-01 21:20:40,044 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=8111.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:20:41,010 INFO [train.py:876] Epoch 6, batch 950, loss[loss=0.2807, simple_loss=0.3047, pruned_loss=0.1284, over 4862.00 frames. ], tot_loss[loss=0.297, simple_loss=0.3157, pruned_loss=0.1391, over 948990.52 frames. ], batch size: 40, lr: 2.74e-02, +2022-12-01 21:21:25,907 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=8158.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:21:29,759 INFO [train.py:876] Epoch 6, batch 1000, loss[loss=0.2844, simple_loss=0.3111, pruned_loss=0.1289, over 4860.00 frames. ], tot_loss[loss=0.2953, simple_loss=0.3144, pruned_loss=0.1381, over 950698.16 frames. ], batch size: 36, lr: 2.74e-02, +2022-12-01 21:22:07,285 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.415e+02 2.582e+02 3.142e+02 4.040e+02 7.803e+02, threshold=6.284e+02, percent-clipped=1.0 +2022-12-01 21:22:18,238 INFO [train.py:876] Epoch 6, batch 1050, loss[loss=0.2021, simple_loss=0.2304, pruned_loss=0.08695, over 4704.00 frames. ], tot_loss[loss=0.2938, simple_loss=0.3129, pruned_loss=0.1373, over 949406.36 frames. ], batch size: 23, lr: 2.73e-02, +2022-12-01 21:22:41,470 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.72 vs. limit=2.0 +2022-12-01 21:23:06,464 INFO [train.py:876] Epoch 6, batch 1100, loss[loss=0.274, simple_loss=0.298, pruned_loss=0.125, over 4900.00 frames. ], tot_loss[loss=0.297, simple_loss=0.3159, pruned_loss=0.139, over 948305.96 frames. ], batch size: 30, lr: 2.72e-02, +2022-12-01 21:23:09,703 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=8265.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:23:11,044 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.06 vs. limit=2.0 +2022-12-01 21:23:24,143 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=8280.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:23:44,293 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.570e+02 2.614e+02 3.165e+02 4.096e+02 1.026e+03, threshold=6.331e+02, percent-clipped=5.0 +2022-12-01 21:23:55,010 INFO [train.py:876] Epoch 6, batch 1150, loss[loss=0.2683, simple_loss=0.2911, pruned_loss=0.1227, over 4805.00 frames. ], tot_loss[loss=0.297, simple_loss=0.316, pruned_loss=0.139, over 950218.14 frames. ], batch size: 32, lr: 2.72e-02, +2022-12-01 21:24:05,979 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2259, 2.8529, 3.5678, 3.4676, 3.3727, 3.2556, 3.7997, 2.8594], + device='cuda:0'), covar=tensor([0.4690, 0.0854, 0.0604, 0.0172, 0.0367, 0.1356, 0.0157, 0.0553], + device='cuda:0'), in_proj_covar=tensor([0.0153, 0.0058, 0.0080, 0.0055, 0.0074, 0.0063, 0.0048, 0.0070], + device='cuda:0'), out_proj_covar=tensor([1.7254e-04, 6.7477e-05, 8.7865e-05, 5.8081e-05, 7.3929e-05, 7.0290e-05, + 5.4720e-05, 7.9383e-05], device='cuda:0') +2022-12-01 21:24:09,141 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=8326.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:24:10,851 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=8328.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:24:15,455 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.96 vs. limit=2.0 +2022-12-01 21:24:43,881 INFO [train.py:876] Epoch 6, batch 1200, loss[loss=0.4037, simple_loss=0.3982, pruned_loss=0.2046, over 4851.00 frames. ], tot_loss[loss=0.2949, simple_loss=0.3139, pruned_loss=0.1379, over 950497.68 frames. ], batch size: 49, lr: 2.71e-02, +2022-12-01 21:25:19,845 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=8399.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:25:21,787 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.337e+02 2.663e+02 3.385e+02 4.099e+02 1.394e+03, threshold=6.770e+02, percent-clipped=7.0 +2022-12-01 21:25:32,674 INFO [train.py:876] Epoch 6, batch 1250, loss[loss=0.2559, simple_loss=0.293, pruned_loss=0.1094, over 4825.00 frames. ], tot_loss[loss=0.2931, simple_loss=0.3123, pruned_loss=0.1369, over 947402.25 frames. ], batch size: 34, lr: 2.71e-02, +2022-12-01 21:25:36,587 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6601, 1.7703, 2.7016, 3.0697, 2.9801, 3.0928, 3.0598, 3.7118], + device='cuda:0'), covar=tensor([0.0175, 0.1049, 0.0607, 0.0331, 0.0287, 0.0303, 0.0375, 0.0158], + device='cuda:0'), in_proj_covar=tensor([0.0037, 0.0046, 0.0047, 0.0033, 0.0037, 0.0034, 0.0034, 0.0040], + device='cuda:0'), out_proj_covar=tensor([3.6481e-05, 5.2859e-05, 5.0220e-05, 3.8841e-05, 3.9212e-05, 3.7882e-05, + 3.9697e-05, 3.8535e-05], device='cuda:0') +2022-12-01 21:26:06,176 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=8447.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:26:17,035 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=8458.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:26:20,746 INFO [train.py:876] Epoch 6, batch 1300, loss[loss=0.3196, simple_loss=0.3496, pruned_loss=0.1448, over 4821.00 frames. ], tot_loss[loss=0.2951, simple_loss=0.3134, pruned_loss=0.1384, over 948176.42 frames. ], batch size: 54, lr: 2.70e-02, +2022-12-01 21:26:30,789 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4332, 1.5564, 1.2292, 1.7607, 1.6580, 1.3746, 1.5470, 1.3292], + device='cuda:0'), covar=tensor([0.0378, 0.0374, 0.0390, 0.0203, 0.0265, 0.0354, 0.0389, 0.0450], + device='cuda:0'), in_proj_covar=tensor([0.0034, 0.0029, 0.0033, 0.0024, 0.0031, 0.0028, 0.0031, 0.0029], + device='cuda:0'), out_proj_covar=tensor([3.1695e-05, 2.5198e-05, 3.1160e-05, 2.0128e-05, 2.7980e-05, 2.4785e-05, + 2.8571e-05, 2.6318e-05], device='cuda:0') +2022-12-01 21:26:41,422 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8439, 4.6126, 4.6889, 4.3776, 4.1719, 4.4155, 4.0591, 4.3947], + device='cuda:0'), covar=tensor([0.0614, 0.0109, 0.0056, 0.0192, 0.0225, 0.0150, 0.0133, 0.0160], + device='cuda:0'), in_proj_covar=tensor([0.0105, 0.0078, 0.0062, 0.0086, 0.0086, 0.0073, 0.0070, 0.0064], + device='cuda:0'), out_proj_covar=tensor([7.1300e-05, 5.0697e-05, 3.9235e-05, 5.7634e-05, 5.7254e-05, 4.6489e-05, + 4.4946e-05, 4.1622e-05], device='cuda:0') +2022-12-01 21:26:48,872 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.0333, 1.1798, 1.2446, 1.0741, 1.0234, 1.1896, 1.0823, 1.3476], + device='cuda:0'), covar=tensor([0.0456, 0.0141, 0.0216, 0.0345, 0.0321, 0.0324, 0.0323, 0.0260], + device='cuda:0'), in_proj_covar=tensor([0.0035, 0.0029, 0.0029, 0.0033, 0.0037, 0.0032, 0.0036, 0.0031], + device='cuda:0'), out_proj_covar=tensor([2.9613e-05, 2.3603e-05, 2.4013e-05, 2.8750e-05, 3.1276e-05, 2.6953e-05, + 3.1857e-05, 2.7364e-05], device='cuda:0') +2022-12-01 21:26:58,354 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 2.016e+02 2.881e+02 3.526e+02 4.623e+02 1.053e+03, threshold=7.052e+02, percent-clipped=7.0 +2022-12-01 21:27:03,327 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=8506.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:27:09,097 INFO [train.py:876] Epoch 6, batch 1350, loss[loss=0.2896, simple_loss=0.3092, pruned_loss=0.1351, over 4927.00 frames. ], tot_loss[loss=0.2952, simple_loss=0.3135, pruned_loss=0.1384, over 949238.03 frames. ], batch size: 32, lr: 2.69e-02, +2022-12-01 21:27:38,915 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.92 vs. limit=2.0 +2022-12-01 21:27:57,916 INFO [train.py:876] Epoch 6, batch 1400, loss[loss=0.2985, simple_loss=0.3217, pruned_loss=0.1376, over 4823.00 frames. ], tot_loss[loss=0.291, simple_loss=0.3093, pruned_loss=0.1363, over 944208.73 frames. ], batch size: 34, lr: 2.69e-02, +2022-12-01 21:28:09,598 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.09 vs. limit=2.0 +2022-12-01 21:28:16,486 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.34 vs. limit=2.0 +2022-12-01 21:28:28,340 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-6.pt +2022-12-01 21:28:44,884 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 21:28:45,794 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི ��ྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 21:28:46,090 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 21:28:46,122 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 21:28:47,245 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 21:28:47,562 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 21:28:49,131 INFO [train.py:876] Epoch 7, batch 0, loss[loss=0.21, simple_loss=0.2512, pruned_loss=0.08442, over 4771.00 frames. ], tot_loss[loss=0.21, simple_loss=0.2512, pruned_loss=0.08442, over 4771.00 frames. ], batch size: 26, lr: 2.52e-02, +2022-12-01 21:28:49,132 INFO [train.py:901] Computing validation loss +2022-12-01 21:29:02,364 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3934, 2.3303, 2.5068, 2.7303, 2.5472, 2.3503, 2.9959, 2.6286], + device='cuda:0'), covar=tensor([0.0268, 0.0305, 0.0242, 0.0138, 0.0323, 0.0352, 0.0096, 0.0191], + device='cuda:0'), in_proj_covar=tensor([0.0028, 0.0031, 0.0028, 0.0024, 0.0032, 0.0032, 0.0021, 0.0026], + device='cuda:0'), out_proj_covar=tensor([2.0769e-05, 2.3735e-05, 2.0854e-05, 1.6741e-05, 2.4906e-05, 2.3952e-05, + 1.5049e-05, 1.8783e-05], device='cuda:0') +2022-12-01 21:29:04,679 INFO [train.py:910] Epoch 7, validation: loss=0.2681, simple_loss=0.3105, pruned_loss=0.1129, over 253132.00 frames. +2022-12-01 21:29:04,679 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-01 21:29:11,834 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.247e+02 2.318e+02 3.192e+02 4.178e+02 9.666e+02, threshold=6.384e+02, percent-clipped=2.0 +2022-12-01 21:29:31,273 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=8621.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:29:46,834 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2264, 1.9346, 2.2699, 2.5875, 2.1517, 2.0924, 1.9728, 2.5209], + device='cuda:0'), covar=tensor([0.1431, 0.1991, 0.0830, 0.0418, 0.0985, 0.0893, 0.1290, 0.0507], + device='cuda:0'), in_proj_covar=tensor([0.0051, 0.0047, 0.0066, 0.0049, 0.0065, 0.0056, 0.0079, 0.0053], + device='cuda:0'), out_proj_covar=tensor([4.6687e-05, 4.5093e-05, 5.9591e-05, 4.3543e-05, 5.8329e-05, 5.0220e-05, + 7.8781e-05, 4.5694e-05], device='cuda:0') +2022-12-01 21:29:53,334 INFO [train.py:876] Epoch 7, batch 50, loss[loss=0.1913, simple_loss=0.2304, pruned_loss=0.07611, over 4776.00 frames. ], tot_loss[loss=0.2704, simple_loss=0.2946, pruned_loss=0.1231, over 216241.57 frames. ], batch size: 26, lr: 2.51e-02, +2022-12-01 21:30:20,791 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 21:30:41,917 INFO [train.py:876] Epoch 7, batch 100, loss[loss=0.2419, simple_loss=0.2715, pruned_loss=0.1062, over 4793.00 frames. ], tot_loss[loss=0.2696, simple_loss=0.2938, pruned_loss=0.1227, over 377786.23 frames. ], batch size: 32, lr: 2.51e-02, +2022-12-01 21:30:42,208 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4113, 1.2932, 1.2271, 1.1570, 0.9986, 1.2212, 1.3025, 1.3279], + device='cuda:0'), covar=tensor([0.0345, 0.0262, 0.0336, 0.0377, 0.0371, 0.0299, 0.0415, 0.0373], + device='cuda:0'), in_proj_covar=tensor([0.0032, 0.0027, 0.0029, 0.0031, 0.0035, 0.0031, 0.0035, 0.0029], + device='cuda:0'), out_proj_covar=tensor([2.7337e-05, 2.1650e-05, 2.4023e-05, 2.6631e-05, 2.9887e-05, 2.6301e-05, + 3.0449e-05, 2.4824e-05], device='cuda:0') +2022-12-01 21:30:48,861 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.582e+02 2.507e+02 3.292e+02 4.230e+02 7.642e+02, threshold=6.584e+02, percent-clipped=4.0 +2022-12-01 21:31:01,712 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 21:31:09,208 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.67 vs. limit=5.0 +2022-12-01 21:31:15,018 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.92 vs. limit=2.0 +2022-12-01 21:31:25,060 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 21:31:30,993 INFO [train.py:876] Epoch 7, batch 150, loss[loss=0.2696, simple_loss=0.2937, pruned_loss=0.1227, over 4793.00 frames. ], tot_loss[loss=0.2733, simple_loss=0.2976, pruned_loss=0.1245, over 506708.47 frames. ], batch size: 32, lr: 2.50e-02, +2022-12-01 21:31:46,885 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=8760.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:32:19,705 INFO [train.py:876] Epoch 7, batch 200, loss[loss=0.2914, simple_loss=0.3158, pruned_loss=0.1335, over 4883.00 frames. ], tot_loss[loss=0.2717, simple_loss=0.2974, pruned_loss=0.123, over 605968.08 frames. ], batch size: 44, lr: 2.50e-02, +2022-12-01 21:32:26,803 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.773e+02 2.549e+02 3.270e+02 4.241e+02 1.006e+03, threshold=6.539e+02, percent-clipped=4.0 +2022-12-01 21:32:43,692 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.34 vs. limit=2.0 +2022-12-01 21:32:46,250 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=8821.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:33:08,340 INFO [train.py:876] Epoch 7, batch 250, loss[loss=0.2529, simple_loss=0.277, pruned_loss=0.1144, over 4792.00 frames. ], tot_loss[loss=0.2727, simple_loss=0.2984, pruned_loss=0.1235, over 683672.14 frames. ], batch size: 32, lr: 2.49e-02, +2022-12-01 21:33:18,043 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 21:33:55,264 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.8028, 3.8261, 4.1903, 4.5088, 4.5635, 4.0088, 3.9026, 4.6638], + device='cuda:0'), covar=tensor([0.0143, 0.0356, 0.0295, 0.0116, 0.0146, 0.0643, 0.0424, 0.0110], + device='cuda:0'), in_proj_covar=tensor([0.0040, 0.0049, 0.0051, 0.0036, 0.0041, 0.0036, 0.0039, 0.0043], + device='cuda:0'), out_proj_covar=tensor([3.9303e-05, 5.7695e-05, 5.4621e-05, 4.0922e-05, 4.3274e-05, 4.1236e-05, + 4.4531e-05, 4.1304e-05], device='cuda:0') +2022-12-01 21:33:56,166 INFO [train.py:876] Epoch 7, batch 300, loss[loss=0.1726, simple_loss=0.2042, pruned_loss=0.07055, over 3607.00 frames. ], tot_loss[loss=0.2747, simple_loss=0.2995, pruned_loss=0.125, over 743209.55 frames. ], batch size: 14, lr: 2.48e-02, +2022-12-01 21:34:03,261 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.448e+02 2.662e+02 3.358e+02 3.972e+02 7.945e+02, threshold=6.716e+02, percent-clipped=3.0 +2022-12-01 21:34:05,743 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.08 vs. limit=2.0 +2022-12-01 21:34:15,841 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 21:34:23,417 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=8921.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:34:23,851 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.20 vs. limit=5.0 +2022-12-01 21:34:29,247 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=8927.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:34:45,840 INFO [train.py:876] Epoch 7, batch 350, loss[loss=0.2883, simple_loss=0.3058, pruned_loss=0.1354, over 4813.00 frames. ], tot_loss[loss=0.2738, simple_loss=0.2992, pruned_loss=0.1242, over 791190.36 frames. ], batch size: 45, lr: 2.48e-02, +2022-12-01 21:35:09,697 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=8969.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:35:15,631 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0256, 2.1137, 2.1327, 2.6603, 2.1402, 2.1243, 1.7424, 2.2142], + device='cuda:0'), covar=tensor([0.0915, 0.0623, 0.0660, 0.0269, 0.0777, 0.0555, 0.1203, 0.0324], + device='cuda:0'), in_proj_covar=tensor([0.0050, 0.0046, 0.0066, 0.0049, 0.0066, 0.0056, 0.0079, 0.0052], + device='cuda:0'), out_proj_covar=tensor([4.5683e-05, 4.4412e-05, 5.9786e-05, 4.3054e-05, 6.0238e-05, 4.9666e-05, + 7.8408e-05, 4.5326e-05], device='cuda:0') +2022-12-01 21:35:26,307 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=8986.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:35:28,373 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=8988.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:35:33,967 INFO [train.py:876] Epoch 7, batch 400, loss[loss=0.2825, simple_loss=0.3087, pruned_loss=0.1282, over 4819.00 frames. ], tot_loss[loss=0.274, simple_loss=0.2996, pruned_loss=0.1242, over 827289.96 frames. ], batch size: 45, lr: 2.47e-02, +2022-12-01 21:35:36,346 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.20 vs. limit=2.0 +2022-12-01 21:35:40,845 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.451e+02 2.437e+02 3.064e+02 4.235e+02 8.437e+02, threshold=6.129e+02, percent-clipped=4.0 +2022-12-01 21:35:49,396 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 21:35:52,518 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=9013.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:36:15,844 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 21:36:22,346 INFO [train.py:876] Epoch 7, batch 450, loss[loss=0.3295, simple_loss=0.3478, pruned_loss=0.1556, over 4709.00 frames. ], tot_loss[loss=0.274, simple_loss=0.2998, pruned_loss=0.124, over 856749.60 frames. ], batch size: 63, lr: 2.47e-02, +2022-12-01 21:36:25,316 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=9047.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:36:32,123 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8412, 3.5014, 3.4093, 3.3157, 3.2884, 2.4509, 3.7228, 2.1376], + device='cuda:0'), covar=tensor([0.0178, 0.0196, 0.0220, 0.0283, 0.0547, 0.2356, 0.0183, 0.1863], + device='cuda:0'), in_proj_covar=tensor([0.0057, 0.0049, 0.0051, 0.0070, 0.0080, 0.0097, 0.0045, 0.0096], + device='cuda:0'), out_proj_covar=tensor([5.6073e-05, 5.1916e-05, 5.2909e-05, 7.0468e-05, 8.2325e-05, 1.0365e-04, + 4.6333e-05, 9.9207e-05], device='cuda:0') +2022-12-01 21:36:34,721 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.92 vs. limit=5.0 +2022-12-01 21:36:47,232 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-01 21:36:51,929 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=9074.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:37:10,976 INFO [train.py:876] Epoch 7, batch 500, loss[loss=0.2691, simple_loss=0.2955, pruned_loss=0.1214, over 4905.00 frames. ], tot_loss[loss=0.2741, simple_loss=0.2998, pruned_loss=0.1242, over 878376.62 frames. ], batch size: 30, lr: 2.46e-02, +2022-12-01 21:37:17,587 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.581e+02 2.614e+02 3.173e+02 4.041e+02 8.389e+02, threshold=6.346e+02, percent-clipped=4.0 +2022-12-01 21:37:32,477 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=9116.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:37:59,559 INFO [train.py:876] Epoch 7, batch 550, loss[loss=0.3861, simple_loss=0.3769, pruned_loss=0.1977, over 4021.00 frames. ], tot_loss[loss=0.2753, simple_loss=0.3006, pruned_loss=0.125, over 894665.83 frames. ], batch size: 72, lr: 2.46e-02, +2022-12-01 21:38:47,631 INFO [train.py:876] Epoch 7, batch 600, loss[loss=0.253, simple_loss=0.2992, pruned_loss=0.1034, over 4874.00 frames. ], tot_loss[loss=0.2757, simple_loss=0.3016, pruned_loss=0.1249, over 908857.15 frames. ], batch size: 39, lr: 2.45e-02, +2022-12-01 21:38:54,672 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.611e+02 2.571e+02 3.313e+02 4.449e+02 1.323e+03, threshold=6.626e+02, percent-clipped=7.0 +2022-12-01 21:38:59,753 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4153, 4.0431, 4.1656, 3.8660, 3.6059, 4.0951, 3.9419, 4.1261], + device='cuda:0'), covar=tensor([0.0892, 0.0172, 0.0146, 0.0278, 0.0291, 0.0239, 0.0303, 0.0211], + device='cuda:0'), in_proj_covar=tensor([0.0115, 0.0080, 0.0065, 0.0088, 0.0089, 0.0077, 0.0071, 0.0069], + device='cuda:0'), out_proj_covar=tensor([7.8227e-05, 5.1696e-05, 4.1656e-05, 5.8765e-05, 5.9392e-05, 5.0438e-05, + 4.5255e-05, 4.4558e-05], device='cuda:0') +2022-12-01 21:39:08,412 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0589, 2.1296, 3.1642, 2.4438, 2.7813, 2.7321, 2.8533, 3.6315], + device='cuda:0'), covar=tensor([0.0095, 0.0770, 0.0308, 0.0708, 0.0243, 0.0188, 0.0340, 0.0161], + device='cuda:0'), in_proj_covar=tensor([0.0035, 0.0052, 0.0040, 0.0053, 0.0041, 0.0038, 0.0038, 0.0040], + device='cuda:0'), out_proj_covar=tensor([3.6042e-05, 6.2071e-05, 4.4451e-05, 6.0392e-05, 4.7160e-05, 4.2596e-05, + 4.1641e-05, 4.0677e-05], device='cuda:0') +2022-12-01 21:39:36,715 INFO [train.py:876] Epoch 7, batch 650, loss[loss=0.217, simple_loss=0.2521, pruned_loss=0.09096, over 4789.00 frames. ], tot_loss[loss=0.2724, simple_loss=0.2987, pruned_loss=0.1231, over 918015.63 frames. ], batch size: 26, lr: 2.45e-02, +2022-12-01 21:39:47,615 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0947, 2.5290, 3.2761, 2.8137, 2.9954, 2.7353, 3.2043, 3.8824], + device='cuda:0'), covar=tensor([0.0102, 0.0849, 0.0338, 0.0777, 0.0346, 0.0309, 0.0334, 0.0138], + device='cuda:0'), in_proj_covar=tensor([0.0034, 0.0052, 0.0039, 0.0053, 0.0041, 0.0037, 0.0038, 0.0040], + device='cuda:0'), out_proj_covar=tensor([3.5412e-05, 6.2192e-05, 4.3715e-05, 6.0349e-05, 4.7415e-05, 4.2296e-05, + 4.1351e-05, 4.0087e-05], device='cuda:0') +2022-12-01 21:40:06,666 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.98 vs. limit=2.0 +2022-12-01 21:40:14,766 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=9283.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:40:25,452 INFO [train.py:876] Epoch 7, batch 700, loss[loss=0.2401, simple_loss=0.2835, pruned_loss=0.09837, over 4823.00 frames. ], tot_loss[loss=0.2702, simple_loss=0.2969, pruned_loss=0.1218, over 926841.53 frames. ], batch size: 45, lr: 2.44e-02, +2022-12-01 21:40:32,024 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.293e+02 2.568e+02 3.196e+02 4.246e+02 1.002e+03, threshold=6.393e+02, percent-clipped=2.0 +2022-12-01 21:40:41,208 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.00 vs. limit=2.0 +2022-12-01 21:41:12,073 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=9342.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:41:13,936 INFO [train.py:876] Epoch 7, batch 750, loss[loss=0.2624, simple_loss=0.2989, pruned_loss=0.1129, over 4778.00 frames. ], tot_loss[loss=0.2689, simple_loss=0.2962, pruned_loss=0.1208, over 931908.66 frames. ], batch size: 51, lr: 2.44e-02, +2022-12-01 21:41:21,018 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.0337, 3.3556, 4.0858, 3.4853, 4.0404, 3.1661, 3.9684, 3.8785], + device='cuda:0'), covar=tensor([0.0157, 0.0327, 0.0095, 0.0276, 0.0137, 0.0512, 0.0168, 0.0128], + device='cuda:0'), in_proj_covar=tensor([0.0049, 0.0070, 0.0041, 0.0058, 0.0045, 0.0072, 0.0046, 0.0043], + device='cuda:0'), out_proj_covar=tensor([4.0230e-05, 5.9291e-05, 3.3202e-05, 4.7656e-05, 3.6442e-05, 6.3210e-05, + 3.9350e-05, 3.4277e-05], device='cuda:0') +2022-12-01 21:41:38,386 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=9369.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:42:02,895 INFO [train.py:876] Epoch 7, batch 800, loss[loss=0.351, simple_loss=0.3512, pruned_loss=0.1754, over 4699.00 frames. ], tot_loss[loss=0.2676, simple_loss=0.2946, pruned_loss=0.1202, over 936221.30 frames. ], batch size: 63, lr: 2.43e-02, +2022-12-01 21:42:09,635 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.768e+02 2.606e+02 3.275e+02 4.103e+02 9.545e+02, threshold=6.551e+02, percent-clipped=5.0 +2022-12-01 21:42:20,269 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.9770, 4.8937, 5.2301, 5.0334, 5.3094, 5.1476, 5.1825, 4.8976], + device='cuda:0'), covar=tensor([0.0507, 0.0444, 0.0244, 0.0371, 0.0511, 0.0294, 0.0385, 0.0447], + device='cuda:0'), in_proj_covar=tensor([0.0088, 0.0070, 0.0062, 0.0074, 0.0064, 0.0072, 0.0082, 0.0065], + device='cuda:0'), out_proj_covar=tensor([8.8011e-05, 5.8280e-05, 5.5440e-05, 6.7671e-05, 5.8890e-05, 6.6302e-05, + 7.7270e-05, 6.0138e-05], device='cuda:0') +2022-12-01 21:42:22,496 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=9414.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:42:24,224 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=9416.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:42:28,176 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0277, 3.1383, 3.4749, 3.3695, 2.8209, 3.2037, 3.3661, 2.3655], + device='cuda:0'), covar=tensor([0.4188, 0.0390, 0.0509, 0.0168, 0.0537, 0.0607, 0.0192, 0.0782], + device='cuda:0'), in_proj_covar=tensor([0.0176, 0.0065, 0.0092, 0.0063, 0.0088, 0.0074, 0.0058, 0.0079], + device='cuda:0'), out_proj_covar=tensor([1.9703e-04, 7.6507e-05, 1.0422e-04, 6.8460e-05, 9.0097e-05, 8.5418e-05, + 6.7678e-05, 9.0861e-05], device='cuda:0') +2022-12-01 21:42:32,223 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.8119, 4.2656, 4.6552, 4.3371, 4.8471, 4.4405, 4.4649, 4.8106], + device='cuda:0'), covar=tensor([0.0062, 0.0125, 0.0053, 0.0160, 0.0093, 0.0178, 0.0239, 0.0052], + device='cuda:0'), in_proj_covar=tensor([0.0048, 0.0069, 0.0041, 0.0057, 0.0045, 0.0072, 0.0046, 0.0044], + device='cuda:0'), out_proj_covar=tensor([4.0181e-05, 5.8742e-05, 3.3181e-05, 4.7365e-05, 3.7082e-05, 6.2772e-05, + 3.9670e-05, 3.4427e-05], device='cuda:0') +2022-12-01 21:42:39,344 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.00 vs. limit=2.0 +2022-12-01 21:42:51,604 INFO [train.py:876] Epoch 7, batch 850, loss[loss=0.2752, simple_loss=0.3088, pruned_loss=0.1208, over 4799.00 frames. ], tot_loss[loss=0.2688, simple_loss=0.2957, pruned_loss=0.121, over 939153.76 frames. ], batch size: 45, lr: 2.43e-02, +2022-12-01 21:42:56,645 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9247, 1.7050, 1.2440, 1.6018, 1.6010, 1.5593, 1.4206, 1.4866], + device='cuda:0'), covar=tensor([0.0540, 0.0574, 0.0993, 0.0469, 0.0626, 0.0873, 0.0524, 0.0323], + device='cuda:0'), in_proj_covar=tensor([0.0032, 0.0031, 0.0033, 0.0033, 0.0031, 0.0028, 0.0030, 0.0031], + device='cuda:0'), out_proj_covar=tensor([2.7563e-05, 2.5696e-05, 2.7909e-05, 2.7554e-05, 2.6479e-05, 2.4948e-05, + 2.5217e-05, 2.4901e-05], device='cuda:0') +2022-12-01 21:43:10,949 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=9464.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:43:21,643 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=9475.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:43:35,392 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6409, 2.6243, 3.3389, 3.5879, 3.2441, 3.4222, 2.9514, 3.1800], + device='cuda:0'), covar=tensor([0.0180, 0.0447, 0.0180, 0.0157, 0.0268, 0.0171, 0.0202, 0.0226], + device='cuda:0'), in_proj_covar=tensor([0.0052, 0.0073, 0.0051, 0.0055, 0.0069, 0.0051, 0.0053, 0.0074], + device='cuda:0'), out_proj_covar=tensor([3.4172e-05, 4.9857e-05, 3.3157e-05, 3.5410e-05, 4.6782e-05, 3.2434e-05, + 3.3757e-05, 5.1969e-05], device='cuda:0') +2022-12-01 21:43:39,942 INFO [train.py:876] Epoch 7, batch 900, loss[loss=0.2851, simple_loss=0.3161, pruned_loss=0.1271, over 4847.00 frames. ], tot_loss[loss=0.2674, simple_loss=0.2948, pruned_loss=0.12, over 943531.27 frames. ], batch size: 41, lr: 2.42e-02, +2022-12-01 21:43:46,639 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.429e+02 2.505e+02 3.154e+02 4.131e+02 7.912e+02, threshold=6.309e+02, percent-clipped=3.0 +2022-12-01 21:44:28,410 INFO [train.py:876] Epoch 7, batch 950, loss[loss=0.3141, simple_loss=0.3288, pruned_loss=0.1497, over 4875.00 frames. ], tot_loss[loss=0.2674, simple_loss=0.295, pruned_loss=0.1199, over 945097.59 frames. ], batch size: 39, lr: 2.42e-02, +2022-12-01 21:45:00,008 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9013, 3.2144, 3.2722, 3.4277, 2.5419, 3.4107, 3.0960, 2.6207], + device='cuda:0'), covar=tensor([0.4616, 0.0491, 0.0731, 0.0192, 0.0725, 0.0674, 0.0284, 0.0772], + device='cuda:0'), in_proj_covar=tensor([0.0171, 0.0063, 0.0092, 0.0063, 0.0086, 0.0072, 0.0058, 0.0077], + device='cuda:0'), out_proj_covar=tensor([1.9105e-04, 7.5079e-05, 1.0334e-04, 6.9221e-05, 8.9168e-05, 8.3150e-05, + 6.7949e-05, 8.8529e-05], device='cuda:0') +2022-12-01 21:45:05,837 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=9583.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:45:06,963 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=9584.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:45:16,609 INFO [train.py:876] Epoch 7, batch 1000, loss[loss=0.2287, simple_loss=0.2639, pruned_loss=0.09671, over 4765.00 frames. ], tot_loss[loss=0.2676, simple_loss=0.2954, pruned_loss=0.1199, over 947025.13 frames. ], batch size: 26, lr: 2.41e-02, +2022-12-01 21:45:23,619 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.590e+02 2.472e+02 3.007e+02 3.883e+02 8.856e+02, threshold=6.013e+02, percent-clipped=6.0 +2022-12-01 21:45:31,440 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4861, 1.7057, 1.4244, 1.2795, 1.4835, 1.6596, 1.6248, 1.3958], + device='cuda:0'), covar=tensor([0.0882, 0.0389, 0.0945, 0.0615, 0.0768, 0.0937, 0.0561, 0.0336], + device='cuda:0'), in_proj_covar=tensor([0.0034, 0.0032, 0.0033, 0.0035, 0.0034, 0.0030, 0.0032, 0.0032], + device='cuda:0'), out_proj_covar=tensor([2.9038e-05, 2.6456e-05, 2.8187e-05, 2.9292e-05, 2.8790e-05, 2.6118e-05, + 2.6267e-05, 2.6171e-05], device='cuda:0') +2022-12-01 21:45:52,758 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=9631.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:46:03,956 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=9642.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:46:05,711 INFO [train.py:876] Epoch 7, batch 1050, loss[loss=0.315, simple_loss=0.3308, pruned_loss=0.1496, over 4850.00 frames. ], tot_loss[loss=0.2667, simple_loss=0.2951, pruned_loss=0.1192, over 948579.51 frames. ], batch size: 40, lr: 2.41e-02, +2022-12-01 21:46:07,019 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=9645.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:46:19,010 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=9658.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:46:24,065 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.6087, 1.4424, 1.5910, 1.2874, 1.2662, 1.3414, 1.3120, 1.4679], + device='cuda:0'), covar=tensor([0.0200, 0.0240, 0.0259, 0.0256, 0.0300, 0.0280, 0.0305, 0.0288], + device='cuda:0'), in_proj_covar=tensor([0.0034, 0.0031, 0.0031, 0.0032, 0.0036, 0.0033, 0.0038, 0.0032], + device='cuda:0'), out_proj_covar=tensor([2.9042e-05, 2.4631e-05, 2.5654e-05, 2.6989e-05, 3.0874e-05, 2.8415e-05, + 3.3144e-05, 2.7210e-05], device='cuda:0') +2022-12-01 21:46:29,848 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=9669.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:46:50,581 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=9690.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:46:54,369 INFO [train.py:876] Epoch 7, batch 1100, loss[loss=0.2385, simple_loss=0.2822, pruned_loss=0.09743, over 4857.00 frames. ], tot_loss[loss=0.2679, simple_loss=0.2957, pruned_loss=0.12, over 949682.14 frames. ], batch size: 36, lr: 2.40e-02, +2022-12-01 21:47:00,762 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.331e+02 2.491e+02 3.031e+02 4.170e+02 8.031e+02, threshold=6.061e+02, percent-clipped=6.0 +2022-12-01 21:47:10,237 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3440, 2.1464, 2.5325, 3.5402, 2.9390, 3.1877, 3.0860, 3.3967], + device='cuda:0'), covar=tensor([0.0226, 0.0859, 0.0782, 0.0260, 0.0274, 0.0379, 0.0612, 0.0246], + device='cuda:0'), in_proj_covar=tensor([0.0039, 0.0050, 0.0052, 0.0038, 0.0042, 0.0041, 0.0040, 0.0045], + device='cuda:0'), out_proj_covar=tensor([3.9020e-05, 5.8194e-05, 5.6867e-05, 4.2998e-05, 4.4457e-05, 4.5310e-05, + 4.5571e-05, 4.4385e-05], device='cuda:0') +2022-12-01 21:47:14,820 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.34 vs. limit=2.0 +2022-12-01 21:47:16,102 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=9717.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:47:18,329 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=9719.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:47:25,852 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.4576, 4.2525, 4.8475, 4.3785, 4.7568, 4.5353, 4.5014, 4.2691], + device='cuda:0'), covar=tensor([0.0353, 0.0404, 0.0196, 0.0317, 0.0281, 0.0216, 0.0314, 0.0340], + device='cuda:0'), in_proj_covar=tensor([0.0082, 0.0067, 0.0059, 0.0070, 0.0059, 0.0069, 0.0080, 0.0061], + device='cuda:0'), out_proj_covar=tensor([8.0088e-05, 5.5566e-05, 5.3244e-05, 6.3066e-05, 5.3169e-05, 6.1942e-05, + 7.4195e-05, 5.4946e-05], device='cuda:0') +2022-12-01 21:47:41,611 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=9743.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:47:42,260 INFO [train.py:876] Epoch 7, batch 1150, loss[loss=0.2354, simple_loss=0.272, pruned_loss=0.09946, over 4841.00 frames. ], tot_loss[loss=0.2675, simple_loss=0.295, pruned_loss=0.12, over 946834.21 frames. ], batch size: 35, lr: 2.40e-02, +2022-12-01 21:47:53,736 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5671, 2.2885, 2.1331, 2.7632, 2.1854, 2.5365, 2.7442, 2.4274], + device='cuda:0'), covar=tensor([0.0211, 0.0259, 0.0265, 0.0142, 0.0354, 0.0245, 0.0131, 0.0218], + device='cuda:0'), in_proj_covar=tensor([0.0032, 0.0036, 0.0033, 0.0029, 0.0039, 0.0036, 0.0026, 0.0031], + device='cuda:0'), out_proj_covar=tensor([2.3792e-05, 2.7529e-05, 2.4588e-05, 2.0295e-05, 3.0225e-05, 2.6472e-05, + 1.8592e-05, 2.2172e-05], device='cuda:0') +2022-12-01 21:48:07,091 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=9770.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:48:07,483 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.80 vs. limit=2.0 +2022-12-01 21:48:30,039 INFO [train.py:876] Epoch 7, batch 1200, loss[loss=0.2345, simple_loss=0.2757, pruned_loss=0.09667, over 4913.00 frames. ], tot_loss[loss=0.2673, simple_loss=0.2952, pruned_loss=0.1197, over 946823.81 frames. ], batch size: 31, lr: 2.39e-02, +2022-12-01 21:48:31,302 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1959, 2.9813, 3.7040, 3.6403, 3.3465, 3.8877, 3.7007, 2.7796], + device='cuda:0'), covar=tensor([0.4938, 0.0766, 0.0673, 0.0157, 0.0347, 0.1489, 0.0331, 0.0465], + device='cuda:0'), in_proj_covar=tensor([0.0174, 0.0067, 0.0094, 0.0065, 0.0087, 0.0075, 0.0061, 0.0079], + device='cuda:0'), out_proj_covar=tensor([1.9486e-04, 7.9726e-05, 1.0689e-04, 7.1947e-05, 9.0418e-05, 8.6588e-05, + 7.1702e-05, 9.1616e-05], device='cuda:0') +2022-12-01 21:48:36,992 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.621e+02 2.531e+02 3.210e+02 4.198e+02 8.403e+02, threshold=6.421e+02, percent-clipped=8.0 +2022-12-01 21:48:40,150 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=9804.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:48:50,689 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4066, 1.3234, 1.4928, 0.9864, 1.1464, 1.0735, 1.2606, 1.1132], + device='cuda:0'), covar=tensor([0.0382, 0.0224, 0.0412, 0.0449, 0.0323, 0.0282, 0.0349, 0.0337], + device='cuda:0'), in_proj_covar=tensor([0.0035, 0.0030, 0.0031, 0.0032, 0.0036, 0.0032, 0.0037, 0.0031], + device='cuda:0'), out_proj_covar=tensor([2.9756e-05, 2.4255e-05, 2.5715e-05, 2.7515e-05, 3.0649e-05, 2.7276e-05, + 3.1883e-05, 2.6702e-05], device='cuda:0') +2022-12-01 21:49:18,384 INFO [train.py:876] Epoch 7, batch 1250, loss[loss=0.321, simple_loss=0.3479, pruned_loss=0.1471, over 4779.00 frames. ], tot_loss[loss=0.2685, simple_loss=0.2958, pruned_loss=0.1207, over 948452.93 frames. ], batch size: 51, lr: 2.39e-02, +2022-12-01 21:49:33,824 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.09 vs. limit=2.0 +2022-12-01 21:49:34,481 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.31 vs. limit=2.0 +2022-12-01 21:50:05,159 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.02 vs. limit=2.0 +2022-12-01 21:50:06,498 INFO [train.py:876] Epoch 7, batch 1300, loss[loss=0.2182, simple_loss=0.2546, pruned_loss=0.09096, over 4835.00 frames. ], tot_loss[loss=0.2667, simple_loss=0.2948, pruned_loss=0.1193, over 949697.76 frames. ], batch size: 34, lr: 2.38e-02, +2022-12-01 21:50:13,400 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.590e+02 2.346e+02 3.011e+02 4.042e+02 1.309e+03, threshold=6.022e+02, percent-clipped=5.0 +2022-12-01 21:50:31,881 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.69 vs. limit=5.0 +2022-12-01 21:50:51,690 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=9940.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:50:55,333 INFO [train.py:876] Epoch 7, batch 1350, loss[loss=0.2906, simple_loss=0.3196, pruned_loss=0.1308, over 4837.00 frames. ], tot_loss[loss=0.2691, simple_loss=0.2972, pruned_loss=0.1205, over 946644.08 frames. ], batch size: 41, lr: 2.38e-02, +2022-12-01 21:51:43,380 INFO [train.py:876] Epoch 7, batch 1400, loss[loss=0.2694, simple_loss=0.2974, pruned_loss=0.1207, over 4806.00 frames. ], tot_loss[loss=0.2679, simple_loss=0.2959, pruned_loss=0.1199, over 947638.26 frames. ], batch size: 42, lr: 2.37e-02, +2022-12-01 21:51:49,197 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-10000.pt +2022-12-01 21:51:52,369 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.381e+02 2.558e+02 3.089e+02 4.012e+02 7.105e+02, threshold=6.177e+02, percent-clipped=3.0 +2022-12-01 21:52:05,143 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=10014.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:52:09,079 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8504, 2.0322, 1.5073, 1.2658, 1.1129, 1.5674, 1.2313, 2.0031], + device='cuda:0'), covar=tensor([0.0571, 0.0239, 0.0448, 0.0422, 0.0561, 0.0326, 0.0489, 0.0257], + device='cuda:0'), in_proj_covar=tensor([0.0036, 0.0032, 0.0032, 0.0034, 0.0038, 0.0033, 0.0040, 0.0033], + device='cuda:0'), out_proj_covar=tensor([3.0794e-05, 2.5517e-05, 2.6338e-05, 2.8928e-05, 3.2269e-05, 2.8512e-05, + 3.4268e-05, 2.9952e-05], device='cuda:0') +2022-12-01 21:52:16,074 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-7.pt +2022-12-01 21:52:43,050 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 21:52:43,961 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 21:52:44,258 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 21:52:44,290 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 21:52:45,398 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 21:52:45,719 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 21:52:47,308 INFO [train.py:876] Epoch 8, batch 0, loss[loss=0.2867, simple_loss=0.3121, pruned_loss=0.1306, over 4841.00 frames. ], tot_loss[loss=0.2867, simple_loss=0.3121, pruned_loss=0.1306, over 4841.00 frames. ], batch size: 41, lr: 2.23e-02, +2022-12-01 21:52:47,309 INFO [train.py:901] Computing validation loss +2022-12-01 21:53:02,793 INFO [train.py:910] Epoch 8, validation: loss=0.2662, simple_loss=0.3078, pruned_loss=0.1123, over 253132.00 frames. +2022-12-01 21:53:02,794 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-01 21:53:16,073 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=10039.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:53:26,389 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.95 vs. limit=2.0 +2022-12-01 21:53:37,682 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=10062.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 21:53:45,293 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=10070.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:53:46,048 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.92 vs. limit=2.0 +2022-12-01 21:53:50,792 INFO [train.py:876] Epoch 8, batch 50, loss[loss=0.2215, simple_loss=0.244, pruned_loss=0.09953, over 4815.00 frames. ], tot_loss[loss=0.2627, simple_loss=0.2953, pruned_loss=0.1151, over 216560.46 frames. ], batch size: 25, lr: 2.23e-02, +2022-12-01 21:54:05,763 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 21:54:12,577 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=10099.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:54:13,677 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=10100.0, num_to_drop=1, layers_to_drop={3} +2022-12-01 21:54:14,479 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.323e+02 2.362e+02 3.114e+02 4.335e+02 7.372e+02, threshold=6.227e+02, percent-clipped=4.0 +2022-12-01 21:54:30,817 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=10118.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:54:35,647 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=10123.0, num_to_drop=1, layers_to_drop={3} +2022-12-01 21:54:38,139 INFO [train.py:876] Epoch 8, batch 100, loss[loss=0.3794, simple_loss=0.3746, pruned_loss=0.1921, over 4827.00 frames. ], tot_loss[loss=0.2594, simple_loss=0.2928, pruned_loss=0.113, over 380589.99 frames. ], batch size: 49, lr: 2.22e-02, +2022-12-01 21:54:49,804 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 21:55:04,874 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0442, 3.3593, 3.6770, 3.1995, 3.1947, 3.3111, 3.3081, 3.4259], + device='cuda:0'), covar=tensor([0.0559, 0.0240, 0.0128, 0.0278, 0.0284, 0.0264, 0.0198, 0.0249], + device='cuda:0'), in_proj_covar=tensor([0.0128, 0.0086, 0.0073, 0.0095, 0.0096, 0.0087, 0.0077, 0.0075], + device='cuda:0'), out_proj_covar=tensor([8.6554e-05, 5.6404e-05, 4.6401e-05, 6.2999e-05, 6.4378e-05, 5.7279e-05, + 5.0237e-05, 4.8720e-05], device='cuda:0') +2022-12-01 21:55:14,224 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 21:55:27,536 INFO [train.py:876] Epoch 8, batch 150, loss[loss=0.2337, simple_loss=0.2684, pruned_loss=0.0995, over 4846.00 frames. ], tot_loss[loss=0.2534, simple_loss=0.2863, pruned_loss=0.1102, over 507421.54 frames. ], batch size: 40, lr: 2.22e-02, +2022-12-01 21:55:51,917 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.502e+02 2.490e+02 2.907e+02 4.083e+02 1.116e+03, threshold=5.815e+02, percent-clipped=5.0 +2022-12-01 21:56:16,098 INFO [train.py:876] Epoch 8, batch 200, loss[loss=0.2645, simple_loss=0.2936, pruned_loss=0.1177, over 4810.00 frames. ], tot_loss[loss=0.2564, simple_loss=0.2883, pruned_loss=0.1122, over 607436.21 frames. ], batch size: 42, lr: 2.22e-02, +2022-12-01 21:56:30,081 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=10240.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:56:46,465 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.7229, 4.6208, 5.0990, 4.5105, 4.9351, 4.7444, 4.6700, 4.5326], + device='cuda:0'), covar=tensor([0.0417, 0.0398, 0.0304, 0.0416, 0.0582, 0.0316, 0.0470, 0.0419], + device='cuda:0'), in_proj_covar=tensor([0.0088, 0.0069, 0.0066, 0.0078, 0.0065, 0.0075, 0.0085, 0.0064], + device='cuda:0'), out_proj_covar=tensor([8.5384e-05, 5.7065e-05, 5.8637e-05, 7.0090e-05, 5.8134e-05, 6.7133e-05, + 7.8216e-05, 5.7676e-05], device='cuda:0') +2022-12-01 21:56:59,826 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 21:57:04,497 INFO [train.py:876] Epoch 8, batch 250, loss[loss=0.2826, simple_loss=0.3258, pruned_loss=0.1197, over 4835.00 frames. ], tot_loss[loss=0.2575, simple_loss=0.2896, pruned_loss=0.1127, over 684748.78 frames. ], batch size: 40, lr: 2.21e-02, +2022-12-01 21:57:16,815 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=10288.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:57:28,933 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.442e+02 2.570e+02 3.091e+02 4.197e+02 1.386e+03, threshold=6.182e+02, percent-clipped=7.0 +2022-12-01 21:57:35,099 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8128, 2.6762, 2.6891, 3.5946, 3.1819, 3.3950, 2.5643, 3.9358], + device='cuda:0'), covar=tensor([0.0129, 0.0679, 0.0882, 0.0248, 0.0259, 0.0364, 0.0426, 0.0167], + device='cuda:0'), in_proj_covar=tensor([0.0041, 0.0056, 0.0058, 0.0041, 0.0045, 0.0043, 0.0043, 0.0047], + device='cuda:0'), out_proj_covar=tensor([4.0690e-05, 6.5466e-05, 6.3073e-05, 4.6326e-05, 4.7013e-05, 4.9515e-05, + 4.7935e-05, 4.5798e-05], device='cuda:0') +2022-12-01 21:57:41,942 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=10314.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:57:43,997 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2626, 2.0456, 2.0611, 2.6105, 2.1354, 1.9764, 1.5363, 2.2367], + device='cuda:0'), covar=tensor([0.1009, 0.1597, 0.1189, 0.0515, 0.1304, 0.1175, 0.1640, 0.0996], + device='cuda:0'), in_proj_covar=tensor([0.0052, 0.0052, 0.0069, 0.0052, 0.0074, 0.0062, 0.0084, 0.0055], + device='cuda:0'), out_proj_covar=tensor([4.7776e-05, 4.9279e-05, 6.4064e-05, 4.7000e-05, 6.8895e-05, 5.6978e-05, + 8.2159e-05, 4.9574e-05], device='cuda:0') +2022-12-01 21:57:53,560 INFO [train.py:876] Epoch 8, batch 300, loss[loss=0.2314, simple_loss=0.2604, pruned_loss=0.1012, over 4917.00 frames. ], tot_loss[loss=0.256, simple_loss=0.2888, pruned_loss=0.1115, over 744292.71 frames. ], batch size: 30, lr: 2.21e-02, +2022-12-01 21:57:56,461 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 21:58:01,748 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.92 vs. limit=2.0 +2022-12-01 21:58:28,334 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=10362.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:58:42,062 INFO [train.py:876] Epoch 8, batch 350, loss[loss=0.2656, simple_loss=0.3101, pruned_loss=0.1106, over 4849.00 frames. ], tot_loss[loss=0.2525, simple_loss=0.2862, pruned_loss=0.1094, over 790286.93 frames. ], batch size: 40, lr: 2.20e-02, +2022-12-01 21:58:57,474 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.01 vs. limit=2.0 +2022-12-01 21:59:00,878 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=10395.0, num_to_drop=1, layers_to_drop={3} +2022-12-01 21:59:04,862 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=10399.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:59:06,961 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.369e+02 2.349e+02 3.009e+02 3.671e+02 6.559e+02, threshold=6.019e+02, percent-clipped=3.0 +2022-12-01 21:59:14,945 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=10409.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:59:23,673 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=10418.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 21:59:24,130 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.95 vs. limit=2.0 +2022-12-01 21:59:31,006 INFO [train.py:876] Epoch 8, batch 400, loss[loss=0.2188, simple_loss=0.2676, pruned_loss=0.08502, over 4803.00 frames. ], tot_loss[loss=0.2518, simple_loss=0.2854, pruned_loss=0.109, over 825961.37 frames. ], batch size: 32, lr: 2.20e-02, +2022-12-01 21:59:37,080 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 21:59:52,172 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=10447.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 21:59:59,132 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=10454.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:00:01,685 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.77 vs. limit=2.0 +2022-12-01 22:00:02,031 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 22:00:14,689 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=10470.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:00:20,246 INFO [train.py:876] Epoch 8, batch 450, loss[loss=0.1619, simple_loss=0.1988, pruned_loss=0.06252, over 4617.00 frames. ], tot_loss[loss=0.2489, simple_loss=0.2828, pruned_loss=0.1075, over 852523.49 frames. ], batch size: 21, lr: 2.19e-02, +2022-12-01 22:00:44,919 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.669e+02 2.398e+02 3.109e+02 3.748e+02 6.669e+02, threshold=6.217e+02, percent-clipped=2.0 +2022-12-01 22:00:59,158 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=10515.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 22:01:09,662 INFO [train.py:876] Epoch 8, batch 500, loss[loss=0.2916, simple_loss=0.3274, pruned_loss=0.1279, over 4713.00 frames. ], tot_loss[loss=0.249, simple_loss=0.2828, pruned_loss=0.1076, over 874643.88 frames. ], batch size: 63, lr: 2.19e-02, +2022-12-01 22:01:25,922 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1617, 2.0624, 2.5873, 2.2564, 2.5465, 1.4899, 2.7019, 2.9350], + device='cuda:0'), covar=tensor([0.0150, 0.1043, 0.0399, 0.0929, 0.0228, 0.0431, 0.0486, 0.0332], + device='cuda:0'), in_proj_covar=tensor([0.0040, 0.0058, 0.0042, 0.0061, 0.0046, 0.0043, 0.0047, 0.0045], + device='cuda:0'), out_proj_covar=tensor([4.2353e-05, 6.9225e-05, 4.7517e-05, 7.0474e-05, 5.2628e-05, 4.8093e-05, + 5.1792e-05, 4.6159e-05], device='cuda:0') +2022-12-01 22:01:50,664 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4172, 2.2133, 1.9728, 2.5747, 1.9748, 1.9762, 2.7994, 2.4128], + device='cuda:0'), covar=tensor([0.0240, 0.0267, 0.0280, 0.0145, 0.0392, 0.0342, 0.0093, 0.0186], + device='cuda:0'), in_proj_covar=tensor([0.0030, 0.0033, 0.0030, 0.0027, 0.0037, 0.0033, 0.0023, 0.0028], + device='cuda:0'), out_proj_covar=tensor([2.2307e-05, 2.5174e-05, 2.1796e-05, 1.8781e-05, 2.8146e-05, 2.4094e-05, + 1.6325e-05, 2.0281e-05], device='cuda:0') +2022-12-01 22:01:58,638 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.14 vs. limit=2.0 +2022-12-01 22:01:59,032 INFO [train.py:876] Epoch 8, batch 550, loss[loss=0.2847, simple_loss=0.306, pruned_loss=0.1317, over 4816.00 frames. ], tot_loss[loss=0.2484, simple_loss=0.2826, pruned_loss=0.1071, over 891084.15 frames. ], batch size: 42, lr: 2.19e-02, +2022-12-01 22:02:23,436 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.457e+02 2.238e+02 2.837e+02 3.582e+02 8.415e+02, threshold=5.674e+02, percent-clipped=3.0 +2022-12-01 22:02:43,258 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4535, 1.5567, 1.3905, 1.5386, 1.2261, 1.1389, 1.1410, 1.6989], + device='cuda:0'), covar=tensor([0.0308, 0.0158, 0.0199, 0.0172, 0.0252, 0.0204, 0.0298, 0.0165], + device='cuda:0'), in_proj_covar=tensor([0.0032, 0.0029, 0.0029, 0.0029, 0.0034, 0.0032, 0.0037, 0.0028], + device='cuda:0'), out_proj_covar=tensor([2.7624e-05, 2.2800e-05, 2.3663e-05, 2.4495e-05, 2.8872e-05, 2.7001e-05, + 3.1809e-05, 2.4676e-05], device='cuda:0') +2022-12-01 22:02:45,807 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.2626, 4.1881, 4.6420, 4.3125, 4.5959, 4.4540, 4.4359, 4.2706], + device='cuda:0'), covar=tensor([0.0533, 0.0412, 0.0552, 0.0439, 0.0497, 0.0325, 0.0440, 0.0289], + device='cuda:0'), in_proj_covar=tensor([0.0091, 0.0073, 0.0070, 0.0080, 0.0068, 0.0077, 0.0087, 0.0067], + device='cuda:0'), out_proj_covar=tensor([8.7804e-05, 6.0467e-05, 6.2178e-05, 7.2100e-05, 6.1512e-05, 6.7940e-05, + 7.8829e-05, 5.9526e-05], device='cuda:0') +2022-12-01 22:02:47,642 INFO [train.py:876] Epoch 8, batch 600, loss[loss=0.2835, simple_loss=0.3077, pruned_loss=0.1296, over 4853.00 frames. ], tot_loss[loss=0.2494, simple_loss=0.2829, pruned_loss=0.1079, over 905468.82 frames. ], batch size: 35, lr: 2.18e-02, +2022-12-01 22:02:51,691 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4183, 1.4094, 2.1346, 3.2899, 2.7227, 3.2359, 2.9820, 3.7769], + device='cuda:0'), covar=tensor([0.0242, 0.1195, 0.1038, 0.0262, 0.0291, 0.0293, 0.0351, 0.0124], + device='cuda:0'), in_proj_covar=tensor([0.0043, 0.0055, 0.0059, 0.0040, 0.0044, 0.0043, 0.0043, 0.0047], + device='cuda:0'), out_proj_covar=tensor([4.3516e-05, 6.4789e-05, 6.4541e-05, 4.5668e-05, 4.6750e-05, 4.9534e-05, + 4.8320e-05, 4.5487e-05], device='cuda:0') +2022-12-01 22:03:30,238 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.99 vs. limit=2.0 +2022-12-01 22:03:36,608 INFO [train.py:876] Epoch 8, batch 650, loss[loss=0.1807, simple_loss=0.2077, pruned_loss=0.07685, over 4634.00 frames. ], tot_loss[loss=0.2492, simple_loss=0.2828, pruned_loss=0.1078, over 914723.74 frames. ], batch size: 21, lr: 2.18e-02, +2022-12-01 22:03:36,850 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4998, 1.4234, 1.1704, 1.8512, 1.5684, 1.4467, 1.6761, 1.5838], + device='cuda:0'), covar=tensor([0.0346, 0.0321, 0.0299, 0.0192, 0.0222, 0.0308, 0.0265, 0.0393], + device='cuda:0'), in_proj_covar=tensor([0.0037, 0.0030, 0.0036, 0.0028, 0.0032, 0.0030, 0.0032, 0.0030], + device='cuda:0'), out_proj_covar=tensor([3.5347e-05, 2.6830e-05, 3.3658e-05, 2.4196e-05, 2.9357e-05, 2.8163e-05, + 2.9895e-05, 2.7365e-05], device='cuda:0') +2022-12-01 22:03:54,991 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=10695.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:04:00,649 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.268e+02 2.505e+02 3.318e+02 4.249e+02 8.688e+02, threshold=6.636e+02, percent-clipped=8.0 +2022-12-01 22:04:17,261 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=10718.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 22:04:24,888 INFO [train.py:876] Epoch 8, batch 700, loss[loss=0.208, simple_loss=0.2567, pruned_loss=0.07968, over 4862.00 frames. ], tot_loss[loss=0.2494, simple_loss=0.2836, pruned_loss=0.1076, over 922761.16 frames. ], batch size: 36, lr: 2.17e-02, +2022-12-01 22:04:41,815 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=10743.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:04:42,994 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=10744.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:04:59,516 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1322, 2.7761, 3.2908, 3.0107, 3.4636, 2.3538, 3.3499, 3.6671], + device='cuda:0'), covar=tensor([0.0182, 0.0430, 0.0121, 0.0211, 0.0114, 0.0592, 0.0120, 0.0081], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0073, 0.0046, 0.0059, 0.0047, 0.0082, 0.0046, 0.0046], + device='cuda:0'), out_proj_covar=tensor([4.4132e-05, 6.1914e-05, 3.7375e-05, 4.8825e-05, 3.8109e-05, 7.1659e-05, + 3.9875e-05, 3.7007e-05], device='cuda:0') +2022-12-01 22:05:03,132 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=10765.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:05:04,058 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=10766.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 22:05:11,884 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=10774.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:05:13,734 INFO [train.py:876] Epoch 8, batch 750, loss[loss=0.2433, simple_loss=0.2763, pruned_loss=0.1051, over 4883.00 frames. ], tot_loss[loss=0.2475, simple_loss=0.2821, pruned_loss=0.1064, over 929682.24 frames. ], batch size: 37, lr: 2.17e-02, +2022-12-01 22:05:29,840 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-01 22:05:37,850 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-01 22:05:38,270 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.655e+02 2.458e+02 2.999e+02 3.581e+02 1.108e+03, threshold=5.998e+02, percent-clipped=4.0 +2022-12-01 22:05:42,449 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=10805.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:05:47,271 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=10810.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 22:06:02,599 INFO [train.py:876] Epoch 8, batch 800, loss[loss=0.2745, simple_loss=0.3108, pruned_loss=0.1191, over 4788.00 frames. ], tot_loss[loss=0.2504, simple_loss=0.2847, pruned_loss=0.108, over 933852.16 frames. ], batch size: 51, lr: 2.17e-02, +2022-12-01 22:06:10,598 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.01 vs. limit=2.0 +2022-12-01 22:06:11,262 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=10835.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:06:51,274 INFO [train.py:876] Epoch 8, batch 850, loss[loss=0.2151, simple_loss=0.2557, pruned_loss=0.08726, over 4906.00 frames. ], tot_loss[loss=0.2468, simple_loss=0.2807, pruned_loss=0.1064, over 937880.88 frames. ], batch size: 30, lr: 2.16e-02, +2022-12-01 22:07:00,064 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=10885.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 22:07:15,431 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.516e+02 2.384e+02 2.993e+02 3.683e+02 6.160e+02, threshold=5.986e+02, percent-clipped=1.0 +2022-12-01 22:07:39,046 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1170, 2.9031, 2.8067, 2.6114, 2.5907, 2.0941, 3.2515, 1.5452], + device='cuda:0'), covar=tensor([0.0276, 0.0225, 0.0434, 0.0805, 0.0821, 0.2837, 0.0185, 0.2465], + device='cuda:0'), in_proj_covar=tensor([0.0065, 0.0058, 0.0064, 0.0086, 0.0093, 0.0114, 0.0053, 0.0120], + device='cuda:0'), out_proj_covar=tensor([6.7447e-05, 6.3516e-05, 6.8648e-05, 9.1618e-05, 9.6936e-05, 1.2083e-04, + 5.7034e-05, 1.2675e-04], device='cuda:0') +2022-12-01 22:07:39,784 INFO [train.py:876] Epoch 8, batch 900, loss[loss=0.2875, simple_loss=0.326, pruned_loss=0.1245, over 4703.00 frames. ], tot_loss[loss=0.246, simple_loss=0.2801, pruned_loss=0.106, over 941566.21 frames. ], batch size: 63, lr: 2.16e-02, +2022-12-01 22:07:42,455 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.0947, 1.6225, 1.4513, 1.4906, 1.2656, 1.5554, 1.2035, 1.7330], + device='cuda:0'), covar=tensor([0.0362, 0.0162, 0.0260, 0.0183, 0.0235, 0.0194, 0.0338, 0.0172], + device='cuda:0'), in_proj_covar=tensor([0.0032, 0.0028, 0.0028, 0.0029, 0.0034, 0.0032, 0.0037, 0.0028], + device='cuda:0'), out_proj_covar=tensor([2.6886e-05, 2.2116e-05, 2.3321e-05, 2.4465e-05, 2.8910e-05, 2.7073e-05, + 3.1549e-05, 2.4559e-05], device='cuda:0') +2022-12-01 22:07:59,742 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=10946.0, num_to_drop=1, layers_to_drop={3} +2022-12-01 22:08:28,372 INFO [train.py:876] Epoch 8, batch 950, loss[loss=0.2379, simple_loss=0.2665, pruned_loss=0.1046, over 4893.00 frames. ], tot_loss[loss=0.2472, simple_loss=0.2814, pruned_loss=0.1065, over 945482.31 frames. ], batch size: 38, lr: 2.15e-02, +2022-12-01 22:08:35,873 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.99 vs. limit=2.0 +2022-12-01 22:08:52,748 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.501e+02 2.402e+02 3.087e+02 3.795e+02 8.213e+02, threshold=6.175e+02, percent-clipped=4.0 +2022-12-01 22:09:17,179 INFO [train.py:876] Epoch 8, batch 1000, loss[loss=0.2438, simple_loss=0.2683, pruned_loss=0.1097, over 4729.00 frames. ], tot_loss[loss=0.2476, simple_loss=0.2823, pruned_loss=0.1064, over 947053.66 frames. ], batch size: 27, lr: 2.15e-02, +2022-12-01 22:09:49,763 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2657, 3.1927, 3.0419, 3.3358, 2.9537, 3.1533, 3.0187, 3.2868], + device='cuda:0'), covar=tensor([0.0118, 0.0091, 0.0147, 0.0082, 0.0222, 0.0156, 0.0092, 0.0119], + device='cuda:0'), in_proj_covar=tensor([0.0032, 0.0035, 0.0031, 0.0028, 0.0038, 0.0036, 0.0027, 0.0030], + device='cuda:0'), out_proj_covar=tensor([2.3964e-05, 2.7049e-05, 2.2704e-05, 2.0208e-05, 2.9305e-05, 2.7007e-05, + 1.8893e-05, 2.1506e-05], device='cuda:0') +2022-12-01 22:09:55,598 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=11065.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:10:06,101 INFO [train.py:876] Epoch 8, batch 1050, loss[loss=0.2321, simple_loss=0.2737, pruned_loss=0.09528, over 4790.00 frames. ], tot_loss[loss=0.2457, simple_loss=0.2806, pruned_loss=0.1054, over 948200.20 frames. ], batch size: 32, lr: 2.14e-02, +2022-12-01 22:10:08,409 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4903, 3.6504, 3.8454, 3.6674, 3.4484, 3.7640, 3.5638, 3.8480], + device='cuda:0'), covar=tensor([0.0619, 0.0219, 0.0125, 0.0256, 0.0240, 0.0250, 0.0177, 0.0186], + device='cuda:0'), in_proj_covar=tensor([0.0141, 0.0090, 0.0079, 0.0099, 0.0098, 0.0095, 0.0087, 0.0077], + device='cuda:0'), out_proj_covar=tensor([9.5521e-05, 5.9151e-05, 5.0908e-05, 6.5799e-05, 6.5706e-05, 6.2273e-05, + 5.6997e-05, 5.0574e-05], device='cuda:0') +2022-12-01 22:10:27,191 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.14 vs. limit=5.0 +2022-12-01 22:10:29,717 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=11100.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:10:30,535 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.428e+02 2.405e+02 2.999e+02 4.009e+02 8.788e+02, threshold=5.997e+02, percent-clipped=5.0 +2022-12-01 22:10:39,230 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=11110.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 22:10:42,120 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=11113.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:10:54,493 INFO [train.py:876] Epoch 8, batch 1100, loss[loss=0.2534, simple_loss=0.2922, pruned_loss=0.1073, over 4802.00 frames. ], tot_loss[loss=0.2456, simple_loss=0.2812, pruned_loss=0.105, over 951995.07 frames. ], batch size: 58, lr: 2.14e-02, +2022-12-01 22:10:58,383 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=11130.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:11:14,238 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5158, 3.3505, 2.9986, 3.2399, 3.2934, 2.4468, 3.7810, 1.7867], + device='cuda:0'), covar=tensor([0.0325, 0.0215, 0.0283, 0.0427, 0.0482, 0.2201, 0.0129, 0.2001], + device='cuda:0'), in_proj_covar=tensor([0.0065, 0.0058, 0.0063, 0.0086, 0.0093, 0.0114, 0.0054, 0.0122], + device='cuda:0'), out_proj_covar=tensor([6.7407e-05, 6.5055e-05, 6.8369e-05, 9.0815e-05, 9.7381e-05, 1.2221e-04, + 5.8330e-05, 1.2920e-04], device='cuda:0') +2022-12-01 22:11:22,153 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1690, 2.9136, 3.5775, 3.6156, 2.9152, 3.6680, 3.5436, 2.7194], + device='cuda:0'), covar=tensor([0.4563, 0.0666, 0.0533, 0.0165, 0.0482, 0.0457, 0.0215, 0.0669], + device='cuda:0'), in_proj_covar=tensor([0.0177, 0.0074, 0.0100, 0.0071, 0.0095, 0.0077, 0.0066, 0.0082], + device='cuda:0'), out_proj_covar=tensor([1.9965e-04, 8.7879e-05, 1.1720e-04, 7.9772e-05, 1.0085e-04, 9.2514e-05, + 7.9371e-05, 1.0040e-04], device='cuda:0') +2022-12-01 22:11:25,855 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=11158.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:11:41,687 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=11174.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:11:43,410 INFO [train.py:876] Epoch 8, batch 1150, loss[loss=0.2396, simple_loss=0.2667, pruned_loss=0.1062, over 4826.00 frames. ], tot_loss[loss=0.2434, simple_loss=0.2795, pruned_loss=0.1036, over 953789.31 frames. ], batch size: 25, lr: 2.14e-02, +2022-12-01 22:12:07,784 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.177e+02 2.357e+02 2.871e+02 3.719e+02 5.713e+02, threshold=5.742e+02, percent-clipped=0.0 +2022-12-01 22:12:32,466 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.06 vs. limit=2.0 +2022-12-01 22:12:32,907 INFO [train.py:876] Epoch 8, batch 1200, loss[loss=0.167, simple_loss=0.2148, pruned_loss=0.05961, over 4829.00 frames. ], tot_loss[loss=0.2423, simple_loss=0.278, pruned_loss=0.1033, over 948760.00 frames. ], batch size: 25, lr: 2.13e-02, +2022-12-01 22:12:42,063 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=11235.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:12:47,902 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=11241.0, num_to_drop=1, layers_to_drop={3} +2022-12-01 22:12:48,252 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.97 vs. limit=2.0 +2022-12-01 22:13:22,334 INFO [train.py:876] Epoch 8, batch 1250, loss[loss=0.2867, simple_loss=0.3049, pruned_loss=0.1342, over 4860.00 frames. ], tot_loss[loss=0.2435, simple_loss=0.2786, pruned_loss=0.1043, over 948708.19 frames. ], batch size: 36, lr: 2.13e-02, +2022-12-01 22:13:46,785 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.381e+02 2.223e+02 3.005e+02 3.752e+02 6.179e+02, threshold=6.011e+02, percent-clipped=4.0 +2022-12-01 22:13:59,246 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.54 vs. limit=2.0 +2022-12-01 22:14:10,282 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0267, 3.4649, 3.6647, 3.5975, 3.4589, 3.5512, 3.4951, 3.6239], + device='cuda:0'), covar=tensor([0.0868, 0.0218, 0.0135, 0.0213, 0.0235, 0.0244, 0.0211, 0.0161], + device='cuda:0'), in_proj_covar=tensor([0.0147, 0.0093, 0.0083, 0.0103, 0.0102, 0.0095, 0.0088, 0.0078], + device='cuda:0'), out_proj_covar=tensor([9.9932e-05, 6.1212e-05, 5.3491e-05, 6.8905e-05, 6.8062e-05, 6.2935e-05, + 5.7120e-05, 5.1639e-05], device='cuda:0') +2022-12-01 22:14:10,932 INFO [train.py:876] Epoch 8, batch 1300, loss[loss=0.2464, simple_loss=0.2895, pruned_loss=0.1017, over 4849.00 frames. ], tot_loss[loss=0.2451, simple_loss=0.2797, pruned_loss=0.1052, over 948858.74 frames. ], batch size: 40, lr: 2.12e-02, +2022-12-01 22:14:21,676 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.2804, 4.3011, 4.5833, 4.0697, 4.4536, 4.5171, 4.2590, 3.8964], + device='cuda:0'), covar=tensor([0.0470, 0.0352, 0.0449, 0.0477, 0.0700, 0.0323, 0.0450, 0.0388], + device='cuda:0'), in_proj_covar=tensor([0.0096, 0.0077, 0.0075, 0.0084, 0.0072, 0.0081, 0.0090, 0.0069], + device='cuda:0'), out_proj_covar=tensor([9.0449e-05, 6.2491e-05, 6.6452e-05, 7.5215e-05, 6.4477e-05, 7.0934e-05, + 8.0192e-05, 5.9635e-05], device='cuda:0') +2022-12-01 22:14:59,642 INFO [train.py:876] Epoch 8, batch 1350, loss[loss=0.2281, simple_loss=0.2443, pruned_loss=0.1059, over 4874.00 frames. ], tot_loss[loss=0.2425, simple_loss=0.2776, pruned_loss=0.1037, over 949672.70 frames. ], batch size: 29, lr: 2.12e-02, +2022-12-01 22:15:24,350 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=11400.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:15:25,021 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.352e+02 2.223e+02 2.806e+02 3.824e+02 7.810e+02, threshold=5.612e+02, percent-clipped=3.0 +2022-12-01 22:15:40,063 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4195, 2.5370, 2.2702, 2.4942, 2.3842, 2.5392, 2.2747, 2.5393], + device='cuda:0'), covar=tensor([0.0228, 0.0219, 0.0228, 0.0174, 0.0287, 0.0248, 0.0177, 0.0199], + device='cuda:0'), in_proj_covar=tensor([0.0035, 0.0038, 0.0031, 0.0031, 0.0041, 0.0037, 0.0028, 0.0033], + device='cuda:0'), out_proj_covar=tensor([2.6039e-05, 2.8623e-05, 2.2687e-05, 2.2229e-05, 3.1532e-05, 2.7780e-05, + 2.0113e-05, 2.3869e-05], device='cuda:0') +2022-12-01 22:15:49,343 INFO [train.py:876] Epoch 8, batch 1400, loss[loss=0.2695, simple_loss=0.3027, pruned_loss=0.1181, over 4873.00 frames. ], tot_loss[loss=0.2398, simple_loss=0.2747, pruned_loss=0.1025, over 945070.57 frames. ], batch size: 44, lr: 2.12e-02, +2022-12-01 22:15:53,380 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=11430.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:16:11,250 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=11448.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:16:21,202 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-8.pt +2022-12-01 22:16:38,063 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 22:16:38,622 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 22:16:38,915 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 22:16:39,248 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 22:16:40,042 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 22:16:40,358 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 22:16:42,356 INFO [train.py:876] Epoch 9, batch 0, loss[loss=0.2574, simple_loss=0.2967, pruned_loss=0.109, over 4818.00 frames. ], tot_loss[loss=0.2574, simple_loss=0.2967, pruned_loss=0.109, over 4818.00 frames. ], batch size: 45, lr: 2.00e-02, +2022-12-01 22:16:42,357 INFO [train.py:901] Computing validation loss +2022-12-01 22:16:57,912 INFO [train.py:910] Epoch 9, validation: loss=0.2488, simple_loss=0.2936, pruned_loss=0.102, over 253132.00 frames. +2022-12-01 22:16:57,913 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-01 22:17:16,260 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=11478.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:17:38,821 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.324e+02 2.395e+02 2.922e+02 3.756e+02 1.096e+03, threshold=5.844e+02, percent-clipped=5.0 +2022-12-01 22:17:46,614 INFO [train.py:876] Epoch 9, batch 50, loss[loss=0.2822, simple_loss=0.3267, pruned_loss=0.1188, over 4824.00 frames. ], tot_loss[loss=0.2357, simple_loss=0.2736, pruned_loss=0.09892, over 215681.36 frames. ], batch size: 54, lr: 2.00e-02, +2022-12-01 22:18:01,443 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6332, 3.8866, 3.4756, 3.5622, 3.9916, 3.6917, 3.8425, 3.7379], + device='cuda:0'), covar=tensor([0.1272, 0.0257, 0.0324, 0.0415, 0.0190, 0.0278, 0.0259, 0.0259], + device='cuda:0'), in_proj_covar=tensor([0.0150, 0.0102, 0.0095, 0.0105, 0.0084, 0.0100, 0.0098, 0.0089], + device='cuda:0'), out_proj_covar=tensor([9.9868e-05, 6.5113e-05, 6.2504e-05, 7.0881e-05, 5.4770e-05, 6.3512e-05, + 6.3426e-05, 5.8399e-05], device='cuda:0') +2022-12-01 22:18:04,976 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 22:18:07,022 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=11530.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:18:09,139 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=11532.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:18:14,307 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.95 vs. limit=2.0 +2022-12-01 22:18:17,741 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=11541.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 22:18:29,019 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.92 vs. limit=2.0 +2022-12-01 22:18:35,570 INFO [train.py:876] Epoch 9, batch 100, loss[loss=0.1957, simple_loss=0.2442, pruned_loss=0.07357, over 4861.00 frames. ], tot_loss[loss=0.2288, simple_loss=0.2679, pruned_loss=0.09486, over 381253.77 frames. ], batch size: 35, lr: 1.99e-02, +2022-12-01 22:18:44,564 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.2212, 1.3002, 0.9580, 1.6996, 1.3525, 1.3177, 1.3539, 1.3815], + device='cuda:0'), covar=tensor([0.0420, 0.0509, 0.0459, 0.0232, 0.0308, 0.0306, 0.0321, 0.0447], + device='cuda:0'), in_proj_covar=tensor([0.0038, 0.0032, 0.0039, 0.0028, 0.0033, 0.0030, 0.0033, 0.0029], + device='cuda:0'), out_proj_covar=tensor([3.6867e-05, 2.9348e-05, 3.7979e-05, 2.5165e-05, 3.0305e-05, 2.8417e-05, + 2.9994e-05, 2.7050e-05], device='cuda:0') +2022-12-01 22:18:52,982 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 22:19:04,649 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=11589.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 22:19:08,678 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=11593.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:19:13,663 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.05 vs. limit=2.0 +2022-12-01 22:19:14,063 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 22:19:16,582 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.323e+02 2.056e+02 2.706e+02 3.543e+02 8.236e+02, threshold=5.413e+02, percent-clipped=3.0 +2022-12-01 22:19:24,488 INFO [train.py:876] Epoch 9, batch 150, loss[loss=0.3317, simple_loss=0.3369, pruned_loss=0.1632, over 4086.00 frames. ], tot_loss[loss=0.2302, simple_loss=0.2688, pruned_loss=0.09579, over 506698.46 frames. ], batch size: 72, lr: 1.99e-02, +2022-12-01 22:19:33,864 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.66 vs. limit=5.0 +2022-12-01 22:19:49,731 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=11635.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:20:07,026 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3165, 3.1115, 3.1527, 2.9345, 3.4124, 2.7661, 3.3418, 3.5403], + device='cuda:0'), covar=tensor([0.0257, 0.0375, 0.0183, 0.0329, 0.0147, 0.0546, 0.0220, 0.0196], + device='cuda:0'), in_proj_covar=tensor([0.0061, 0.0080, 0.0053, 0.0065, 0.0052, 0.0092, 0.0054, 0.0052], + device='cuda:0'), out_proj_covar=tensor([5.1028e-05, 6.7417e-05, 4.3018e-05, 5.4623e-05, 4.2415e-05, 8.1111e-05, + 4.7146e-05, 4.1581e-05], device='cuda:0') +2022-12-01 22:20:08,986 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6140, 3.7821, 3.4956, 3.4251, 3.8056, 3.5494, 3.6217, 3.7681], + device='cuda:0'), covar=tensor([0.1457, 0.0302, 0.0291, 0.0516, 0.0216, 0.0372, 0.0318, 0.0300], + device='cuda:0'), in_proj_covar=tensor([0.0148, 0.0103, 0.0096, 0.0105, 0.0086, 0.0099, 0.0100, 0.0089], + device='cuda:0'), out_proj_covar=tensor([9.9088e-05, 6.5874e-05, 6.3020e-05, 7.1353e-05, 5.5722e-05, 6.3170e-05, + 6.4495e-05, 5.8218e-05], device='cuda:0') +2022-12-01 22:20:13,533 INFO [train.py:876] Epoch 9, batch 200, loss[loss=0.268, simple_loss=0.3124, pruned_loss=0.1118, over 4812.00 frames. ], tot_loss[loss=0.2354, simple_loss=0.2741, pruned_loss=0.09834, over 605684.06 frames. ], batch size: 42, lr: 1.99e-02, +2022-12-01 22:20:49,943 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=11696.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:20:52,901 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=11699.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:20:54,529 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.190e+02 2.326e+02 3.027e+02 3.872e+02 8.166e+02, threshold=6.053e+02, percent-clipped=10.0 +2022-12-01 22:21:02,128 INFO [train.py:876] Epoch 9, batch 250, loss[loss=0.2358, simple_loss=0.272, pruned_loss=0.09976, over 4807.00 frames. ], tot_loss[loss=0.2329, simple_loss=0.2721, pruned_loss=0.09686, over 684028.29 frames. ], batch size: 33, lr: 1.98e-02, +2022-12-01 22:21:02,134 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 22:21:08,988 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.3053, 1.4352, 0.9529, 1.7703, 1.4430, 1.5479, 1.4166, 1.4916], + device='cuda:0'), covar=tensor([0.0400, 0.0343, 0.0352, 0.0184, 0.0284, 0.0269, 0.0227, 0.0278], + device='cuda:0'), in_proj_covar=tensor([0.0038, 0.0032, 0.0040, 0.0028, 0.0033, 0.0031, 0.0033, 0.0030], + device='cuda:0'), out_proj_covar=tensor([3.6905e-05, 2.9507e-05, 3.8476e-05, 2.5370e-05, 2.9974e-05, 2.8776e-05, + 3.0694e-05, 2.8052e-05], device='cuda:0') +2022-12-01 22:21:16,717 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2436, 3.1588, 3.7894, 3.8304, 3.2309, 4.0015, 3.6731, 2.7499], + device='cuda:0'), covar=tensor([0.4251, 0.0778, 0.0562, 0.0173, 0.0351, 0.0567, 0.0265, 0.0801], + device='cuda:0'), in_proj_covar=tensor([0.0183, 0.0080, 0.0107, 0.0073, 0.0099, 0.0080, 0.0069, 0.0086], + device='cuda:0'), out_proj_covar=tensor([2.0746e-04, 9.5356e-05, 1.2602e-04, 8.2914e-05, 1.0564e-04, 9.7515e-05, + 8.2765e-05, 1.0645e-04], device='cuda:0') +2022-12-01 22:21:50,994 INFO [train.py:876] Epoch 9, batch 300, loss[loss=0.229, simple_loss=0.2676, pruned_loss=0.09516, over 4828.00 frames. ], tot_loss[loss=0.2334, simple_loss=0.2728, pruned_loss=0.09705, over 743598.49 frames. ], batch size: 41, lr: 1.98e-02, +2022-12-01 22:21:52,336 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=11760.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:22:00,040 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 22:22:03,526 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.89 vs. limit=2.0 +2022-12-01 22:22:14,079 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.7232, 4.6421, 5.1386, 4.5090, 4.8209, 4.7284, 4.6366, 4.4274], + device='cuda:0'), covar=tensor([0.0496, 0.0381, 0.0325, 0.0371, 0.0747, 0.0331, 0.0410, 0.0353], + device='cuda:0'), in_proj_covar=tensor([0.0100, 0.0079, 0.0077, 0.0085, 0.0074, 0.0083, 0.0095, 0.0070], + device='cuda:0'), out_proj_covar=tensor([9.4064e-05, 6.3897e-05, 6.8636e-05, 7.5298e-05, 6.5669e-05, 7.2688e-05, + 8.4215e-05, 6.0776e-05], device='cuda:0') +2022-12-01 22:22:30,961 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.2846, 3.8155, 3.7735, 4.4664, 4.1184, 4.5509, 4.6045, 4.0803], + device='cuda:0'), covar=tensor([0.0161, 0.0179, 0.0131, 0.0101, 0.0099, 0.0075, 0.0063, 0.0123], + device='cuda:0'), in_proj_covar=tensor([0.0063, 0.0083, 0.0063, 0.0063, 0.0073, 0.0059, 0.0061, 0.0084], + device='cuda:0'), out_proj_covar=tensor([4.2986e-05, 5.7425e-05, 4.2922e-05, 4.0916e-05, 4.9771e-05, 3.8686e-05, + 4.0110e-05, 5.9697e-05], device='cuda:0') +2022-12-01 22:22:32,856 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.282e+02 2.414e+02 3.062e+02 3.932e+02 7.619e+02, threshold=6.124e+02, percent-clipped=6.0 +2022-12-01 22:22:40,750 INFO [train.py:876] Epoch 9, batch 350, loss[loss=0.219, simple_loss=0.2552, pruned_loss=0.0914, over 4793.00 frames. ], tot_loss[loss=0.2285, simple_loss=0.2683, pruned_loss=0.09438, over 788473.32 frames. ], batch size: 32, lr: 1.98e-02, +2022-12-01 22:22:47,709 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=11816.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:23:01,365 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=11830.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:23:17,250 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=11846.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:23:29,921 INFO [train.py:876] Epoch 9, batch 400, loss[loss=0.1262, simple_loss=0.1725, pruned_loss=0.03996, over 4653.00 frames. ], tot_loss[loss=0.2254, simple_loss=0.2658, pruned_loss=0.09255, over 827690.11 frames. ], batch size: 21, lr: 1.97e-02, +2022-12-01 22:23:45,527 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 22:23:47,891 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=11877.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:23:48,747 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=11878.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:23:54,128 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.83 vs. limit=2.0 +2022-12-01 22:23:58,413 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=11888.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:24:11,352 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.330e+02 2.266e+02 2.795e+02 3.434e+02 7.173e+02, threshold=5.590e+02, percent-clipped=3.0 +2022-12-01 22:24:12,396 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 22:24:17,538 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=11907.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:24:19,249 INFO [train.py:876] Epoch 9, batch 450, loss[loss=0.2067, simple_loss=0.2535, pruned_loss=0.07999, over 4905.00 frames. ], tot_loss[loss=0.2248, simple_loss=0.2653, pruned_loss=0.09216, over 853103.75 frames. ], batch size: 32, lr: 1.97e-02, +2022-12-01 22:24:24,630 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.92 vs. limit=2.0 +2022-12-01 22:24:30,962 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=11920.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:24:54,620 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.16 vs. limit=5.0 +2022-12-01 22:24:57,869 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=11948.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:25:08,297 INFO [train.py:876] Epoch 9, batch 500, loss[loss=0.2467, simple_loss=0.2911, pruned_loss=0.1012, over 4860.00 frames. ], tot_loss[loss=0.2266, simple_loss=0.2673, pruned_loss=0.09301, over 876220.55 frames. ], batch size: 47, lr: 1.97e-02, +2022-12-01 22:25:29,116 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4358, 1.5918, 2.3935, 3.1247, 2.7988, 2.8521, 3.1717, 3.6642], + device='cuda:0'), covar=tensor([0.0172, 0.1229, 0.1113, 0.0314, 0.0371, 0.0401, 0.0376, 0.0174], + device='cuda:0'), in_proj_covar=tensor([0.0044, 0.0061, 0.0068, 0.0045, 0.0049, 0.0047, 0.0048, 0.0051], + device='cuda:0'), out_proj_covar=tensor([4.5980e-05, 7.0448e-05, 7.3952e-05, 5.0885e-05, 5.0992e-05, 5.3426e-05, + 5.3482e-05, 5.0639e-05], device='cuda:0') +2022-12-01 22:25:30,018 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=11981.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:25:32,858 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=11984.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:25:37,717 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7502, 4.2610, 4.4823, 4.3563, 3.9966, 4.1842, 4.1968, 4.5802], + device='cuda:0'), covar=tensor([0.0797, 0.0219, 0.0126, 0.0284, 0.0180, 0.0242, 0.0153, 0.0151], + device='cuda:0'), in_proj_covar=tensor([0.0154, 0.0098, 0.0090, 0.0109, 0.0103, 0.0101, 0.0091, 0.0082], + device='cuda:0'), out_proj_covar=tensor([1.0463e-04, 6.4660e-05, 5.7699e-05, 7.2526e-05, 6.8028e-05, 6.6927e-05, + 5.8762e-05, 5.4977e-05], device='cuda:0') +2022-12-01 22:25:39,309 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=11991.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:25:42,502 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4771, 2.2319, 2.2486, 2.6172, 2.1442, 2.4616, 2.4567, 2.5801], + device='cuda:0'), covar=tensor([0.0230, 0.0283, 0.0225, 0.0132, 0.0311, 0.0279, 0.0169, 0.0176], + device='cuda:0'), in_proj_covar=tensor([0.0036, 0.0039, 0.0032, 0.0031, 0.0042, 0.0039, 0.0030, 0.0033], + device='cuda:0'), out_proj_covar=tensor([2.7029e-05, 3.0179e-05, 2.3535e-05, 2.2567e-05, 3.1999e-05, 2.8636e-05, + 2.1634e-05, 2.3691e-05], device='cuda:0') +2022-12-01 22:25:48,201 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-12000.pt +2022-12-01 22:25:51,374 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.419e+02 2.374e+02 2.901e+02 3.489e+02 1.046e+03, threshold=5.801e+02, percent-clipped=8.0 +2022-12-01 22:25:56,809 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8938, 2.9558, 3.4416, 3.7802, 3.1790, 3.4850, 3.6145, 3.1477], + device='cuda:0'), covar=tensor([0.0191, 0.0395, 0.0193, 0.0135, 0.0264, 0.0150, 0.0122, 0.0270], + device='cuda:0'), in_proj_covar=tensor([0.0067, 0.0086, 0.0066, 0.0066, 0.0076, 0.0062, 0.0062, 0.0086], + device='cuda:0'), out_proj_covar=tensor([4.5103e-05, 5.9738e-05, 4.5171e-05, 4.2894e-05, 5.1973e-05, 4.0984e-05, + 4.1319e-05, 6.1442e-05], device='cuda:0') +2022-12-01 22:25:59,525 INFO [train.py:876] Epoch 9, batch 550, loss[loss=0.1869, simple_loss=0.2405, pruned_loss=0.06664, over 4852.00 frames. ], tot_loss[loss=0.2247, simple_loss=0.2654, pruned_loss=0.09194, over 893458.23 frames. ], batch size: 36, lr: 1.96e-02, +2022-12-01 22:25:59,765 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=12009.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:26:35,019 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=12045.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:26:44,748 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=12055.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:26:48,668 INFO [train.py:876] Epoch 9, batch 600, loss[loss=0.1893, simple_loss=0.2269, pruned_loss=0.07585, over 4730.00 frames. ], tot_loss[loss=0.2242, simple_loss=0.2651, pruned_loss=0.09162, over 908326.54 frames. ], batch size: 23, lr: 1.96e-02, +2022-12-01 22:26:49,326 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.96 vs. limit=2.0 +2022-12-01 22:27:05,241 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=12076.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:27:09,652 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.89 vs. limit=2.0 +2022-12-01 22:27:28,953 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.419e+02 2.453e+02 2.952e+02 3.730e+02 7.701e+02, threshold=5.903e+02, percent-clipped=1.0 +2022-12-01 22:27:37,015 INFO [train.py:876] Epoch 9, batch 650, loss[loss=0.191, simple_loss=0.2316, pruned_loss=0.07523, over 4900.00 frames. ], tot_loss[loss=0.2247, simple_loss=0.2651, pruned_loss=0.09222, over 920225.13 frames. ], batch size: 30, lr: 1.96e-02, +2022-12-01 22:28:02,740 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.78 vs. limit=2.0 +2022-12-01 22:28:04,568 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=12137.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:28:25,504 INFO [train.py:876] Epoch 9, batch 700, loss[loss=0.2258, simple_loss=0.2687, pruned_loss=0.09151, over 4916.00 frames. ], tot_loss[loss=0.2258, simple_loss=0.2661, pruned_loss=0.09277, over 929105.89 frames. ], batch size: 32, lr: 1.95e-02, +2022-12-01 22:28:38,182 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=12172.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:28:53,502 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=12188.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:29:02,338 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2137, 1.9669, 2.2427, 2.5824, 1.9947, 2.5703, 2.2516, 2.5773], + device='cuda:0'), covar=tensor([0.0263, 0.0341, 0.0196, 0.0153, 0.0442, 0.0240, 0.0215, 0.0171], + device='cuda:0'), in_proj_covar=tensor([0.0037, 0.0041, 0.0033, 0.0032, 0.0044, 0.0040, 0.0031, 0.0034], + device='cuda:0'), out_proj_covar=tensor([2.7204e-05, 3.1340e-05, 2.3763e-05, 2.3294e-05, 3.4014e-05, 3.0181e-05, + 2.2482e-05, 2.4431e-05], device='cuda:0') +2022-12-01 22:29:06,164 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.148e+02 2.248e+02 2.919e+02 3.864e+02 1.119e+03, threshold=5.839e+02, percent-clipped=6.0 +2022-12-01 22:29:07,239 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=12202.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:29:13,879 INFO [train.py:876] Epoch 9, batch 750, loss[loss=0.2552, simple_loss=0.2917, pruned_loss=0.1093, over 4823.00 frames. ], tot_loss[loss=0.2276, simple_loss=0.2675, pruned_loss=0.09384, over 934236.64 frames. ], batch size: 42, lr: 1.95e-02, +2022-12-01 22:29:17,199 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.24 vs. limit=2.0 +2022-12-01 22:29:40,570 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=12236.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:30:03,193 INFO [train.py:876] Epoch 9, batch 800, loss[loss=0.2729, simple_loss=0.3131, pruned_loss=0.1164, over 4784.00 frames. ], tot_loss[loss=0.2283, simple_loss=0.268, pruned_loss=0.09431, over 938017.75 frames. ], batch size: 58, lr: 1.95e-02, +2022-12-01 22:30:19,723 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=12276.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:30:34,549 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=12291.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:30:43,785 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.350e+02 2.273e+02 2.750e+02 3.633e+02 8.469e+02, threshold=5.501e+02, percent-clipped=2.0 +2022-12-01 22:30:46,802 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=12304.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:30:50,325 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.00 vs. limit=2.0 +2022-12-01 22:30:51,585 INFO [train.py:876] Epoch 9, batch 850, loss[loss=0.1978, simple_loss=0.2448, pruned_loss=0.07536, over 4882.00 frames. ], tot_loss[loss=0.2257, simple_loss=0.2658, pruned_loss=0.09276, over 941623.80 frames. ], batch size: 37, lr: 1.94e-02, +2022-12-01 22:31:20,617 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=12339.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:31:21,582 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=12340.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:31:35,954 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=12355.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:31:39,747 INFO [train.py:876] Epoch 9, batch 900, loss[loss=0.2345, simple_loss=0.2751, pruned_loss=0.09702, over 4898.00 frames. ], tot_loss[loss=0.2257, simple_loss=0.2664, pruned_loss=0.09246, over 945596.72 frames. ], batch size: 31, lr: 1.94e-02, +2022-12-01 22:32:01,623 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.94 vs. limit=2.0 +2022-12-01 22:32:20,593 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.411e+02 2.417e+02 3.002e+02 4.061e+02 8.480e+02, threshold=6.004e+02, percent-clipped=9.0 +2022-12-01 22:32:22,540 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=12403.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:32:28,449 INFO [train.py:876] Epoch 9, batch 950, loss[loss=0.1907, simple_loss=0.2448, pruned_loss=0.06829, over 4856.00 frames. ], tot_loss[loss=0.2272, simple_loss=0.2674, pruned_loss=0.09348, over 943730.13 frames. ], batch size: 35, lr: 1.94e-02, +2022-12-01 22:32:39,459 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0552, 2.7105, 2.9543, 2.6184, 2.6090, 2.3292, 3.0815, 2.9555], + device='cuda:0'), covar=tensor([0.0127, 0.0641, 0.0375, 0.0822, 0.0228, 0.0395, 0.0439, 0.0360], + device='cuda:0'), in_proj_covar=tensor([0.0042, 0.0067, 0.0049, 0.0070, 0.0053, 0.0050, 0.0057, 0.0053], + device='cuda:0'), out_proj_covar=tensor([4.7689e-05, 7.7973e-05, 5.5887e-05, 8.1084e-05, 5.9632e-05, 5.7665e-05, + 6.5032e-05, 5.7018e-05], device='cuda:0') +2022-12-01 22:32:51,065 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=12432.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:32:54,625 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.84 vs. limit=2.0 +2022-12-01 22:32:56,969 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4955, 1.7911, 0.9904, 1.8127, 1.4226, 1.2873, 1.7106, 1.3147], + device='cuda:0'), covar=tensor([0.0390, 0.0274, 0.0437, 0.0426, 0.0285, 0.0320, 0.0256, 0.0674], + device='cuda:0'), in_proj_covar=tensor([0.0040, 0.0034, 0.0041, 0.0031, 0.0036, 0.0033, 0.0036, 0.0031], + device='cuda:0'), out_proj_covar=tensor([3.8940e-05, 3.1610e-05, 3.9876e-05, 2.7854e-05, 3.2298e-05, 3.0605e-05, + 3.3355e-05, 2.9805e-05], device='cuda:0') +2022-12-01 22:33:04,643 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=12446.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:33:07,789 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-01 22:33:16,651 INFO [train.py:876] Epoch 9, batch 1000, loss[loss=0.2175, simple_loss=0.2641, pruned_loss=0.08546, over 4915.00 frames. ], tot_loss[loss=0.2246, simple_loss=0.265, pruned_loss=0.09213, over 947286.26 frames. ], batch size: 32, lr: 1.93e-02, +2022-12-01 22:33:29,302 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=12472.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:33:33,781 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.13 vs. limit=5.0 +2022-12-01 22:33:44,362 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-01 22:33:46,942 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.00 vs. limit=2.0 +2022-12-01 22:33:56,624 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.486e+02 2.197e+02 2.991e+02 3.767e+02 2.430e+03, threshold=5.983e+02, percent-clipped=7.0 +2022-12-01 22:33:57,703 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=12502.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:34:02,320 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=12507.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:34:04,064 INFO [train.py:876] Epoch 9, batch 1050, loss[loss=0.2406, simple_loss=0.2951, pruned_loss=0.09304, over 4864.00 frames. ], tot_loss[loss=0.2266, simple_loss=0.2669, pruned_loss=0.09317, over 950333.71 frames. ], batch size: 39, lr: 1.93e-02, +2022-12-01 22:34:09,001 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9242, 4.1693, 4.0557, 4.3866, 3.8384, 3.7088, 4.4447, 4.1527], + device='cuda:0'), covar=tensor([0.0488, 0.0316, 0.0202, 0.0472, 0.0375, 0.0313, 0.0237, 0.0262], + device='cuda:0'), in_proj_covar=tensor([0.0039, 0.0036, 0.0034, 0.0028, 0.0038, 0.0038, 0.0033, 0.0035], + device='cuda:0'), out_proj_covar=tensor([4.3822e-05, 3.6128e-05, 3.4744e-05, 2.8450e-05, 4.0940e-05, 4.1097e-05, + 3.1209e-05, 3.6575e-05], device='cuda:0') +2022-12-01 22:34:14,836 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=12520.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:34:43,794 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=12550.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:34:52,738 INFO [train.py:876] Epoch 9, batch 1100, loss[loss=0.2884, simple_loss=0.3179, pruned_loss=0.1295, over 4838.00 frames. ], tot_loss[loss=0.2287, simple_loss=0.2689, pruned_loss=0.0943, over 951455.55 frames. ], batch size: 47, lr: 1.93e-02, +2022-12-01 22:35:09,731 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=12576.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:35:19,885 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.61 vs. limit=5.0 +2022-12-01 22:35:23,519 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2872, 3.3692, 3.1585, 3.1797, 3.3699, 3.3306, 3.2827, 3.4163], + device='cuda:0'), covar=tensor([0.1402, 0.0352, 0.0473, 0.0389, 0.0271, 0.0364, 0.0353, 0.0308], + device='cuda:0'), in_proj_covar=tensor([0.0154, 0.0112, 0.0109, 0.0107, 0.0095, 0.0105, 0.0106, 0.0096], + device='cuda:0'), out_proj_covar=tensor([1.0306e-04, 7.3054e-05, 7.1801e-05, 7.2240e-05, 6.1368e-05, 6.7498e-05, + 6.8780e-05, 6.2659e-05], device='cuda:0') +2022-12-01 22:35:34,006 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.402e+02 2.185e+02 2.681e+02 3.427e+02 8.652e+02, threshold=5.362e+02, percent-clipped=2.0 +2022-12-01 22:35:37,086 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=12604.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:35:41,902 INFO [train.py:876] Epoch 9, batch 1150, loss[loss=0.2232, simple_loss=0.2637, pruned_loss=0.09136, over 4877.00 frames. ], tot_loss[loss=0.2289, simple_loss=0.2693, pruned_loss=0.0943, over 952254.80 frames. ], batch size: 37, lr: 1.92e-02, +2022-12-01 22:35:47,808 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.8597, 2.2795, 2.8688, 2.2944, 2.4736, 1.8723, 3.0084, 3.0863], + device='cuda:0'), covar=tensor([0.0159, 0.0845, 0.0369, 0.0926, 0.0276, 0.0379, 0.0452, 0.0271], + device='cuda:0'), in_proj_covar=tensor([0.0043, 0.0069, 0.0050, 0.0071, 0.0054, 0.0051, 0.0058, 0.0053], + device='cuda:0'), out_proj_covar=tensor([4.8570e-05, 7.9740e-05, 5.7676e-05, 8.1498e-05, 6.0816e-05, 5.8737e-05, + 6.6985e-05, 5.7385e-05], device='cuda:0') +2022-12-01 22:35:56,629 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=12624.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:36:11,841 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=12640.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:36:23,415 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=12652.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:36:30,428 INFO [train.py:876] Epoch 9, batch 1200, loss[loss=0.2274, simple_loss=0.2647, pruned_loss=0.09504, over 4863.00 frames. ], tot_loss[loss=0.2284, simple_loss=0.269, pruned_loss=0.09396, over 952204.94 frames. ], batch size: 39, lr: 1.92e-02, +2022-12-01 22:36:56,594 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5230, 2.3581, 2.7185, 2.9198, 2.4248, 2.2989, 2.0346, 2.6475], + device='cuda:0'), covar=tensor([0.0796, 0.1093, 0.0729, 0.0493, 0.1003, 0.1778, 0.1348, 0.0664], + device='cuda:0'), in_proj_covar=tensor([0.0054, 0.0057, 0.0077, 0.0061, 0.0083, 0.0069, 0.0088, 0.0056], + device='cuda:0'), out_proj_covar=tensor([5.1752e-05, 5.6520e-05, 7.3084e-05, 5.7833e-05, 7.8287e-05, 6.5604e-05, + 8.6300e-05, 5.3548e-05], device='cuda:0') +2022-12-01 22:36:58,371 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=12688.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:37:11,146 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.272e+02 2.138e+02 2.929e+02 3.430e+02 1.241e+03, threshold=5.858e+02, percent-clipped=4.0 +2022-12-01 22:37:19,036 INFO [train.py:876] Epoch 9, batch 1250, loss[loss=0.2089, simple_loss=0.26, pruned_loss=0.07884, over 4915.00 frames. ], tot_loss[loss=0.2277, simple_loss=0.2683, pruned_loss=0.0936, over 950689.93 frames. ], batch size: 31, lr: 1.92e-02, +2022-12-01 22:37:41,588 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=12732.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:37:53,523 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9783, 2.7231, 3.4843, 3.4612, 3.3389, 3.7964, 3.3758, 2.8172], + device='cuda:0'), covar=tensor([0.5247, 0.1019, 0.0719, 0.0180, 0.0451, 0.0554, 0.0378, 0.0803], + device='cuda:0'), in_proj_covar=tensor([0.0183, 0.0083, 0.0110, 0.0075, 0.0103, 0.0083, 0.0070, 0.0090], + device='cuda:0'), out_proj_covar=tensor([2.0823e-04, 1.0065e-04, 1.3017e-04, 8.6017e-05, 1.1164e-04, 1.0269e-04, + 8.5675e-05, 1.1296e-04], device='cuda:0') +2022-12-01 22:37:53,910 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.45 vs. limit=5.0 +2022-12-01 22:38:08,156 INFO [train.py:876] Epoch 9, batch 1300, loss[loss=0.2229, simple_loss=0.2737, pruned_loss=0.08607, over 4849.00 frames. ], tot_loss[loss=0.2274, simple_loss=0.2673, pruned_loss=0.09373, over 945319.95 frames. ], batch size: 47, lr: 1.91e-02, +2022-12-01 22:38:12,214 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=12763.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:38:28,809 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=12780.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:38:30,979 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6006, 3.5796, 3.4876, 3.6860, 3.2504, 2.8023, 3.6659, 1.8563], + device='cuda:0'), covar=tensor([0.0317, 0.0356, 0.0306, 0.0368, 0.0624, 0.2082, 0.0177, 0.2951], + device='cuda:0'), in_proj_covar=tensor([0.0077, 0.0067, 0.0067, 0.0093, 0.0097, 0.0124, 0.0059, 0.0136], + device='cuda:0'), out_proj_covar=tensor([8.2380e-05, 7.5782e-05, 7.4231e-05, 9.8462e-05, 1.0326e-04, 1.3300e-04, + 6.3430e-05, 1.4355e-04], device='cuda:0') +2022-12-01 22:38:49,736 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.444e+02 2.320e+02 2.853e+02 3.545e+02 9.804e+02, threshold=5.706e+02, percent-clipped=2.0 +2022-12-01 22:38:50,583 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=12802.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:38:57,264 INFO [train.py:876] Epoch 9, batch 1350, loss[loss=0.223, simple_loss=0.2678, pruned_loss=0.08913, over 4836.00 frames. ], tot_loss[loss=0.2258, simple_loss=0.2654, pruned_loss=0.09306, over 944036.15 frames. ], batch size: 34, lr: 1.91e-02, +2022-12-01 22:39:12,376 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=12824.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 22:39:44,028 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.88 vs. limit=5.0 +2022-12-01 22:39:45,410 INFO [train.py:876] Epoch 9, batch 1400, loss[loss=0.1859, simple_loss=0.2333, pruned_loss=0.06928, over 4730.00 frames. ], tot_loss[loss=0.2279, simple_loss=0.2674, pruned_loss=0.0942, over 941484.04 frames. ], batch size: 27, lr: 1.91e-02, +2022-12-01 22:40:15,707 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-9.pt +2022-12-01 22:40:32,218 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 22:40:33,130 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 22:40:33,423 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 22:40:33,454 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 22:40:34,246 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 22:40:34,918 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 22:40:36,510 INFO [train.py:876] Epoch 10, batch 0, loss[loss=0.1973, simple_loss=0.2452, pruned_loss=0.07471, over 4880.00 frames. ], tot_loss[loss=0.1973, simple_loss=0.2452, pruned_loss=0.07471, over 4880.00 frames. ], batch size: 37, lr: 1.81e-02, +2022-12-01 22:40:36,512 INFO [train.py:901] Computing validation loss +2022-12-01 22:40:52,086 INFO [train.py:910] Epoch 10, validation: loss=0.2428, simple_loss=0.2887, pruned_loss=0.09846, over 253132.00 frames. +2022-12-01 22:40:52,086 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-01 22:41:01,483 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.329e+01 2.414e+02 3.022e+02 3.975e+02 7.997e+02, threshold=6.045e+02, percent-clipped=9.0 +2022-12-01 22:41:29,631 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.01 vs. limit=2.0 +2022-12-01 22:41:38,026 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.90 vs. limit=2.0 +2022-12-01 22:41:40,255 INFO [train.py:876] Epoch 10, batch 50, loss[loss=0.2108, simple_loss=0.2661, pruned_loss=0.07775, over 4820.00 frames. ], tot_loss[loss=0.213, simple_loss=0.2566, pruned_loss=0.08473, over 214469.87 frames. ], batch size: 45, lr: 1.81e-02, +2022-12-01 22:42:02,961 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 22:42:15,003 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.2110, 1.2546, 1.4625, 1.0323, 1.0761, 1.0070, 1.0603, 1.4509], + device='cuda:0'), covar=tensor([0.0226, 0.0182, 0.0206, 0.0259, 0.0247, 0.0242, 0.0299, 0.0153], + device='cuda:0'), in_proj_covar=tensor([0.0031, 0.0030, 0.0027, 0.0027, 0.0033, 0.0032, 0.0035, 0.0028], + device='cuda:0'), out_proj_covar=tensor([2.6294e-05, 2.4340e-05, 2.2346e-05, 2.2403e-05, 2.7547e-05, 2.7139e-05, + 2.9415e-05, 2.3840e-05], device='cuda:0') +2022-12-01 22:42:29,818 INFO [train.py:876] Epoch 10, batch 100, loss[loss=0.2676, simple_loss=0.2856, pruned_loss=0.1249, over 3997.00 frames. ], tot_loss[loss=0.2105, simple_loss=0.2543, pruned_loss=0.08333, over 373335.15 frames. ], batch size: 72, lr: 1.80e-02, +2022-12-01 22:42:39,768 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.301e+02 2.110e+02 2.868e+02 3.675e+02 6.986e+02, threshold=5.736e+02, percent-clipped=3.0 +2022-12-01 22:42:44,477 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 22:43:07,446 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 22:43:17,941 INFO [train.py:876] Epoch 10, batch 150, loss[loss=0.2342, simple_loss=0.2663, pruned_loss=0.1011, over 4794.00 frames. ], tot_loss[loss=0.2138, simple_loss=0.2576, pruned_loss=0.08494, over 503247.75 frames. ], batch size: 33, lr: 1.80e-02, +2022-12-01 22:43:52,777 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.97 vs. limit=2.0 +2022-12-01 22:44:06,540 INFO [train.py:876] Epoch 10, batch 200, loss[loss=0.1959, simple_loss=0.2401, pruned_loss=0.07588, over 4807.00 frames. ], tot_loss[loss=0.2169, simple_loss=0.2605, pruned_loss=0.08664, over 603591.93 frames. ], batch size: 33, lr: 1.80e-02, +2022-12-01 22:44:16,065 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.369e+02 2.245e+02 2.797e+02 3.527e+02 1.042e+03, threshold=5.594e+02, percent-clipped=3.0 +2022-12-01 22:44:17,158 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=13102.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:44:32,012 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.1184, 1.3579, 1.3813, 1.2578, 0.9859, 1.0374, 1.2514, 1.5543], + device='cuda:0'), covar=tensor([0.0273, 0.0177, 0.0182, 0.0191, 0.0239, 0.0248, 0.0232, 0.0121], + device='cuda:0'), in_proj_covar=tensor([0.0031, 0.0029, 0.0027, 0.0027, 0.0032, 0.0031, 0.0034, 0.0027], + device='cuda:0'), out_proj_covar=tensor([2.6329e-05, 2.3561e-05, 2.2332e-05, 2.1861e-05, 2.6653e-05, 2.6044e-05, + 2.9096e-05, 2.3008e-05], device='cuda:0') +2022-12-01 22:44:33,802 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=13119.0, num_to_drop=1, layers_to_drop={3} +2022-12-01 22:44:49,732 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 22:44:54,540 INFO [train.py:876] Epoch 10, batch 250, loss[loss=0.1786, simple_loss=0.2291, pruned_loss=0.06402, over 4892.00 frames. ], tot_loss[loss=0.2179, simple_loss=0.2621, pruned_loss=0.08679, over 681346.62 frames. ], batch size: 29, lr: 1.80e-02, +2022-12-01 22:45:03,063 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=13150.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:45:37,844 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-01 22:45:42,008 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6346, 3.5583, 3.1661, 3.4459, 3.1510, 2.3054, 3.6846, 1.9702], + device='cuda:0'), covar=tensor([0.0312, 0.0250, 0.0359, 0.0484, 0.0679, 0.2307, 0.0130, 0.2648], + device='cuda:0'), in_proj_covar=tensor([0.0081, 0.0070, 0.0069, 0.0099, 0.0101, 0.0128, 0.0061, 0.0144], + device='cuda:0'), out_proj_covar=tensor([8.6486e-05, 7.9904e-05, 7.7850e-05, 1.0499e-04, 1.0885e-04, 1.3779e-04, + 6.6055e-05, 1.5287e-04], device='cuda:0') +2022-12-01 22:45:42,827 INFO [train.py:876] Epoch 10, batch 300, loss[loss=0.2014, simple_loss=0.2488, pruned_loss=0.07704, over 4835.00 frames. ], tot_loss[loss=0.2149, simple_loss=0.2594, pruned_loss=0.08516, over 742576.53 frames. ], batch size: 34, lr: 1.79e-02, +2022-12-01 22:45:47,566 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 22:45:52,605 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.255e+02 2.140e+02 2.651e+02 3.600e+02 6.101e+02, threshold=5.302e+02, percent-clipped=3.0 +2022-12-01 22:46:04,411 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.3163, 1.5047, 1.4761, 1.2612, 1.0281, 1.0399, 1.1942, 1.8818], + device='cuda:0'), covar=tensor([0.0490, 0.0300, 0.0338, 0.0304, 0.0337, 0.0390, 0.0384, 0.0173], + device='cuda:0'), in_proj_covar=tensor([0.0031, 0.0030, 0.0027, 0.0027, 0.0032, 0.0031, 0.0034, 0.0027], + device='cuda:0'), out_proj_covar=tensor([2.6490e-05, 2.4136e-05, 2.2565e-05, 2.2297e-05, 2.6983e-05, 2.6560e-05, + 2.9270e-05, 2.3337e-05], device='cuda:0') +2022-12-01 22:46:06,441 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.2600, 1.4687, 1.3753, 1.5140, 1.3803, 1.1356, 1.3213, 1.9762], + device='cuda:0'), covar=tensor([0.0806, 0.0418, 0.0452, 0.0245, 0.0303, 0.0473, 0.0422, 0.0186], + device='cuda:0'), in_proj_covar=tensor([0.0031, 0.0030, 0.0027, 0.0027, 0.0032, 0.0031, 0.0034, 0.0027], + device='cuda:0'), out_proj_covar=tensor([2.6485e-05, 2.4123e-05, 2.2546e-05, 2.2280e-05, 2.6985e-05, 2.6555e-05, + 2.9243e-05, 2.3305e-05], device='cuda:0') +2022-12-01 22:46:23,011 INFO [zipformer.py:1414] attn_weights_entropy = tensor([5.1247, 4.7397, 5.3576, 5.0078, 5.2763, 5.1137, 4.9718, 4.9051], + device='cuda:0'), covar=tensor([0.0593, 0.0467, 0.0531, 0.0354, 0.0478, 0.0372, 0.0647, 0.0316], + device='cuda:0'), in_proj_covar=tensor([0.0104, 0.0082, 0.0089, 0.0092, 0.0080, 0.0092, 0.0102, 0.0076], + device='cuda:0'), out_proj_covar=tensor([9.3837e-05, 6.4465e-05, 7.7979e-05, 7.9313e-05, 6.9251e-05, 7.7931e-05, + 8.7595e-05, 6.4966e-05], device='cuda:0') +2022-12-01 22:46:31,685 INFO [train.py:876] Epoch 10, batch 350, loss[loss=0.2759, simple_loss=0.3134, pruned_loss=0.1192, over 4833.00 frames. ], tot_loss[loss=0.2152, simple_loss=0.2598, pruned_loss=0.08532, over 789790.36 frames. ], batch size: 45, lr: 1.79e-02, +2022-12-01 22:46:59,493 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.01 vs. limit=2.0 +2022-12-01 22:47:20,014 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 22:47:20,933 INFO [train.py:876] Epoch 10, batch 400, loss[loss=0.2476, simple_loss=0.3003, pruned_loss=0.09741, over 4836.00 frames. ], tot_loss[loss=0.2159, simple_loss=0.2607, pruned_loss=0.08551, over 825896.85 frames. ], batch size: 49, lr: 1.79e-02, +2022-12-01 22:47:30,452 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.257e+02 2.188e+02 2.914e+02 3.486e+02 1.088e+03, threshold=5.828e+02, percent-clipped=6.0 +2022-12-01 22:47:42,112 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 22:48:09,222 INFO [train.py:876] Epoch 10, batch 450, loss[loss=0.233, simple_loss=0.2818, pruned_loss=0.09212, over 4669.00 frames. ], tot_loss[loss=0.2166, simple_loss=0.2619, pruned_loss=0.08571, over 853240.68 frames. ], batch size: 63, lr: 1.78e-02, +2022-12-01 22:48:18,075 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4404, 1.5595, 2.2218, 3.1647, 2.9284, 2.9756, 2.9632, 3.2482], + device='cuda:0'), covar=tensor([0.0165, 0.1306, 0.1284, 0.0227, 0.0327, 0.0330, 0.0363, 0.0219], + device='cuda:0'), in_proj_covar=tensor([0.0046, 0.0064, 0.0072, 0.0047, 0.0049, 0.0051, 0.0050, 0.0056], + device='cuda:0'), out_proj_covar=tensor([4.7670e-05, 7.4396e-05, 7.9219e-05, 5.2671e-05, 4.9916e-05, 5.7821e-05, + 5.5904e-05, 5.4016e-05], device='cuda:0') +2022-12-01 22:48:20,645 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.92 vs. limit=2.0 +2022-12-01 22:48:57,963 INFO [train.py:876] Epoch 10, batch 500, loss[loss=0.1941, simple_loss=0.2383, pruned_loss=0.075, over 4797.00 frames. ], tot_loss[loss=0.2159, simple_loss=0.2605, pruned_loss=0.08568, over 874297.89 frames. ], batch size: 32, lr: 1.78e-02, +2022-12-01 22:49:07,686 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.613e+02 2.324e+02 3.011e+02 3.720e+02 8.003e+02, threshold=6.023e+02, percent-clipped=4.0 +2022-12-01 22:49:25,115 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=13419.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:49:46,502 INFO [train.py:876] Epoch 10, batch 550, loss[loss=0.2097, simple_loss=0.2308, pruned_loss=0.09426, over 4710.00 frames. ], tot_loss[loss=0.2159, simple_loss=0.2603, pruned_loss=0.0857, over 891264.18 frames. ], batch size: 23, lr: 1.78e-02, +2022-12-01 22:50:11,418 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=13467.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:50:35,555 INFO [train.py:876] Epoch 10, batch 600, loss[loss=0.1964, simple_loss=0.259, pruned_loss=0.06688, over 4836.00 frames. ], tot_loss[loss=0.2139, simple_loss=0.2586, pruned_loss=0.08458, over 904076.71 frames. ], batch size: 49, lr: 1.77e-02, +2022-12-01 22:50:45,133 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.188e+02 2.035e+02 2.877e+02 3.481e+02 6.857e+02, threshold=5.753e+02, percent-clipped=1.0 +2022-12-01 22:50:51,981 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.88 vs. limit=2.0 +2022-12-01 22:51:24,428 INFO [train.py:876] Epoch 10, batch 650, loss[loss=0.2358, simple_loss=0.2901, pruned_loss=0.09074, over 4852.00 frames. ], tot_loss[loss=0.2133, simple_loss=0.2586, pruned_loss=0.08404, over 913968.03 frames. ], batch size: 47, lr: 1.77e-02, +2022-12-01 22:52:12,364 INFO [train.py:876] Epoch 10, batch 700, loss[loss=0.2216, simple_loss=0.2634, pruned_loss=0.08992, over 4881.00 frames. ], tot_loss[loss=0.213, simple_loss=0.2578, pruned_loss=0.08414, over 920689.85 frames. ], batch size: 30, lr: 1.77e-02, +2022-12-01 22:52:22,065 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.269e+02 2.319e+02 2.824e+02 3.846e+02 7.302e+02, threshold=5.647e+02, percent-clipped=8.0 +2022-12-01 22:53:00,886 INFO [train.py:876] Epoch 10, batch 750, loss[loss=0.1449, simple_loss=0.1763, pruned_loss=0.05679, over 4646.00 frames. ], tot_loss[loss=0.21, simple_loss=0.2552, pruned_loss=0.08242, over 927730.81 frames. ], batch size: 21, lr: 1.77e-02, +2022-12-01 22:53:49,215 INFO [train.py:876] Epoch 10, batch 800, loss[loss=0.2283, simple_loss=0.2859, pruned_loss=0.0854, over 4820.00 frames. ], tot_loss[loss=0.2113, simple_loss=0.2566, pruned_loss=0.08298, over 932119.60 frames. ], batch size: 42, lr: 1.76e-02, +2022-12-01 22:53:58,753 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.246e+02 2.400e+02 2.909e+02 3.588e+02 5.439e+02, threshold=5.818e+02, percent-clipped=0.0 +2022-12-01 22:54:02,777 INFO [zipformer.py:1414] attn_weights_entropy = tensor([5.4957, 5.4069, 5.7127, 5.4131, 5.6098, 5.5420, 5.4819, 5.2913], + device='cuda:0'), covar=tensor([0.0402, 0.0192, 0.0321, 0.0199, 0.0401, 0.0211, 0.0472, 0.0195], + device='cuda:0'), in_proj_covar=tensor([0.0104, 0.0081, 0.0088, 0.0091, 0.0078, 0.0090, 0.0102, 0.0075], + device='cuda:0'), out_proj_covar=tensor([9.2876e-05, 6.2302e-05, 7.6845e-05, 7.7997e-05, 6.7906e-05, 7.6254e-05, + 8.7140e-05, 6.1866e-05], device='cuda:0') +2022-12-01 22:54:33,926 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=13737.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 22:54:37,381 INFO [train.py:876] Epoch 10, batch 850, loss[loss=0.1609, simple_loss=0.199, pruned_loss=0.06134, over 4648.00 frames. ], tot_loss[loss=0.2137, simple_loss=0.2587, pruned_loss=0.08433, over 935522.96 frames. ], batch size: 21, lr: 1.76e-02, +2022-12-01 22:55:25,073 INFO [train.py:876] Epoch 10, batch 900, loss[loss=0.2705, simple_loss=0.3024, pruned_loss=0.1193, over 4782.00 frames. ], tot_loss[loss=0.2118, simple_loss=0.2573, pruned_loss=0.0832, over 940652.18 frames. ], batch size: 54, lr: 1.76e-02, +2022-12-01 22:55:32,077 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=13798.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 22:55:32,861 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.0271, 4.4772, 4.1680, 4.8082, 4.2450, 3.8822, 4.5542, 4.0954], + device='cuda:0'), covar=tensor([0.0242, 0.0122, 0.0143, 0.0106, 0.0159, 0.0173, 0.0095, 0.0174], + device='cuda:0'), in_proj_covar=tensor([0.0041, 0.0038, 0.0037, 0.0031, 0.0039, 0.0039, 0.0035, 0.0037], + device='cuda:0'), out_proj_covar=tensor([4.4475e-05, 3.7648e-05, 3.7662e-05, 3.1208e-05, 4.0989e-05, 4.1731e-05, + 3.2766e-05, 3.7006e-05], device='cuda:0') +2022-12-01 22:55:34,901 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.228e+02 2.484e+02 3.117e+02 3.777e+02 7.282e+02, threshold=6.235e+02, percent-clipped=7.0 +2022-12-01 22:55:52,575 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4674, 2.4106, 2.4417, 2.5434, 2.2627, 2.6979, 2.5846, 2.5881], + device='cuda:0'), covar=tensor([0.0246, 0.0251, 0.0217, 0.0209, 0.0326, 0.0253, 0.0192, 0.0180], + device='cuda:0'), in_proj_covar=tensor([0.0043, 0.0044, 0.0038, 0.0038, 0.0046, 0.0043, 0.0034, 0.0036], + device='cuda:0'), out_proj_covar=tensor([3.1716e-05, 3.3528e-05, 2.7730e-05, 2.6953e-05, 3.5318e-05, 3.2313e-05, + 2.5104e-05, 2.6224e-05], device='cuda:0') +2022-12-01 22:56:13,872 INFO [train.py:876] Epoch 10, batch 950, loss[loss=0.2256, simple_loss=0.2831, pruned_loss=0.08405, over 4833.00 frames. ], tot_loss[loss=0.2131, simple_loss=0.2585, pruned_loss=0.08385, over 943475.36 frames. ], batch size: 45, lr: 1.75e-02, +2022-12-01 22:56:44,730 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1827, 3.2717, 3.9341, 3.4990, 3.6883, 3.6075, 4.0153, 2.8056], + device='cuda:0'), covar=tensor([0.4076, 0.1002, 0.0524, 0.0202, 0.0349, 0.0700, 0.0242, 0.1078], + device='cuda:0'), in_proj_covar=tensor([0.0188, 0.0088, 0.0116, 0.0081, 0.0107, 0.0091, 0.0076, 0.0097], + device='cuda:0'), out_proj_covar=tensor([2.1641e-04, 1.0788e-04, 1.3864e-04, 9.6095e-05, 1.1784e-04, 1.1386e-04, + 9.6252e-05, 1.2263e-04], device='cuda:0') +2022-12-01 22:57:01,643 INFO [train.py:876] Epoch 10, batch 1000, loss[loss=0.1597, simple_loss=0.2055, pruned_loss=0.05701, over 4915.00 frames. ], tot_loss[loss=0.2124, simple_loss=0.2576, pruned_loss=0.08357, over 947640.30 frames. ], batch size: 30, lr: 1.75e-02, +2022-12-01 22:57:11,336 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.471e+02 2.174e+02 2.612e+02 3.407e+02 6.157e+02, threshold=5.225e+02, percent-clipped=0.0 +2022-12-01 22:57:17,421 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7858, 2.7032, 3.1778, 3.4716, 3.0058, 3.3806, 3.2804, 2.9505], + device='cuda:0'), covar=tensor([0.0123, 0.0348, 0.0217, 0.0139, 0.0242, 0.0153, 0.0147, 0.0268], + device='cuda:0'), in_proj_covar=tensor([0.0076, 0.0094, 0.0075, 0.0075, 0.0082, 0.0072, 0.0074, 0.0097], + device='cuda:0'), out_proj_covar=tensor([5.2482e-05, 6.4363e-05, 5.1329e-05, 4.9822e-05, 5.5795e-05, 4.8450e-05, + 4.9699e-05, 6.9070e-05], device='cuda:0') +2022-12-01 22:57:49,934 INFO [train.py:876] Epoch 10, batch 1050, loss[loss=0.1921, simple_loss=0.2417, pruned_loss=0.07123, over 4854.00 frames. ], tot_loss[loss=0.2112, simple_loss=0.2565, pruned_loss=0.08295, over 948759.88 frames. ], batch size: 35, lr: 1.75e-02, +2022-12-01 22:58:23,053 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.7464, 2.6460, 3.3089, 3.1129, 2.8354, 3.1744, 3.2357, 2.6414], + device='cuda:0'), covar=tensor([0.4749, 0.0720, 0.0589, 0.0297, 0.0557, 0.0888, 0.0321, 0.0936], + device='cuda:0'), in_proj_covar=tensor([0.0196, 0.0091, 0.0120, 0.0085, 0.0110, 0.0095, 0.0079, 0.0101], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-01 22:58:38,042 INFO [train.py:876] Epoch 10, batch 1100, loss[loss=0.177, simple_loss=0.2292, pruned_loss=0.06239, over 4737.00 frames. ], tot_loss[loss=0.212, simple_loss=0.2572, pruned_loss=0.08342, over 949626.09 frames. ], batch size: 27, lr: 1.75e-02, +2022-12-01 22:58:46,140 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5539, 2.4180, 2.3758, 2.5439, 2.3325, 2.5684, 2.4774, 2.6179], + device='cuda:0'), covar=tensor([0.0249, 0.0279, 0.0254, 0.0218, 0.0339, 0.0287, 0.0161, 0.0186], + device='cuda:0'), in_proj_covar=tensor([0.0045, 0.0047, 0.0041, 0.0041, 0.0050, 0.0047, 0.0036, 0.0038], + device='cuda:0'), out_proj_covar=tensor([3.3143e-05, 3.6094e-05, 3.0348e-05, 2.9115e-05, 3.7709e-05, 3.4982e-05, + 2.6297e-05, 2.7507e-05], device='cuda:0') +2022-12-01 22:58:47,144 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-14000.pt +2022-12-01 22:58:50,809 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.218e+02 2.087e+02 2.770e+02 3.369e+02 7.936e+02, threshold=5.540e+02, percent-clipped=3.0 +2022-12-01 22:59:07,810 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.05 vs. limit=2.0 +2022-12-01 22:59:12,060 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.89 vs. limit=5.0 +2022-12-01 22:59:16,292 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6158, 2.3629, 2.1925, 2.8230, 2.0293, 2.1052, 1.4757, 2.3983], + device='cuda:0'), covar=tensor([0.0568, 0.0807, 0.0734, 0.0326, 0.0959, 0.1016, 0.1143, 0.0283], + device='cuda:0'), in_proj_covar=tensor([0.0057, 0.0056, 0.0073, 0.0059, 0.0082, 0.0066, 0.0082, 0.0057], + device='cuda:0'), out_proj_covar=tensor([5.4839e-05, 5.5689e-05, 6.9799e-05, 5.6504e-05, 7.9063e-05, 6.3629e-05, + 8.0201e-05, 5.4645e-05], device='cuda:0') +2022-12-01 22:59:29,889 INFO [train.py:876] Epoch 10, batch 1150, loss[loss=0.1646, simple_loss=0.1995, pruned_loss=0.06486, over 3473.00 frames. ], tot_loss[loss=0.2108, simple_loss=0.2562, pruned_loss=0.08276, over 951060.16 frames. ], batch size: 13, lr: 1.74e-02, +2022-12-01 22:59:30,543 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.89 vs. limit=5.0 +2022-12-01 22:59:31,093 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2930, 3.4318, 3.3894, 3.0450, 3.2427, 3.3191, 3.4904, 3.4243], + device='cuda:0'), covar=tensor([0.1186, 0.0289, 0.0299, 0.0466, 0.0345, 0.0274, 0.0232, 0.0233], + device='cuda:0'), in_proj_covar=tensor([0.0155, 0.0116, 0.0113, 0.0117, 0.0103, 0.0113, 0.0109, 0.0104], + device='cuda:0'), out_proj_covar=tensor([1.0276e-04, 7.5208e-05, 7.5006e-05, 7.9021e-05, 6.7740e-05, 7.2452e-05, + 7.0585e-05, 6.8853e-05], device='cuda:0') +2022-12-01 22:59:54,428 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.93 vs. limit=2.0 +2022-12-01 22:59:59,884 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=14072.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:00:09,565 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.6315, 1.4229, 2.2202, 1.3842, 1.6507, 1.6857, 1.4544, 1.7520], + device='cuda:0'), covar=tensor([0.0575, 0.0361, 0.0307, 0.0571, 0.0454, 0.0358, 0.0515, 0.0295], + device='cuda:0'), in_proj_covar=tensor([0.0048, 0.0047, 0.0046, 0.0050, 0.0044, 0.0038, 0.0042, 0.0043], + device='cuda:0'), out_proj_covar=tensor([4.2213e-05, 4.0394e-05, 4.0485e-05, 4.3207e-05, 3.9081e-05, 3.4362e-05, + 3.7206e-05, 3.6961e-05], device='cuda:0') +2022-12-01 23:00:18,106 INFO [train.py:876] Epoch 10, batch 1200, loss[loss=0.2597, simple_loss=0.3056, pruned_loss=0.1069, over 4829.00 frames. ], tot_loss[loss=0.2118, simple_loss=0.2572, pruned_loss=0.08322, over 951313.93 frames. ], batch size: 47, lr: 1.74e-02, +2022-12-01 23:00:20,087 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=14093.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 23:00:27,677 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.329e+02 2.284e+02 2.758e+02 3.576e+02 7.112e+02, threshold=5.515e+02, percent-clipped=3.0 +2022-12-01 23:00:29,854 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=14103.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:00:46,111 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=14120.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 23:00:58,526 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=14133.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:01:06,181 INFO [train.py:876] Epoch 10, batch 1250, loss[loss=0.2195, simple_loss=0.2744, pruned_loss=0.08226, over 4879.00 frames. ], tot_loss[loss=0.2118, simple_loss=0.2574, pruned_loss=0.08312, over 951289.91 frames. ], batch size: 44, lr: 1.74e-02, +2022-12-01 23:01:14,498 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.14 vs. limit=5.0 +2022-12-01 23:01:28,289 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=14164.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:01:44,695 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=14181.0, num_to_drop=1, layers_to_drop={3} +2022-12-01 23:01:53,845 INFO [train.py:876] Epoch 10, batch 1300, loss[loss=0.2231, simple_loss=0.2745, pruned_loss=0.08581, over 4796.00 frames. ], tot_loss[loss=0.2128, simple_loss=0.2583, pruned_loss=0.08362, over 951061.35 frames. ], batch size: 54, lr: 1.74e-02, +2022-12-01 23:01:58,994 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.0493, 4.6083, 4.2472, 4.7491, 4.3147, 3.9223, 4.5193, 4.3847], + device='cuda:0'), covar=tensor([0.0189, 0.0084, 0.0122, 0.0128, 0.0124, 0.0150, 0.0091, 0.0105], + device='cuda:0'), in_proj_covar=tensor([0.0040, 0.0038, 0.0037, 0.0031, 0.0039, 0.0039, 0.0035, 0.0036], + device='cuda:0'), out_proj_covar=tensor([4.3616e-05, 3.7498e-05, 3.7149e-05, 3.0787e-05, 4.0521e-05, 4.0916e-05, + 3.2056e-05, 3.6392e-05], device='cuda:0') +2022-12-01 23:02:03,863 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.519e+02 2.227e+02 2.703e+02 3.822e+02 1.068e+03, threshold=5.405e+02, percent-clipped=5.0 +2022-12-01 23:02:05,260 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3357, 4.2056, 4.2321, 3.9906, 3.6597, 4.0639, 4.0700, 4.2981], + device='cuda:0'), covar=tensor([0.1062, 0.0156, 0.0187, 0.0241, 0.0235, 0.0290, 0.0137, 0.0194], + device='cuda:0'), in_proj_covar=tensor([0.0175, 0.0110, 0.0103, 0.0120, 0.0120, 0.0118, 0.0107, 0.0102], + device='cuda:0'), out_proj_covar=tensor([1.1685e-04, 7.1306e-05, 6.6762e-05, 7.9856e-05, 7.9859e-05, 7.7842e-05, + 6.9664e-05, 6.7826e-05], device='cuda:0') +2022-12-01 23:02:28,652 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.79 vs. limit=2.0 +2022-12-01 23:02:42,706 INFO [train.py:876] Epoch 10, batch 1350, loss[loss=0.1756, simple_loss=0.2325, pruned_loss=0.05937, over 4881.00 frames. ], tot_loss[loss=0.2103, simple_loss=0.2556, pruned_loss=0.0825, over 953025.77 frames. ], batch size: 37, lr: 1.73e-02, +2022-12-01 23:03:31,684 INFO [train.py:876] Epoch 10, batch 1400, loss[loss=0.1816, simple_loss=0.2409, pruned_loss=0.06114, over 4854.00 frames. ], tot_loss[loss=0.2075, simple_loss=0.2527, pruned_loss=0.08113, over 952247.51 frames. ], batch size: 40, lr: 1.73e-02, +2022-12-01 23:03:41,560 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.422e+02 2.073e+02 2.637e+02 3.210e+02 1.131e+03, threshold=5.275e+02, percent-clipped=4.0 +2022-12-01 23:04:02,253 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-10.pt +2022-12-01 23:04:18,809 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 23:04:19,745 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 23:04:20,040 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 23:04:20,072 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 23:04:21,216 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 23:04:21,538 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 23:04:23,107 INFO [train.py:876] Epoch 11, batch 0, loss[loss=0.2101, simple_loss=0.2473, pruned_loss=0.08643, over 4818.00 frames. ], tot_loss[loss=0.2101, simple_loss=0.2473, pruned_loss=0.08643, over 4818.00 frames. ], batch size: 33, lr: 1.65e-02, +2022-12-01 23:04:23,108 INFO [train.py:901] Computing validation loss +2022-12-01 23:04:38,665 INFO [train.py:910] Epoch 11, validation: loss=0.2428, simple_loss=0.2875, pruned_loss=0.09908, over 253132.00 frames. +2022-12-01 23:04:38,665 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-01 23:05:10,495 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4276, 3.2866, 3.0565, 2.9877, 2.9230, 2.3249, 3.6050, 1.6083], + device='cuda:0'), covar=tensor([0.0487, 0.0260, 0.0368, 0.0671, 0.0716, 0.2602, 0.0185, 0.3398], + device='cuda:0'), in_proj_covar=tensor([0.0087, 0.0075, 0.0075, 0.0107, 0.0111, 0.0135, 0.0064, 0.0151], + device='cuda:0'), out_proj_covar=tensor([9.4364e-05, 8.6766e-05, 8.6121e-05, 1.1478e-04, 1.1917e-04, 1.4652e-04, + 7.1427e-05, 1.6059e-04], device='cuda:0') +2022-12-01 23:05:26,596 INFO [train.py:876] Epoch 11, batch 50, loss[loss=0.1751, simple_loss=0.2183, pruned_loss=0.06596, over 4821.00 frames. ], tot_loss[loss=0.2063, simple_loss=0.2525, pruned_loss=0.07998, over 217334.41 frames. ], batch size: 33, lr: 1.65e-02, +2022-12-01 23:05:28,689 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4542, 3.4625, 3.3030, 3.1858, 3.4039, 3.4223, 3.4723, 3.5753], + device='cuda:0'), covar=tensor([0.1096, 0.0294, 0.0342, 0.0416, 0.0287, 0.0270, 0.0249, 0.0249], + device='cuda:0'), in_proj_covar=tensor([0.0156, 0.0115, 0.0116, 0.0116, 0.0105, 0.0112, 0.0111, 0.0105], + device='cuda:0'), out_proj_covar=tensor([1.0318e-04, 7.4968e-05, 7.7063e-05, 7.8246e-05, 6.9101e-05, 7.1575e-05, + 7.1294e-05, 6.9474e-05], device='cuda:0') +2022-12-01 23:05:45,611 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 23:05:45,682 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=14393.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:05:54,426 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.330e+02 2.206e+02 2.726e+02 3.383e+02 7.745e+02, threshold=5.453e+02, percent-clipped=2.0 +2022-12-01 23:06:04,977 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-01 23:06:06,250 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=14413.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:06:15,752 INFO [train.py:876] Epoch 11, batch 100, loss[loss=0.1432, simple_loss=0.2021, pruned_loss=0.0421, over 4724.00 frames. ], tot_loss[loss=0.1992, simple_loss=0.2474, pruned_loss=0.07552, over 379698.90 frames. ], batch size: 27, lr: 1.64e-02, +2022-12-01 23:06:16,282 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.95 vs. limit=2.0 +2022-12-01 23:06:19,862 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4329, 1.4672, 0.9358, 1.7349, 1.5686, 1.3102, 1.5889, 1.5167], + device='cuda:0'), covar=tensor([0.0268, 0.0308, 0.0396, 0.0199, 0.0253, 0.0324, 0.0225, 0.0335], + device='cuda:0'), in_proj_covar=tensor([0.0038, 0.0035, 0.0041, 0.0028, 0.0035, 0.0033, 0.0035, 0.0032], + device='cuda:0'), out_proj_covar=tensor([3.5863e-05, 3.3083e-05, 3.9632e-05, 2.5759e-05, 3.2552e-05, 3.0582e-05, + 3.2538e-05, 3.0566e-05], device='cuda:0') +2022-12-01 23:06:20,675 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=14428.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:06:29,230 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 23:06:33,082 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=14441.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:06:42,675 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1150, 2.4380, 2.8203, 2.6633, 2.9326, 2.9717, 2.7722, 3.5943], + device='cuda:0'), covar=tensor([0.0105, 0.0799, 0.0377, 0.0893, 0.0206, 0.0223, 0.0616, 0.0192], + device='cuda:0'), in_proj_covar=tensor([0.0049, 0.0075, 0.0057, 0.0082, 0.0062, 0.0055, 0.0071, 0.0060], + device='cuda:0'), out_proj_covar=tensor([5.5082e-05, 8.7572e-05, 6.5718e-05, 9.5816e-05, 6.9830e-05, 6.4267e-05, + 8.1087e-05, 6.5126e-05], device='cuda:0') +2022-12-01 23:06:50,027 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=14459.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:06:50,969 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 23:07:03,132 INFO [train.py:876] Epoch 11, batch 150, loss[loss=0.1303, simple_loss=0.163, pruned_loss=0.04878, over 3877.00 frames. ], tot_loss[loss=0.2023, simple_loss=0.2503, pruned_loss=0.07711, over 508005.39 frames. ], batch size: 15, lr: 1.64e-02, +2022-12-01 23:07:04,280 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=14474.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:07:05,986 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=14476.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 23:07:10,970 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3773, 3.1479, 2.9637, 2.9086, 2.3620, 1.8097, 3.4507, 1.4691], + device='cuda:0'), covar=tensor([0.0451, 0.0333, 0.0484, 0.0784, 0.1279, 0.3516, 0.0203, 0.3053], + device='cuda:0'), in_proj_covar=tensor([0.0089, 0.0076, 0.0076, 0.0111, 0.0115, 0.0136, 0.0065, 0.0152], + device='cuda:0'), out_proj_covar=tensor([9.6325e-05, 8.7719e-05, 8.7281e-05, 1.1883e-04, 1.2313e-04, 1.4808e-04, + 7.2105e-05, 1.6191e-04], device='cuda:0') +2022-12-01 23:07:16,769 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=14487.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:07:30,312 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.133e+02 1.996e+02 2.413e+02 3.465e+02 6.097e+02, threshold=4.826e+02, percent-clipped=1.0 +2022-12-01 23:07:44,205 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7231, 2.0474, 2.7189, 2.0749, 2.4817, 2.7118, 2.4544, 2.9416], + device='cuda:0'), covar=tensor([0.0137, 0.1162, 0.0438, 0.1276, 0.0277, 0.0230, 0.0772, 0.0353], + device='cuda:0'), in_proj_covar=tensor([0.0050, 0.0077, 0.0058, 0.0084, 0.0063, 0.0056, 0.0071, 0.0060], + device='cuda:0'), out_proj_covar=tensor([5.5937e-05, 8.9293e-05, 6.7321e-05, 9.7900e-05, 7.0476e-05, 6.5031e-05, + 8.1893e-05, 6.5633e-05], device='cuda:0') +2022-12-01 23:07:50,610 INFO [train.py:876] Epoch 11, batch 200, loss[loss=0.2021, simple_loss=0.238, pruned_loss=0.08313, over 4782.00 frames. ], tot_loss[loss=0.2035, simple_loss=0.2512, pruned_loss=0.0779, over 608120.58 frames. ], batch size: 26, lr: 1.64e-02, +2022-12-01 23:08:11,644 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-01 23:08:15,125 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=14548.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:08:17,732 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.04 vs. limit=2.0 +2022-12-01 23:08:20,851 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.16 vs. limit=2.0 +2022-12-01 23:08:21,081 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.1088, 4.5869, 4.2440, 4.8355, 4.1057, 3.8350, 4.5825, 4.2164], + device='cuda:0'), covar=tensor([0.0207, 0.0095, 0.0134, 0.0163, 0.0166, 0.0160, 0.0092, 0.0135], + device='cuda:0'), in_proj_covar=tensor([0.0039, 0.0038, 0.0036, 0.0031, 0.0039, 0.0038, 0.0033, 0.0036], + device='cuda:0'), out_proj_covar=tensor([4.2006e-05, 3.7128e-05, 3.6098e-05, 3.0804e-05, 4.0272e-05, 3.9610e-05, + 3.0511e-05, 3.6699e-05], device='cuda:0') +2022-12-01 23:08:39,240 INFO [train.py:876] Epoch 11, batch 250, loss[loss=0.1704, simple_loss=0.2176, pruned_loss=0.06159, over 4802.00 frames. ], tot_loss[loss=0.2004, simple_loss=0.2479, pruned_loss=0.07642, over 684119.68 frames. ], batch size: 32, lr: 1.64e-02, +2022-12-01 23:08:42,434 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 23:08:49,517 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0795, 3.1765, 3.4462, 3.4186, 2.8464, 3.3387, 3.4090, 2.9935], + device='cuda:0'), covar=tensor([0.4282, 0.0757, 0.0717, 0.0263, 0.0678, 0.0801, 0.0380, 0.0704], + device='cuda:0'), in_proj_covar=tensor([0.0193, 0.0091, 0.0126, 0.0087, 0.0109, 0.0095, 0.0081, 0.0102], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-01 23:09:06,900 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.103e+02 2.166e+02 2.836e+02 3.675e+02 9.423e+02, threshold=5.672e+02, percent-clipped=6.0 +2022-12-01 23:09:13,314 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.00 vs. limit=5.0 +2022-12-01 23:09:28,618 INFO [train.py:876] Epoch 11, batch 300, loss[loss=0.2041, simple_loss=0.2566, pruned_loss=0.0758, over 4851.00 frames. ], tot_loss[loss=0.1998, simple_loss=0.2473, pruned_loss=0.07617, over 744715.34 frames. ], batch size: 47, lr: 1.63e-02, +2022-12-01 23:09:43,570 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 23:10:18,257 INFO [train.py:876] Epoch 11, batch 350, loss[loss=0.207, simple_loss=0.2503, pruned_loss=0.08181, over 4794.00 frames. ], tot_loss[loss=0.1983, simple_loss=0.2465, pruned_loss=0.07502, over 792331.97 frames. ], batch size: 33, lr: 1.63e-02, +2022-12-01 23:10:45,192 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.420e+02 2.182e+02 2.714e+02 3.454e+02 6.830e+02, threshold=5.429e+02, percent-clipped=2.0 +2022-12-01 23:10:58,425 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4115, 1.3223, 1.0225, 1.7125, 1.6779, 1.1903, 1.4425, 1.2948], + device='cuda:0'), covar=tensor([0.0359, 0.0501, 0.0443, 0.0211, 0.0305, 0.0388, 0.0382, 0.0467], + device='cuda:0'), in_proj_covar=tensor([0.0038, 0.0036, 0.0042, 0.0029, 0.0036, 0.0034, 0.0035, 0.0032], + device='cuda:0'), out_proj_covar=tensor([3.6424e-05, 3.4172e-05, 4.0243e-05, 2.6411e-05, 3.3306e-05, 3.1699e-05, + 3.2625e-05, 3.0806e-05], device='cuda:0') +2022-12-01 23:11:06,892 INFO [train.py:876] Epoch 11, batch 400, loss[loss=0.1471, simple_loss=0.1879, pruned_loss=0.05315, over 4612.00 frames. ], tot_loss[loss=0.1979, simple_loss=0.2458, pruned_loss=0.07498, over 828167.92 frames. ], batch size: 21, lr: 1.63e-02, +2022-12-01 23:11:12,038 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=14728.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:11:24,176 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=14740.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:11:25,854 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 23:11:42,446 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=14759.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:11:52,337 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=14769.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:11:53,288 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 23:11:56,209 INFO [train.py:876] Epoch 11, batch 450, loss[loss=0.2098, simple_loss=0.2618, pruned_loss=0.07894, over 4886.00 frames. ], tot_loss[loss=0.197, simple_loss=0.2447, pruned_loss=0.07461, over 854691.77 frames. ], batch size: 37, lr: 1.63e-02, +2022-12-01 23:11:59,203 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=14776.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:11:59,249 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=14776.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 23:12:23,581 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.157e+02 2.206e+02 2.550e+02 3.094e+02 6.719e+02, threshold=5.101e+02, percent-clipped=3.0 +2022-12-01 23:12:23,868 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=14801.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:12:29,523 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=14807.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:12:44,654 INFO [train.py:876] Epoch 11, batch 500, loss[loss=0.1973, simple_loss=0.2464, pruned_loss=0.0741, over 4842.00 frames. ], tot_loss[loss=0.1973, simple_loss=0.245, pruned_loss=0.07478, over 876372.23 frames. ], batch size: 49, lr: 1.62e-02, +2022-12-01 23:12:45,602 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=14824.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 23:12:50,182 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.1811, 4.7336, 4.4191, 4.9421, 4.0949, 4.0071, 4.7759, 4.1540], + device='cuda:0'), covar=tensor([0.0195, 0.0084, 0.0115, 0.0126, 0.0153, 0.0149, 0.0078, 0.0127], + device='cuda:0'), in_proj_covar=tensor([0.0039, 0.0037, 0.0036, 0.0030, 0.0038, 0.0038, 0.0033, 0.0036], + device='cuda:0'), out_proj_covar=tensor([4.2072e-05, 3.6197e-05, 3.5660e-05, 2.9942e-05, 3.9855e-05, 3.9283e-05, + 3.0360e-05, 3.6075e-05], device='cuda:0') +2022-12-01 23:12:57,439 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.11 vs. limit=2.0 +2022-12-01 23:13:03,905 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=14843.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:13:16,906 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3294, 2.8437, 3.4631, 3.1540, 3.4578, 2.7431, 3.1258, 3.6460], + device='cuda:0'), covar=tensor([0.0238, 0.0475, 0.0153, 0.0269, 0.0162, 0.0515, 0.0179, 0.0203], + device='cuda:0'), in_proj_covar=tensor([0.0073, 0.0093, 0.0065, 0.0077, 0.0063, 0.0104, 0.0062, 0.0061], + device='cuda:0'), out_proj_covar=tensor([6.1556e-05, 7.8142e-05, 5.4275e-05, 6.6470e-05, 5.3120e-05, 9.0117e-05, + 5.4887e-05, 5.1752e-05], device='cuda:0') +2022-12-01 23:13:33,011 INFO [train.py:876] Epoch 11, batch 550, loss[loss=0.2679, simple_loss=0.2998, pruned_loss=0.118, over 4788.00 frames. ], tot_loss[loss=0.1992, simple_loss=0.2468, pruned_loss=0.07584, over 892809.46 frames. ], batch size: 54, lr: 1.62e-02, +2022-12-01 23:13:37,062 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.9335, 2.5391, 2.9793, 3.1565, 2.9235, 3.0075, 3.3168, 3.5886], + device='cuda:0'), covar=tensor([0.0129, 0.0991, 0.0362, 0.0614, 0.0275, 0.0290, 0.0517, 0.0235], + device='cuda:0'), in_proj_covar=tensor([0.0049, 0.0076, 0.0057, 0.0081, 0.0061, 0.0055, 0.0072, 0.0061], + device='cuda:0'), out_proj_covar=tensor([5.5443e-05, 8.8338e-05, 6.6310e-05, 9.4606e-05, 6.8943e-05, 6.4582e-05, + 8.1893e-05, 6.6704e-05], device='cuda:0') +2022-12-01 23:13:48,552 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.12 vs. limit=2.0 +2022-12-01 23:14:00,675 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.346e+02 2.266e+02 2.766e+02 3.499e+02 9.964e+02, threshold=5.533e+02, percent-clipped=9.0 +2022-12-01 23:14:15,297 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.66 vs. limit=5.0 +2022-12-01 23:14:22,703 INFO [train.py:876] Epoch 11, batch 600, loss[loss=0.2114, simple_loss=0.2554, pruned_loss=0.0837, over 4831.00 frames. ], tot_loss[loss=0.1969, simple_loss=0.245, pruned_loss=0.07438, over 904740.59 frames. ], batch size: 35, lr: 1.62e-02, +2022-12-01 23:14:23,979 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3998, 1.8462, 1.8783, 2.3171, 1.7874, 2.0372, 1.5453, 2.0967], + device='cuda:0'), covar=tensor([0.0831, 0.1360, 0.0946, 0.0543, 0.1248, 0.1049, 0.1230, 0.0589], + device='cuda:0'), in_proj_covar=tensor([0.0063, 0.0062, 0.0079, 0.0064, 0.0090, 0.0073, 0.0088, 0.0066], + device='cuda:0'), out_proj_covar=tensor([6.1058e-05, 6.1951e-05, 7.5806e-05, 6.2082e-05, 8.6800e-05, 7.0605e-05, + 8.5665e-05, 6.3464e-05], device='cuda:0') +2022-12-01 23:14:59,924 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.99 vs. limit=2.0 +2022-12-01 23:15:11,954 INFO [train.py:876] Epoch 11, batch 650, loss[loss=0.2344, simple_loss=0.2855, pruned_loss=0.09161, over 4838.00 frames. ], tot_loss[loss=0.1997, simple_loss=0.2473, pruned_loss=0.07601, over 912592.86 frames. ], batch size: 49, lr: 1.62e-02, +2022-12-01 23:15:32,107 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7515, 2.3321, 2.3511, 2.7414, 2.1083, 2.3148, 1.7066, 2.3815], + device='cuda:0'), covar=tensor([0.0705, 0.1272, 0.0633, 0.0390, 0.0914, 0.0836, 0.1157, 0.0406], + device='cuda:0'), in_proj_covar=tensor([0.0062, 0.0061, 0.0077, 0.0064, 0.0089, 0.0072, 0.0087, 0.0065], + device='cuda:0'), out_proj_covar=tensor([6.0138e-05, 6.0963e-05, 7.4365e-05, 6.1686e-05, 8.5793e-05, 6.9270e-05, + 8.5236e-05, 6.2348e-05], device='cuda:0') +2022-12-01 23:15:38,731 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.519e+02 2.200e+02 2.869e+02 3.500e+02 8.778e+02, threshold=5.738e+02, percent-clipped=2.0 +2022-12-01 23:16:00,199 INFO [train.py:876] Epoch 11, batch 700, loss[loss=0.2076, simple_loss=0.2486, pruned_loss=0.0833, over 4796.00 frames. ], tot_loss[loss=0.2025, simple_loss=0.2503, pruned_loss=0.07739, over 922025.65 frames. ], batch size: 33, lr: 1.61e-02, +2022-12-01 23:16:45,225 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=15069.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:16:49,233 INFO [train.py:876] Epoch 11, batch 750, loss[loss=0.1242, simple_loss=0.1708, pruned_loss=0.03876, over 4682.00 frames. ], tot_loss[loss=0.2008, simple_loss=0.2484, pruned_loss=0.07654, over 928339.93 frames. ], batch size: 21, lr: 1.61e-02, +2022-12-01 23:16:49,583 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7410, 3.2005, 3.2680, 3.5708, 3.2807, 3.5006, 3.4225, 3.1913], + device='cuda:0'), covar=tensor([0.0149, 0.0244, 0.0205, 0.0151, 0.0177, 0.0144, 0.0165, 0.0246], + device='cuda:0'), in_proj_covar=tensor([0.0076, 0.0095, 0.0078, 0.0080, 0.0080, 0.0075, 0.0079, 0.0098], + device='cuda:0'), out_proj_covar=tensor([5.2903e-05, 6.6178e-05, 5.3860e-05, 5.3651e-05, 5.5116e-05, 5.0026e-05, + 5.3138e-05, 7.0052e-05], device='cuda:0') +2022-12-01 23:17:11,955 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=15096.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:17:16,646 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.394e+02 2.071e+02 2.365e+02 3.046e+02 8.037e+02, threshold=4.730e+02, percent-clipped=2.0 +2022-12-01 23:17:21,840 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7249, 2.8080, 2.7721, 2.8423, 2.4789, 2.8696, 3.0507, 2.9093], + device='cuda:0'), covar=tensor([0.0197, 0.0245, 0.0200, 0.0157, 0.0415, 0.0256, 0.0158, 0.0156], + device='cuda:0'), in_proj_covar=tensor([0.0047, 0.0051, 0.0043, 0.0041, 0.0056, 0.0051, 0.0039, 0.0041], + device='cuda:0'), out_proj_covar=tensor([3.4261e-05, 3.9176e-05, 3.1757e-05, 2.9194e-05, 4.2364e-05, 3.7781e-05, + 2.8607e-05, 2.9689e-05], device='cuda:0') +2022-12-01 23:17:32,246 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=15117.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:17:37,901 INFO [train.py:876] Epoch 11, batch 800, loss[loss=0.183, simple_loss=0.2362, pruned_loss=0.06494, over 4822.00 frames. ], tot_loss[loss=0.1998, simple_loss=0.2482, pruned_loss=0.07571, over 935872.60 frames. ], batch size: 34, lr: 1.61e-02, +2022-12-01 23:17:44,049 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.02 vs. limit=2.0 +2022-12-01 23:17:57,412 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=15143.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:18:25,947 INFO [train.py:876] Epoch 11, batch 850, loss[loss=0.2063, simple_loss=0.2488, pruned_loss=0.08185, over 4852.00 frames. ], tot_loss[loss=0.1992, simple_loss=0.2479, pruned_loss=0.0752, over 941747.92 frames. ], batch size: 35, lr: 1.61e-02, +2022-12-01 23:18:43,418 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=15191.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:18:47,618 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=15195.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:18:53,461 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.522e+02 2.245e+02 2.653e+02 3.323e+02 5.381e+02, threshold=5.306e+02, percent-clipped=4.0 +2022-12-01 23:19:14,426 INFO [train.py:876] Epoch 11, batch 900, loss[loss=0.1279, simple_loss=0.1716, pruned_loss=0.04205, over 4665.00 frames. ], tot_loss[loss=0.1998, simple_loss=0.249, pruned_loss=0.0753, over 945726.14 frames. ], batch size: 21, lr: 1.61e-02, +2022-12-01 23:19:45,764 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6037, 2.4274, 2.3050, 2.4446, 2.0502, 2.5758, 1.4190, 2.2682], + device='cuda:0'), covar=tensor([0.0595, 0.1056, 0.0744, 0.0569, 0.0992, 0.0670, 0.1222, 0.0494], + device='cuda:0'), in_proj_covar=tensor([0.0061, 0.0059, 0.0076, 0.0064, 0.0086, 0.0071, 0.0085, 0.0064], + device='cuda:0'), out_proj_covar=tensor([5.9265e-05, 5.9603e-05, 7.3015e-05, 6.1786e-05, 8.3445e-05, 6.8829e-05, + 8.2889e-05, 6.1784e-05], device='cuda:0') +2022-12-01 23:19:46,742 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=15256.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 23:20:02,629 INFO [train.py:876] Epoch 11, batch 950, loss[loss=0.1545, simple_loss=0.2042, pruned_loss=0.05235, over 4780.00 frames. ], tot_loss[loss=0.199, simple_loss=0.2482, pruned_loss=0.07491, over 945118.14 frames. ], batch size: 26, lr: 1.60e-02, +2022-12-01 23:20:05,944 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.92 vs. limit=2.0 +2022-12-01 23:20:29,828 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.517e+02 2.333e+02 3.023e+02 3.754e+02 7.765e+02, threshold=6.045e+02, percent-clipped=6.0 +2022-12-01 23:20:43,940 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7807, 2.6362, 2.8073, 2.9405, 2.7834, 3.0156, 3.0224, 2.8885], + device='cuda:0'), covar=tensor([0.0232, 0.0320, 0.0212, 0.0180, 0.0365, 0.0260, 0.0174, 0.0167], + device='cuda:0'), in_proj_covar=tensor([0.0049, 0.0054, 0.0045, 0.0044, 0.0058, 0.0053, 0.0041, 0.0042], + device='cuda:0'), out_proj_covar=tensor([3.6114e-05, 4.1515e-05, 3.3225e-05, 3.1483e-05, 4.4131e-05, 3.9573e-05, + 3.0551e-05, 3.0674e-05], device='cuda:0') +2022-12-01 23:20:51,463 INFO [train.py:876] Epoch 11, batch 1000, loss[loss=0.1397, simple_loss=0.1982, pruned_loss=0.04057, over 4825.00 frames. ], tot_loss[loss=0.1992, simple_loss=0.2481, pruned_loss=0.07514, over 947223.89 frames. ], batch size: 25, lr: 1.60e-02, +2022-12-01 23:20:54,730 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4343, 3.3436, 3.2833, 3.3866, 3.5677, 3.3890, 3.4134, 3.4955], + device='cuda:0'), covar=tensor([0.1195, 0.0395, 0.0456, 0.0353, 0.0244, 0.0376, 0.0375, 0.0295], + device='cuda:0'), in_proj_covar=tensor([0.0162, 0.0123, 0.0124, 0.0121, 0.0118, 0.0120, 0.0118, 0.0108], + device='cuda:0'), out_proj_covar=tensor([1.0766e-04, 8.0486e-05, 8.2412e-05, 8.1327e-05, 7.7136e-05, 7.8214e-05, + 7.6274e-05, 7.1939e-05], device='cuda:0') +2022-12-01 23:21:33,188 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.14 vs. limit=2.0 +2022-12-01 23:21:37,981 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4260, 1.8559, 1.3298, 1.4470, 1.2864, 1.6115, 1.6221, 1.4599], + device='cuda:0'), covar=tensor([0.0945, 0.0305, 0.1237, 0.0676, 0.1027, 0.0947, 0.0600, 0.0561], + device='cuda:0'), in_proj_covar=tensor([0.0051, 0.0049, 0.0047, 0.0052, 0.0047, 0.0040, 0.0044, 0.0044], + device='cuda:0'), out_proj_covar=tensor([4.4056e-05, 4.2514e-05, 4.2236e-05, 4.5285e-05, 4.1212e-05, 3.5654e-05, + 3.9131e-05, 3.8560e-05], device='cuda:0') +2022-12-01 23:21:40,726 INFO [train.py:876] Epoch 11, batch 1050, loss[loss=0.2043, simple_loss=0.2547, pruned_loss=0.07699, over 4848.00 frames. ], tot_loss[loss=0.1985, simple_loss=0.2475, pruned_loss=0.0747, over 948175.63 frames. ], batch size: 40, lr: 1.60e-02, +2022-12-01 23:21:58,697 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1050, 2.9450, 2.8944, 3.1966, 2.8437, 3.1156, 3.1599, 3.0753], + device='cuda:0'), covar=tensor([0.0149, 0.0211, 0.0176, 0.0126, 0.0288, 0.0223, 0.0117, 0.0122], + device='cuda:0'), in_proj_covar=tensor([0.0047, 0.0053, 0.0044, 0.0043, 0.0056, 0.0052, 0.0039, 0.0041], + device='cuda:0'), out_proj_covar=tensor([3.4566e-05, 4.0404e-05, 3.2623e-05, 3.0746e-05, 4.2630e-05, 3.8226e-05, + 2.8936e-05, 2.9823e-05], device='cuda:0') +2022-12-01 23:22:03,195 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=15396.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:22:08,121 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.135e+02 2.135e+02 2.545e+02 3.082e+02 5.457e+02, threshold=5.090e+02, percent-clipped=0.0 +2022-12-01 23:22:14,351 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5720, 2.3473, 2.4453, 2.8714, 2.4727, 2.7272, 2.7613, 2.7041], + device='cuda:0'), covar=tensor([0.0240, 0.0334, 0.0260, 0.0165, 0.0399, 0.0298, 0.0184, 0.0183], + device='cuda:0'), in_proj_covar=tensor([0.0047, 0.0053, 0.0044, 0.0043, 0.0056, 0.0052, 0.0039, 0.0041], + device='cuda:0'), out_proj_covar=tensor([3.4649e-05, 4.0630e-05, 3.2581e-05, 3.0836e-05, 4.2779e-05, 3.8567e-05, + 2.9103e-05, 2.9934e-05], device='cuda:0') +2022-12-01 23:22:29,772 INFO [train.py:876] Epoch 11, batch 1100, loss[loss=0.116, simple_loss=0.1618, pruned_loss=0.03513, over 3438.00 frames. ], tot_loss[loss=0.1955, simple_loss=0.2447, pruned_loss=0.07318, over 947611.68 frames. ], batch size: 13, lr: 1.60e-02, +2022-12-01 23:22:50,175 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=15444.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:23:18,749 INFO [train.py:876] Epoch 11, batch 1150, loss[loss=0.2665, simple_loss=0.2986, pruned_loss=0.1173, over 4844.00 frames. ], tot_loss[loss=0.1946, simple_loss=0.2431, pruned_loss=0.07301, over 949339.06 frames. ], batch size: 47, lr: 1.59e-02, +2022-12-01 23:23:19,430 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.68 vs. limit=5.0 +2022-12-01 23:23:46,400 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.409e+02 2.226e+02 2.737e+02 3.390e+02 8.297e+02, threshold=5.473e+02, percent-clipped=6.0 +2022-12-01 23:24:07,486 INFO [train.py:876] Epoch 11, batch 1200, loss[loss=0.2167, simple_loss=0.2708, pruned_loss=0.08129, over 4812.00 frames. ], tot_loss[loss=0.1945, simple_loss=0.2434, pruned_loss=0.07278, over 951061.28 frames. ], batch size: 42, lr: 1.59e-02, +2022-12-01 23:24:16,105 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-01 23:24:34,664 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=15551.0, num_to_drop=1, layers_to_drop={3} +2022-12-01 23:24:56,128 INFO [train.py:876] Epoch 11, batch 1250, loss[loss=0.2001, simple_loss=0.2548, pruned_loss=0.0727, over 4839.00 frames. ], tot_loss[loss=0.1952, simple_loss=0.2439, pruned_loss=0.07321, over 950471.55 frames. ], batch size: 41, lr: 1.59e-02, +2022-12-01 23:25:23,891 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.201e+02 2.034e+02 2.559e+02 3.391e+02 1.091e+03, threshold=5.118e+02, percent-clipped=4.0 +2022-12-01 23:25:31,039 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7414, 4.3727, 4.1097, 4.5868, 4.0978, 3.6820, 4.4745, 4.1155], + device='cuda:0'), covar=tensor([0.0269, 0.0140, 0.0152, 0.0164, 0.0167, 0.0189, 0.0112, 0.0145], + device='cuda:0'), in_proj_covar=tensor([0.0041, 0.0038, 0.0039, 0.0031, 0.0040, 0.0040, 0.0035, 0.0037], + device='cuda:0'), out_proj_covar=tensor([4.3828e-05, 3.6836e-05, 3.8270e-05, 3.0202e-05, 4.0943e-05, 4.0883e-05, + 3.2077e-05, 3.7293e-05], device='cuda:0') +2022-12-01 23:25:37,094 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8120, 1.4992, 1.5797, 1.5043, 1.6085, 1.7172, 1.5838, 1.4726], + device='cuda:0'), covar=tensor([0.0659, 0.0456, 0.1097, 0.0515, 0.0821, 0.0804, 0.0464, 0.0651], + device='cuda:0'), in_proj_covar=tensor([0.0051, 0.0049, 0.0047, 0.0050, 0.0046, 0.0038, 0.0043, 0.0044], + device='cuda:0'), out_proj_covar=tensor([4.4292e-05, 4.2637e-05, 4.2139e-05, 4.3759e-05, 4.0521e-05, 3.4250e-05, + 3.8296e-05, 3.8751e-05], device='cuda:0') +2022-12-01 23:25:42,829 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.1931, 1.1255, 1.3442, 0.7004, 0.7569, 1.0665, 1.2667, 1.5258], + device='cuda:0'), covar=tensor([0.0347, 0.0275, 0.0259, 0.0374, 0.0468, 0.0284, 0.0255, 0.0160], + device='cuda:0'), in_proj_covar=tensor([0.0037, 0.0032, 0.0032, 0.0034, 0.0039, 0.0035, 0.0041, 0.0031], + device='cuda:0'), out_proj_covar=tensor([3.1338e-05, 2.5878e-05, 2.6726e-05, 2.7171e-05, 3.2706e-05, 2.9339e-05, + 3.4415e-05, 2.5770e-05], device='cuda:0') +2022-12-01 23:25:45,389 INFO [train.py:876] Epoch 11, batch 1300, loss[loss=0.1967, simple_loss=0.2504, pruned_loss=0.07151, over 4861.00 frames. ], tot_loss[loss=0.1958, simple_loss=0.2449, pruned_loss=0.07334, over 952677.99 frames. ], batch size: 40, lr: 1.59e-02, +2022-12-01 23:26:34,419 INFO [train.py:876] Epoch 11, batch 1350, loss[loss=0.2249, simple_loss=0.2847, pruned_loss=0.08257, over 4883.00 frames. ], tot_loss[loss=0.1965, simple_loss=0.246, pruned_loss=0.07354, over 952071.46 frames. ], batch size: 44, lr: 1.58e-02, +2022-12-01 23:26:49,692 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.71 vs. limit=5.0 +2022-12-01 23:26:58,142 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1645, 1.7837, 2.1869, 1.7326, 1.7116, 2.1551, 1.7496, 1.6549], + device='cuda:0'), covar=tensor([0.0668, 0.0449, 0.0523, 0.0475, 0.0493, 0.0749, 0.0418, 0.0484], + device='cuda:0'), in_proj_covar=tensor([0.0050, 0.0050, 0.0046, 0.0051, 0.0046, 0.0039, 0.0044, 0.0044], + device='cuda:0'), out_proj_covar=tensor([4.3807e-05, 4.3184e-05, 4.1376e-05, 4.4704e-05, 4.0578e-05, 3.5043e-05, + 3.8198e-05, 3.8758e-05], device='cuda:0') +2022-12-01 23:27:01,856 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.189e+02 2.120e+02 2.545e+02 3.252e+02 9.546e+02, threshold=5.090e+02, percent-clipped=6.0 +2022-12-01 23:27:02,905 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4912, 3.9333, 3.7231, 4.1388, 3.6873, 3.5060, 3.9816, 3.7020], + device='cuda:0'), covar=tensor([0.0209, 0.0097, 0.0142, 0.0118, 0.0154, 0.0161, 0.0088, 0.0131], + device='cuda:0'), in_proj_covar=tensor([0.0041, 0.0038, 0.0039, 0.0030, 0.0040, 0.0039, 0.0035, 0.0038], + device='cuda:0'), out_proj_covar=tensor([4.3133e-05, 3.6940e-05, 3.8330e-05, 2.9824e-05, 4.1091e-05, 4.0593e-05, + 3.2254e-05, 3.7653e-05], device='cuda:0') +2022-12-01 23:27:23,311 INFO [train.py:876] Epoch 11, batch 1400, loss[loss=0.276, simple_loss=0.3006, pruned_loss=0.1257, over 4098.00 frames. ], tot_loss[loss=0.1971, simple_loss=0.2456, pruned_loss=0.07428, over 944942.72 frames. ], batch size: 72, lr: 1.58e-02, +2022-12-01 23:27:53,349 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-11.pt +2022-12-01 23:28:02,497 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 23:28:03,470 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 23:28:03,767 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 23:28:03,800 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 23:28:04,985 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 23:28:05,308 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 23:28:06,896 INFO [train.py:876] Epoch 12, batch 0, loss[loss=0.1718, simple_loss=0.2391, pruned_loss=0.05221, over 4881.00 frames. ], tot_loss[loss=0.1718, simple_loss=0.2391, pruned_loss=0.05221, over 4881.00 frames. ], batch size: 37, lr: 1.51e-02, +2022-12-01 23:28:06,897 INFO [train.py:901] Computing validation loss +2022-12-01 23:28:08,688 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1980, 2.8263, 3.3041, 3.5185, 2.9225, 3.2300, 3.3687, 3.8532], + device='cuda:0'), covar=tensor([0.0092, 0.0978, 0.0382, 0.0683, 0.0292, 0.0250, 0.0545, 0.0234], + device='cuda:0'), in_proj_covar=tensor([0.0052, 0.0084, 0.0064, 0.0089, 0.0067, 0.0061, 0.0079, 0.0067], + device='cuda:0'), out_proj_covar=tensor([5.9987e-05, 9.7017e-05, 7.4354e-05, 1.0427e-04, 7.6596e-05, 7.2004e-05, + 9.1160e-05, 7.3365e-05], device='cuda:0') +2022-12-01 23:28:11,083 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.6231, 4.5540, 4.9345, 4.4218, 4.5175, 4.5839, 4.6738, 4.4714], + device='cuda:0'), covar=tensor([0.0452, 0.0356, 0.0331, 0.0398, 0.0859, 0.0376, 0.0540, 0.0316], + device='cuda:0'), in_proj_covar=tensor([0.0122, 0.0094, 0.0101, 0.0102, 0.0086, 0.0104, 0.0122, 0.0089], + device='cuda:0'), out_proj_covar=tensor([1.0497e-04, 7.1153e-05, 8.5867e-05, 8.3617e-05, 7.2336e-05, 8.7518e-05, + 1.0121e-04, 7.1695e-05], device='cuda:0') +2022-12-01 23:28:22,620 INFO [train.py:910] Epoch 12, validation: loss=0.234, simple_loss=0.282, pruned_loss=0.093, over 253132.00 frames. +2022-12-01 23:28:22,621 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-01 23:28:24,856 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.5624, 1.6779, 1.0251, 1.8613, 1.8649, 1.5916, 1.7881, 1.6697], + device='cuda:0'), covar=tensor([0.0305, 0.0387, 0.0435, 0.0406, 0.0323, 0.0408, 0.0252, 0.0337], + device='cuda:0'), in_proj_covar=tensor([0.0038, 0.0037, 0.0041, 0.0031, 0.0037, 0.0035, 0.0036, 0.0032], + device='cuda:0'), out_proj_covar=tensor([3.6140e-05, 3.4683e-05, 3.9165e-05, 2.8267e-05, 3.4435e-05, 3.2927e-05, + 3.4202e-05, 3.1383e-05], device='cuda:0') +2022-12-01 23:29:06,286 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0564, 3.1491, 3.1411, 3.1303, 3.0576, 3.2659, 3.3100, 3.3501], + device='cuda:0'), covar=tensor([0.1483, 0.0379, 0.0405, 0.0410, 0.0520, 0.0335, 0.0291, 0.0267], + device='cuda:0'), in_proj_covar=tensor([0.0171, 0.0130, 0.0129, 0.0128, 0.0127, 0.0124, 0.0123, 0.0118], + device='cuda:0'), out_proj_covar=tensor([1.1386e-04, 8.6161e-05, 8.6082e-05, 8.6248e-05, 8.3353e-05, 8.0404e-05, + 7.9877e-05, 7.8299e-05], device='cuda:0') +2022-12-01 23:29:08,320 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.360e+02 2.194e+02 2.655e+02 3.407e+02 5.302e+02, threshold=5.310e+02, percent-clipped=1.0 +2022-12-01 23:29:12,221 INFO [train.py:876] Epoch 12, batch 50, loss[loss=0.2071, simple_loss=0.2522, pruned_loss=0.08105, over 4062.00 frames. ], tot_loss[loss=0.1841, simple_loss=0.2352, pruned_loss=0.06648, over 214750.16 frames. ], batch size: 72, lr: 1.51e-02, +2022-12-01 23:29:31,692 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7894, 4.3794, 4.3694, 4.5811, 4.3935, 4.5805, 4.0445, 4.6719], + device='cuda:0'), covar=tensor([0.0897, 0.0261, 0.0405, 0.0259, 0.0341, 0.0222, 0.0238, 0.0240], + device='cuda:0'), in_proj_covar=tensor([0.0173, 0.0132, 0.0131, 0.0129, 0.0128, 0.0125, 0.0125, 0.0118], + device='cuda:0'), out_proj_covar=tensor([1.1503e-04, 8.7107e-05, 8.7195e-05, 8.6642e-05, 8.4148e-05, 8.1283e-05, + 8.0952e-05, 7.8741e-05], device='cuda:0') +2022-12-01 23:29:33,663 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0824, 1.4996, 2.1256, 2.8928, 2.5149, 2.9017, 2.7240, 3.1870], + device='cuda:0'), covar=tensor([0.0325, 0.1529, 0.1560, 0.0466, 0.0546, 0.0500, 0.0700, 0.0377], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0073, 0.0083, 0.0055, 0.0058, 0.0058, 0.0061, 0.0064], + device='cuda:0'), out_proj_covar=tensor([5.5236e-05, 8.3790e-05, 9.0089e-05, 6.0169e-05, 5.7466e-05, 6.4821e-05, + 6.5972e-05, 6.0559e-05], device='cuda:0') +2022-12-01 23:29:35,068 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 23:29:46,171 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.01 vs. limit=2.0 +2022-12-01 23:29:57,358 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=15851.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:30:01,417 INFO [train.py:876] Epoch 12, batch 100, loss[loss=0.2236, simple_loss=0.278, pruned_loss=0.0846, over 4714.00 frames. ], tot_loss[loss=0.1888, simple_loss=0.2392, pruned_loss=0.06926, over 375346.56 frames. ], batch size: 63, lr: 1.51e-02, +2022-12-01 23:30:08,318 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3742, 1.8928, 2.2872, 2.1482, 1.9794, 1.6344, 1.5850, 2.2491], + device='cuda:0'), covar=tensor([0.1146, 0.1624, 0.0956, 0.1042, 0.1320, 0.1616, 0.2029, 0.0589], + device='cuda:0'), in_proj_covar=tensor([0.0066, 0.0066, 0.0080, 0.0068, 0.0092, 0.0074, 0.0090, 0.0067], + device='cuda:0'), out_proj_covar=tensor([6.4840e-05, 6.5583e-05, 7.7826e-05, 6.5568e-05, 8.9158e-05, 7.2159e-05, + 8.8643e-05, 6.5529e-05], device='cuda:0') +2022-12-01 23:30:13,837 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 23:30:28,704 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=15883.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:30:36,097 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 23:30:43,688 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=15899.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:30:45,576 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.347e+02 2.168e+02 2.658e+02 3.326e+02 6.331e+02, threshold=5.315e+02, percent-clipped=3.0 +2022-12-01 23:30:49,384 INFO [train.py:876] Epoch 12, batch 150, loss[loss=0.1682, simple_loss=0.2115, pruned_loss=0.06247, over 4700.00 frames. ], tot_loss[loss=0.1908, simple_loss=0.2421, pruned_loss=0.06975, over 503897.82 frames. ], batch size: 23, lr: 1.51e-02, +2022-12-01 23:31:03,341 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4414, 2.8402, 3.0688, 3.1857, 3.1885, 3.2755, 3.0282, 2.9537], + device='cuda:0'), covar=tensor([0.0197, 0.0271, 0.0213, 0.0230, 0.0174, 0.0152, 0.0191, 0.0273], + device='cuda:0'), in_proj_covar=tensor([0.0078, 0.0099, 0.0082, 0.0083, 0.0082, 0.0078, 0.0084, 0.0104], + device='cuda:0'), out_proj_covar=tensor([5.4047e-05, 6.8113e-05, 5.6638e-05, 5.5854e-05, 5.5566e-05, 5.2174e-05, + 5.5938e-05, 7.4301e-05], device='cuda:0') +2022-12-01 23:31:06,994 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8821, 3.9087, 4.1756, 3.7075, 4.0447, 3.9392, 3.8398, 3.7170], + device='cuda:0'), covar=tensor([0.0627, 0.0468, 0.0461, 0.0526, 0.0612, 0.0500, 0.0604, 0.0517], + device='cuda:0'), in_proj_covar=tensor([0.0125, 0.0095, 0.0104, 0.0106, 0.0087, 0.0105, 0.0125, 0.0090], + device='cuda:0'), out_proj_covar=tensor([1.0684e-04, 7.2107e-05, 8.8414e-05, 8.6564e-05, 7.3543e-05, 8.7769e-05, + 1.0419e-04, 7.2749e-05], device='cuda:0') +2022-12-01 23:31:27,981 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=15944.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:31:30,984 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7374, 3.1317, 3.3242, 3.4907, 3.4459, 3.6205, 3.4720, 3.2803], + device='cuda:0'), covar=tensor([0.0191, 0.0252, 0.0190, 0.0153, 0.0164, 0.0141, 0.0150, 0.0255], + device='cuda:0'), in_proj_covar=tensor([0.0080, 0.0100, 0.0083, 0.0085, 0.0084, 0.0079, 0.0086, 0.0106], + device='cuda:0'), out_proj_covar=tensor([5.5458e-05, 6.9252e-05, 5.7413e-05, 5.7013e-05, 5.6849e-05, 5.2967e-05, + 5.7691e-05, 7.6088e-05], device='cuda:0') +2022-12-01 23:31:31,885 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1679, 3.2797, 3.4109, 3.2400, 3.4909, 3.3629, 3.4817, 3.5215], + device='cuda:0'), covar=tensor([0.1464, 0.0432, 0.0386, 0.0402, 0.0353, 0.0362, 0.0277, 0.0273], + device='cuda:0'), in_proj_covar=tensor([0.0168, 0.0131, 0.0129, 0.0127, 0.0126, 0.0123, 0.0123, 0.0116], + device='cuda:0'), out_proj_covar=tensor([1.1170e-04, 8.6442e-05, 8.6266e-05, 8.4980e-05, 8.2947e-05, 7.9656e-05, + 8.0210e-05, 7.6730e-05], device='cuda:0') +2022-12-01 23:31:38,527 INFO [train.py:876] Epoch 12, batch 200, loss[loss=0.2501, simple_loss=0.3012, pruned_loss=0.0995, over 4816.00 frames. ], tot_loss[loss=0.1912, simple_loss=0.2428, pruned_loss=0.06983, over 603787.94 frames. ], batch size: 42, lr: 1.51e-02, +2022-12-01 23:32:08,610 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.96 vs. limit=2.0 +2022-12-01 23:32:20,833 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 23:32:22,969 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-16000.pt +2022-12-01 23:32:26,201 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.466e+02 2.092e+02 2.873e+02 3.408e+02 7.489e+02, threshold=5.747e+02, percent-clipped=5.0 +2022-12-01 23:32:26,431 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=16001.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:32:30,174 INFO [train.py:876] Epoch 12, batch 250, loss[loss=0.1933, simple_loss=0.2462, pruned_loss=0.07026, over 4792.00 frames. ], tot_loss[loss=0.1897, simple_loss=0.2413, pruned_loss=0.06902, over 682262.76 frames. ], batch size: 58, lr: 1.50e-02, +2022-12-01 23:33:16,615 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0425, 3.1798, 3.7360, 3.3681, 3.1872, 3.6974, 3.8909, 3.0768], + device='cuda:0'), covar=tensor([0.5194, 0.0740, 0.0628, 0.0257, 0.0531, 0.1195, 0.0252, 0.0651], + device='cuda:0'), in_proj_covar=tensor([0.0191, 0.0098, 0.0127, 0.0095, 0.0112, 0.0099, 0.0085, 0.0109], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-01 23:33:17,370 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 23:33:18,370 INFO [train.py:876] Epoch 12, batch 300, loss[loss=0.2053, simple_loss=0.2739, pruned_loss=0.06838, over 4840.00 frames. ], tot_loss[loss=0.1915, simple_loss=0.2432, pruned_loss=0.06994, over 739549.93 frames. ], batch size: 41, lr: 1.50e-02, +2022-12-01 23:33:25,877 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=16062.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:33:37,935 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.95 vs. limit=2.0 +2022-12-01 23:33:48,752 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=16085.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:34:04,083 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.334e+02 2.119e+02 2.631e+02 3.294e+02 8.863e+02, threshold=5.261e+02, percent-clipped=4.0 +2022-12-01 23:34:07,715 INFO [train.py:876] Epoch 12, batch 350, loss[loss=0.1744, simple_loss=0.2192, pruned_loss=0.06484, over 4730.00 frames. ], tot_loss[loss=0.1906, simple_loss=0.242, pruned_loss=0.06967, over 785595.07 frames. ], batch size: 27, lr: 1.50e-02, +2022-12-01 23:34:40,445 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9142, 1.6341, 1.7219, 1.1583, 1.3260, 1.7124, 1.7464, 1.6045], + device='cuda:0'), covar=tensor([0.0607, 0.0604, 0.1259, 0.0938, 0.1021, 0.0924, 0.0479, 0.0352], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0049, 0.0047, 0.0054, 0.0048, 0.0040, 0.0045, 0.0046], + device='cuda:0'), out_proj_covar=tensor([4.6080e-05, 4.2893e-05, 4.1824e-05, 4.6642e-05, 4.2884e-05, 3.6271e-05, + 3.9184e-05, 4.0560e-05], device='cuda:0') +2022-12-01 23:34:48,360 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=16146.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:34:51,218 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=16149.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:34:55,799 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 23:34:56,789 INFO [train.py:876] Epoch 12, batch 400, loss[loss=0.2075, simple_loss=0.2645, pruned_loss=0.07526, over 4846.00 frames. ], tot_loss[loss=0.1894, simple_loss=0.241, pruned_loss=0.06886, over 822549.09 frames. ], batch size: 47, lr: 1.50e-02, +2022-12-01 23:35:10,109 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7370, 2.6764, 3.3460, 3.3986, 3.1383, 3.3717, 3.2960, 2.8881], + device='cuda:0'), covar=tensor([0.0111, 0.0307, 0.0185, 0.0140, 0.0169, 0.0160, 0.0165, 0.0298], + device='cuda:0'), in_proj_covar=tensor([0.0079, 0.0100, 0.0083, 0.0084, 0.0082, 0.0078, 0.0086, 0.0105], + device='cuda:0'), out_proj_covar=tensor([5.4481e-05, 6.8862e-05, 5.6902e-05, 5.6288e-05, 5.5884e-05, 5.2216e-05, + 5.7970e-05, 7.4565e-05], device='cuda:0') +2022-12-01 23:35:22,368 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 23:35:34,458 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9453, 4.7259, 4.4160, 4.9107, 4.2367, 3.8763, 4.5858, 4.1545], + device='cuda:0'), covar=tensor([0.0244, 0.0079, 0.0113, 0.0116, 0.0135, 0.0153, 0.0094, 0.0123], + device='cuda:0'), in_proj_covar=tensor([0.0042, 0.0039, 0.0040, 0.0032, 0.0041, 0.0040, 0.0036, 0.0039], + device='cuda:0'), out_proj_covar=tensor([4.4636e-05, 3.7881e-05, 3.8737e-05, 3.0891e-05, 4.1473e-05, 4.1080e-05, + 3.2368e-05, 3.8492e-05], device='cuda:0') +2022-12-01 23:35:42,539 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.418e+02 2.106e+02 2.634e+02 3.574e+02 6.522e+02, threshold=5.267e+02, percent-clipped=5.0 +2022-12-01 23:35:45,119 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.04 vs. limit=5.0 +2022-12-01 23:35:46,336 INFO [train.py:876] Epoch 12, batch 450, loss[loss=0.1433, simple_loss=0.1829, pruned_loss=0.05184, over 4683.00 frames. ], tot_loss[loss=0.1865, simple_loss=0.2385, pruned_loss=0.06724, over 851628.09 frames. ], batch size: 23, lr: 1.49e-02, +2022-12-01 23:35:51,526 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=16210.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:35:57,927 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.99 vs. limit=2.0 +2022-12-01 23:36:19,861 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=16239.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:36:22,065 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6399, 3.2724, 3.7969, 3.6650, 3.8874, 3.3906, 3.7061, 3.9056], + device='cuda:0'), covar=tensor([0.0194, 0.0432, 0.0156, 0.0244, 0.0153, 0.0457, 0.0158, 0.0232], + device='cuda:0'), in_proj_covar=tensor([0.0086, 0.0107, 0.0076, 0.0089, 0.0077, 0.0123, 0.0072, 0.0074], + device='cuda:0'), out_proj_covar=tensor([7.2716e-05, 9.0538e-05, 6.3253e-05, 7.6025e-05, 6.4005e-05, 1.0654e-04, + 6.4375e-05, 6.2651e-05], device='cuda:0') +2022-12-01 23:36:27,895 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2856, 1.5138, 2.4573, 3.3619, 2.3306, 3.0574, 3.0758, 3.5113], + device='cuda:0'), covar=tensor([0.0302, 0.1450, 0.1209, 0.0321, 0.0483, 0.0460, 0.0481, 0.0266], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0073, 0.0084, 0.0054, 0.0057, 0.0058, 0.0060, 0.0063], + device='cuda:0'), out_proj_covar=tensor([5.5159e-05, 8.3083e-05, 9.0721e-05, 5.8837e-05, 5.6734e-05, 6.4376e-05, + 6.6023e-05, 5.9740e-05], device='cuda:0') +2022-12-01 23:36:29,895 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4474, 1.6427, 2.6091, 3.5916, 2.5680, 3.1787, 3.1789, 3.6751], + device='cuda:0'), covar=tensor([0.0289, 0.1472, 0.1276, 0.0289, 0.0410, 0.0558, 0.0542, 0.0268], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0073, 0.0084, 0.0054, 0.0057, 0.0058, 0.0061, 0.0063], + device='cuda:0'), out_proj_covar=tensor([5.5218e-05, 8.3216e-05, 9.0787e-05, 5.8858e-05, 5.6805e-05, 6.4466e-05, + 6.6098e-05, 5.9807e-05], device='cuda:0') +2022-12-01 23:36:35,629 INFO [train.py:876] Epoch 12, batch 500, loss[loss=0.1352, simple_loss=0.1865, pruned_loss=0.04197, over 4769.00 frames. ], tot_loss[loss=0.1869, simple_loss=0.2388, pruned_loss=0.06743, over 874952.51 frames. ], batch size: 26, lr: 1.49e-02, +2022-12-01 23:37:21,137 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.363e+02 2.080e+02 2.411e+02 3.025e+02 5.440e+02, threshold=4.822e+02, percent-clipped=1.0 +2022-12-01 23:37:25,122 INFO [train.py:876] Epoch 12, batch 550, loss[loss=0.1412, simple_loss=0.1757, pruned_loss=0.05336, over 4697.00 frames. ], tot_loss[loss=0.1853, simple_loss=0.2375, pruned_loss=0.06655, over 891604.44 frames. ], batch size: 21, lr: 1.49e-02, +2022-12-01 23:38:14,271 INFO [train.py:876] Epoch 12, batch 600, loss[loss=0.2046, simple_loss=0.2488, pruned_loss=0.08015, over 4908.00 frames. ], tot_loss[loss=0.185, simple_loss=0.2369, pruned_loss=0.06657, over 905366.19 frames. ], batch size: 31, lr: 1.49e-02, +2022-12-01 23:38:15,404 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4604, 1.7116, 2.5592, 3.5501, 2.9050, 3.1745, 3.2124, 3.7188], + device='cuda:0'), covar=tensor([0.0327, 0.1348, 0.1288, 0.0336, 0.0360, 0.0544, 0.0624, 0.0239], + device='cuda:0'), in_proj_covar=tensor([0.0054, 0.0075, 0.0084, 0.0054, 0.0058, 0.0058, 0.0061, 0.0063], + device='cuda:0'), out_proj_covar=tensor([5.6481e-05, 8.5017e-05, 9.1396e-05, 5.9432e-05, 5.7841e-05, 6.4631e-05, + 6.5932e-05, 5.9647e-05], device='cuda:0') +2022-12-01 23:38:16,321 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=16357.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:38:20,588 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9604, 1.5231, 1.9272, 1.4341, 1.3631, 1.7594, 1.4231, 1.6156], + device='cuda:0'), covar=tensor([0.0634, 0.0832, 0.0746, 0.0685, 0.0754, 0.0480, 0.0730, 0.0359], + device='cuda:0'), in_proj_covar=tensor([0.0054, 0.0051, 0.0048, 0.0054, 0.0049, 0.0041, 0.0046, 0.0047], + device='cuda:0'), out_proj_covar=tensor([4.6609e-05, 4.4264e-05, 4.3120e-05, 4.6815e-05, 4.3980e-05, 3.7039e-05, + 4.0381e-05, 4.1287e-05], device='cuda:0') +2022-12-01 23:38:30,173 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=16371.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:38:34,368 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4704, 2.1239, 2.0484, 1.9576, 1.6595, 2.2056, 1.8752, 2.4992], + device='cuda:0'), covar=tensor([0.0810, 0.0329, 0.0995, 0.0644, 0.1959, 0.0988, 0.0930, 0.0256], + device='cuda:0'), in_proj_covar=tensor([0.0052, 0.0050, 0.0047, 0.0053, 0.0048, 0.0041, 0.0045, 0.0046], + device='cuda:0'), out_proj_covar=tensor([4.5633e-05, 4.3762e-05, 4.2418e-05, 4.6322e-05, 4.2979e-05, 3.6622e-05, + 3.9494e-05, 4.0589e-05], device='cuda:0') +2022-12-01 23:38:59,256 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.170e+02 1.995e+02 2.573e+02 3.259e+02 1.211e+03, threshold=5.147e+02, percent-clipped=7.0 +2022-12-01 23:39:03,015 INFO [train.py:876] Epoch 12, batch 650, loss[loss=0.2606, simple_loss=0.3087, pruned_loss=0.1062, over 4677.00 frames. ], tot_loss[loss=0.1843, simple_loss=0.2359, pruned_loss=0.06632, over 915045.13 frames. ], batch size: 63, lr: 1.49e-02, +2022-12-01 23:39:29,126 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=16432.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:39:34,022 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7854, 4.5263, 4.3170, 4.7376, 4.1107, 3.9140, 4.5060, 4.3118], + device='cuda:0'), covar=tensor([0.0257, 0.0099, 0.0112, 0.0136, 0.0150, 0.0146, 0.0097, 0.0106], + device='cuda:0'), in_proj_covar=tensor([0.0044, 0.0041, 0.0041, 0.0034, 0.0043, 0.0042, 0.0038, 0.0041], + device='cuda:0'), out_proj_covar=tensor([4.6539e-05, 3.9820e-05, 3.9389e-05, 3.2974e-05, 4.3458e-05, 4.3162e-05, + 3.4576e-05, 4.0611e-05], device='cuda:0') +2022-12-01 23:39:37,677 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=16441.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:39:50,890 INFO [train.py:876] Epoch 12, batch 700, loss[loss=0.1761, simple_loss=0.2332, pruned_loss=0.05949, over 4862.00 frames. ], tot_loss[loss=0.1867, simple_loss=0.2386, pruned_loss=0.06736, over 924067.32 frames. ], batch size: 36, lr: 1.48e-02, +2022-12-01 23:40:26,159 INFO [zipformer.py:1414] attn_weights_entropy = tensor([5.0508, 5.5002, 5.1513, 5.6448, 5.0838, 4.7542, 5.3577, 5.0187], + device='cuda:0'), covar=tensor([0.0123, 0.0058, 0.0084, 0.0067, 0.0099, 0.0087, 0.0067, 0.0100], + device='cuda:0'), in_proj_covar=tensor([0.0043, 0.0040, 0.0040, 0.0032, 0.0042, 0.0041, 0.0037, 0.0040], + device='cuda:0'), out_proj_covar=tensor([4.4811e-05, 3.8828e-05, 3.8464e-05, 3.1276e-05, 4.2567e-05, 4.2148e-05, + 3.3187e-05, 4.0373e-05], device='cuda:0') +2022-12-01 23:40:35,554 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.319e+02 2.031e+02 2.339e+02 3.019e+02 8.589e+02, threshold=4.678e+02, percent-clipped=5.0 +2022-12-01 23:40:39,038 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.16 vs. limit=2.0 +2022-12-01 23:40:39,452 INFO [train.py:876] Epoch 12, batch 750, loss[loss=0.1444, simple_loss=0.1832, pruned_loss=0.05281, over 3456.00 frames. ], tot_loss[loss=0.1858, simple_loss=0.2372, pruned_loss=0.06714, over 930128.58 frames. ], batch size: 13, lr: 1.48e-02, +2022-12-01 23:40:39,511 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=16505.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:41:12,596 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=16539.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:41:15,117 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.93 vs. limit=2.0 +2022-12-01 23:41:28,033 INFO [train.py:876] Epoch 12, batch 800, loss[loss=0.2136, simple_loss=0.2635, pruned_loss=0.08185, over 4057.00 frames. ], tot_loss[loss=0.1854, simple_loss=0.237, pruned_loss=0.06686, over 934913.80 frames. ], batch size: 72, lr: 1.48e-02, +2022-12-01 23:41:53,487 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.3563, 1.3283, 1.3130, 0.9899, 1.0007, 0.9990, 1.2114, 1.3644], + device='cuda:0'), covar=tensor([0.0226, 0.0160, 0.0501, 0.0194, 0.0286, 0.0193, 0.0261, 0.0161], + device='cuda:0'), in_proj_covar=tensor([0.0035, 0.0029, 0.0029, 0.0031, 0.0035, 0.0032, 0.0038, 0.0029], + device='cuda:0'), out_proj_covar=tensor([2.9302e-05, 2.2739e-05, 2.4348e-05, 2.4630e-05, 2.9473e-05, 2.6614e-05, + 3.1693e-05, 2.3998e-05], device='cuda:0') +2022-12-01 23:41:58,928 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=16587.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:42:12,596 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.266e+02 2.062e+02 2.491e+02 3.029e+02 1.072e+03, threshold=4.982e+02, percent-clipped=7.0 +2022-12-01 23:42:16,595 INFO [train.py:876] Epoch 12, batch 850, loss[loss=0.1936, simple_loss=0.2447, pruned_loss=0.07127, over 4875.00 frames. ], tot_loss[loss=0.1857, simple_loss=0.2379, pruned_loss=0.06668, over 940806.78 frames. ], batch size: 38, lr: 1.48e-02, +2022-12-01 23:42:31,032 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.06 vs. limit=2.0 +2022-12-01 23:43:05,700 INFO [train.py:876] Epoch 12, batch 900, loss[loss=0.2054, simple_loss=0.2562, pruned_loss=0.0773, over 4831.00 frames. ], tot_loss[loss=0.1837, simple_loss=0.2357, pruned_loss=0.06578, over 942900.48 frames. ], batch size: 49, lr: 1.48e-02, +2022-12-01 23:43:07,732 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=16657.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:43:50,621 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.369e+02 2.067e+02 2.412e+02 2.734e+02 7.605e+02, threshold=4.823e+02, percent-clipped=1.0 +2022-12-01 23:43:54,354 INFO [train.py:876] Epoch 12, batch 950, loss[loss=0.176, simple_loss=0.2362, pruned_loss=0.05784, over 4880.00 frames. ], tot_loss[loss=0.1847, simple_loss=0.2373, pruned_loss=0.06602, over 945500.04 frames. ], batch size: 37, lr: 1.47e-02, +2022-12-01 23:43:54,403 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=16705.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:43:57,336 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7491, 2.2487, 2.9568, 2.7110, 2.8219, 2.9991, 2.6296, 2.8821], + device='cuda:0'), covar=tensor([0.0158, 0.0953, 0.0359, 0.0983, 0.0303, 0.0214, 0.0792, 0.0399], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0085, 0.0062, 0.0090, 0.0068, 0.0062, 0.0083, 0.0067], + device='cuda:0'), out_proj_covar=tensor([6.1755e-05, 9.8757e-05, 7.2631e-05, 1.0608e-04, 7.7168e-05, 7.2744e-05, + 9.5247e-05, 7.3858e-05], device='cuda:0') +2022-12-01 23:44:15,638 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=16727.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:44:29,068 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=16741.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:44:42,386 INFO [train.py:876] Epoch 12, batch 1000, loss[loss=0.2259, simple_loss=0.2814, pruned_loss=0.08521, over 4819.00 frames. ], tot_loss[loss=0.1855, simple_loss=0.2385, pruned_loss=0.06626, over 949811.28 frames. ], batch size: 42, lr: 1.47e-02, +2022-12-01 23:45:04,969 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.5483, 1.0606, 1.5662, 0.9874, 0.9482, 0.9971, 1.1595, 1.4952], + device='cuda:0'), covar=tensor([0.0166, 0.0191, 0.0204, 0.0243, 0.0321, 0.0254, 0.0268, 0.0159], + device='cuda:0'), in_proj_covar=tensor([0.0036, 0.0030, 0.0030, 0.0031, 0.0037, 0.0033, 0.0038, 0.0030], + device='cuda:0'), out_proj_covar=tensor([2.9382e-05, 2.3758e-05, 2.5056e-05, 2.5157e-05, 3.0805e-05, 2.7251e-05, + 3.2100e-05, 2.4520e-05], device='cuda:0') +2022-12-01 23:45:15,707 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=16789.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:45:27,301 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.430e+02 2.149e+02 2.562e+02 3.314e+02 5.891e+02, threshold=5.125e+02, percent-clipped=2.0 +2022-12-01 23:45:27,881 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.02 vs. limit=2.0 +2022-12-01 23:45:28,474 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4948, 1.8667, 2.0639, 2.6202, 1.8583, 1.8822, 1.4396, 2.4009], + device='cuda:0'), covar=tensor([0.0856, 0.2098, 0.0928, 0.0673, 0.1282, 0.2029, 0.1437, 0.0519], + device='cuda:0'), in_proj_covar=tensor([0.0065, 0.0064, 0.0081, 0.0069, 0.0089, 0.0074, 0.0088, 0.0069], + device='cuda:0'), out_proj_covar=tensor([6.4701e-05, 6.4012e-05, 7.8698e-05, 6.7788e-05, 8.6467e-05, 7.3091e-05, + 8.6411e-05, 6.7477e-05], device='cuda:0') +2022-12-01 23:45:31,152 INFO [train.py:876] Epoch 12, batch 1050, loss[loss=0.2426, simple_loss=0.2913, pruned_loss=0.09694, over 4805.00 frames. ], tot_loss[loss=0.1866, simple_loss=0.239, pruned_loss=0.06711, over 951274.21 frames. ], batch size: 54, lr: 1.47e-02, +2022-12-01 23:45:31,357 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=16805.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:45:35,281 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4301, 3.4733, 3.9252, 3.6502, 3.9096, 3.3407, 3.7413, 3.9142], + device='cuda:0'), covar=tensor([0.0289, 0.0355, 0.0147, 0.0253, 0.0191, 0.0463, 0.0174, 0.0234], + device='cuda:0'), in_proj_covar=tensor([0.0088, 0.0106, 0.0079, 0.0092, 0.0078, 0.0123, 0.0074, 0.0075], + device='cuda:0'), out_proj_covar=tensor([7.5415e-05, 9.0179e-05, 6.6498e-05, 7.8771e-05, 6.5159e-05, 1.0607e-04, + 6.5831e-05, 6.4238e-05], device='cuda:0') +2022-12-01 23:45:44,863 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4675, 3.2776, 3.4992, 3.2311, 3.1133, 2.2908, 3.6096, 1.8455], + device='cuda:0'), covar=tensor([0.0386, 0.0231, 0.0184, 0.0532, 0.0776, 0.2720, 0.0198, 0.2360], + device='cuda:0'), in_proj_covar=tensor([0.0096, 0.0080, 0.0071, 0.0111, 0.0120, 0.0145, 0.0068, 0.0158], + device='cuda:0'), out_proj_covar=tensor([1.0650e-04, 9.3693e-05, 8.6244e-05, 1.2008e-04, 1.3096e-04, 1.5902e-04, + 7.6110e-05, 1.6927e-04], device='cuda:0') +2022-12-01 23:46:06,837 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1776, 3.3648, 3.4904, 3.3949, 2.9334, 3.4099, 3.3736, 3.4694], + device='cuda:0'), covar=tensor([0.1125, 0.0340, 0.0351, 0.0356, 0.0400, 0.0406, 0.0278, 0.0330], + device='cuda:0'), in_proj_covar=tensor([0.0200, 0.0128, 0.0128, 0.0136, 0.0132, 0.0131, 0.0119, 0.0126], + device='cuda:0'), out_proj_covar=tensor([1.3337e-04, 8.3038e-05, 8.3370e-05, 8.9412e-05, 8.7018e-05, 8.5902e-05, + 7.8036e-05, 8.5239e-05], device='cuda:0') +2022-12-01 23:46:11,761 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7519, 2.2830, 2.2165, 2.7752, 2.0875, 2.4139, 1.5670, 2.5046], + device='cuda:0'), covar=tensor([0.0417, 0.1144, 0.0752, 0.0493, 0.0981, 0.0979, 0.1140, 0.0386], + device='cuda:0'), in_proj_covar=tensor([0.0064, 0.0063, 0.0081, 0.0068, 0.0087, 0.0072, 0.0086, 0.0068], + device='cuda:0'), out_proj_covar=tensor([6.3509e-05, 6.3475e-05, 7.8186e-05, 6.6375e-05, 8.4059e-05, 7.1035e-05, + 8.4482e-05, 6.6569e-05], device='cuda:0') +2022-12-01 23:46:17,012 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.86 vs. limit=2.0 +2022-12-01 23:46:17,304 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=16853.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:46:19,193 INFO [train.py:876] Epoch 12, batch 1100, loss[loss=0.1877, simple_loss=0.24, pruned_loss=0.06771, over 4828.00 frames. ], tot_loss[loss=0.186, simple_loss=0.2383, pruned_loss=0.06685, over 953561.69 frames. ], batch size: 45, lr: 1.47e-02, +2022-12-01 23:46:31,918 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=16868.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 23:47:02,624 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.291e+02 2.048e+02 2.539e+02 3.273e+02 7.413e+02, threshold=5.077e+02, percent-clipped=3.0 +2022-12-01 23:47:06,827 INFO [train.py:876] Epoch 12, batch 1150, loss[loss=0.1273, simple_loss=0.1717, pruned_loss=0.04149, over 4692.00 frames. ], tot_loss[loss=0.186, simple_loss=0.2382, pruned_loss=0.0669, over 950964.74 frames. ], batch size: 23, lr: 1.47e-02, +2022-12-01 23:47:08,037 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=16906.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:47:17,297 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.84 vs. limit=2.0 +2022-12-01 23:47:30,554 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=16929.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 23:47:55,373 INFO [train.py:876] Epoch 12, batch 1200, loss[loss=0.2026, simple_loss=0.2561, pruned_loss=0.07461, over 4856.00 frames. ], tot_loss[loss=0.1851, simple_loss=0.2372, pruned_loss=0.06656, over 951031.92 frames. ], batch size: 40, lr: 1.46e-02, +2022-12-01 23:48:07,719 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=16967.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 23:48:40,334 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.388e+02 2.096e+02 2.590e+02 3.275e+02 1.308e+03, threshold=5.180e+02, percent-clipped=2.0 +2022-12-01 23:48:41,797 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-01 23:48:44,361 INFO [train.py:876] Epoch 12, batch 1250, loss[loss=0.1581, simple_loss=0.2291, pruned_loss=0.04353, over 4823.00 frames. ], tot_loss[loss=0.1841, simple_loss=0.236, pruned_loss=0.06609, over 952722.75 frames. ], batch size: 45, lr: 1.46e-02, +2022-12-01 23:49:05,865 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=17027.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:49:22,570 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.22 vs. limit=5.0 +2022-12-01 23:49:32,873 INFO [train.py:876] Epoch 12, batch 1300, loss[loss=0.1515, simple_loss=0.2051, pruned_loss=0.04894, over 4741.00 frames. ], tot_loss[loss=0.1837, simple_loss=0.2356, pruned_loss=0.06593, over 950744.31 frames. ], batch size: 27, lr: 1.46e-02, +2022-12-01 23:49:43,301 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.80 vs. limit=2.0 +2022-12-01 23:49:51,994 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=17075.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:49:52,030 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.1710, 4.1972, 4.5276, 4.0771, 4.3631, 4.4084, 4.2572, 4.0721], + device='cuda:0'), covar=tensor([0.0566, 0.0340, 0.0472, 0.0461, 0.0674, 0.0368, 0.0579, 0.0540], + device='cuda:0'), in_proj_covar=tensor([0.0132, 0.0098, 0.0111, 0.0110, 0.0088, 0.0111, 0.0131, 0.0092], + device='cuda:0'), out_proj_covar=tensor([1.1128e-04, 7.3098e-05, 9.3661e-05, 8.8719e-05, 7.3851e-05, 9.1561e-05, + 1.0721e-04, 7.3912e-05], device='cuda:0') +2022-12-01 23:50:17,063 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.429e+02 2.038e+02 2.683e+02 3.257e+02 1.137e+03, threshold=5.367e+02, percent-clipped=2.0 +2022-12-01 23:50:20,940 INFO [train.py:876] Epoch 12, batch 1350, loss[loss=0.1156, simple_loss=0.1716, pruned_loss=0.02985, over 4765.00 frames. ], tot_loss[loss=0.1834, simple_loss=0.2353, pruned_loss=0.06578, over 945832.22 frames. ], batch size: 26, lr: 1.46e-02, +2022-12-01 23:51:00,194 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.78 vs. limit=5.0 +2022-12-01 23:51:09,194 INFO [train.py:876] Epoch 12, batch 1400, loss[loss=0.2464, simple_loss=0.2994, pruned_loss=0.0967, over 4698.00 frames. ], tot_loss[loss=0.1841, simple_loss=0.2358, pruned_loss=0.06617, over 948424.95 frames. ], batch size: 63, lr: 1.46e-02, +2022-12-01 23:51:38,608 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-12.pt +2022-12-01 23:51:54,908 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-01 23:51:55,840 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-01 23:51:56,139 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-01 23:51:56,171 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-01 23:51:57,290 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-01 23:51:57,611 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-01 23:51:59,044 INFO [train.py:876] Epoch 13, batch 0, loss[loss=0.1973, simple_loss=0.2398, pruned_loss=0.07735, over 4854.00 frames. ], tot_loss[loss=0.1973, simple_loss=0.2398, pruned_loss=0.07735, over 4854.00 frames. ], batch size: 36, lr: 1.40e-02, +2022-12-01 23:51:59,045 INFO [train.py:901] Computing validation loss +2022-12-01 23:52:14,587 INFO [train.py:910] Epoch 13, validation: loss=0.232, simple_loss=0.2791, pruned_loss=0.09244, over 253132.00 frames. +2022-12-01 23:52:14,588 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-01 23:52:16,560 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=17189.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:52:26,863 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7543, 3.7318, 3.6254, 3.8368, 3.5374, 3.0239, 4.1199, 1.7317], + device='cuda:0'), covar=tensor([0.0417, 0.0329, 0.0321, 0.0305, 0.0621, 0.1961, 0.0182, 0.3003], + device='cuda:0'), in_proj_covar=tensor([0.0093, 0.0079, 0.0073, 0.0108, 0.0116, 0.0144, 0.0067, 0.0153], + device='cuda:0'), out_proj_covar=tensor([1.0367e-04, 9.2859e-05, 8.8151e-05, 1.1662e-04, 1.2754e-04, 1.5719e-04, + 7.5417e-05, 1.6451e-04], device='cuda:0') +2022-12-01 23:52:27,576 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.03 vs. limit=2.0 +2022-12-01 23:52:28,947 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.292e+02 2.330e+02 2.862e+02 3.528e+02 5.665e+02, threshold=5.724e+02, percent-clipped=2.0 +2022-12-01 23:52:29,216 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6835, 2.5428, 2.4197, 2.9123, 2.1201, 2.7315, 2.8674, 2.7498], + device='cuda:0'), covar=tensor([0.0197, 0.0329, 0.0246, 0.0153, 0.0461, 0.0267, 0.0167, 0.0145], + device='cuda:0'), in_proj_covar=tensor([0.0044, 0.0048, 0.0040, 0.0039, 0.0054, 0.0047, 0.0036, 0.0039], + device='cuda:0'), out_proj_covar=tensor([3.2165e-05, 3.6843e-05, 2.9539e-05, 2.7799e-05, 4.0724e-05, 3.4669e-05, + 2.6617e-05, 2.8155e-05], device='cuda:0') +2022-12-01 23:52:51,636 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.13 vs. limit=5.0 +2022-12-01 23:52:52,058 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=17224.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 23:53:04,560 INFO [train.py:876] Epoch 13, batch 50, loss[loss=0.1858, simple_loss=0.236, pruned_loss=0.0678, over 4836.00 frames. ], tot_loss[loss=0.1783, simple_loss=0.2304, pruned_loss=0.06314, over 214420.28 frames. ], batch size: 35, lr: 1.40e-02, +2022-12-01 23:53:17,103 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=17250.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:53:23,182 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3852, 3.5554, 3.7533, 3.6740, 3.2533, 3.4562, 3.7278, 3.7483], + device='cuda:0'), covar=tensor([0.1022, 0.0254, 0.0219, 0.0239, 0.0299, 0.0358, 0.0203, 0.0325], + device='cuda:0'), in_proj_covar=tensor([0.0203, 0.0132, 0.0129, 0.0138, 0.0136, 0.0137, 0.0120, 0.0126], + device='cuda:0'), out_proj_covar=tensor([1.3454e-04, 8.5712e-05, 8.3888e-05, 9.0502e-05, 8.9885e-05, 9.0067e-05, + 7.8622e-05, 8.5582e-05], device='cuda:0') +2022-12-01 23:53:24,704 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-01 23:53:28,487 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=17262.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 23:53:52,643 INFO [train.py:876] Epoch 13, batch 100, loss[loss=0.1695, simple_loss=0.2362, pruned_loss=0.05145, over 4821.00 frames. ], tot_loss[loss=0.1775, simple_loss=0.2309, pruned_loss=0.06204, over 380470.99 frames. ], batch size: 45, lr: 1.39e-02, +2022-12-01 23:54:06,133 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.383e+02 2.072e+02 2.493e+02 2.882e+02 7.334e+02, threshold=4.986e+02, percent-clipped=2.0 +2022-12-01 23:54:06,181 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-01 23:54:07,231 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.7952, 4.5912, 5.0637, 4.6284, 5.0222, 4.8919, 4.7773, 4.4767], + device='cuda:0'), covar=tensor([0.0421, 0.0333, 0.0460, 0.0449, 0.0389, 0.0322, 0.0603, 0.0458], + device='cuda:0'), in_proj_covar=tensor([0.0127, 0.0098, 0.0111, 0.0108, 0.0088, 0.0110, 0.0128, 0.0091], + device='cuda:0'), out_proj_covar=tensor([1.0578e-04, 7.2789e-05, 9.3814e-05, 8.6685e-05, 7.3150e-05, 8.9697e-05, + 1.0493e-04, 7.2507e-05], device='cuda:0') +2022-12-01 23:54:28,491 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-01 23:54:40,934 INFO [train.py:876] Epoch 13, batch 150, loss[loss=0.175, simple_loss=0.2254, pruned_loss=0.06229, over 4856.00 frames. ], tot_loss[loss=0.1794, simple_loss=0.2335, pruned_loss=0.06267, over 508896.09 frames. ], batch size: 36, lr: 1.39e-02, +2022-12-01 23:55:05,213 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=17362.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:55:09,190 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.1789, 1.3568, 1.5652, 1.0294, 0.8697, 0.9398, 1.1580, 1.4466], + device='cuda:0'), covar=tensor([0.0334, 0.0213, 0.0248, 0.0290, 0.0417, 0.0279, 0.0301, 0.0187], + device='cuda:0'), in_proj_covar=tensor([0.0036, 0.0031, 0.0031, 0.0032, 0.0037, 0.0033, 0.0038, 0.0030], + device='cuda:0'), out_proj_covar=tensor([2.9980e-05, 2.4474e-05, 2.5552e-05, 2.5442e-05, 3.0919e-05, 2.7839e-05, + 3.1318e-05, 2.4909e-05], device='cuda:0') +2022-12-01 23:55:25,671 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=17383.0, num_to_drop=1, layers_to_drop={1} +2022-12-01 23:55:29,298 INFO [train.py:876] Epoch 13, batch 200, loss[loss=0.1619, simple_loss=0.2187, pruned_loss=0.05255, over 4860.00 frames. ], tot_loss[loss=0.18, simple_loss=0.2333, pruned_loss=0.06339, over 605438.96 frames. ], batch size: 36, lr: 1.39e-02, +2022-12-01 23:55:43,168 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.194e+02 2.166e+02 2.523e+02 3.178e+02 9.182e+02, threshold=5.047e+02, percent-clipped=3.0 +2022-12-01 23:55:50,282 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.3175, 1.2965, 1.3969, 0.9507, 0.9473, 1.0692, 1.2661, 1.4584], + device='cuda:0'), covar=tensor([0.0379, 0.0186, 0.0312, 0.0426, 0.0477, 0.0251, 0.0269, 0.0217], + device='cuda:0'), in_proj_covar=tensor([0.0036, 0.0031, 0.0031, 0.0031, 0.0037, 0.0033, 0.0037, 0.0030], + device='cuda:0'), out_proj_covar=tensor([2.9928e-05, 2.4409e-05, 2.5416e-05, 2.5410e-05, 3.0844e-05, 2.7628e-05, + 3.1182e-05, 2.4870e-05], device='cuda:0') +2022-12-01 23:55:55,848 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.4667, 4.4049, 4.8104, 4.2974, 4.6627, 4.6127, 4.3841, 4.3351], + device='cuda:0'), covar=tensor([0.0526, 0.0448, 0.0523, 0.0432, 0.0631, 0.0404, 0.0786, 0.0423], + device='cuda:0'), in_proj_covar=tensor([0.0129, 0.0099, 0.0114, 0.0111, 0.0089, 0.0113, 0.0131, 0.0093], + device='cuda:0'), out_proj_covar=tensor([1.0819e-04, 7.3152e-05, 9.6329e-05, 8.9325e-05, 7.4596e-05, 9.2124e-05, + 1.0786e-04, 7.4310e-05], device='cuda:0') +2022-12-01 23:56:04,935 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=17423.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:56:12,566 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6484, 3.2668, 3.6576, 3.6078, 3.8373, 3.2057, 3.4155, 4.0601], + device='cuda:0'), covar=tensor([0.0223, 0.0338, 0.0188, 0.0232, 0.0184, 0.0459, 0.0265, 0.0171], + device='cuda:0'), in_proj_covar=tensor([0.0087, 0.0106, 0.0081, 0.0095, 0.0079, 0.0121, 0.0072, 0.0075], + device='cuda:0'), out_proj_covar=tensor([7.4508e-05, 9.0074e-05, 6.7802e-05, 8.0692e-05, 6.6720e-05, 1.0489e-04, + 6.4540e-05, 6.4353e-05], device='cuda:0') +2022-12-01 23:56:17,113 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-01 23:56:18,143 INFO [train.py:876] Epoch 13, batch 250, loss[loss=0.157, simple_loss=0.1853, pruned_loss=0.06435, over 4178.00 frames. ], tot_loss[loss=0.1798, simple_loss=0.2331, pruned_loss=0.06321, over 681726.49 frames. ], batch size: 16, lr: 1.39e-02, +2022-12-01 23:56:25,243 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=17444.0, num_to_drop=1, layers_to_drop={3} +2022-12-01 23:56:48,865 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9110, 1.7565, 2.2062, 1.4707, 1.8171, 1.9903, 1.8000, 1.7126], + device='cuda:0'), covar=tensor([0.0829, 0.0515, 0.0601, 0.0585, 0.0509, 0.0815, 0.0469, 0.0417], + device='cuda:0'), in_proj_covar=tensor([0.0052, 0.0054, 0.0052, 0.0055, 0.0048, 0.0042, 0.0047, 0.0049], + device='cuda:0'), out_proj_covar=tensor([4.6062e-05, 4.7512e-05, 4.6276e-05, 4.8528e-05, 4.3513e-05, 3.7726e-05, + 4.1576e-05, 4.3387e-05], device='cuda:0') +2022-12-01 23:57:01,489 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8089, 4.6073, 4.2167, 4.4910, 4.5375, 4.3701, 4.4478, 4.8693], + device='cuda:0'), covar=tensor([0.0919, 0.0121, 0.0335, 0.0179, 0.0132, 0.0226, 0.0141, 0.0171], + device='cuda:0'), in_proj_covar=tensor([0.0199, 0.0131, 0.0129, 0.0139, 0.0137, 0.0136, 0.0121, 0.0125], + device='cuda:0'), out_proj_covar=tensor([1.3257e-04, 8.5391e-05, 8.3960e-05, 9.1257e-05, 8.9755e-05, 8.9890e-05, + 7.8913e-05, 8.4877e-05], device='cuda:0') +2022-12-01 23:57:06,674 INFO [train.py:876] Epoch 13, batch 300, loss[loss=0.1686, simple_loss=0.2324, pruned_loss=0.05237, over 4857.00 frames. ], tot_loss[loss=0.1788, simple_loss=0.2319, pruned_loss=0.06283, over 741086.61 frames. ], batch size: 36, lr: 1.39e-02, +2022-12-01 23:57:08,912 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9978, 3.2342, 3.4575, 3.1408, 3.3670, 3.3848, 3.2973, 3.4911], + device='cuda:0'), covar=tensor([0.1380, 0.0457, 0.0406, 0.0449, 0.0450, 0.0370, 0.0381, 0.0274], + device='cuda:0'), in_proj_covar=tensor([0.0169, 0.0135, 0.0137, 0.0131, 0.0136, 0.0131, 0.0133, 0.0123], + device='cuda:0'), out_proj_covar=tensor([1.1229e-04, 8.9637e-05, 9.0706e-05, 8.8234e-05, 8.8903e-05, 8.5626e-05, + 8.7034e-05, 8.1458e-05], device='cuda:0') +2022-12-01 23:57:17,158 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-01 23:57:18,673 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.89 vs. limit=5.0 +2022-12-01 23:57:20,117 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.171e+02 2.068e+02 2.576e+02 3.306e+02 1.262e+03, threshold=5.152e+02, percent-clipped=9.0 +2022-12-01 23:57:26,374 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2945, 2.8217, 3.4224, 2.8493, 3.4150, 2.7185, 3.0506, 3.7601], + device='cuda:0'), covar=tensor([0.0201, 0.0449, 0.0166, 0.0338, 0.0190, 0.0536, 0.0294, 0.0161], + device='cuda:0'), in_proj_covar=tensor([0.0084, 0.0103, 0.0077, 0.0092, 0.0077, 0.0118, 0.0070, 0.0073], + device='cuda:0'), out_proj_covar=tensor([7.1718e-05, 8.7170e-05, 6.4539e-05, 7.8684e-05, 6.4617e-05, 1.0188e-04, + 6.3193e-05, 6.2222e-05], device='cuda:0') +2022-12-01 23:57:42,721 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=17524.0, num_to_drop=1, layers_to_drop={2} +2022-12-01 23:57:54,939 INFO [train.py:876] Epoch 13, batch 350, loss[loss=0.2468, simple_loss=0.2889, pruned_loss=0.1024, over 4823.00 frames. ], tot_loss[loss=0.18, simple_loss=0.2336, pruned_loss=0.0632, over 789865.43 frames. ], batch size: 45, lr: 1.39e-02, +2022-12-01 23:58:02,738 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=17545.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:58:14,037 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4083, 1.4357, 1.5900, 0.8590, 0.7432, 1.0054, 1.3223, 1.4270], + device='cuda:0'), covar=tensor([0.0317, 0.0165, 0.0212, 0.0265, 0.0475, 0.0274, 0.0187, 0.0160], + device='cuda:0'), in_proj_covar=tensor([0.0037, 0.0031, 0.0030, 0.0032, 0.0037, 0.0034, 0.0037, 0.0030], + device='cuda:0'), out_proj_covar=tensor([3.0507e-05, 2.4505e-05, 2.5365e-05, 2.5677e-05, 3.1126e-05, 2.8066e-05, + 3.1018e-05, 2.4912e-05], device='cuda:0') +2022-12-01 23:58:16,039 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3418, 1.8771, 2.1154, 1.4998, 2.1079, 2.1446, 2.2877, 1.9759], + device='cuda:0'), covar=tensor([0.0578, 0.0452, 0.1395, 0.0643, 0.0321, 0.0606, 0.0393, 0.0224], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0054, 0.0052, 0.0055, 0.0048, 0.0041, 0.0047, 0.0049], + device='cuda:0'), out_proj_covar=tensor([4.6477e-05, 4.7887e-05, 4.6167e-05, 4.8247e-05, 4.3071e-05, 3.7345e-05, + 4.1053e-05, 4.3294e-05], device='cuda:0') +2022-12-01 23:58:19,854 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=17562.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 23:58:29,559 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=17572.0, num_to_drop=1, layers_to_drop={0} +2022-12-01 23:58:43,729 INFO [train.py:876] Epoch 13, batch 400, loss[loss=0.2052, simple_loss=0.2571, pruned_loss=0.07669, over 4821.00 frames. ], tot_loss[loss=0.1778, simple_loss=0.232, pruned_loss=0.06186, over 828026.84 frames. ], batch size: 42, lr: 1.38e-02, +2022-12-01 23:58:52,440 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-01 23:58:57,484 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.288e+02 1.976e+02 2.375e+02 2.812e+02 9.037e+02, threshold=4.750e+02, percent-clipped=3.0 +2022-12-01 23:59:06,431 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=17610.0, num_to_drop=0, layers_to_drop=set() +2022-12-01 23:59:19,988 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-01 23:59:27,105 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4895, 4.0521, 3.8604, 3.8153, 3.7193, 3.7015, 3.9740, 4.0037], + device='cuda:0'), covar=tensor([0.1071, 0.0223, 0.0313, 0.0303, 0.0316, 0.0326, 0.0230, 0.0357], + device='cuda:0'), in_proj_covar=tensor([0.0210, 0.0137, 0.0136, 0.0144, 0.0144, 0.0141, 0.0127, 0.0133], + device='cuda:0'), out_proj_covar=tensor([1.3911e-04, 8.8878e-05, 8.8448e-05, 9.4576e-05, 9.5002e-05, 9.2847e-05, + 8.2923e-05, 8.9957e-05], device='cuda:0') +2022-12-01 23:59:32,684 INFO [train.py:876] Epoch 13, batch 450, loss[loss=0.1405, simple_loss=0.2011, pruned_loss=0.03993, over 4802.00 frames. ], tot_loss[loss=0.1759, simple_loss=0.2307, pruned_loss=0.06054, over 857181.17 frames. ], batch size: 32, lr: 1.38e-02, +2022-12-02 00:00:19,467 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1011, 2.7037, 3.3460, 2.9260, 3.3002, 2.5584, 3.1335, 3.5498], + device='cuda:0'), covar=tensor([0.0248, 0.0504, 0.0194, 0.0369, 0.0267, 0.0612, 0.0265, 0.0198], + device='cuda:0'), in_proj_covar=tensor([0.0085, 0.0103, 0.0078, 0.0092, 0.0078, 0.0116, 0.0071, 0.0073], + device='cuda:0'), out_proj_covar=tensor([7.2280e-05, 8.7329e-05, 6.5103e-05, 7.8749e-05, 6.5083e-05, 1.0022e-04, + 6.4070e-05, 6.2852e-05], device='cuda:0') +2022-12-02 00:00:21,948 INFO [train.py:876] Epoch 13, batch 500, loss[loss=0.1427, simple_loss=0.1941, pruned_loss=0.04565, over 4780.00 frames. ], tot_loss[loss=0.1735, simple_loss=0.2279, pruned_loss=0.05955, over 876489.24 frames. ], batch size: 26, lr: 1.38e-02, +2022-12-02 00:00:34,157 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=17700.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:00:34,907 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.186e+02 1.837e+02 2.396e+02 2.892e+02 5.268e+02, threshold=4.792e+02, percent-clipped=3.0 +2022-12-02 00:00:51,390 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=17718.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:01:03,889 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=17730.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:01:10,614 INFO [train.py:876] Epoch 13, batch 550, loss[loss=0.1025, simple_loss=0.1376, pruned_loss=0.0337, over 4241.00 frames. ], tot_loss[loss=0.1741, simple_loss=0.2279, pruned_loss=0.06011, over 890001.83 frames. ], batch size: 17, lr: 1.38e-02, +2022-12-02 00:01:12,616 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=17739.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 00:01:33,836 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=17761.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:01:44,565 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7365, 2.4793, 3.3101, 2.9968, 2.8047, 2.8476, 2.7871, 3.4451], + device='cuda:0'), covar=tensor([0.0145, 0.0883, 0.0333, 0.0876, 0.0312, 0.0381, 0.0882, 0.0303], + device='cuda:0'), in_proj_covar=tensor([0.0056, 0.0087, 0.0066, 0.0095, 0.0070, 0.0066, 0.0087, 0.0071], + device='cuda:0'), out_proj_covar=tensor([6.5949e-05, 1.0117e-04, 7.7234e-05, 1.1199e-04, 7.9980e-05, 7.8835e-05, + 1.0090e-04, 7.7537e-05], device='cuda:0') +2022-12-02 00:01:47,669 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0680, 4.0901, 4.0956, 3.8588, 3.7164, 3.8476, 3.7525, 4.1905], + device='cuda:0'), covar=tensor([0.0936, 0.0271, 0.0265, 0.0315, 0.0363, 0.0271, 0.0273, 0.0238], + device='cuda:0'), in_proj_covar=tensor([0.0172, 0.0137, 0.0137, 0.0132, 0.0139, 0.0134, 0.0136, 0.0124], + device='cuda:0'), out_proj_covar=tensor([1.1435e-04, 9.1381e-05, 9.0857e-05, 8.8847e-05, 9.0958e-05, 8.7495e-05, + 8.9300e-05, 8.1944e-05], device='cuda:0') +2022-12-02 00:01:59,087 INFO [train.py:876] Epoch 13, batch 600, loss[loss=0.2258, simple_loss=0.27, pruned_loss=0.09076, over 4841.00 frames. ], tot_loss[loss=0.1755, simple_loss=0.229, pruned_loss=0.06101, over 902877.57 frames. ], batch size: 47, lr: 1.38e-02, +2022-12-02 00:02:03,055 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=17791.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:02:11,058 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9884, 3.0956, 3.5925, 3.7793, 3.2248, 3.7800, 3.5476, 3.2715], + device='cuda:0'), covar=tensor([0.0135, 0.0250, 0.0173, 0.0136, 0.0174, 0.0121, 0.0158, 0.0237], + device='cuda:0'), in_proj_covar=tensor([0.0080, 0.0098, 0.0085, 0.0087, 0.0082, 0.0079, 0.0089, 0.0102], + device='cuda:0'), out_proj_covar=tensor([5.4393e-05, 6.7563e-05, 5.8889e-05, 5.8325e-05, 5.5139e-05, 5.3250e-05, + 5.9665e-05, 7.1419e-05], device='cuda:0') +2022-12-02 00:02:12,842 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.404e+02 2.187e+02 2.543e+02 3.217e+02 9.527e+02, threshold=5.086e+02, percent-clipped=6.0 +2022-12-02 00:02:47,188 INFO [train.py:876] Epoch 13, batch 650, loss[loss=0.2283, simple_loss=0.2748, pruned_loss=0.09088, over 4849.00 frames. ], tot_loss[loss=0.1767, simple_loss=0.2301, pruned_loss=0.0616, over 914872.69 frames. ], batch size: 47, lr: 1.37e-02, +2022-12-02 00:02:55,076 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=17845.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:03:07,997 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4608, 1.4076, 1.7091, 1.0070, 0.7721, 1.0616, 1.3656, 1.3113], + device='cuda:0'), covar=tensor([0.0227, 0.0191, 0.0172, 0.0221, 0.0422, 0.0261, 0.0207, 0.0248], + device='cuda:0'), in_proj_covar=tensor([0.0036, 0.0030, 0.0030, 0.0031, 0.0037, 0.0033, 0.0038, 0.0029], + device='cuda:0'), out_proj_covar=tensor([2.9488e-05, 2.3441e-05, 2.4591e-05, 2.5385e-05, 3.0976e-05, 2.7885e-05, + 3.1484e-05, 2.4117e-05], device='cuda:0') +2022-12-02 00:03:18,206 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.88 vs. limit=2.0 +2022-12-02 00:03:25,237 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2250, 1.8030, 1.8442, 1.5335, 1.4073, 2.0600, 2.1015, 1.7404], + device='cuda:0'), covar=tensor([0.0657, 0.0357, 0.0924, 0.0642, 0.0573, 0.0690, 0.0419, 0.0486], + device='cuda:0'), in_proj_covar=tensor([0.0052, 0.0053, 0.0051, 0.0054, 0.0048, 0.0042, 0.0046, 0.0048], + device='cuda:0'), out_proj_covar=tensor([4.6095e-05, 4.6173e-05, 4.5661e-05, 4.7578e-05, 4.3447e-05, 3.7514e-05, + 4.0860e-05, 4.2835e-05], device='cuda:0') +2022-12-02 00:03:26,549 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-02 00:03:35,224 INFO [train.py:876] Epoch 13, batch 700, loss[loss=0.1751, simple_loss=0.2471, pruned_loss=0.05157, over 4840.00 frames. ], tot_loss[loss=0.1764, simple_loss=0.2309, pruned_loss=0.06095, over 926847.17 frames. ], batch size: 47, lr: 1.37e-02, +2022-12-02 00:03:41,048 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=17893.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:03:48,658 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.283e+02 2.010e+02 2.551e+02 3.112e+02 8.166e+02, threshold=5.102e+02, percent-clipped=3.0 +2022-12-02 00:03:48,883 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7822, 2.2430, 2.9748, 2.5519, 2.6708, 2.4896, 2.5938, 3.0410], + device='cuda:0'), covar=tensor([0.0140, 0.1037, 0.0384, 0.1146, 0.0238, 0.0365, 0.0927, 0.0383], + device='cuda:0'), in_proj_covar=tensor([0.0056, 0.0087, 0.0067, 0.0095, 0.0069, 0.0065, 0.0088, 0.0071], + device='cuda:0'), out_proj_covar=tensor([6.5341e-05, 1.0167e-04, 7.8485e-05, 1.1143e-04, 7.9245e-05, 7.8255e-05, + 1.0110e-04, 7.7870e-05], device='cuda:0') +2022-12-02 00:04:15,417 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.67 vs. limit=5.0 +2022-12-02 00:04:23,504 INFO [train.py:876] Epoch 13, batch 750, loss[loss=0.1503, simple_loss=0.2009, pruned_loss=0.04983, over 4892.00 frames. ], tot_loss[loss=0.1762, simple_loss=0.2307, pruned_loss=0.06083, over 935274.46 frames. ], batch size: 29, lr: 1.37e-02, +2022-12-02 00:04:45,088 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.4347, 4.9642, 4.6439, 5.1204, 4.4962, 3.9712, 5.0633, 4.6122], + device='cuda:0'), covar=tensor([0.0200, 0.0087, 0.0121, 0.0103, 0.0169, 0.0168, 0.0069, 0.0092], + device='cuda:0'), in_proj_covar=tensor([0.0047, 0.0042, 0.0043, 0.0035, 0.0045, 0.0045, 0.0039, 0.0041], + device='cuda:0'), out_proj_covar=tensor([4.7919e-05, 3.8836e-05, 4.2093e-05, 3.3062e-05, 4.4818e-05, 4.4702e-05, + 3.4744e-05, 3.8726e-05], device='cuda:0') +2022-12-02 00:04:46,209 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=17960.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:05:12,314 INFO [train.py:876] Epoch 13, batch 800, loss[loss=0.1698, simple_loss=0.239, pruned_loss=0.05025, over 4836.00 frames. ], tot_loss[loss=0.1755, simple_loss=0.23, pruned_loss=0.06053, over 939392.02 frames. ], batch size: 49, lr: 1.37e-02, +2022-12-02 00:05:24,362 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-18000.pt +2022-12-02 00:05:27,501 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.216e+02 1.911e+02 2.333e+02 3.325e+02 6.453e+02, threshold=4.666e+02, percent-clipped=1.0 +2022-12-02 00:05:44,065 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=18018.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:05:47,361 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=18021.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:06:02,641 INFO [train.py:876] Epoch 13, batch 850, loss[loss=0.2211, simple_loss=0.2816, pruned_loss=0.08031, over 4697.00 frames. ], tot_loss[loss=0.1753, simple_loss=0.2299, pruned_loss=0.06029, over 940899.09 frames. ], batch size: 63, lr: 1.37e-02, +2022-12-02 00:06:02,913 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2376, 1.7960, 2.2973, 1.6865, 1.8043, 1.9003, 1.8288, 1.7402], + device='cuda:0'), covar=tensor([0.1556, 0.0364, 0.0790, 0.0760, 0.0622, 0.2005, 0.0725, 0.0475], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0054, 0.0052, 0.0055, 0.0049, 0.0043, 0.0047, 0.0050], + device='cuda:0'), out_proj_covar=tensor([4.7296e-05, 4.7227e-05, 4.6324e-05, 4.7917e-05, 4.4146e-05, 3.8921e-05, + 4.1558e-05, 4.4416e-05], device='cuda:0') +2022-12-02 00:06:04,752 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=18039.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 00:06:06,340 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.3552, 4.2381, 4.6305, 4.2742, 4.3928, 4.4787, 4.3927, 4.1613], + device='cuda:0'), covar=tensor([0.0481, 0.0383, 0.0434, 0.0354, 0.0452, 0.0366, 0.0536, 0.0369], + device='cuda:0'), in_proj_covar=tensor([0.0131, 0.0098, 0.0114, 0.0108, 0.0093, 0.0114, 0.0133, 0.0094], + device='cuda:0'), out_proj_covar=tensor([1.0845e-04, 7.2025e-05, 9.6073e-05, 8.5891e-05, 7.6323e-05, 9.2376e-05, + 1.0861e-04, 7.3776e-05], device='cuda:0') +2022-12-02 00:06:20,725 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=18056.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:06:30,220 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=18066.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:06:49,675 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=18086.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:06:50,610 INFO [train.py:876] Epoch 13, batch 900, loss[loss=0.1568, simple_loss=0.2172, pruned_loss=0.04826, over 4841.00 frames. ], tot_loss[loss=0.176, simple_loss=0.2303, pruned_loss=0.06084, over 941584.94 frames. ], batch size: 45, lr: 1.37e-02, +2022-12-02 00:06:50,650 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=18087.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 00:07:03,962 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.224e+02 2.054e+02 2.511e+02 3.244e+02 7.375e+02, threshold=5.021e+02, percent-clipped=5.0 +2022-12-02 00:07:36,094 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7219, 3.4703, 3.6935, 3.8977, 3.6094, 2.6090, 4.0358, 2.0430], + device='cuda:0'), covar=tensor([0.0366, 0.0333, 0.0251, 0.0362, 0.0591, 0.2354, 0.0169, 0.3292], + device='cuda:0'), in_proj_covar=tensor([0.0096, 0.0083, 0.0077, 0.0113, 0.0123, 0.0147, 0.0069, 0.0163], + device='cuda:0'), out_proj_covar=tensor([1.0631e-04, 9.7443e-05, 9.3464e-05, 1.2328e-04, 1.3502e-04, 1.6234e-04, + 7.9118e-05, 1.7448e-04], device='cuda:0') +2022-12-02 00:07:39,842 INFO [train.py:876] Epoch 13, batch 950, loss[loss=0.1644, simple_loss=0.2194, pruned_loss=0.05468, over 4861.00 frames. ], tot_loss[loss=0.1765, simple_loss=0.2306, pruned_loss=0.06119, over 944857.80 frames. ], batch size: 36, lr: 1.36e-02, +2022-12-02 00:07:57,494 INFO [zipformer.py:1414] attn_weights_entropy = tensor([5.3990, 5.2454, 5.7371, 5.4048, 5.5337, 5.5054, 5.4491, 5.2088], + device='cuda:0'), covar=tensor([0.0417, 0.0319, 0.0305, 0.0229, 0.0454, 0.0261, 0.0459, 0.0281], + device='cuda:0'), in_proj_covar=tensor([0.0133, 0.0100, 0.0118, 0.0109, 0.0095, 0.0118, 0.0137, 0.0094], + device='cuda:0'), out_proj_covar=tensor([1.0985e-04, 7.3652e-05, 9.8689e-05, 8.6126e-05, 7.7919e-05, 9.5268e-05, + 1.1160e-04, 7.3942e-05], device='cuda:0') +2022-12-02 00:08:15,893 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=18174.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:08:16,962 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=18175.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:08:28,103 INFO [train.py:876] Epoch 13, batch 1000, loss[loss=0.1816, simple_loss=0.2292, pruned_loss=0.06704, over 4816.00 frames. ], tot_loss[loss=0.1765, simple_loss=0.2304, pruned_loss=0.06126, over 944406.12 frames. ], batch size: 45, lr: 1.36e-02, +2022-12-02 00:08:36,995 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.1650, 4.2767, 4.5706, 4.1435, 4.3186, 4.3576, 4.1910, 4.0565], + device='cuda:0'), covar=tensor([0.0818, 0.0468, 0.0719, 0.0464, 0.1070, 0.0559, 0.0874, 0.0640], + device='cuda:0'), in_proj_covar=tensor([0.0134, 0.0101, 0.0120, 0.0110, 0.0095, 0.0119, 0.0137, 0.0096], + device='cuda:0'), out_proj_covar=tensor([1.1028e-04, 7.4048e-05, 1.0073e-04, 8.6865e-05, 7.8102e-05, 9.6688e-05, + 1.1159e-04, 7.4800e-05], device='cuda:0') +2022-12-02 00:08:41,941 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.281e+02 2.097e+02 2.543e+02 3.210e+02 6.830e+02, threshold=5.087e+02, percent-clipped=3.0 +2022-12-02 00:09:15,551 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=18235.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:09:16,451 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=18236.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:09:17,251 INFO [train.py:876] Epoch 13, batch 1050, loss[loss=0.1876, simple_loss=0.2275, pruned_loss=0.07388, over 4832.00 frames. ], tot_loss[loss=0.1758, simple_loss=0.2295, pruned_loss=0.06108, over 946480.31 frames. ], batch size: 34, lr: 1.36e-02, +2022-12-02 00:10:05,492 INFO [train.py:876] Epoch 13, batch 1100, loss[loss=0.184, simple_loss=0.231, pruned_loss=0.06852, over 4903.00 frames. ], tot_loss[loss=0.1757, simple_loss=0.2302, pruned_loss=0.06059, over 951394.69 frames. ], batch size: 31, lr: 1.36e-02, +2022-12-02 00:10:19,011 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.379e+02 1.942e+02 2.424e+02 3.108e+02 5.431e+02, threshold=4.848e+02, percent-clipped=1.0 +2022-12-02 00:10:33,524 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=18316.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:10:52,886 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9170, 3.1563, 3.7568, 3.5971, 2.6541, 3.9596, 3.6879, 3.1108], + device='cuda:0'), covar=tensor([0.4762, 0.1236, 0.0652, 0.0357, 0.0736, 0.0650, 0.0298, 0.0851], + device='cuda:0'), in_proj_covar=tensor([0.0189, 0.0103, 0.0134, 0.0100, 0.0117, 0.0104, 0.0089, 0.0113], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 00:10:53,655 INFO [train.py:876] Epoch 13, batch 1150, loss[loss=0.1756, simple_loss=0.2306, pruned_loss=0.06026, over 4893.00 frames. ], tot_loss[loss=0.1762, simple_loss=0.2307, pruned_loss=0.06086, over 952917.15 frames. ], batch size: 30, lr: 1.36e-02, +2022-12-02 00:11:11,652 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=18356.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:11:40,596 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=18386.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:11:41,358 INFO [train.py:876] Epoch 13, batch 1200, loss[loss=0.2058, simple_loss=0.2633, pruned_loss=0.07414, over 4846.00 frames. ], tot_loss[loss=0.177, simple_loss=0.2313, pruned_loss=0.06136, over 950230.01 frames. ], batch size: 40, lr: 1.36e-02, +2022-12-02 00:11:54,964 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.386e+02 1.914e+02 2.381e+02 3.129e+02 7.565e+02, threshold=4.762e+02, percent-clipped=3.0 +2022-12-02 00:11:57,982 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=18404.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:12:27,470 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=18434.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:12:30,393 INFO [train.py:876] Epoch 13, batch 1250, loss[loss=0.1962, simple_loss=0.2629, pruned_loss=0.06472, over 4844.00 frames. ], tot_loss[loss=0.1752, simple_loss=0.2302, pruned_loss=0.0601, over 951124.84 frames. ], batch size: 49, lr: 1.35e-02, +2022-12-02 00:12:48,896 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.2621, 1.5533, 0.8988, 1.5102, 1.5551, 1.1665, 1.3425, 1.4429], + device='cuda:0'), covar=tensor([0.0326, 0.0318, 0.0379, 0.0217, 0.0228, 0.0332, 0.0321, 0.0458], + device='cuda:0'), in_proj_covar=tensor([0.0042, 0.0041, 0.0045, 0.0033, 0.0037, 0.0039, 0.0039, 0.0035], + device='cuda:0'), out_proj_covar=tensor([3.9679e-05, 3.8381e-05, 4.3557e-05, 3.0923e-05, 3.4440e-05, 3.6674e-05, + 3.7216e-05, 3.4456e-05], device='cuda:0') +2022-12-02 00:13:04,955 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.62 vs. limit=2.0 +2022-12-02 00:13:19,023 INFO [train.py:876] Epoch 13, batch 1300, loss[loss=0.2155, simple_loss=0.268, pruned_loss=0.08152, over 4105.00 frames. ], tot_loss[loss=0.177, simple_loss=0.2319, pruned_loss=0.06108, over 949669.87 frames. ], batch size: 72, lr: 1.35e-02, +2022-12-02 00:13:22,068 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=18490.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:13:32,065 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.322e+02 2.188e+02 2.615e+02 3.397e+02 1.289e+03, threshold=5.230e+02, percent-clipped=4.0 +2022-12-02 00:13:39,145 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=18508.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 00:14:00,294 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=18530.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:14:01,411 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=18531.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:14:01,585 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2031, 3.4354, 4.0056, 3.6343, 3.0303, 3.7734, 3.7671, 3.0335], + device='cuda:0'), covar=tensor([0.4947, 0.0852, 0.0554, 0.0294, 0.0634, 0.0618, 0.0396, 0.1193], + device='cuda:0'), in_proj_covar=tensor([0.0197, 0.0108, 0.0139, 0.0104, 0.0120, 0.0108, 0.0093, 0.0116], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 00:14:07,231 INFO [train.py:876] Epoch 13, batch 1350, loss[loss=0.2478, simple_loss=0.3018, pruned_loss=0.0969, over 4679.00 frames. ], tot_loss[loss=0.1773, simple_loss=0.2318, pruned_loss=0.06142, over 948929.88 frames. ], batch size: 63, lr: 1.35e-02, +2022-12-02 00:14:11,248 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.5736, 1.9842, 1.1488, 1.7035, 1.9426, 1.4498, 1.6462, 1.8862], + device='cuda:0'), covar=tensor([0.0290, 0.0312, 0.0366, 0.0260, 0.0210, 0.0355, 0.0290, 0.0487], + device='cuda:0'), in_proj_covar=tensor([0.0042, 0.0041, 0.0045, 0.0033, 0.0037, 0.0039, 0.0039, 0.0035], + device='cuda:0'), out_proj_covar=tensor([3.9345e-05, 3.8169e-05, 4.3615e-05, 3.0826e-05, 3.4079e-05, 3.6627e-05, + 3.7036e-05, 3.4659e-05], device='cuda:0') +2022-12-02 00:14:20,552 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=18551.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:14:37,747 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-02 00:14:38,336 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=18569.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 00:14:55,067 INFO [train.py:876] Epoch 13, batch 1400, loss[loss=0.19, simple_loss=0.2482, pruned_loss=0.06592, over 4878.00 frames. ], tot_loss[loss=0.1759, simple_loss=0.23, pruned_loss=0.06091, over 947802.95 frames. ], batch size: 38, lr: 1.35e-02, +2022-12-02 00:15:09,142 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.289e+02 2.043e+02 2.558e+02 3.510e+02 7.960e+02, threshold=5.117e+02, percent-clipped=9.0 +2022-12-02 00:15:23,615 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=18616.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:15:25,452 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-13.pt +2022-12-02 00:15:42,320 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 00:15:42,882 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 00:15:43,178 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 00:15:43,210 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 00:15:44,320 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 00:15:44,643 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 00:15:46,633 INFO [train.py:876] Epoch 14, batch 0, loss[loss=0.1884, simple_loss=0.2531, pruned_loss=0.06184, over 4889.00 frames. ], tot_loss[loss=0.1884, simple_loss=0.2531, pruned_loss=0.06184, over 4889.00 frames. ], batch size: 44, lr: 1.30e-02, +2022-12-02 00:15:46,634 INFO [train.py:901] Computing validation loss +2022-12-02 00:15:56,937 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6536, 3.1190, 3.1011, 3.4888, 3.3054, 3.6126, 3.2277, 3.1130], + device='cuda:0'), covar=tensor([0.0178, 0.0310, 0.0268, 0.0204, 0.0200, 0.0153, 0.0236, 0.0301], + device='cuda:0'), in_proj_covar=tensor([0.0086, 0.0100, 0.0092, 0.0090, 0.0085, 0.0085, 0.0092, 0.0109], + device='cuda:0'), out_proj_covar=tensor([5.8809e-05, 6.9081e-05, 6.3458e-05, 6.0395e-05, 5.6726e-05, 5.6987e-05, + 6.2022e-05, 7.6162e-05], device='cuda:0') +2022-12-02 00:15:58,432 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4738, 3.2866, 2.9530, 3.0409, 2.4596, 2.4689, 3.4782, 1.8769], + device='cuda:0'), covar=tensor([0.0400, 0.0214, 0.0460, 0.0550, 0.1180, 0.2654, 0.0221, 0.2701], + device='cuda:0'), in_proj_covar=tensor([0.0099, 0.0082, 0.0079, 0.0118, 0.0124, 0.0151, 0.0070, 0.0163], + device='cuda:0'), out_proj_covar=tensor([1.0979e-04, 9.6899e-05, 9.5669e-05, 1.2808e-04, 1.3638e-04, 1.6599e-04, + 8.0092e-05, 1.7520e-04], device='cuda:0') +2022-12-02 00:16:02,123 INFO [train.py:910] Epoch 14, validation: loss=0.2269, simple_loss=0.2749, pruned_loss=0.08942, over 253132.00 frames. +2022-12-02 00:16:02,418 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-02 00:16:30,552 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2433, 2.8412, 2.9202, 3.3152, 3.1096, 3.3547, 3.1430, 2.6433], + device='cuda:0'), covar=tensor([0.0335, 0.0420, 0.0335, 0.0244, 0.0260, 0.0236, 0.0259, 0.0498], + device='cuda:0'), in_proj_covar=tensor([0.0088, 0.0102, 0.0094, 0.0092, 0.0086, 0.0087, 0.0094, 0.0111], + device='cuda:0'), out_proj_covar=tensor([6.0054e-05, 7.0453e-05, 6.4908e-05, 6.1714e-05, 5.7644e-05, 5.8269e-05, + 6.3355e-05, 7.7575e-05], device='cuda:0') +2022-12-02 00:16:45,639 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=18664.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:16:50,583 INFO [train.py:876] Epoch 14, batch 50, loss[loss=0.1572, simple_loss=0.216, pruned_loss=0.04924, over 4847.00 frames. ], tot_loss[loss=0.1582, simple_loss=0.2134, pruned_loss=0.05147, over 215385.77 frames. ], batch size: 40, lr: 1.30e-02, +2022-12-02 00:16:55,739 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=18674.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:17:17,541 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 00:17:21,398 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.535e+01 2.087e+02 2.512e+02 3.228e+02 6.286e+02, threshold=5.025e+02, percent-clipped=4.0 +2022-12-02 00:17:38,615 INFO [train.py:876] Epoch 14, batch 100, loss[loss=0.163, simple_loss=0.2254, pruned_loss=0.05024, over 4861.00 frames. ], tot_loss[loss=0.1634, simple_loss=0.2187, pruned_loss=0.05408, over 382274.25 frames. ], batch size: 36, lr: 1.30e-02, +2022-12-02 00:17:54,114 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=18735.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:17:58,869 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 00:18:26,294 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 00:18:27,304 INFO [train.py:876] Epoch 14, batch 150, loss[loss=0.1037, simple_loss=0.1493, pruned_loss=0.02906, over 4683.00 frames. ], tot_loss[loss=0.1618, simple_loss=0.2174, pruned_loss=0.05309, over 510252.32 frames. ], batch size: 21, lr: 1.29e-02, +2022-12-02 00:18:28,474 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.2329, 4.6487, 4.5554, 5.0077, 4.3622, 3.8708, 4.9050, 4.5602], + device='cuda:0'), covar=tensor([0.0192, 0.0091, 0.0109, 0.0129, 0.0137, 0.0171, 0.0067, 0.0101], + device='cuda:0'), in_proj_covar=tensor([0.0047, 0.0042, 0.0043, 0.0036, 0.0045, 0.0046, 0.0039, 0.0042], + device='cuda:0'), out_proj_covar=tensor([4.8092e-05, 3.8331e-05, 4.1242e-05, 3.4079e-05, 4.4325e-05, 4.5345e-05, + 3.4091e-05, 3.9335e-05], device='cuda:0') +2022-12-02 00:18:34,998 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-02 00:18:45,145 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5121, 3.2475, 3.0794, 2.9312, 2.6470, 2.1893, 3.6515, 1.7245], + device='cuda:0'), covar=tensor([0.0331, 0.0205, 0.0287, 0.0654, 0.0970, 0.2545, 0.0180, 0.2559], + device='cuda:0'), in_proj_covar=tensor([0.0100, 0.0083, 0.0079, 0.0118, 0.0123, 0.0151, 0.0070, 0.0161], + device='cuda:0'), out_proj_covar=tensor([1.1043e-04, 9.7654e-05, 9.5729e-05, 1.2876e-04, 1.3520e-04, 1.6657e-04, + 8.0129e-05, 1.7318e-04], device='cuda:0') +2022-12-02 00:18:59,033 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.253e+02 1.810e+02 2.241e+02 2.972e+02 4.657e+02, threshold=4.483e+02, percent-clipped=0.0 +2022-12-02 00:19:16,889 INFO [train.py:876] Epoch 14, batch 200, loss[loss=0.1834, simple_loss=0.2369, pruned_loss=0.065, over 4805.00 frames. ], tot_loss[loss=0.1651, simple_loss=0.2208, pruned_loss=0.05472, over 610201.52 frames. ], batch size: 42, lr: 1.29e-02, +2022-12-02 00:19:28,475 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=18830.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:19:29,379 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=18831.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:19:43,701 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=18846.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:20:01,442 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=18864.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 00:20:06,267 INFO [train.py:876] Epoch 14, batch 250, loss[loss=0.1751, simple_loss=0.2412, pruned_loss=0.0545, over 4803.00 frames. ], tot_loss[loss=0.1667, simple_loss=0.2223, pruned_loss=0.05552, over 687450.17 frames. ], batch size: 42, lr: 1.29e-02, +2022-12-02 00:20:13,955 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 00:20:15,003 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=18878.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:20:15,877 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=18879.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:20:36,972 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.057e+02 2.049e+02 2.499e+02 3.093e+02 6.490e+02, threshold=4.998e+02, percent-clipped=6.0 +2022-12-02 00:20:54,178 INFO [train.py:876] Epoch 14, batch 300, loss[loss=0.1515, simple_loss=0.2109, pruned_loss=0.04605, over 4880.00 frames. ], tot_loss[loss=0.1698, simple_loss=0.2258, pruned_loss=0.05685, over 747244.15 frames. ], batch size: 37, lr: 1.29e-02, +2022-12-02 00:21:04,932 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=18930.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:21:08,766 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 00:21:43,343 INFO [train.py:876] Epoch 14, batch 350, loss[loss=0.1592, simple_loss=0.2249, pruned_loss=0.04675, over 4873.00 frames. ], tot_loss[loss=0.1691, simple_loss=0.2247, pruned_loss=0.05673, over 789611.76 frames. ], batch size: 39, lr: 1.29e-02, +2022-12-02 00:22:00,688 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.45 vs. limit=5.0 +2022-12-02 00:22:05,198 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=18991.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:22:14,916 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.032e+02 2.003e+02 2.326e+02 3.066e+02 6.351e+02, threshold=4.652e+02, percent-clipped=2.0 +2022-12-02 00:22:32,465 INFO [train.py:876] Epoch 14, batch 400, loss[loss=0.1721, simple_loss=0.2382, pruned_loss=0.05299, over 4812.00 frames. ], tot_loss[loss=0.1692, simple_loss=0.2253, pruned_loss=0.0566, over 825238.35 frames. ], batch size: 42, lr: 1.29e-02, +2022-12-02 00:22:43,105 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 00:22:43,155 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=19030.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:23:03,069 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.85 vs. limit=2.0 +2022-12-02 00:23:11,973 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 00:23:20,887 INFO [train.py:876] Epoch 14, batch 450, loss[loss=0.1122, simple_loss=0.1658, pruned_loss=0.02934, over 4710.00 frames. ], tot_loss[loss=0.1702, simple_loss=0.2258, pruned_loss=0.05733, over 850581.79 frames. ], batch size: 23, lr: 1.28e-02, +2022-12-02 00:23:24,493 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4742, 4.2514, 4.0907, 4.5304, 3.8502, 3.5964, 4.4083, 4.0909], + device='cuda:0'), covar=tensor([0.0286, 0.0102, 0.0138, 0.0124, 0.0161, 0.0186, 0.0082, 0.0147], + device='cuda:0'), in_proj_covar=tensor([0.0049, 0.0044, 0.0044, 0.0037, 0.0046, 0.0048, 0.0041, 0.0043], + device='cuda:0'), out_proj_covar=tensor([5.0425e-05, 4.0225e-05, 4.2316e-05, 3.5015e-05, 4.4701e-05, 4.6999e-05, + 3.6092e-05, 4.0191e-05], device='cuda:0') +2022-12-02 00:23:32,955 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.18 vs. limit=5.0 +2022-12-02 00:23:53,090 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.239e+02 2.118e+02 2.598e+02 3.138e+02 5.833e+02, threshold=5.195e+02, percent-clipped=3.0 +2022-12-02 00:24:10,733 INFO [train.py:876] Epoch 14, batch 500, loss[loss=0.187, simple_loss=0.2357, pruned_loss=0.06914, over 4848.00 frames. ], tot_loss[loss=0.1696, simple_loss=0.2253, pruned_loss=0.05697, over 870196.32 frames. ], batch size: 35, lr: 1.28e-02, +2022-12-02 00:24:20,621 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=19129.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 00:24:37,191 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=19146.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:24:54,951 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=19164.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 00:24:59,547 INFO [train.py:876] Epoch 14, batch 550, loss[loss=0.1738, simple_loss=0.2411, pruned_loss=0.05323, over 4799.00 frames. ], tot_loss[loss=0.1689, simple_loss=0.2249, pruned_loss=0.05651, over 887406.08 frames. ], batch size: 51, lr: 1.28e-02, +2022-12-02 00:25:20,376 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=19190.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 00:25:24,087 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=19194.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:25:30,859 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.051e+02 1.771e+02 2.344e+02 2.979e+02 4.881e+02, threshold=4.688e+02, percent-clipped=0.0 +2022-12-02 00:25:41,950 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=19212.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 00:25:48,651 INFO [train.py:876] Epoch 14, batch 600, loss[loss=0.218, simple_loss=0.2828, pruned_loss=0.07657, over 4701.00 frames. ], tot_loss[loss=0.1674, simple_loss=0.2232, pruned_loss=0.05579, over 900568.05 frames. ], batch size: 63, lr: 1.28e-02, +2022-12-02 00:25:51,173 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.76 vs. limit=2.0 +2022-12-02 00:26:22,930 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8036, 1.4836, 1.8929, 1.6938, 1.6373, 2.1672, 1.5334, 1.8378], + device='cuda:0'), covar=tensor([0.0851, 0.0627, 0.0685, 0.0513, 0.0612, 0.0394, 0.0759, 0.0501], + device='cuda:0'), in_proj_covar=tensor([0.0057, 0.0056, 0.0054, 0.0059, 0.0052, 0.0046, 0.0049, 0.0053], + device='cuda:0'), out_proj_covar=tensor([5.0247e-05, 4.9283e-05, 4.8218e-05, 5.1620e-05, 4.7027e-05, 4.1116e-05, + 4.3987e-05, 4.6844e-05], device='cuda:0') +2022-12-02 00:26:35,338 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.85 vs. limit=2.0 +2022-12-02 00:26:37,611 INFO [train.py:876] Epoch 14, batch 650, loss[loss=0.1187, simple_loss=0.1674, pruned_loss=0.03498, over 4655.00 frames. ], tot_loss[loss=0.1681, simple_loss=0.2241, pruned_loss=0.0561, over 912009.85 frames. ], batch size: 21, lr: 1.28e-02, +2022-12-02 00:26:54,047 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=19286.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:27:08,147 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.387e+02 1.942e+02 2.531e+02 3.187e+02 1.024e+03, threshold=5.062e+02, percent-clipped=6.0 +2022-12-02 00:27:20,507 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.01 vs. limit=2.0 +2022-12-02 00:27:25,683 INFO [train.py:876] Epoch 14, batch 700, loss[loss=0.1718, simple_loss=0.2378, pruned_loss=0.0529, over 4800.00 frames. ], tot_loss[loss=0.168, simple_loss=0.2244, pruned_loss=0.05586, over 923396.05 frames. ], batch size: 54, lr: 1.28e-02, +2022-12-02 00:27:36,619 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=19330.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:28:14,449 INFO [train.py:876] Epoch 14, batch 750, loss[loss=0.1929, simple_loss=0.2457, pruned_loss=0.07009, over 4872.00 frames. ], tot_loss[loss=0.1675, simple_loss=0.2239, pruned_loss=0.05558, over 927597.32 frames. ], batch size: 39, lr: 1.27e-02, +2022-12-02 00:28:23,853 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=19378.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:28:28,809 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.0271, 3.9786, 4.3881, 4.0296, 4.2564, 4.1863, 4.0247, 3.9293], + device='cuda:0'), covar=tensor([0.0680, 0.0459, 0.0570, 0.0441, 0.0557, 0.0491, 0.0740, 0.0446], + device='cuda:0'), in_proj_covar=tensor([0.0136, 0.0104, 0.0123, 0.0113, 0.0097, 0.0121, 0.0139, 0.0098], + device='cuda:0'), out_proj_covar=tensor([1.1037e-04, 7.6331e-05, 1.0254e-04, 8.7151e-05, 7.8647e-05, 9.7162e-05, + 1.1192e-04, 7.5970e-05], device='cuda:0') +2022-12-02 00:28:39,359 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=19394.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:28:46,283 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.106e+02 2.002e+02 2.409e+02 3.181e+02 6.638e+02, threshold=4.818e+02, percent-clipped=4.0 +2022-12-02 00:29:03,599 INFO [train.py:876] Epoch 14, batch 800, loss[loss=0.1457, simple_loss=0.2093, pruned_loss=0.04102, over 4890.00 frames. ], tot_loss[loss=0.1676, simple_loss=0.2238, pruned_loss=0.05571, over 934451.85 frames. ], batch size: 37, lr: 1.27e-02, +2022-12-02 00:29:21,407 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6554, 2.4237, 2.3329, 2.3273, 1.9400, 2.0602, 1.2862, 2.4518], + device='cuda:0'), covar=tensor([0.0547, 0.0763, 0.0664, 0.0643, 0.1018, 0.1217, 0.1303, 0.0613], + device='cuda:0'), in_proj_covar=tensor([0.0066, 0.0068, 0.0086, 0.0072, 0.0095, 0.0077, 0.0092, 0.0075], + device='cuda:0'), out_proj_covar=tensor([6.5753e-05, 6.9257e-05, 8.4457e-05, 7.1757e-05, 9.2049e-05, 7.7159e-05, + 9.0732e-05, 7.3705e-05], device='cuda:0') +2022-12-02 00:29:38,879 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=19455.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:29:52,406 INFO [train.py:876] Epoch 14, batch 850, loss[loss=0.1425, simple_loss=0.2064, pruned_loss=0.03926, over 4920.00 frames. ], tot_loss[loss=0.1669, simple_loss=0.223, pruned_loss=0.05538, over 939690.46 frames. ], batch size: 32, lr: 1.27e-02, +2022-12-02 00:30:08,031 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=19485.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 00:30:09,409 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.58 vs. limit=5.0 +2022-12-02 00:30:23,027 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.262e+02 1.950e+02 2.208e+02 2.997e+02 9.023e+02, threshold=4.416e+02, percent-clipped=4.0 +2022-12-02 00:30:27,145 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4186, 3.0917, 3.2008, 3.1443, 2.6237, 2.1126, 3.6837, 1.7504], + device='cuda:0'), covar=tensor([0.0440, 0.0329, 0.0271, 0.0695, 0.1257, 0.3028, 0.0161, 0.2717], + device='cuda:0'), in_proj_covar=tensor([0.0102, 0.0085, 0.0080, 0.0119, 0.0127, 0.0153, 0.0071, 0.0164], + device='cuda:0'), out_proj_covar=tensor([1.1284e-04, 1.0108e-04, 9.7888e-05, 1.3003e-04, 1.3970e-04, 1.6926e-04, + 8.1460e-05, 1.7688e-04], device='cuda:0') +2022-12-02 00:30:30,206 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3380, 3.1152, 3.3781, 3.2671, 3.4365, 2.7785, 3.2990, 3.6114], + device='cuda:0'), covar=tensor([0.0254, 0.0360, 0.0177, 0.0289, 0.0199, 0.0556, 0.0193, 0.0283], + device='cuda:0'), in_proj_covar=tensor([0.0092, 0.0109, 0.0087, 0.0098, 0.0082, 0.0123, 0.0076, 0.0080], + device='cuda:0'), out_proj_covar=tensor([7.7804e-05, 9.1531e-05, 7.3259e-05, 8.4256e-05, 6.8891e-05, 1.0568e-04, + 6.7638e-05, 6.9666e-05], device='cuda:0') +2022-12-02 00:30:40,659 INFO [train.py:876] Epoch 14, batch 900, loss[loss=0.2008, simple_loss=0.2665, pruned_loss=0.06753, over 4658.00 frames. ], tot_loss[loss=0.1666, simple_loss=0.2227, pruned_loss=0.05526, over 941330.41 frames. ], batch size: 63, lr: 1.27e-02, +2022-12-02 00:31:19,159 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.9663, 2.8453, 2.6915, 2.5210, 2.3969, 2.3543, 1.7259, 2.8169], + device='cuda:0'), covar=tensor([0.0424, 0.0734, 0.0670, 0.0675, 0.0759, 0.0907, 0.1087, 0.0391], + device='cuda:0'), in_proj_covar=tensor([0.0065, 0.0067, 0.0085, 0.0071, 0.0092, 0.0077, 0.0090, 0.0072], + device='cuda:0'), out_proj_covar=tensor([6.4668e-05, 6.8481e-05, 8.3671e-05, 7.0377e-05, 9.0006e-05, 7.7046e-05, + 8.8978e-05, 7.1235e-05], device='cuda:0') +2022-12-02 00:31:28,657 INFO [train.py:876] Epoch 14, batch 950, loss[loss=0.1685, simple_loss=0.236, pruned_loss=0.0505, over 4843.00 frames. ], tot_loss[loss=0.1673, simple_loss=0.2236, pruned_loss=0.05549, over 943454.24 frames. ], batch size: 47, lr: 1.27e-02, +2022-12-02 00:31:40,131 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.91 vs. limit=2.0 +2022-12-02 00:31:45,422 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=19586.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:31:59,936 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.344e+02 1.889e+02 2.318e+02 2.985e+02 6.681e+02, threshold=4.637e+02, percent-clipped=3.0 +2022-12-02 00:32:16,892 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=19618.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:32:17,764 INFO [train.py:876] Epoch 14, batch 1000, loss[loss=0.1064, simple_loss=0.164, pruned_loss=0.02444, over 4704.00 frames. ], tot_loss[loss=0.1648, simple_loss=0.2213, pruned_loss=0.05421, over 946892.32 frames. ], batch size: 23, lr: 1.27e-02, +2022-12-02 00:32:32,775 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=19634.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:33:06,618 INFO [train.py:876] Epoch 14, batch 1050, loss[loss=0.1459, simple_loss=0.1982, pruned_loss=0.04677, over 4910.00 frames. ], tot_loss[loss=0.1653, simple_loss=0.2214, pruned_loss=0.05457, over 945707.33 frames. ], batch size: 29, lr: 1.27e-02, +2022-12-02 00:33:16,511 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=19679.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:33:37,690 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.159e+02 1.863e+02 2.267e+02 2.777e+02 5.272e+02, threshold=4.533e+02, percent-clipped=3.0 +2022-12-02 00:33:55,435 INFO [train.py:876] Epoch 14, batch 1100, loss[loss=0.1323, simple_loss=0.1945, pruned_loss=0.03501, over 4788.00 frames. ], tot_loss[loss=0.166, simple_loss=0.222, pruned_loss=0.05496, over 946324.30 frames. ], batch size: 33, lr: 1.26e-02, +2022-12-02 00:34:20,029 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6054, 3.2230, 3.3419, 3.1214, 2.8152, 2.2708, 3.7473, 1.8245], + device='cuda:0'), covar=tensor([0.0358, 0.0332, 0.0323, 0.0618, 0.1142, 0.2845, 0.0184, 0.2914], + device='cuda:0'), in_proj_covar=tensor([0.0105, 0.0088, 0.0083, 0.0122, 0.0130, 0.0154, 0.0073, 0.0168], + device='cuda:0'), out_proj_covar=tensor([1.1627e-04, 1.0426e-04, 1.0104e-04, 1.3317e-04, 1.4310e-04, 1.7078e-04, + 8.3988e-05, 1.8071e-04], device='cuda:0') +2022-12-02 00:34:25,630 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=19750.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:34:43,816 INFO [train.py:876] Epoch 14, batch 1150, loss[loss=0.1758, simple_loss=0.2462, pruned_loss=0.05276, over 4783.00 frames. ], tot_loss[loss=0.168, simple_loss=0.2242, pruned_loss=0.05592, over 947161.92 frames. ], batch size: 51, lr: 1.26e-02, +2022-12-02 00:34:59,535 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=19785.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 00:35:02,343 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9122, 3.0753, 3.5330, 3.4512, 2.3411, 3.3008, 3.4279, 2.6425], + device='cuda:0'), covar=tensor([0.3960, 0.0931, 0.0667, 0.0347, 0.0825, 0.0666, 0.0425, 0.1085], + device='cuda:0'), in_proj_covar=tensor([0.0185, 0.0105, 0.0136, 0.0100, 0.0114, 0.0106, 0.0095, 0.0114], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 00:35:14,989 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.452e+02 1.981e+02 2.324e+02 2.988e+02 8.123e+02, threshold=4.648e+02, percent-clipped=1.0 +2022-12-02 00:35:21,843 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.9988, 2.5423, 3.0454, 2.6251, 2.7977, 2.9537, 2.8214, 3.1361], + device='cuda:0'), covar=tensor([0.0148, 0.1002, 0.0426, 0.1207, 0.0292, 0.0294, 0.1048, 0.0382], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0090, 0.0070, 0.0097, 0.0072, 0.0071, 0.0093, 0.0075], + device='cuda:0'), out_proj_covar=tensor([6.8236e-05, 1.0434e-04, 8.2343e-05, 1.1394e-04, 8.2303e-05, 8.4483e-05, + 1.0679e-04, 8.3545e-05], device='cuda:0') +2022-12-02 00:35:32,104 INFO [train.py:876] Epoch 14, batch 1200, loss[loss=0.1719, simple_loss=0.2298, pruned_loss=0.05693, over 4814.00 frames. ], tot_loss[loss=0.1697, simple_loss=0.2254, pruned_loss=0.057, over 945331.55 frames. ], batch size: 42, lr: 1.26e-02, +2022-12-02 00:35:45,585 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=19833.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 00:35:45,781 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.2925, 1.3836, 1.0758, 0.8528, 1.0505, 1.1351, 1.2074, 1.2381], + device='cuda:0'), covar=tensor([0.0252, 0.0143, 0.0238, 0.0220, 0.0200, 0.0145, 0.0215, 0.0151], + device='cuda:0'), in_proj_covar=tensor([0.0035, 0.0030, 0.0030, 0.0033, 0.0037, 0.0032, 0.0038, 0.0031], + device='cuda:0'), out_proj_covar=tensor([2.9076e-05, 2.3213e-05, 2.5047e-05, 2.6338e-05, 3.0701e-05, 2.6230e-05, + 3.1667e-05, 2.5555e-05], device='cuda:0') +2022-12-02 00:36:17,567 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.65 vs. limit=2.0 +2022-12-02 00:36:20,030 INFO [train.py:876] Epoch 14, batch 1250, loss[loss=0.1369, simple_loss=0.2081, pruned_loss=0.03283, over 4858.00 frames. ], tot_loss[loss=0.1697, simple_loss=0.2261, pruned_loss=0.05667, over 949520.17 frames. ], batch size: 35, lr: 1.26e-02, +2022-12-02 00:36:48,176 INFO [zipformer.py:1414] attn_weights_entropy = tensor([5.2012, 5.5353, 5.3575, 5.7636, 5.4811, 4.6551, 5.7085, 5.2157], + device='cuda:0'), covar=tensor([0.0094, 0.0059, 0.0055, 0.0063, 0.0060, 0.0081, 0.0039, 0.0094], + device='cuda:0'), in_proj_covar=tensor([0.0047, 0.0042, 0.0043, 0.0035, 0.0044, 0.0046, 0.0040, 0.0040], + device='cuda:0'), out_proj_covar=tensor([4.6780e-05, 3.8746e-05, 4.1131e-05, 3.2843e-05, 4.2537e-05, 4.5377e-05, + 3.4418e-05, 3.6879e-05], device='cuda:0') +2022-12-02 00:36:51,105 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.197e+02 1.926e+02 2.457e+02 2.992e+02 5.732e+02, threshold=4.913e+02, percent-clipped=3.0 +2022-12-02 00:37:06,309 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2793, 1.7560, 2.0881, 1.9492, 1.6493, 2.1172, 1.9610, 1.9970], + device='cuda:0'), covar=tensor([0.0560, 0.0537, 0.0819, 0.0564, 0.0669, 0.0490, 0.0688, 0.0423], + device='cuda:0'), in_proj_covar=tensor([0.0055, 0.0055, 0.0054, 0.0057, 0.0050, 0.0045, 0.0049, 0.0051], + device='cuda:0'), out_proj_covar=tensor([4.8529e-05, 4.8979e-05, 4.8710e-05, 5.0509e-05, 4.5859e-05, 4.1048e-05, + 4.4208e-05, 4.5758e-05], device='cuda:0') +2022-12-02 00:37:09,060 INFO [train.py:876] Epoch 14, batch 1300, loss[loss=0.1808, simple_loss=0.2457, pruned_loss=0.05801, over 4864.00 frames. ], tot_loss[loss=0.1694, simple_loss=0.2255, pruned_loss=0.05669, over 948668.83 frames. ], batch size: 39, lr: 1.26e-02, +2022-12-02 00:37:12,262 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1832, 2.5719, 3.2271, 2.6612, 3.2462, 2.2975, 3.1019, 3.4836], + device='cuda:0'), covar=tensor([0.0272, 0.0547, 0.0215, 0.0424, 0.0293, 0.0710, 0.0192, 0.0233], + device='cuda:0'), in_proj_covar=tensor([0.0094, 0.0108, 0.0088, 0.0098, 0.0084, 0.0122, 0.0078, 0.0082], + device='cuda:0'), out_proj_covar=tensor([7.9424e-05, 9.1256e-05, 7.3460e-05, 8.4084e-05, 7.0139e-05, 1.0485e-04, + 6.9442e-05, 7.0674e-05], device='cuda:0') +2022-12-02 00:37:18,751 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.2369, 4.7522, 4.5099, 4.9939, 4.5054, 3.7901, 4.9900, 4.8751], + device='cuda:0'), covar=tensor([0.0399, 0.0325, 0.0289, 0.0264, 0.0279, 0.0295, 0.0213, 0.0222], + device='cuda:0'), in_proj_covar=tensor([0.0046, 0.0042, 0.0043, 0.0035, 0.0043, 0.0046, 0.0040, 0.0040], + device='cuda:0'), out_proj_covar=tensor([4.6321e-05, 3.8178e-05, 4.0751e-05, 3.2533e-05, 4.2078e-05, 4.5151e-05, + 3.4370e-05, 3.6658e-05], device='cuda:0') +2022-12-02 00:37:57,796 INFO [train.py:876] Epoch 14, batch 1350, loss[loss=0.1921, simple_loss=0.2449, pruned_loss=0.06961, over 4864.00 frames. ], tot_loss[loss=0.1694, simple_loss=0.2255, pruned_loss=0.05665, over 948352.49 frames. ], batch size: 44, lr: 1.26e-02, +2022-12-02 00:38:02,691 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=19974.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:38:08,720 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3790, 3.6510, 3.5836, 3.3871, 3.6474, 3.6565, 3.6345, 3.6451], + device='cuda:0'), covar=tensor([0.1432, 0.0332, 0.0581, 0.0452, 0.0489, 0.0304, 0.0384, 0.0420], + device='cuda:0'), in_proj_covar=tensor([0.0174, 0.0139, 0.0149, 0.0137, 0.0143, 0.0139, 0.0141, 0.0132], + device='cuda:0'), out_proj_covar=tensor([1.1598e-04, 9.2582e-05, 9.8835e-05, 9.2111e-05, 9.3699e-05, 9.1248e-05, + 9.3513e-05, 8.7291e-05], device='cuda:0') +2022-12-02 00:38:28,076 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-20000.pt +2022-12-02 00:38:31,283 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.257e+02 2.005e+02 2.497e+02 2.966e+02 1.215e+03, threshold=4.994e+02, percent-clipped=5.0 +2022-12-02 00:38:32,542 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.8548, 2.4407, 2.4713, 2.6324, 2.0647, 2.3351, 1.4900, 2.5936], + device='cuda:0'), covar=tensor([0.0481, 0.1070, 0.0866, 0.0573, 0.1209, 0.1477, 0.1337, 0.0477], + device='cuda:0'), in_proj_covar=tensor([0.0067, 0.0067, 0.0086, 0.0071, 0.0095, 0.0078, 0.0091, 0.0074], + device='cuda:0'), out_proj_covar=tensor([6.6743e-05, 6.8488e-05, 8.4603e-05, 7.1053e-05, 9.2243e-05, 7.8250e-05, + 9.0091e-05, 7.3199e-05], device='cuda:0') +2022-12-02 00:38:48,920 INFO [train.py:876] Epoch 14, batch 1400, loss[loss=0.1708, simple_loss=0.2326, pruned_loss=0.05451, over 4867.00 frames. ], tot_loss[loss=0.1678, simple_loss=0.2235, pruned_loss=0.05603, over 949451.52 frames. ], batch size: 39, lr: 1.25e-02, +2022-12-02 00:38:50,141 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=20020.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:39:18,917 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-14.pt +2022-12-02 00:39:35,055 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 00:39:35,632 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 00:39:36,303 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 00:39:36,335 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 00:39:37,139 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 00:39:37,841 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 00:39:38,763 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=20050.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:39:39,507 INFO [train.py:876] Epoch 15, batch 0, loss[loss=0.2028, simple_loss=0.2609, pruned_loss=0.07235, over 4800.00 frames. ], tot_loss[loss=0.2028, simple_loss=0.2609, pruned_loss=0.07235, over 4800.00 frames. ], batch size: 54, lr: 1.21e-02, +2022-12-02 00:39:39,508 INFO [train.py:901] Computing validation loss +2022-12-02 00:39:41,337 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1774, 2.0154, 2.3310, 2.3361, 2.1162, 2.5709, 2.5349, 2.0913], + device='cuda:0'), covar=tensor([0.0827, 0.0523, 0.1094, 0.0504, 0.0465, 0.0542, 0.0369, 0.0665], + device='cuda:0'), in_proj_covar=tensor([0.0055, 0.0055, 0.0055, 0.0056, 0.0051, 0.0045, 0.0049, 0.0052], + device='cuda:0'), out_proj_covar=tensor([4.8498e-05, 4.8675e-05, 4.9158e-05, 4.9904e-05, 4.6123e-05, 4.0873e-05, + 4.3734e-05, 4.6033e-05], device='cuda:0') +2022-12-02 00:39:55,065 INFO [train.py:910] Epoch 15, validation: loss=0.227, simple_loss=0.2728, pruned_loss=0.0906, over 253132.00 frames. +2022-12-02 00:39:55,066 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-02 00:40:24,484 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=20081.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:40:32,163 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=20089.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:40:40,603 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=20098.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:40:43,458 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.165e+02 1.968e+02 2.388e+02 2.937e+02 9.329e+02, threshold=4.777e+02, percent-clipped=3.0 +2022-12-02 00:40:43,491 INFO [train.py:876] Epoch 15, batch 50, loss[loss=0.2021, simple_loss=0.252, pruned_loss=0.07607, over 4078.00 frames. ], tot_loss[loss=0.1584, simple_loss=0.2152, pruned_loss=0.05081, over 216131.91 frames. ], batch size: 72, lr: 1.21e-02, +2022-12-02 00:40:54,316 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=20112.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:41:06,989 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 00:41:30,898 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=20150.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:41:31,726 INFO [train.py:876] Epoch 15, batch 100, loss[loss=0.1049, simple_loss=0.1685, pruned_loss=0.02063, over 4906.00 frames. ], tot_loss[loss=0.1592, simple_loss=0.2161, pruned_loss=0.0511, over 381219.57 frames. ], batch size: 29, lr: 1.21e-02, +2022-12-02 00:41:47,852 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 00:41:53,825 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=20173.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:42:09,965 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 00:42:20,677 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.199e+02 1.894e+02 2.285e+02 2.781e+02 5.351e+02, threshold=4.570e+02, percent-clipped=3.0 +2022-12-02 00:42:20,710 INFO [train.py:876] Epoch 15, batch 150, loss[loss=0.153, simple_loss=0.2149, pruned_loss=0.04559, over 4859.00 frames. ], tot_loss[loss=0.1607, simple_loss=0.2182, pruned_loss=0.05154, over 510137.75 frames. ], batch size: 39, lr: 1.21e-02, +2022-12-02 00:42:23,748 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=20204.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:42:35,676 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.9459, 2.3246, 2.8418, 2.2480, 2.6592, 1.7540, 2.5750, 2.8293], + device='cuda:0'), covar=tensor([0.0132, 0.0769, 0.0345, 0.0954, 0.0276, 0.0600, 0.0839, 0.0407], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0090, 0.0070, 0.0096, 0.0072, 0.0071, 0.0094, 0.0075], + device='cuda:0'), out_proj_covar=tensor([6.8448e-05, 1.0486e-04, 8.2849e-05, 1.1255e-04, 8.2411e-05, 8.4832e-05, + 1.0815e-04, 8.3815e-05], device='cuda:0') +2022-12-02 00:43:02,788 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4377, 2.6952, 3.2430, 2.9973, 3.1864, 3.3907, 3.0196, 3.7899], + device='cuda:0'), covar=tensor([0.0119, 0.0751, 0.0364, 0.0883, 0.0138, 0.0289, 0.0733, 0.0237], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0090, 0.0070, 0.0096, 0.0072, 0.0071, 0.0094, 0.0075], + device='cuda:0'), out_proj_covar=tensor([6.8388e-05, 1.0431e-04, 8.2825e-05, 1.1265e-04, 8.1930e-05, 8.4606e-05, + 1.0806e-04, 8.3622e-05], device='cuda:0') +2022-12-02 00:43:09,412 INFO [train.py:876] Epoch 15, batch 200, loss[loss=0.1304, simple_loss=0.2, pruned_loss=0.03041, over 4727.00 frames. ], tot_loss[loss=0.1619, simple_loss=0.2201, pruned_loss=0.05189, over 606908.94 frames. ], batch size: 27, lr: 1.21e-02, +2022-12-02 00:43:22,965 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=20265.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:43:31,627 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=20274.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:43:31,865 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6096, 3.3324, 3.6564, 3.5074, 3.8336, 3.0505, 3.5172, 3.9124], + device='cuda:0'), covar=tensor([0.0287, 0.0391, 0.0186, 0.0266, 0.0284, 0.0463, 0.0203, 0.0269], + device='cuda:0'), in_proj_covar=tensor([0.0096, 0.0108, 0.0088, 0.0098, 0.0086, 0.0125, 0.0079, 0.0083], + device='cuda:0'), out_proj_covar=tensor([8.1206e-05, 9.1573e-05, 7.3987e-05, 8.3960e-05, 7.1952e-05, 1.0740e-04, + 7.0418e-05, 7.1924e-05], device='cuda:0') +2022-12-02 00:43:55,859 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 00:43:57,808 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.220e+02 1.910e+02 2.228e+02 2.813e+02 4.422e+02, threshold=4.457e+02, percent-clipped=0.0 +2022-12-02 00:43:57,841 INFO [train.py:876] Epoch 15, batch 250, loss[loss=0.1402, simple_loss=0.2076, pruned_loss=0.03639, over 4801.00 frames. ], tot_loss[loss=0.1618, simple_loss=0.2203, pruned_loss=0.05168, over 683935.78 frames. ], batch size: 33, lr: 1.20e-02, +2022-12-02 00:44:14,174 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-02 00:44:18,431 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=20322.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:44:47,044 INFO [train.py:876] Epoch 15, batch 300, loss[loss=0.1337, simple_loss=0.1649, pruned_loss=0.05124, over 4661.00 frames. ], tot_loss[loss=0.1594, simple_loss=0.2174, pruned_loss=0.0507, over 742947.65 frames. ], batch size: 21, lr: 1.20e-02, +2022-12-02 00:44:52,971 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8954, 3.6928, 3.6087, 3.7515, 3.1821, 2.7812, 4.0258, 1.8578], + device='cuda:0'), covar=tensor([0.0393, 0.0359, 0.0286, 0.0390, 0.0907, 0.2407, 0.0169, 0.3593], + device='cuda:0'), in_proj_covar=tensor([0.0107, 0.0090, 0.0085, 0.0128, 0.0130, 0.0159, 0.0075, 0.0175], + device='cuda:0'), out_proj_covar=tensor([1.1877e-04, 1.0730e-04, 1.0370e-04, 1.3963e-04, 1.4410e-04, 1.7788e-04, + 8.6572e-05, 1.8896e-04], device='cuda:0') +2022-12-02 00:44:54,821 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 00:45:11,257 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=20376.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:45:18,692 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.96 vs. limit=2.0 +2022-12-02 00:45:23,017 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=20388.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:45:35,333 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.288e+02 1.869e+02 2.182e+02 2.739e+02 5.809e+02, threshold=4.365e+02, percent-clipped=3.0 +2022-12-02 00:45:35,366 INFO [train.py:876] Epoch 15, batch 350, loss[loss=0.1655, simple_loss=0.2261, pruned_loss=0.05243, over 4857.00 frames. ], tot_loss[loss=0.161, simple_loss=0.2191, pruned_loss=0.0515, over 787217.03 frames. ], batch size: 40, lr: 1.20e-02, +2022-12-02 00:45:52,788 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=20418.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:46:18,858 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=20445.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:46:22,406 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.01 vs. limit=2.0 +2022-12-02 00:46:23,030 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=20449.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:46:24,654 INFO [train.py:876] Epoch 15, batch 400, loss[loss=0.207, simple_loss=0.2591, pruned_loss=0.07749, over 4841.00 frames. ], tot_loss[loss=0.1602, simple_loss=0.2179, pruned_loss=0.0512, over 823620.96 frames. ], batch size: 47, lr: 1.20e-02, +2022-12-02 00:46:29,040 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.01 vs. limit=2.0 +2022-12-02 00:46:29,807 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4977, 4.1488, 4.1851, 4.0442, 3.8922, 4.0205, 4.0206, 4.2861], + device='cuda:0'), covar=tensor([0.1149, 0.0157, 0.0229, 0.0234, 0.0228, 0.0259, 0.0188, 0.0263], + device='cuda:0'), in_proj_covar=tensor([0.0219, 0.0142, 0.0150, 0.0147, 0.0147, 0.0146, 0.0139, 0.0149], + device='cuda:0'), out_proj_covar=tensor([1.4398e-04, 9.2469e-05, 9.8170e-05, 9.5856e-05, 9.5909e-05, 9.5650e-05, + 9.1922e-05, 1.0108e-04], device='cuda:0') +2022-12-02 00:46:32,463 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 00:46:41,440 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=20468.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:46:52,356 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=20479.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:46:58,007 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 00:47:13,511 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.386e+02 1.898e+02 2.297e+02 2.909e+02 6.890e+02, threshold=4.594e+02, percent-clipped=5.0 +2022-12-02 00:47:13,544 INFO [train.py:876] Epoch 15, batch 450, loss[loss=0.1354, simple_loss=0.1916, pruned_loss=0.03956, over 4821.00 frames. ], tot_loss[loss=0.1599, simple_loss=0.2174, pruned_loss=0.05119, over 854680.03 frames. ], batch size: 25, lr: 1.20e-02, +2022-12-02 00:47:48,596 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.96 vs. limit=2.0 +2022-12-02 00:47:52,147 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0948, 3.0297, 3.6256, 3.4951, 2.9558, 3.8860, 3.6164, 2.7847], + device='cuda:0'), covar=tensor([0.3958, 0.0849, 0.0607, 0.0430, 0.0622, 0.0513, 0.0260, 0.1041], + device='cuda:0'), in_proj_covar=tensor([0.0190, 0.0106, 0.0139, 0.0109, 0.0118, 0.0108, 0.0098, 0.0121], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 00:48:01,014 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.63 vs. limit=2.0 +2022-12-02 00:48:02,572 INFO [train.py:876] Epoch 15, batch 500, loss[loss=0.1738, simple_loss=0.2257, pruned_loss=0.06093, over 4927.00 frames. ], tot_loss[loss=0.1603, simple_loss=0.2176, pruned_loss=0.0515, over 876056.34 frames. ], batch size: 32, lr: 1.20e-02, +2022-12-02 00:48:08,045 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.76 vs. limit=2.0 +2022-12-02 00:48:08,862 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6877, 4.2894, 4.3755, 4.1528, 3.6706, 4.2241, 3.9135, 4.3821], + device='cuda:0'), covar=tensor([0.0920, 0.0176, 0.0175, 0.0228, 0.0273, 0.0306, 0.0221, 0.0254], + device='cuda:0'), in_proj_covar=tensor([0.0222, 0.0145, 0.0151, 0.0150, 0.0149, 0.0148, 0.0141, 0.0152], + device='cuda:0'), out_proj_covar=tensor([1.4614e-04, 9.4196e-05, 9.9096e-05, 9.7204e-05, 9.7620e-05, 9.7525e-05, + 9.3338e-05, 1.0269e-04], device='cuda:0') +2022-12-02 00:48:10,769 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7872, 2.4400, 2.3499, 2.8851, 2.0235, 2.4201, 1.4321, 2.6122], + device='cuda:0'), covar=tensor([0.0641, 0.0917, 0.0879, 0.0495, 0.1171, 0.1231, 0.1418, 0.0528], + device='cuda:0'), in_proj_covar=tensor([0.0068, 0.0069, 0.0086, 0.0071, 0.0093, 0.0076, 0.0089, 0.0075], + device='cuda:0'), out_proj_covar=tensor([6.8124e-05, 7.0479e-05, 8.4525e-05, 7.0810e-05, 9.0952e-05, 7.6940e-05, + 8.8281e-05, 7.4273e-05], device='cuda:0') +2022-12-02 00:48:11,603 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=20560.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:48:33,064 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9409, 2.9139, 3.4674, 3.4007, 2.4999, 3.6807, 3.4459, 2.7833], + device='cuda:0'), covar=tensor([0.4113, 0.0822, 0.0657, 0.0390, 0.0767, 0.0545, 0.0253, 0.0806], + device='cuda:0'), in_proj_covar=tensor([0.0188, 0.0106, 0.0138, 0.0109, 0.0117, 0.0107, 0.0097, 0.0120], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 00:48:36,039 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6224, 2.0255, 2.2796, 3.3375, 2.5265, 3.1814, 3.3143, 3.4611], + device='cuda:0'), covar=tensor([0.0252, 0.1550, 0.1732, 0.0326, 0.0610, 0.0382, 0.0496, 0.0403], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0083, 0.0101, 0.0059, 0.0067, 0.0058, 0.0069, 0.0076], + device='cuda:0'), out_proj_covar=tensor([6.0727e-05, 9.3863e-05, 1.0919e-04, 6.6527e-05, 6.6974e-05, 6.5716e-05, + 7.6295e-05, 7.3583e-05], device='cuda:0') +2022-12-02 00:48:36,573 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.94 vs. limit=2.0 +2022-12-02 00:48:39,034 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=20588.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:48:51,262 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.268e+02 1.779e+02 2.130e+02 2.722e+02 5.336e+02, threshold=4.259e+02, percent-clipped=1.0 +2022-12-02 00:48:51,295 INFO [train.py:876] Epoch 15, batch 550, loss[loss=0.1697, simple_loss=0.2215, pruned_loss=0.0589, over 4830.00 frames. ], tot_loss[loss=0.1591, simple_loss=0.2165, pruned_loss=0.05086, over 893747.01 frames. ], batch size: 34, lr: 1.20e-02, +2022-12-02 00:49:14,863 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0697, 3.0362, 3.5788, 3.6529, 3.3710, 4.0261, 3.8460, 2.6594], + device='cuda:0'), covar=tensor([0.4513, 0.1483, 0.0739, 0.0339, 0.0548, 0.0728, 0.0281, 0.1242], + device='cuda:0'), in_proj_covar=tensor([0.0185, 0.0105, 0.0137, 0.0106, 0.0116, 0.0106, 0.0097, 0.0118], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 00:49:37,858 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=20649.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:49:39,663 INFO [train.py:876] Epoch 15, batch 600, loss[loss=0.1682, simple_loss=0.2192, pruned_loss=0.05859, over 4860.00 frames. ], tot_loss[loss=0.1603, simple_loss=0.2175, pruned_loss=0.05156, over 908070.33 frames. ], batch size: 36, lr: 1.19e-02, +2022-12-02 00:50:03,606 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=20676.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:50:27,679 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.331e+02 1.985e+02 2.418e+02 3.183e+02 5.539e+02, threshold=4.836e+02, percent-clipped=7.0 +2022-12-02 00:50:27,712 INFO [train.py:876] Epoch 15, batch 650, loss[loss=0.192, simple_loss=0.2526, pruned_loss=0.06572, over 4820.00 frames. ], tot_loss[loss=0.1603, simple_loss=0.2175, pruned_loss=0.05158, over 918945.10 frames. ], batch size: 45, lr: 1.19e-02, +2022-12-02 00:50:39,915 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-02 00:50:49,728 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=20724.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:51:08,984 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=20744.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:51:10,199 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=20745.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:51:15,812 INFO [train.py:876] Epoch 15, batch 700, loss[loss=0.1826, simple_loss=0.2342, pruned_loss=0.06546, over 4832.00 frames. ], tot_loss[loss=0.1614, simple_loss=0.2186, pruned_loss=0.05213, over 926606.02 frames. ], batch size: 34, lr: 1.19e-02, +2022-12-02 00:51:20,175 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.82 vs. limit=5.0 +2022-12-02 00:51:32,558 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=20768.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:51:37,526 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2900, 3.1757, 3.5866, 3.2792, 3.5784, 2.7944, 3.4664, 3.6669], + device='cuda:0'), covar=tensor([0.0381, 0.0473, 0.0233, 0.0352, 0.0270, 0.0652, 0.0220, 0.0342], + device='cuda:0'), in_proj_covar=tensor([0.0094, 0.0108, 0.0087, 0.0097, 0.0085, 0.0123, 0.0078, 0.0084], + device='cuda:0'), out_proj_covar=tensor([7.9675e-05, 9.1617e-05, 7.3220e-05, 8.3502e-05, 7.1719e-05, 1.0551e-04, + 6.9774e-05, 7.2594e-05], device='cuda:0') +2022-12-02 00:51:38,254 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=20774.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:51:56,449 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=20793.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:52:04,220 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.041e+02 1.862e+02 2.258e+02 2.779e+02 7.676e+02, threshold=4.516e+02, percent-clipped=3.0 +2022-12-02 00:52:04,253 INFO [train.py:876] Epoch 15, batch 750, loss[loss=0.1019, simple_loss=0.1674, pruned_loss=0.01821, over 4778.00 frames. ], tot_loss[loss=0.1608, simple_loss=0.2185, pruned_loss=0.05161, over 933731.16 frames. ], batch size: 26, lr: 1.19e-02, +2022-12-02 00:52:19,190 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=20816.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:52:53,062 INFO [train.py:876] Epoch 15, batch 800, loss[loss=0.1455, simple_loss=0.2042, pruned_loss=0.04342, over 4905.00 frames. ], tot_loss[loss=0.1597, simple_loss=0.2172, pruned_loss=0.0511, over 937596.40 frames. ], batch size: 30, lr: 1.19e-02, +2022-12-02 00:53:01,794 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=20860.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:53:04,152 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.50 vs. limit=2.0 +2022-12-02 00:53:40,273 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.188e+02 1.916e+02 2.315e+02 3.004e+02 5.618e+02, threshold=4.629e+02, percent-clipped=4.0 +2022-12-02 00:53:40,306 INFO [train.py:876] Epoch 15, batch 850, loss[loss=0.2, simple_loss=0.2457, pruned_loss=0.07715, over 4918.00 frames. ], tot_loss[loss=0.1613, simple_loss=0.2186, pruned_loss=0.05196, over 937507.61 frames. ], batch size: 32, lr: 1.19e-02, +2022-12-02 00:53:41,591 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2889, 3.4558, 3.7453, 3.4003, 3.2337, 3.4242, 3.5204, 3.7834], + device='cuda:0'), covar=tensor([0.1031, 0.0295, 0.0279, 0.0328, 0.0337, 0.0359, 0.0297, 0.0324], + device='cuda:0'), in_proj_covar=tensor([0.0219, 0.0143, 0.0148, 0.0149, 0.0148, 0.0149, 0.0140, 0.0150], + device='cuda:0'), out_proj_covar=tensor([1.4348e-04, 9.3247e-05, 9.6725e-05, 9.7031e-05, 9.7125e-05, 9.7660e-05, + 9.2262e-05, 1.0168e-04], device='cuda:0') +2022-12-02 00:53:47,157 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=20908.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:54:11,026 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.96 vs. limit=2.0 +2022-12-02 00:54:22,424 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=20944.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:54:29,214 INFO [train.py:876] Epoch 15, batch 900, loss[loss=0.1284, simple_loss=0.1822, pruned_loss=0.03734, over 4736.00 frames. ], tot_loss[loss=0.1616, simple_loss=0.219, pruned_loss=0.05205, over 939307.71 frames. ], batch size: 27, lr: 1.19e-02, +2022-12-02 00:54:31,754 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.04 vs. limit=2.0 +2022-12-02 00:54:36,119 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2784, 3.5655, 3.6214, 3.2753, 3.7039, 3.5898, 3.3801, 3.6399], + device='cuda:0'), covar=tensor([0.1305, 0.0387, 0.0398, 0.0441, 0.0320, 0.0322, 0.0481, 0.0410], + device='cuda:0'), in_proj_covar=tensor([0.0175, 0.0138, 0.0147, 0.0134, 0.0141, 0.0138, 0.0141, 0.0135], + device='cuda:0'), out_proj_covar=tensor([1.1675e-04, 9.2738e-05, 9.7540e-05, 9.0358e-05, 9.2471e-05, 9.0628e-05, + 9.3050e-05, 8.9208e-05], device='cuda:0') +2022-12-02 00:54:38,452 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.97 vs. limit=2.0 +2022-12-02 00:54:44,409 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2550, 2.5936, 2.9815, 2.9603, 2.7956, 3.0035, 2.7398, 3.3587], + device='cuda:0'), covar=tensor([0.0147, 0.1090, 0.0517, 0.1082, 0.0300, 0.0416, 0.1031, 0.0379], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0096, 0.0076, 0.0103, 0.0075, 0.0076, 0.0099, 0.0081], + device='cuda:0'), out_proj_covar=tensor([6.9286e-05, 1.1129e-04, 8.9475e-05, 1.2103e-04, 8.6037e-05, 9.1413e-05, + 1.1388e-04, 9.0245e-05], device='cuda:0') +2022-12-02 00:55:17,701 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.225e+02 1.879e+02 2.219e+02 2.749e+02 7.809e+02, threshold=4.437e+02, percent-clipped=4.0 +2022-12-02 00:55:17,735 INFO [train.py:876] Epoch 15, batch 950, loss[loss=0.1334, simple_loss=0.1926, pruned_loss=0.03705, over 4791.00 frames. ], tot_loss[loss=0.1622, simple_loss=0.2197, pruned_loss=0.0524, over 941270.30 frames. ], batch size: 26, lr: 1.19e-02, +2022-12-02 00:55:21,220 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.87 vs. limit=2.0 +2022-12-02 00:55:58,836 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=21044.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:56:05,135 INFO [train.py:876] Epoch 15, batch 1000, loss[loss=0.1688, simple_loss=0.2287, pruned_loss=0.05445, over 4841.00 frames. ], tot_loss[loss=0.1623, simple_loss=0.2196, pruned_loss=0.05245, over 944644.11 frames. ], batch size: 41, lr: 1.18e-02, +2022-12-02 00:56:06,279 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=21052.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:56:15,960 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0082, 3.3324, 3.8308, 3.4950, 3.3553, 3.7511, 4.0269, 3.1125], + device='cuda:0'), covar=tensor([0.4768, 0.1155, 0.0759, 0.0295, 0.0543, 0.0812, 0.0310, 0.1183], + device='cuda:0'), in_proj_covar=tensor([0.0187, 0.0106, 0.0142, 0.0109, 0.0118, 0.0109, 0.0102, 0.0120], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 00:56:27,121 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=21074.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:56:44,253 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=21092.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:56:52,833 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.099e+02 1.916e+02 2.150e+02 2.867e+02 1.073e+03, threshold=4.299e+02, percent-clipped=2.0 +2022-12-02 00:56:52,866 INFO [train.py:876] Epoch 15, batch 1050, loss[loss=0.1572, simple_loss=0.2248, pruned_loss=0.04479, over 4787.00 frames. ], tot_loss[loss=0.1621, simple_loss=0.2201, pruned_loss=0.05206, over 946716.34 frames. ], batch size: 51, lr: 1.18e-02, +2022-12-02 00:56:56,104 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4091, 1.6090, 1.5427, 1.2151, 1.0429, 1.1896, 1.4254, 1.3470], + device='cuda:0'), covar=tensor([0.0309, 0.0164, 0.0223, 0.0237, 0.0287, 0.0178, 0.0228, 0.0214], + device='cuda:0'), in_proj_covar=tensor([0.0034, 0.0030, 0.0030, 0.0033, 0.0037, 0.0032, 0.0037, 0.0032], + device='cuda:0'), out_proj_covar=tensor([2.7793e-05, 2.3013e-05, 2.4838e-05, 2.6180e-05, 3.0598e-05, 2.6481e-05, + 3.0592e-05, 2.5757e-05], device='cuda:0') +2022-12-02 00:57:05,022 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=21113.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:57:09,909 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=21118.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:57:13,674 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=21122.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:57:41,923 INFO [train.py:876] Epoch 15, batch 1100, loss[loss=0.1604, simple_loss=0.2246, pruned_loss=0.04806, over 4832.00 frames. ], tot_loss[loss=0.1606, simple_loss=0.2188, pruned_loss=0.05119, over 948346.03 frames. ], batch size: 41, lr: 1.18e-02, +2022-12-02 00:58:09,148 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2514, 3.0063, 3.2880, 3.2797, 2.7006, 2.2931, 3.3309, 1.7054], + device='cuda:0'), covar=tensor([0.0499, 0.0344, 0.0381, 0.0650, 0.1288, 0.2768, 0.0250, 0.3207], + device='cuda:0'), in_proj_covar=tensor([0.0108, 0.0088, 0.0084, 0.0126, 0.0128, 0.0154, 0.0076, 0.0169], + device='cuda:0'), out_proj_covar=tensor([1.1929e-04, 1.0650e-04, 1.0348e-04, 1.3741e-04, 1.4219e-04, 1.7296e-04, + 8.7502e-05, 1.8226e-04], device='cuda:0') +2022-12-02 00:58:09,206 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=21179.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:58:30,944 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.176e+02 1.877e+02 2.231e+02 3.021e+02 5.873e+02, threshold=4.461e+02, percent-clipped=8.0 +2022-12-02 00:58:30,977 INFO [train.py:876] Epoch 15, batch 1150, loss[loss=0.1373, simple_loss=0.1852, pruned_loss=0.04465, over 4730.00 frames. ], tot_loss[loss=0.161, simple_loss=0.2188, pruned_loss=0.05165, over 950261.95 frames. ], batch size: 27, lr: 1.18e-02, +2022-12-02 00:58:55,221 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.7767, 2.0352, 1.7164, 1.8279, 1.7874, 2.3011, 2.0595, 1.8914], + device='cuda:0'), covar=tensor([0.1004, 0.0427, 0.1210, 0.0476, 0.0695, 0.0522, 0.0585, 0.0447], + device='cuda:0'), in_proj_covar=tensor([0.0059, 0.0058, 0.0055, 0.0060, 0.0053, 0.0047, 0.0050, 0.0053], + device='cuda:0'), out_proj_covar=tensor([5.1913e-05, 5.1667e-05, 5.0149e-05, 5.3502e-05, 4.8079e-05, 4.3191e-05, + 4.5749e-05, 4.6979e-05], device='cuda:0') +2022-12-02 00:59:12,338 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=21244.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 00:59:19,183 INFO [train.py:876] Epoch 15, batch 1200, loss[loss=0.1474, simple_loss=0.1972, pruned_loss=0.04886, over 4900.00 frames. ], tot_loss[loss=0.1615, simple_loss=0.2193, pruned_loss=0.05188, over 950713.71 frames. ], batch size: 31, lr: 1.18e-02, +2022-12-02 00:59:43,079 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7389, 2.6573, 3.2074, 3.3977, 2.8561, 3.6346, 3.2687, 2.7239], + device='cuda:0'), covar=tensor([0.0229, 0.0462, 0.0285, 0.0259, 0.0293, 0.0220, 0.0244, 0.0535], + device='cuda:0'), in_proj_covar=tensor([0.0101, 0.0112, 0.0110, 0.0102, 0.0095, 0.0104, 0.0107, 0.0124], + device='cuda:0'), out_proj_covar=tensor([6.8764e-05, 7.6839e-05, 7.5339e-05, 6.8395e-05, 6.3337e-05, 6.9663e-05, + 7.2434e-05, 8.7014e-05], device='cuda:0') +2022-12-02 00:59:59,386 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=21292.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:00:07,877 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.154e+02 1.955e+02 2.354e+02 2.946e+02 5.792e+02, threshold=4.708e+02, percent-clipped=5.0 +2022-12-02 01:00:07,910 INFO [train.py:876] Epoch 15, batch 1250, loss[loss=0.1693, simple_loss=0.2307, pruned_loss=0.05392, over 4845.00 frames. ], tot_loss[loss=0.1619, simple_loss=0.22, pruned_loss=0.0519, over 948090.25 frames. ], batch size: 49, lr: 1.18e-02, +2022-12-02 01:00:18,403 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=21312.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:00:33,763 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=21328.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:00:56,338 INFO [train.py:876] Epoch 15, batch 1300, loss[loss=0.1604, simple_loss=0.2266, pruned_loss=0.04706, over 4887.00 frames. ], tot_loss[loss=0.162, simple_loss=0.2196, pruned_loss=0.05216, over 947876.91 frames. ], batch size: 38, lr: 1.18e-02, +2022-12-02 01:01:17,541 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=21373.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:01:33,212 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=21389.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:01:44,501 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.264e+02 1.906e+02 2.352e+02 2.851e+02 1.172e+03, threshold=4.704e+02, percent-clipped=0.0 +2022-12-02 01:01:44,536 INFO [train.py:876] Epoch 15, batch 1350, loss[loss=0.111, simple_loss=0.1601, pruned_loss=0.03095, over 3758.00 frames. ], tot_loss[loss=0.1616, simple_loss=0.2194, pruned_loss=0.05191, over 947211.61 frames. ], batch size: 14, lr: 1.17e-02, +2022-12-02 01:01:51,558 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=21408.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:02:18,329 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8336, 3.1683, 3.6943, 3.9289, 3.7571, 3.9963, 3.7729, 3.4512], + device='cuda:0'), covar=tensor([0.0217, 0.0326, 0.0221, 0.0188, 0.0157, 0.0162, 0.0187, 0.0315], + device='cuda:0'), in_proj_covar=tensor([0.0099, 0.0110, 0.0108, 0.0100, 0.0092, 0.0102, 0.0106, 0.0121], + device='cuda:0'), out_proj_covar=tensor([6.7486e-05, 7.5627e-05, 7.4212e-05, 6.6918e-05, 6.1825e-05, 6.8538e-05, + 7.1830e-05, 8.4724e-05], device='cuda:0') +2022-12-02 01:02:31,806 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-02 01:02:34,025 INFO [train.py:876] Epoch 15, batch 1400, loss[loss=0.1219, simple_loss=0.1877, pruned_loss=0.02805, over 4884.00 frames. ], tot_loss[loss=0.1598, simple_loss=0.2168, pruned_loss=0.05137, over 944659.73 frames. ], batch size: 29, lr: 1.17e-02, +2022-12-02 01:02:38,523 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.11 vs. limit=5.0 +2022-12-02 01:02:56,992 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=21474.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:03:04,058 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.70 vs. limit=5.0 +2022-12-02 01:03:05,694 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-15.pt +2022-12-02 01:03:14,804 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 01:03:15,756 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 01:03:16,055 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 01:03:16,086 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 01:03:17,246 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 01:03:17,567 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 01:03:19,222 INFO [train.py:876] Epoch 16, batch 0, loss[loss=0.1832, simple_loss=0.2406, pruned_loss=0.06293, over 4862.00 frames. ], tot_loss[loss=0.1832, simple_loss=0.2406, pruned_loss=0.06293, over 4862.00 frames. ], batch size: 47, lr: 1.13e-02, +2022-12-02 01:03:19,223 INFO [train.py:901] Computing validation loss +2022-12-02 01:03:32,488 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4722, 3.3513, 3.4824, 3.3219, 2.5126, 2.3071, 3.5647, 2.0329], + device='cuda:0'), covar=tensor([0.0442, 0.0360, 0.0494, 0.0651, 0.1471, 0.3507, 0.0211, 0.2760], + device='cuda:0'), in_proj_covar=tensor([0.0106, 0.0088, 0.0084, 0.0126, 0.0128, 0.0154, 0.0077, 0.0168], + device='cuda:0'), out_proj_covar=tensor([1.1757e-04, 1.0657e-04, 1.0363e-04, 1.3830e-04, 1.4173e-04, 1.7244e-04, + 8.8800e-05, 1.8203e-04], device='cuda:0') +2022-12-02 01:03:34,850 INFO [train.py:910] Epoch 16, validation: loss=0.2301, simple_loss=0.2754, pruned_loss=0.09241, over 253132.00 frames. +2022-12-02 01:03:34,851 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-02 01:03:51,318 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.127e+02 1.864e+02 2.224e+02 2.706e+02 5.023e+02, threshold=4.448e+02, percent-clipped=3.0 +2022-12-02 01:04:14,357 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9169, 2.9406, 3.4866, 3.1740, 2.6274, 3.3894, 3.5062, 2.7550], + device='cuda:0'), covar=tensor([0.4372, 0.0943, 0.0680, 0.0525, 0.0855, 0.0780, 0.0465, 0.1107], + device='cuda:0'), in_proj_covar=tensor([0.0189, 0.0110, 0.0142, 0.0112, 0.0121, 0.0110, 0.0104, 0.0120], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 01:04:23,589 INFO [train.py:876] Epoch 16, batch 50, loss[loss=0.09463, simple_loss=0.1559, pruned_loss=0.01667, over 4784.00 frames. ], tot_loss[loss=0.1526, simple_loss=0.21, pruned_loss=0.04766, over 214415.63 frames. ], batch size: 26, lr: 1.13e-02, +2022-12-02 01:04:28,779 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.4359, 4.8314, 4.6432, 5.0947, 4.6059, 3.9765, 5.0590, 4.7324], + device='cuda:0'), covar=tensor([0.0179, 0.0079, 0.0091, 0.0105, 0.0100, 0.0141, 0.0067, 0.0092], + device='cuda:0'), in_proj_covar=tensor([0.0051, 0.0046, 0.0047, 0.0038, 0.0048, 0.0049, 0.0045, 0.0044], + device='cuda:0'), out_proj_covar=tensor([5.0860e-05, 4.2008e-05, 4.3178e-05, 3.4561e-05, 4.5629e-05, 4.7432e-05, + 3.8704e-05, 3.9373e-05], device='cuda:0') +2022-12-02 01:04:45,893 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 01:05:13,050 INFO [train.py:876] Epoch 16, batch 100, loss[loss=0.1408, simple_loss=0.2116, pruned_loss=0.03506, over 4830.00 frames. ], tot_loss[loss=0.1506, simple_loss=0.2084, pruned_loss=0.04639, over 376492.39 frames. ], batch size: 34, lr: 1.13e-02, +2022-12-02 01:05:23,883 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=21595.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:05:24,318 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.54 vs. limit=2.0 +2022-12-02 01:05:29,914 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.046e+02 2.044e+02 2.357e+02 2.801e+02 7.423e+02, threshold=4.715e+02, percent-clipped=1.0 +2022-12-02 01:05:33,001 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 01:05:33,536 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.75 vs. limit=2.0 +2022-12-02 01:05:34,345 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5354, 3.9376, 4.1286, 3.6950, 3.6389, 3.8166, 3.9180, 4.1583], + device='cuda:0'), covar=tensor([0.0913, 0.0255, 0.0239, 0.0341, 0.0271, 0.0539, 0.0262, 0.0306], + device='cuda:0'), in_proj_covar=tensor([0.0228, 0.0147, 0.0154, 0.0154, 0.0155, 0.0153, 0.0145, 0.0158], + device='cuda:0'), out_proj_covar=tensor([1.4916e-04, 9.5443e-05, 1.0065e-04, 1.0002e-04, 1.0155e-04, 1.0031e-04, + 9.5584e-05, 1.0606e-04], device='cuda:0') +2022-12-02 01:05:42,167 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=21613.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:05:54,777 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 01:06:02,590 INFO [train.py:876] Epoch 16, batch 150, loss[loss=0.1582, simple_loss=0.2157, pruned_loss=0.05037, over 4867.00 frames. ], tot_loss[loss=0.1511, simple_loss=0.209, pruned_loss=0.04659, over 504502.59 frames. ], batch size: 36, lr: 1.13e-02, +2022-12-02 01:06:24,198 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=21656.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:06:35,698 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=21668.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:06:41,573 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=21674.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:06:51,225 INFO [train.py:876] Epoch 16, batch 200, loss[loss=0.1705, simple_loss=0.2385, pruned_loss=0.05125, over 4861.00 frames. ], tot_loss[loss=0.1522, simple_loss=0.2094, pruned_loss=0.04745, over 601806.28 frames. ], batch size: 40, lr: 1.13e-02, +2022-12-02 01:06:51,288 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=21684.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:07:07,446 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.569e+01 1.764e+02 2.167e+02 2.810e+02 8.678e+02, threshold=4.333e+02, percent-clipped=1.0 +2022-12-02 01:07:14,416 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=21708.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:07:18,510 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.3772, 1.1063, 1.3797, 1.1768, 0.8315, 1.2970, 1.2144, 1.3635], + device='cuda:0'), covar=tensor([0.0253, 0.0168, 0.0198, 0.0177, 0.0270, 0.0146, 0.0213, 0.0146], + device='cuda:0'), in_proj_covar=tensor([0.0034, 0.0030, 0.0030, 0.0032, 0.0037, 0.0032, 0.0037, 0.0032], + device='cuda:0'), out_proj_covar=tensor([2.7700e-05, 2.3437e-05, 2.4618e-05, 2.5997e-05, 3.0239e-05, 2.5606e-05, + 3.0887e-05, 2.5910e-05], device='cuda:0') +2022-12-02 01:07:26,528 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.01 vs. limit=2.0 +2022-12-02 01:07:39,651 INFO [train.py:876] Epoch 16, batch 250, loss[loss=0.1366, simple_loss=0.1947, pruned_loss=0.03925, over 4907.00 frames. ], tot_loss[loss=0.1517, simple_loss=0.2095, pruned_loss=0.04699, over 677584.72 frames. ], batch size: 32, lr: 1.13e-02, +2022-12-02 01:07:46,339 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 01:08:01,033 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=21756.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:08:06,021 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.4743, 4.4550, 4.9704, 4.4559, 4.7739, 4.6965, 4.3890, 4.4529], + device='cuda:0'), covar=tensor([0.0748, 0.0475, 0.0553, 0.0450, 0.0564, 0.0489, 0.1133, 0.0516], + device='cuda:0'), in_proj_covar=tensor([0.0151, 0.0111, 0.0131, 0.0123, 0.0107, 0.0132, 0.0155, 0.0107], + device='cuda:0'), out_proj_covar=tensor([1.1881e-04, 7.9324e-05, 1.0769e-04, 9.2884e-05, 8.5006e-05, 1.0357e-04, + 1.2361e-04, 7.9979e-05], device='cuda:0') +2022-12-02 01:08:08,144 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=21763.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:08:18,850 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=21774.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:08:28,582 INFO [train.py:876] Epoch 16, batch 300, loss[loss=0.1259, simple_loss=0.1877, pruned_loss=0.03199, over 4899.00 frames. ], tot_loss[loss=0.1509, simple_loss=0.209, pruned_loss=0.0464, over 740562.56 frames. ], batch size: 30, lr: 1.13e-02, +2022-12-02 01:08:39,543 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.7857, 4.7623, 5.3653, 4.6525, 5.1342, 5.0418, 4.7228, 4.6806], + device='cuda:0'), covar=tensor([0.0934, 0.0422, 0.0477, 0.0489, 0.0536, 0.0426, 0.0959, 0.0457], + device='cuda:0'), in_proj_covar=tensor([0.0151, 0.0111, 0.0130, 0.0123, 0.0106, 0.0132, 0.0154, 0.0108], + device='cuda:0'), out_proj_covar=tensor([1.1844e-04, 7.9264e-05, 1.0695e-04, 9.2597e-05, 8.4493e-05, 1.0320e-04, + 1.2294e-04, 8.0500e-05], device='cuda:0') +2022-12-02 01:08:45,589 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.268e+02 1.701e+02 2.069e+02 2.529e+02 5.521e+02, threshold=4.139e+02, percent-clipped=5.0 +2022-12-02 01:08:47,476 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 01:08:50,061 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.96 vs. limit=2.0 +2022-12-02 01:08:55,343 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.96 vs. limit=2.0 +2022-12-02 01:09:03,742 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0150, 2.0953, 1.2014, 2.2165, 1.8675, 1.9058, 2.1839, 1.8908], + device='cuda:0'), covar=tensor([0.0196, 0.0283, 0.0428, 0.0204, 0.0254, 0.0305, 0.0210, 0.0494], + device='cuda:0'), in_proj_covar=tensor([0.0044, 0.0045, 0.0048, 0.0035, 0.0040, 0.0040, 0.0041, 0.0038], + device='cuda:0'), out_proj_covar=tensor([4.0828e-05, 4.2161e-05, 4.6550e-05, 3.2437e-05, 3.6617e-05, 3.8236e-05, + 3.8067e-05, 3.7301e-05], device='cuda:0') +2022-12-02 01:09:04,608 INFO [zipformer.py:1414] attn_weights_entropy = tensor([5.3660, 5.2538, 5.8275, 5.2340, 5.5758, 5.5652, 5.3444, 5.3885], + device='cuda:0'), covar=tensor([0.0589, 0.0404, 0.0333, 0.0449, 0.0477, 0.0336, 0.0565, 0.0291], + device='cuda:0'), in_proj_covar=tensor([0.0150, 0.0110, 0.0129, 0.0122, 0.0106, 0.0130, 0.0152, 0.0107], + device='cuda:0'), out_proj_covar=tensor([1.1760e-04, 7.8447e-05, 1.0576e-04, 9.2454e-05, 8.3846e-05, 1.0166e-04, + 1.2128e-04, 7.9826e-05], device='cuda:0') +2022-12-02 01:09:06,546 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=21822.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:09:09,052 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=21824.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:09:18,393 INFO [train.py:876] Epoch 16, batch 350, loss[loss=0.1423, simple_loss=0.2162, pruned_loss=0.03416, over 4825.00 frames. ], tot_loss[loss=0.1515, simple_loss=0.2102, pruned_loss=0.04644, over 787048.58 frames. ], batch size: 34, lr: 1.13e-02, +2022-12-02 01:09:34,289 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=21850.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:09:35,182 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9101, 3.0963, 3.4074, 3.2765, 2.5545, 3.4724, 3.4999, 2.6655], + device='cuda:0'), covar=tensor([0.4136, 0.0754, 0.0872, 0.0482, 0.0835, 0.0774, 0.0442, 0.1300], + device='cuda:0'), in_proj_covar=tensor([0.0190, 0.0109, 0.0144, 0.0113, 0.0121, 0.0112, 0.0105, 0.0123], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 01:10:07,251 INFO [train.py:876] Epoch 16, batch 400, loss[loss=0.1267, simple_loss=0.1862, pruned_loss=0.03354, over 4853.00 frames. ], tot_loss[loss=0.152, simple_loss=0.211, pruned_loss=0.04646, over 825444.64 frames. ], batch size: 36, lr: 1.12e-02, +2022-12-02 01:10:23,625 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.313e+02 1.814e+02 2.277e+02 2.759e+02 5.219e+02, threshold=4.555e+02, percent-clipped=7.0 +2022-12-02 01:10:27,547 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7685, 1.7400, 2.2196, 3.4907, 2.7024, 3.1516, 3.0312, 3.7734], + device='cuda:0'), covar=tensor([0.0225, 0.1935, 0.2132, 0.0385, 0.0489, 0.0418, 0.0552, 0.0256], + device='cuda:0'), in_proj_covar=tensor([0.0059, 0.0090, 0.0106, 0.0062, 0.0071, 0.0059, 0.0071, 0.0076], + device='cuda:0'), out_proj_covar=tensor([6.3238e-05, 1.0078e-04, 1.1440e-04, 6.9706e-05, 7.1066e-05, 6.6396e-05, + 7.8616e-05, 7.4495e-05], device='cuda:0') +2022-12-02 01:10:28,399 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 01:10:33,549 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=21911.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:10:41,306 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4014, 1.5248, 0.9415, 1.6007, 1.4850, 1.4839, 1.3696, 1.5782], + device='cuda:0'), covar=tensor([0.0276, 0.0353, 0.0383, 0.0219, 0.0280, 0.0303, 0.0313, 0.0372], + device='cuda:0'), in_proj_covar=tensor([0.0044, 0.0045, 0.0048, 0.0035, 0.0040, 0.0040, 0.0041, 0.0038], + device='cuda:0'), out_proj_covar=tensor([4.1045e-05, 4.2604e-05, 4.6881e-05, 3.2809e-05, 3.6738e-05, 3.8316e-05, + 3.8315e-05, 3.7600e-05], device='cuda:0') +2022-12-02 01:10:50,420 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-02 01:10:54,695 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 01:10:55,714 INFO [train.py:876] Epoch 16, batch 450, loss[loss=0.1366, simple_loss=0.1943, pruned_loss=0.03947, over 4895.00 frames. ], tot_loss[loss=0.1533, simple_loss=0.2124, pruned_loss=0.04707, over 852991.21 frames. ], batch size: 30, lr: 1.12e-02, +2022-12-02 01:11:12,512 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=21951.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:11:29,029 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=21968.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:11:29,985 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=21969.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:11:31,130 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=21970.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:11:33,137 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2149, 2.5861, 3.4057, 3.2159, 3.1427, 3.2098, 3.0830, 3.7131], + device='cuda:0'), covar=tensor([0.0157, 0.1000, 0.0424, 0.1003, 0.0188, 0.0376, 0.0911, 0.0275], + device='cuda:0'), in_proj_covar=tensor([0.0063, 0.0098, 0.0080, 0.0108, 0.0077, 0.0077, 0.0101, 0.0083], + device='cuda:0'), out_proj_covar=tensor([7.4682e-05, 1.1428e-04, 9.3969e-05, 1.2605e-04, 8.8181e-05, 9.2894e-05, + 1.1655e-04, 9.2338e-05], device='cuda:0') +2022-12-02 01:11:44,663 INFO [train.py:876] Epoch 16, batch 500, loss[loss=0.166, simple_loss=0.2245, pruned_loss=0.05378, over 4807.00 frames. ], tot_loss[loss=0.1521, simple_loss=0.2115, pruned_loss=0.04637, over 878183.25 frames. ], batch size: 42, lr: 1.12e-02, +2022-12-02 01:11:44,740 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=21984.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:12:00,600 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-22000.pt +2022-12-02 01:12:03,812 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.234e+02 1.865e+02 2.190e+02 2.792e+02 6.849e+02, threshold=4.379e+02, percent-clipped=7.0 +2022-12-02 01:12:18,657 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=22016.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:12:33,286 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=22031.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:12:34,074 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=22032.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:12:35,930 INFO [train.py:876] Epoch 16, batch 550, loss[loss=0.1447, simple_loss=0.2058, pruned_loss=0.04183, over 4863.00 frames. ], tot_loss[loss=0.1513, simple_loss=0.2107, pruned_loss=0.04601, over 895942.00 frames. ], batch size: 36, lr: 1.12e-02, +2022-12-02 01:13:25,612 INFO [train.py:876] Epoch 16, batch 600, loss[loss=0.2185, simple_loss=0.2715, pruned_loss=0.08271, over 4677.00 frames. ], tot_loss[loss=0.1512, simple_loss=0.2105, pruned_loss=0.04595, over 908538.39 frames. ], batch size: 63, lr: 1.12e-02, +2022-12-02 01:13:42,343 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.139e+02 1.816e+02 2.140e+02 2.677e+02 5.575e+02, threshold=4.280e+02, percent-clipped=8.0 +2022-12-02 01:13:53,618 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9327, 1.6866, 2.1488, 1.5239, 1.6797, 2.1977, 1.8769, 1.8228], + device='cuda:0'), covar=tensor([0.0667, 0.0694, 0.0599, 0.0820, 0.0715, 0.0633, 0.0732, 0.0528], + device='cuda:0'), in_proj_covar=tensor([0.0061, 0.0061, 0.0058, 0.0064, 0.0054, 0.0050, 0.0054, 0.0056], + device='cuda:0'), out_proj_covar=tensor([5.4522e-05, 5.4430e-05, 5.2829e-05, 5.6523e-05, 4.9477e-05, 4.5776e-05, + 4.8821e-05, 5.0301e-05], device='cuda:0') +2022-12-02 01:14:00,265 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=22119.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:14:10,781 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=22130.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:14:14,630 INFO [train.py:876] Epoch 16, batch 650, loss[loss=0.1905, simple_loss=0.241, pruned_loss=0.07004, over 4812.00 frames. ], tot_loss[loss=0.1516, simple_loss=0.2106, pruned_loss=0.04631, over 917846.94 frames. ], batch size: 42, lr: 1.12e-02, +2022-12-02 01:14:16,121 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.56 vs. limit=5.0 +2022-12-02 01:14:55,634 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=22176.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:15:03,630 INFO [train.py:876] Epoch 16, batch 700, loss[loss=0.1131, simple_loss=0.1669, pruned_loss=0.02962, over 4673.00 frames. ], tot_loss[loss=0.1514, simple_loss=0.2107, pruned_loss=0.04602, over 925315.59 frames. ], batch size: 23, lr: 1.12e-02, +2022-12-02 01:15:10,574 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=22191.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:15:20,220 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.091e+02 1.912e+02 2.273e+02 2.774e+02 6.239e+02, threshold=4.547e+02, percent-clipped=2.0 +2022-12-02 01:15:25,039 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=22206.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:15:30,774 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4534, 3.4850, 3.4104, 3.0679, 2.9436, 2.2365, 3.7468, 1.5414], + device='cuda:0'), covar=tensor([0.0406, 0.0298, 0.0312, 0.0818, 0.1063, 0.2702, 0.0193, 0.3612], + device='cuda:0'), in_proj_covar=tensor([0.0107, 0.0087, 0.0082, 0.0126, 0.0129, 0.0153, 0.0077, 0.0167], + device='cuda:0'), out_proj_covar=tensor([1.1938e-04, 1.0660e-04, 1.0178e-04, 1.3956e-04, 1.4401e-04, 1.7282e-04, + 8.8616e-05, 1.8079e-04], device='cuda:0') +2022-12-02 01:15:33,664 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=22215.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:15:44,628 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9308, 1.9482, 2.2054, 1.9797, 2.1571, 2.2440, 2.1419, 1.7280], + device='cuda:0'), covar=tensor([0.1074, 0.0559, 0.1016, 0.0643, 0.1196, 0.1187, 0.0851, 0.0910], + device='cuda:0'), in_proj_covar=tensor([0.0061, 0.0062, 0.0059, 0.0064, 0.0054, 0.0050, 0.0054, 0.0056], + device='cuda:0'), out_proj_covar=tensor([5.4661e-05, 5.4913e-05, 5.3378e-05, 5.6554e-05, 4.9555e-05, 4.6017e-05, + 4.8896e-05, 5.0321e-05], device='cuda:0') +2022-12-02 01:15:52,159 INFO [train.py:876] Epoch 16, batch 750, loss[loss=0.1637, simple_loss=0.2317, pruned_loss=0.04787, over 4875.00 frames. ], tot_loss[loss=0.1515, simple_loss=0.211, pruned_loss=0.046, over 934183.39 frames. ], batch size: 38, lr: 1.12e-02, +2022-12-02 01:15:55,200 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=22237.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:15:55,210 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=22237.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:16:08,529 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=22251.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:16:26,323 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=22269.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:16:30,104 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4783, 3.5402, 3.4923, 3.7853, 3.0331, 2.3971, 3.9107, 1.8451], + device='cuda:0'), covar=tensor([0.0502, 0.0324, 0.0450, 0.0483, 0.1323, 0.3199, 0.0217, 0.3582], + device='cuda:0'), in_proj_covar=tensor([0.0109, 0.0089, 0.0085, 0.0129, 0.0133, 0.0158, 0.0078, 0.0171], + device='cuda:0'), out_proj_covar=tensor([1.2206e-04, 1.0917e-04, 1.0485e-04, 1.4234e-04, 1.4797e-04, 1.7731e-04, + 9.0108e-05, 1.8498e-04], device='cuda:0') +2022-12-02 01:16:33,128 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=22276.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:16:40,255 INFO [train.py:876] Epoch 16, batch 800, loss[loss=0.1905, simple_loss=0.2413, pruned_loss=0.06991, over 4806.00 frames. ], tot_loss[loss=0.1535, simple_loss=0.2124, pruned_loss=0.0473, over 937726.92 frames. ], batch size: 42, lr: 1.12e-02, +2022-12-02 01:16:51,740 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1146, 3.2196, 3.2539, 3.2511, 2.7626, 2.2712, 3.5176, 1.7168], + device='cuda:0'), covar=tensor([0.0562, 0.0324, 0.0593, 0.0809, 0.1443, 0.2982, 0.0255, 0.3726], + device='cuda:0'), in_proj_covar=tensor([0.0109, 0.0089, 0.0085, 0.0128, 0.0132, 0.0157, 0.0078, 0.0170], + device='cuda:0'), out_proj_covar=tensor([1.2195e-04, 1.0904e-04, 1.0501e-04, 1.4160e-04, 1.4749e-04, 1.7630e-04, + 8.9764e-05, 1.8416e-04], device='cuda:0') +2022-12-02 01:16:53,602 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=22298.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:16:54,444 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=22299.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:16:56,437 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.297e+02 1.825e+02 2.323e+02 3.173e+02 5.525e+02, threshold=4.647e+02, percent-clipped=4.0 +2022-12-02 01:17:01,290 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.83 vs. limit=2.0 +2022-12-02 01:17:02,559 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9965, 4.4778, 4.3832, 4.7720, 4.1564, 3.6986, 4.5966, 4.3527], + device='cuda:0'), covar=tensor([0.0224, 0.0081, 0.0098, 0.0109, 0.0115, 0.0200, 0.0081, 0.0111], + device='cuda:0'), in_proj_covar=tensor([0.0048, 0.0043, 0.0043, 0.0034, 0.0045, 0.0046, 0.0042, 0.0042], + device='cuda:0'), out_proj_covar=tensor([4.7295e-05, 3.7830e-05, 3.9676e-05, 3.0765e-05, 4.1677e-05, 4.4115e-05, + 3.5817e-05, 3.8033e-05], device='cuda:0') +2022-12-02 01:17:08,455 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6407, 2.4093, 2.3145, 2.6121, 2.0159, 2.0833, 1.4956, 2.5715], + device='cuda:0'), covar=tensor([0.0624, 0.1118, 0.0876, 0.0578, 0.1142, 0.1264, 0.1356, 0.0712], + device='cuda:0'), in_proj_covar=tensor([0.0069, 0.0071, 0.0088, 0.0074, 0.0096, 0.0077, 0.0089, 0.0077], + device='cuda:0'), out_proj_covar=tensor([7.0298e-05, 7.2443e-05, 8.7263e-05, 7.4118e-05, 9.4015e-05, 7.8802e-05, + 8.8895e-05, 7.7454e-05], device='cuda:0') +2022-12-02 01:17:12,060 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=22317.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:17:20,692 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=22326.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:17:28,462 INFO [train.py:876] Epoch 16, batch 850, loss[loss=0.1578, simple_loss=0.2247, pruned_loss=0.04546, over 4810.00 frames. ], tot_loss[loss=0.1543, simple_loss=0.2132, pruned_loss=0.04769, over 940528.70 frames. ], batch size: 45, lr: 1.11e-02, +2022-12-02 01:18:17,382 INFO [train.py:876] Epoch 16, batch 900, loss[loss=0.1835, simple_loss=0.2447, pruned_loss=0.06111, over 4833.00 frames. ], tot_loss[loss=0.1526, simple_loss=0.2112, pruned_loss=0.04702, over 939082.12 frames. ], batch size: 47, lr: 1.11e-02, +2022-12-02 01:18:18,530 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=22385.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:18:31,907 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.9658, 2.8084, 3.0882, 2.9531, 2.2664, 1.8715, 3.4107, 1.7040], + device='cuda:0'), covar=tensor([0.0579, 0.0333, 0.0374, 0.0740, 0.1642, 0.3480, 0.0204, 0.2732], + device='cuda:0'), in_proj_covar=tensor([0.0109, 0.0091, 0.0086, 0.0128, 0.0133, 0.0158, 0.0078, 0.0170], + device='cuda:0'), out_proj_covar=tensor([1.2195e-04, 1.1051e-04, 1.0583e-04, 1.4157e-04, 1.4813e-04, 1.7706e-04, + 8.9767e-05, 1.8349e-04], device='cuda:0') +2022-12-02 01:18:32,628 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.93 vs. limit=2.0 +2022-12-02 01:18:33,858 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.158e+02 1.904e+02 2.279e+02 2.944e+02 8.484e+02, threshold=4.558e+02, percent-clipped=3.0 +2022-12-02 01:18:35,011 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4808, 3.3827, 3.3329, 3.5236, 2.7031, 2.2603, 3.7586, 1.5067], + device='cuda:0'), covar=tensor([0.0467, 0.0361, 0.0399, 0.0526, 0.1492, 0.2921, 0.0209, 0.4008], + device='cuda:0'), in_proj_covar=tensor([0.0109, 0.0091, 0.0086, 0.0128, 0.0133, 0.0157, 0.0078, 0.0170], + device='cuda:0'), out_proj_covar=tensor([1.2192e-04, 1.1048e-04, 1.0576e-04, 1.4159e-04, 1.4807e-04, 1.7692e-04, + 8.9713e-05, 1.8343e-04], device='cuda:0') +2022-12-02 01:18:36,132 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6252, 2.7721, 3.2107, 3.4939, 2.9562, 3.6012, 3.2247, 3.0658], + device='cuda:0'), covar=tensor([0.0160, 0.0268, 0.0241, 0.0161, 0.0189, 0.0159, 0.0200, 0.0281], + device='cuda:0'), in_proj_covar=tensor([0.0100, 0.0111, 0.0112, 0.0103, 0.0094, 0.0103, 0.0109, 0.0122], + device='cuda:0'), out_proj_covar=tensor([6.7918e-05, 7.6099e-05, 7.6054e-05, 6.9491e-05, 6.2898e-05, 6.9258e-05, + 7.3992e-05, 8.4920e-05], device='cuda:0') +2022-12-02 01:18:51,538 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=22419.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:19:06,111 INFO [train.py:876] Epoch 16, batch 950, loss[loss=0.1424, simple_loss=0.209, pruned_loss=0.03794, over 4831.00 frames. ], tot_loss[loss=0.1525, simple_loss=0.2109, pruned_loss=0.04702, over 941068.71 frames. ], batch size: 34, lr: 1.11e-02, +2022-12-02 01:19:13,051 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1720, 3.0839, 3.2452, 2.9931, 2.3208, 2.0869, 3.3568, 1.6500], + device='cuda:0'), covar=tensor([0.0613, 0.0360, 0.0467, 0.0941, 0.1787, 0.3489, 0.0288, 0.3409], + device='cuda:0'), in_proj_covar=tensor([0.0108, 0.0090, 0.0085, 0.0128, 0.0133, 0.0157, 0.0078, 0.0170], + device='cuda:0'), out_proj_covar=tensor([1.2160e-04, 1.1013e-04, 1.0483e-04, 1.4163e-04, 1.4821e-04, 1.7654e-04, + 9.0230e-05, 1.8341e-04], device='cuda:0') +2022-12-02 01:19:17,930 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=22446.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:19:31,472 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9758, 2.3419, 2.4047, 3.7445, 3.4563, 3.2004, 2.7349, 3.9950], + device='cuda:0'), covar=tensor([0.0210, 0.1454, 0.1789, 0.0354, 0.0277, 0.0553, 0.0662, 0.0280], + device='cuda:0'), in_proj_covar=tensor([0.0062, 0.0094, 0.0112, 0.0066, 0.0074, 0.0061, 0.0075, 0.0079], + device='cuda:0'), out_proj_covar=tensor([6.6427e-05, 1.0491e-04, 1.2037e-04, 7.3855e-05, 7.4523e-05, 6.9070e-05, + 8.2746e-05, 7.8315e-05], device='cuda:0') +2022-12-02 01:19:38,009 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=22467.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:19:54,675 INFO [train.py:876] Epoch 16, batch 1000, loss[loss=0.145, simple_loss=0.2017, pruned_loss=0.0441, over 4859.00 frames. ], tot_loss[loss=0.1535, simple_loss=0.212, pruned_loss=0.0475, over 946296.63 frames. ], batch size: 35, lr: 1.11e-02, +2022-12-02 01:19:56,656 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=22486.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:20:11,267 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.234e+02 1.769e+02 2.165e+02 2.563e+02 4.206e+02, threshold=4.331e+02, percent-clipped=0.0 +2022-12-02 01:20:16,153 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=22506.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:20:41,296 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=22532.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:20:43,065 INFO [train.py:876] Epoch 16, batch 1050, loss[loss=0.13, simple_loss=0.1666, pruned_loss=0.04676, over 4677.00 frames. ], tot_loss[loss=0.1529, simple_loss=0.2116, pruned_loss=0.04707, over 948763.09 frames. ], batch size: 21, lr: 1.11e-02, +2022-12-02 01:20:45,311 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8895, 3.3666, 3.5458, 3.1113, 3.3480, 3.5199, 3.4684, 3.4491], + device='cuda:0'), covar=tensor([0.1681, 0.0545, 0.0432, 0.0536, 0.0461, 0.0357, 0.0384, 0.0501], + device='cuda:0'), in_proj_covar=tensor([0.0176, 0.0148, 0.0156, 0.0143, 0.0152, 0.0146, 0.0144, 0.0143], + device='cuda:0'), out_proj_covar=tensor([1.1752e-04, 9.8594e-05, 1.0362e-04, 9.6702e-05, 9.9898e-05, 9.5183e-05, + 9.5098e-05, 9.4329e-05], device='cuda:0') +2022-12-02 01:20:48,227 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6519, 3.8154, 3.8765, 3.6221, 3.5237, 3.5958, 3.8425, 3.7263], + device='cuda:0'), covar=tensor([0.0787, 0.0222, 0.0247, 0.0309, 0.0326, 0.0325, 0.0210, 0.0400], + device='cuda:0'), in_proj_covar=tensor([0.0233, 0.0151, 0.0159, 0.0160, 0.0156, 0.0157, 0.0146, 0.0162], + device='cuda:0'), out_proj_covar=tensor([1.5268e-04, 9.8001e-05, 1.0321e-04, 1.0383e-04, 1.0222e-04, 1.0321e-04, + 9.6465e-05, 1.0884e-04], device='cuda:0') +2022-12-02 01:21:02,721 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=22554.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:21:20,037 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=22571.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:21:32,368 INFO [train.py:876] Epoch 16, batch 1100, loss[loss=0.1765, simple_loss=0.2371, pruned_loss=0.058, over 4883.00 frames. ], tot_loss[loss=0.1514, simple_loss=0.2106, pruned_loss=0.04609, over 949324.53 frames. ], batch size: 37, lr: 1.11e-02, +2022-12-02 01:21:41,107 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=22593.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:21:49,195 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.049e+02 1.835e+02 2.177e+02 2.634e+02 5.238e+02, threshold=4.355e+02, percent-clipped=6.0 +2022-12-02 01:22:13,616 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=22626.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:22:20,676 INFO [train.py:876] Epoch 16, batch 1150, loss[loss=0.145, simple_loss=0.2052, pruned_loss=0.04246, over 4852.00 frames. ], tot_loss[loss=0.1527, simple_loss=0.2118, pruned_loss=0.04686, over 948443.98 frames. ], batch size: 35, lr: 1.11e-02, +2022-12-02 01:22:25,658 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=22639.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:22:33,112 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.03 vs. limit=5.0 +2022-12-02 01:22:59,614 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=22674.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:23:09,358 INFO [train.py:876] Epoch 16, batch 1200, loss[loss=0.1909, simple_loss=0.2466, pruned_loss=0.06756, over 4852.00 frames. ], tot_loss[loss=0.1518, simple_loss=0.2106, pruned_loss=0.04649, over 948403.71 frames. ], batch size: 39, lr: 1.11e-02, +2022-12-02 01:23:25,260 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=22700.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 01:23:25,808 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.107e+02 1.909e+02 2.346e+02 2.694e+02 1.692e+03, threshold=4.692e+02, percent-clipped=6.0 +2022-12-02 01:23:57,712 INFO [train.py:876] Epoch 16, batch 1250, loss[loss=0.1482, simple_loss=0.2039, pruned_loss=0.04624, over 4888.00 frames. ], tot_loss[loss=0.152, simple_loss=0.2114, pruned_loss=0.04632, over 950534.97 frames. ], batch size: 37, lr: 1.10e-02, +2022-12-02 01:24:04,379 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=22741.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:24:13,239 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3575, 1.9975, 2.1219, 2.2856, 1.7637, 2.2102, 2.1975, 2.2969], + device='cuda:0'), covar=tensor([0.0381, 0.0505, 0.0335, 0.0337, 0.0677, 0.0506, 0.0350, 0.0321], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0066, 0.0052, 0.0054, 0.0068, 0.0061, 0.0052, 0.0053], + device='cuda:0'), out_proj_covar=tensor([4.2963e-05, 4.9654e-05, 3.8284e-05, 3.8662e-05, 5.0507e-05, 4.5264e-05, + 3.8465e-05, 3.8392e-05], device='cuda:0') +2022-12-02 01:24:45,740 INFO [train.py:876] Epoch 16, batch 1300, loss[loss=0.151, simple_loss=0.2198, pruned_loss=0.04107, over 4812.00 frames. ], tot_loss[loss=0.1544, simple_loss=0.2136, pruned_loss=0.04762, over 949436.56 frames. ], batch size: 42, lr: 1.10e-02, +2022-12-02 01:24:47,914 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=22786.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:24:55,770 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=22794.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:25:02,583 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.176e+02 2.017e+02 2.452e+02 3.159e+02 5.625e+02, threshold=4.905e+02, percent-clipped=3.0 +2022-12-02 01:25:26,062 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=22825.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:25:32,625 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=22832.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:25:34,387 INFO [train.py:876] Epoch 16, batch 1350, loss[loss=0.1025, simple_loss=0.1638, pruned_loss=0.0206, over 4759.00 frames. ], tot_loss[loss=0.1539, simple_loss=0.2132, pruned_loss=0.04731, over 950092.36 frames. ], batch size: 26, lr: 1.10e-02, +2022-12-02 01:25:34,429 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=22834.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:25:55,276 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9812, 1.9620, 1.9654, 1.5619, 1.8014, 1.9021, 2.0493, 2.0043], + device='cuda:0'), covar=tensor([0.0586, 0.0541, 0.0990, 0.0788, 0.0538, 0.0659, 0.0731, 0.0380], + device='cuda:0'), in_proj_covar=tensor([0.0059, 0.0059, 0.0056, 0.0061, 0.0053, 0.0047, 0.0052, 0.0053], + device='cuda:0'), out_proj_covar=tensor([5.2413e-05, 5.2603e-05, 5.1020e-05, 5.4328e-05, 4.8270e-05, 4.3335e-05, + 4.7515e-05, 4.7744e-05], device='cuda:0') +2022-12-02 01:25:55,298 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=22855.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:26:10,700 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=22871.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:26:19,078 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=22880.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:26:22,870 INFO [train.py:876] Epoch 16, batch 1400, loss[loss=0.2154, simple_loss=0.2839, pruned_loss=0.07346, over 4822.00 frames. ], tot_loss[loss=0.1548, simple_loss=0.2145, pruned_loss=0.04761, over 949747.56 frames. ], batch size: 54, lr: 1.10e-02, +2022-12-02 01:26:25,160 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=22886.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:26:31,755 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=22893.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:26:39,323 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.759e+01 1.957e+02 2.499e+02 2.960e+02 5.459e+02, threshold=4.998e+02, percent-clipped=3.0 +2022-12-02 01:26:39,575 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.7440, 3.1460, 3.4922, 3.3273, 2.6213, 3.3011, 3.6022, 2.8602], + device='cuda:0'), covar=tensor([0.4453, 0.0682, 0.0594, 0.0422, 0.0720, 0.0890, 0.0371, 0.1010], + device='cuda:0'), in_proj_covar=tensor([0.0185, 0.0109, 0.0140, 0.0113, 0.0119, 0.0111, 0.0104, 0.0119], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 01:26:45,491 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5800, 2.8331, 3.1485, 3.6410, 3.1410, 3.5493, 3.2908, 3.2293], + device='cuda:0'), covar=tensor([0.0238, 0.0323, 0.0256, 0.0148, 0.0180, 0.0178, 0.0203, 0.0242], + device='cuda:0'), in_proj_covar=tensor([0.0105, 0.0113, 0.0115, 0.0106, 0.0096, 0.0107, 0.0112, 0.0127], + device='cuda:0'), out_proj_covar=tensor([7.1644e-05, 7.7764e-05, 7.7930e-05, 7.1118e-05, 6.3841e-05, 7.1786e-05, + 7.6042e-05, 8.8362e-05], device='cuda:0') +2022-12-02 01:26:52,913 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-16.pt +2022-12-02 01:26:55,457 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 01:26:56,359 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 01:26:56,654 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 01:26:56,686 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 01:26:57,809 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 01:26:58,131 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 01:26:59,696 INFO [train.py:876] Epoch 17, batch 0, loss[loss=0.1847, simple_loss=0.229, pruned_loss=0.0702, over 4849.00 frames. ], tot_loss[loss=0.1847, simple_loss=0.229, pruned_loss=0.0702, over 4849.00 frames. ], batch size: 40, lr: 1.07e-02, +2022-12-02 01:26:59,698 INFO [train.py:901] Computing validation loss +2022-12-02 01:27:01,061 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6211, 2.9899, 3.3047, 3.7956, 3.3144, 3.6602, 3.3767, 3.3017], + device='cuda:0'), covar=tensor([0.0245, 0.0304, 0.0312, 0.0150, 0.0192, 0.0243, 0.0271, 0.0285], + device='cuda:0'), in_proj_covar=tensor([0.0106, 0.0114, 0.0115, 0.0106, 0.0096, 0.0107, 0.0112, 0.0128], + device='cuda:0'), out_proj_covar=tensor([7.1984e-05, 7.8060e-05, 7.8249e-05, 7.1162e-05, 6.4096e-05, 7.1973e-05, + 7.6126e-05, 8.8664e-05], device='cuda:0') +2022-12-02 01:27:01,174 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.4538, 5.1348, 4.3931, 5.3631, 4.4318, 4.1502, 4.9683, 4.6565], + device='cuda:0'), covar=tensor([0.0288, 0.0074, 0.0158, 0.0075, 0.0141, 0.0139, 0.0107, 0.0122], + device='cuda:0'), in_proj_covar=tensor([0.0049, 0.0044, 0.0045, 0.0036, 0.0046, 0.0047, 0.0043, 0.0043], + device='cuda:0'), out_proj_covar=tensor([4.8508e-05, 3.9179e-05, 4.1323e-05, 3.1939e-05, 4.3032e-05, 4.4991e-05, + 3.6632e-05, 3.8866e-05], device='cuda:0') +2022-12-02 01:27:02,392 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8113, 1.6833, 0.9915, 1.9306, 1.6649, 1.3954, 1.7885, 1.6383], + device='cuda:0'), covar=tensor([0.0247, 0.0412, 0.0455, 0.0216, 0.0310, 0.0471, 0.0248, 0.0637], + device='cuda:0'), in_proj_covar=tensor([0.0046, 0.0046, 0.0050, 0.0037, 0.0041, 0.0043, 0.0044, 0.0040], + device='cuda:0'), out_proj_covar=tensor([4.3113e-05, 4.3306e-05, 4.8703e-05, 3.4854e-05, 3.7773e-05, 4.1156e-05, + 4.1061e-05, 3.9135e-05], device='cuda:0') +2022-12-02 01:27:15,395 INFO [train.py:910] Epoch 17, validation: loss=0.2294, simple_loss=0.2751, pruned_loss=0.09184, over 253132.00 frames. +2022-12-02 01:27:15,395 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-02 01:27:18,410 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=22919.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:27:30,861 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.35 vs. limit=5.0 +2022-12-02 01:27:39,442 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=22941.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:28:03,678 INFO [train.py:876] Epoch 17, batch 50, loss[loss=0.1443, simple_loss=0.2137, pruned_loss=0.03744, over 4875.00 frames. ], tot_loss[loss=0.1528, simple_loss=0.2121, pruned_loss=0.04674, over 214993.00 frames. ], batch size: 38, lr: 1.07e-02, +2022-12-02 01:28:17,476 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.82 vs. limit=2.0 +2022-12-02 01:28:20,863 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 01:28:31,109 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=22995.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 01:28:37,190 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.101e+02 1.783e+02 2.349e+02 2.751e+02 7.583e+02, threshold=4.699e+02, percent-clipped=3.0 +2022-12-02 01:28:51,754 INFO [train.py:876] Epoch 17, batch 100, loss[loss=0.1465, simple_loss=0.2042, pruned_loss=0.04439, over 4854.00 frames. ], tot_loss[loss=0.1494, simple_loss=0.209, pruned_loss=0.04488, over 378662.70 frames. ], batch size: 36, lr: 1.06e-02, +2022-12-02 01:29:03,311 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 01:29:16,239 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=23041.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:29:26,412 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 01:29:40,054 INFO [train.py:876] Epoch 17, batch 150, loss[loss=0.121, simple_loss=0.1656, pruned_loss=0.03824, over 4650.00 frames. ], tot_loss[loss=0.1462, simple_loss=0.2056, pruned_loss=0.04341, over 504039.44 frames. ], batch size: 21, lr: 1.06e-02, +2022-12-02 01:30:02,383 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=23089.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:30:14,045 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.153e+02 1.706e+02 2.087e+02 2.794e+02 7.167e+02, threshold=4.175e+02, percent-clipped=2.0 +2022-12-02 01:30:21,235 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=23108.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:30:28,830 INFO [train.py:876] Epoch 17, batch 200, loss[loss=0.1302, simple_loss=0.1825, pruned_loss=0.03897, over 4823.00 frames. ], tot_loss[loss=0.1495, simple_loss=0.2093, pruned_loss=0.04481, over 603872.23 frames. ], batch size: 25, lr: 1.06e-02, +2022-12-02 01:31:01,932 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=23150.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:31:12,764 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 01:31:17,572 INFO [train.py:876] Epoch 17, batch 250, loss[loss=0.1246, simple_loss=0.1864, pruned_loss=0.03144, over 4895.00 frames. ], tot_loss[loss=0.1475, simple_loss=0.2074, pruned_loss=0.04381, over 683137.13 frames. ], batch size: 31, lr: 1.06e-02, +2022-12-02 01:31:20,664 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=23169.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:31:32,505 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=23181.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:31:52,208 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.030e+02 1.643e+02 2.050e+02 2.547e+02 4.809e+02, threshold=4.100e+02, percent-clipped=1.0 +2022-12-02 01:32:06,857 INFO [train.py:876] Epoch 17, batch 300, loss[loss=0.1481, simple_loss=0.1999, pruned_loss=0.04818, over 4916.00 frames. ], tot_loss[loss=0.149, simple_loss=0.2094, pruned_loss=0.04428, over 743487.94 frames. ], batch size: 30, lr: 1.06e-02, +2022-12-02 01:32:07,675 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 01:32:07,829 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=23217.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:32:14,310 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.53 vs. limit=2.0 +2022-12-02 01:32:19,709 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=23229.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:32:31,325 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6117, 2.3137, 2.4390, 2.5144, 2.1317, 2.5435, 2.3382, 2.6083], + device='cuda:0'), covar=tensor([0.0330, 0.0469, 0.0301, 0.0275, 0.0527, 0.0431, 0.0324, 0.0256], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0064, 0.0052, 0.0054, 0.0068, 0.0060, 0.0052, 0.0052], + device='cuda:0'), out_proj_covar=tensor([4.2433e-05, 4.8149e-05, 3.7797e-05, 3.8238e-05, 5.0208e-05, 4.4500e-05, + 3.8280e-05, 3.7592e-05], device='cuda:0') +2022-12-02 01:32:55,725 INFO [train.py:876] Epoch 17, batch 350, loss[loss=0.1249, simple_loss=0.1724, pruned_loss=0.03874, over 3406.00 frames. ], tot_loss[loss=0.148, simple_loss=0.2077, pruned_loss=0.04412, over 787567.50 frames. ], batch size: 13, lr: 1.06e-02, +2022-12-02 01:33:07,523 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=23278.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:33:18,934 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=23290.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:33:23,750 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=23295.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:33:29,231 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.149e+02 1.809e+02 2.174e+02 2.644e+02 7.546e+02, threshold=4.348e+02, percent-clipped=2.0 +2022-12-02 01:33:37,784 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6490, 2.2458, 2.3651, 2.3526, 2.0381, 2.5092, 2.2346, 2.4972], + device='cuda:0'), covar=tensor([0.0332, 0.0471, 0.0302, 0.0287, 0.0581, 0.0401, 0.0317, 0.0257], + device='cuda:0'), in_proj_covar=tensor([0.0060, 0.0067, 0.0054, 0.0055, 0.0070, 0.0062, 0.0053, 0.0054], + device='cuda:0'), out_proj_covar=tensor([4.4015e-05, 5.0002e-05, 3.9513e-05, 3.9711e-05, 5.1911e-05, 4.5813e-05, + 3.9480e-05, 3.9026e-05], device='cuda:0') +2022-12-02 01:33:39,226 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.92 vs. limit=2.0 +2022-12-02 01:33:43,233 INFO [train.py:876] Epoch 17, batch 400, loss[loss=0.192, simple_loss=0.2557, pruned_loss=0.0641, over 4060.00 frames. ], tot_loss[loss=0.1489, simple_loss=0.2088, pruned_loss=0.04448, over 825580.72 frames. ], batch size: 72, lr: 1.06e-02, +2022-12-02 01:33:43,241 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 01:34:07,245 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 01:34:09,176 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=23343.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:34:31,649 INFO [train.py:876] Epoch 17, batch 450, loss[loss=0.1127, simple_loss=0.1859, pruned_loss=0.01974, over 4925.00 frames. ], tot_loss[loss=0.1481, simple_loss=0.2084, pruned_loss=0.04391, over 853640.10 frames. ], batch size: 31, lr: 1.06e-02, +2022-12-02 01:34:40,180 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-02 01:34:55,205 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.5788, 4.4974, 5.0620, 4.4037, 4.8844, 4.6824, 4.4525, 4.4122], + device='cuda:0'), covar=tensor([0.0758, 0.0434, 0.0477, 0.0462, 0.0460, 0.0485, 0.0936, 0.0436], + device='cuda:0'), in_proj_covar=tensor([0.0149, 0.0112, 0.0137, 0.0124, 0.0104, 0.0136, 0.0156, 0.0111], + device='cuda:0'), out_proj_covar=tensor([1.1471e-04, 7.8449e-05, 1.1113e-04, 9.1642e-05, 8.2290e-05, 1.0512e-04, + 1.2285e-04, 8.1624e-05], device='cuda:0') +2022-12-02 01:35:05,529 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.025e+02 1.847e+02 2.202e+02 2.807e+02 5.338e+02, threshold=4.404e+02, percent-clipped=3.0 +2022-12-02 01:35:20,345 INFO [train.py:876] Epoch 17, batch 500, loss[loss=0.138, simple_loss=0.1912, pruned_loss=0.04239, over 4905.00 frames. ], tot_loss[loss=0.1474, simple_loss=0.2075, pruned_loss=0.04367, over 875540.90 frames. ], batch size: 29, lr: 1.06e-02, +2022-12-02 01:35:37,236 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.7991, 3.1013, 3.3504, 3.3188, 2.5419, 3.4774, 3.4554, 2.8411], + device='cuda:0'), covar=tensor([0.3905, 0.0671, 0.0700, 0.0399, 0.0682, 0.0512, 0.0307, 0.1018], + device='cuda:0'), in_proj_covar=tensor([0.0185, 0.0110, 0.0142, 0.0114, 0.0119, 0.0111, 0.0106, 0.0119], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 01:35:53,893 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=23450.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:36:07,617 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=23464.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:36:09,605 INFO [train.py:876] Epoch 17, batch 550, loss[loss=0.1058, simple_loss=0.1621, pruned_loss=0.02476, over 4838.00 frames. ], tot_loss[loss=0.1468, simple_loss=0.2068, pruned_loss=0.04336, over 892228.63 frames. ], batch size: 25, lr: 1.06e-02, +2022-12-02 01:36:14,110 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6877, 3.9356, 3.9536, 3.8196, 3.7558, 3.8926, 3.9445, 4.0857], + device='cuda:0'), covar=tensor([0.0988, 0.0228, 0.0292, 0.0267, 0.0262, 0.0322, 0.0214, 0.0268], + device='cuda:0'), in_proj_covar=tensor([0.0229, 0.0149, 0.0159, 0.0160, 0.0155, 0.0158, 0.0144, 0.0163], + device='cuda:0'), out_proj_covar=tensor([1.4984e-04, 9.6521e-05, 1.0389e-04, 1.0363e-04, 1.0090e-04, 1.0393e-04, + 9.5208e-05, 1.0917e-04], device='cuda:0') +2022-12-02 01:36:22,359 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4991, 1.9144, 1.4536, 0.9931, 1.4629, 1.3353, 1.3810, 1.5676], + device='cuda:0'), covar=tensor([0.0214, 0.0131, 0.0317, 0.0471, 0.0231, 0.0211, 0.0217, 0.0196], + device='cuda:0'), in_proj_covar=tensor([0.0034, 0.0032, 0.0033, 0.0036, 0.0040, 0.0034, 0.0039, 0.0034], + device='cuda:0'), out_proj_covar=tensor([2.7928e-05, 2.4741e-05, 2.7093e-05, 2.8480e-05, 3.2845e-05, 2.7516e-05, + 3.1932e-05, 2.7369e-05], device='cuda:0') +2022-12-02 01:36:24,140 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=23481.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:36:31,074 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=23488.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:36:33,118 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8733, 3.1903, 3.5764, 3.5164, 2.4643, 3.7021, 3.6186, 2.9985], + device='cuda:0'), covar=tensor([0.4000, 0.0701, 0.0674, 0.0375, 0.0881, 0.0512, 0.0355, 0.0859], + device='cuda:0'), in_proj_covar=tensor([0.0187, 0.0112, 0.0143, 0.0115, 0.0121, 0.0112, 0.0107, 0.0120], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 01:36:36,481 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.81 vs. limit=2.0 +2022-12-02 01:36:40,796 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=23498.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:36:43,614 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.202e+02 1.681e+02 2.092e+02 2.588e+02 4.285e+02, threshold=4.184e+02, percent-clipped=0.0 +2022-12-02 01:36:58,087 INFO [train.py:876] Epoch 17, batch 600, loss[loss=0.1726, simple_loss=0.2375, pruned_loss=0.05386, over 4798.00 frames. ], tot_loss[loss=0.1479, simple_loss=0.2078, pruned_loss=0.04403, over 904487.64 frames. ], batch size: 54, lr: 1.05e-02, +2022-12-02 01:37:10,819 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=23529.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:37:30,249 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=23549.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 01:37:38,277 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0257, 2.0454, 2.9877, 2.3464, 2.6553, 2.9983, 2.3996, 2.9447], + device='cuda:0'), covar=tensor([0.0248, 0.1472, 0.0625, 0.1551, 0.0361, 0.0263, 0.1340, 0.0553], + device='cuda:0'), in_proj_covar=tensor([0.0063, 0.0100, 0.0082, 0.0110, 0.0078, 0.0078, 0.0106, 0.0087], + device='cuda:0'), out_proj_covar=tensor([7.6060e-05, 1.1589e-04, 9.7067e-05, 1.2834e-04, 9.0538e-05, 9.4675e-05, + 1.2122e-04, 9.7120e-05], device='cuda:0') +2022-12-02 01:37:46,504 INFO [train.py:876] Epoch 17, batch 650, loss[loss=0.1453, simple_loss=0.2048, pruned_loss=0.04291, over 4821.00 frames. ], tot_loss[loss=0.1497, simple_loss=0.2092, pruned_loss=0.04507, over 914624.32 frames. ], batch size: 34, lr: 1.05e-02, +2022-12-02 01:37:53,291 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=23573.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:38:04,817 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=23585.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:38:14,726 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-02 01:38:20,216 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.174e+02 1.891e+02 2.225e+02 2.960e+02 7.298e+02, threshold=4.451e+02, percent-clipped=5.0 +2022-12-02 01:38:34,165 INFO [train.py:876] Epoch 17, batch 700, loss[loss=0.1368, simple_loss=0.1946, pruned_loss=0.0395, over 4916.00 frames. ], tot_loss[loss=0.1505, simple_loss=0.2101, pruned_loss=0.04547, over 922024.01 frames. ], batch size: 31, lr: 1.05e-02, +2022-12-02 01:38:37,218 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.5430, 1.6393, 0.7663, 1.6170, 1.6066, 1.3087, 1.2998, 1.5390], + device='cuda:0'), covar=tensor([0.0197, 0.0276, 0.0357, 0.0240, 0.0224, 0.0362, 0.0240, 0.0348], + device='cuda:0'), in_proj_covar=tensor([0.0046, 0.0045, 0.0051, 0.0036, 0.0041, 0.0043, 0.0043, 0.0039], + device='cuda:0'), out_proj_covar=tensor([4.3127e-05, 4.2440e-05, 4.9008e-05, 3.4155e-05, 3.7948e-05, 4.1164e-05, + 4.0246e-05, 3.8719e-05], device='cuda:0') +2022-12-02 01:39:10,506 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.3809, 1.3556, 1.2896, 0.9391, 0.9364, 1.0792, 1.3070, 1.4364], + device='cuda:0'), covar=tensor([0.0333, 0.0260, 0.0258, 0.0316, 0.0530, 0.0252, 0.0226, 0.0159], + device='cuda:0'), in_proj_covar=tensor([0.0033, 0.0032, 0.0032, 0.0035, 0.0039, 0.0034, 0.0038, 0.0033], + device='cuda:0'), out_proj_covar=tensor([2.6837e-05, 2.4887e-05, 2.6472e-05, 2.7892e-05, 3.2466e-05, 2.7264e-05, + 3.1349e-05, 2.6392e-05], device='cuda:0') +2022-12-02 01:39:21,819 INFO [train.py:876] Epoch 17, batch 750, loss[loss=0.1564, simple_loss=0.2164, pruned_loss=0.04822, over 4822.00 frames. ], tot_loss[loss=0.1517, simple_loss=0.2113, pruned_loss=0.04609, over 929080.13 frames. ], batch size: 45, lr: 1.05e-02, +2022-12-02 01:39:47,034 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7647, 4.3011, 4.2100, 4.6681, 4.0153, 3.6947, 4.4626, 4.1387], + device='cuda:0'), covar=tensor([0.0252, 0.0105, 0.0132, 0.0101, 0.0136, 0.0241, 0.0096, 0.0127], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0048, 0.0048, 0.0038, 0.0049, 0.0051, 0.0046, 0.0046], + device='cuda:0'), out_proj_covar=tensor([5.1653e-05, 4.3156e-05, 4.3649e-05, 3.4263e-05, 4.5513e-05, 4.8089e-05, + 3.9386e-05, 4.1067e-05], device='cuda:0') +2022-12-02 01:39:55,658 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.149e+02 1.860e+02 2.397e+02 2.904e+02 7.009e+02, threshold=4.795e+02, percent-clipped=5.0 +2022-12-02 01:40:05,525 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.5877, 1.8199, 0.7869, 1.7763, 1.6588, 1.3525, 1.3811, 1.5668], + device='cuda:0'), covar=tensor([0.0280, 0.0366, 0.0483, 0.0464, 0.0363, 0.0390, 0.0346, 0.0500], + device='cuda:0'), in_proj_covar=tensor([0.0046, 0.0045, 0.0050, 0.0036, 0.0041, 0.0043, 0.0043, 0.0040], + device='cuda:0'), out_proj_covar=tensor([4.3213e-05, 4.2217e-05, 4.8455e-05, 3.4065e-05, 3.7969e-05, 4.1164e-05, + 4.0565e-05, 3.9092e-05], device='cuda:0') +2022-12-02 01:40:10,101 INFO [train.py:876] Epoch 17, batch 800, loss[loss=0.1509, simple_loss=0.2096, pruned_loss=0.04607, over 4800.00 frames. ], tot_loss[loss=0.151, simple_loss=0.2104, pruned_loss=0.04575, over 932852.15 frames. ], batch size: 32, lr: 1.05e-02, +2022-12-02 01:40:39,267 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6100, 2.9424, 3.2818, 3.4272, 3.2406, 3.6306, 3.2797, 3.0926], + device='cuda:0'), covar=tensor([0.0242, 0.0364, 0.0301, 0.0247, 0.0204, 0.0215, 0.0262, 0.0371], + device='cuda:0'), in_proj_covar=tensor([0.0101, 0.0107, 0.0110, 0.0100, 0.0090, 0.0102, 0.0106, 0.0122], + device='cuda:0'), out_proj_covar=tensor([6.8960e-05, 7.3255e-05, 7.4671e-05, 6.7164e-05, 5.9866e-05, 6.8293e-05, + 7.1986e-05, 8.5056e-05], device='cuda:0') +2022-12-02 01:40:56,276 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=23764.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:40:58,037 INFO [train.py:876] Epoch 17, batch 850, loss[loss=0.1672, simple_loss=0.2215, pruned_loss=0.0565, over 4884.00 frames. ], tot_loss[loss=0.1503, simple_loss=0.2098, pruned_loss=0.04536, over 938148.23 frames. ], batch size: 44, lr: 1.05e-02, +2022-12-02 01:41:17,888 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.92 vs. limit=2.0 +2022-12-02 01:41:28,036 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5982, 2.2856, 2.4159, 2.5987, 2.0910, 2.2096, 1.4046, 2.5694], + device='cuda:0'), covar=tensor([0.0663, 0.1301, 0.0753, 0.0623, 0.0909, 0.1574, 0.1245, 0.0569], + device='cuda:0'), in_proj_covar=tensor([0.0072, 0.0074, 0.0092, 0.0076, 0.0097, 0.0080, 0.0091, 0.0081], + device='cuda:0'), out_proj_covar=tensor([7.4604e-05, 7.5720e-05, 9.1420e-05, 7.6943e-05, 9.6308e-05, 8.1195e-05, + 9.1385e-05, 8.1967e-05], device='cuda:0') +2022-12-02 01:41:31,858 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.120e+02 1.768e+02 2.082e+02 2.562e+02 7.340e+02, threshold=4.163e+02, percent-clipped=3.0 +2022-12-02 01:41:42,549 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=23812.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:41:46,394 INFO [train.py:876] Epoch 17, batch 900, loss[loss=0.1505, simple_loss=0.212, pruned_loss=0.04455, over 4879.00 frames. ], tot_loss[loss=0.1491, simple_loss=0.2091, pruned_loss=0.04456, over 944571.35 frames. ], batch size: 38, lr: 1.05e-02, +2022-12-02 01:42:14,263 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=23844.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 01:42:35,019 INFO [train.py:876] Epoch 17, batch 950, loss[loss=0.1732, simple_loss=0.236, pruned_loss=0.05516, over 4805.00 frames. ], tot_loss[loss=0.149, simple_loss=0.2093, pruned_loss=0.04435, over 948263.68 frames. ], batch size: 42, lr: 1.05e-02, +2022-12-02 01:42:41,955 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=23873.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:42:53,568 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=23885.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:43:08,681 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.735e+01 1.777e+02 2.277e+02 2.915e+02 6.356e+02, threshold=4.554e+02, percent-clipped=11.0 +2022-12-02 01:43:23,497 INFO [train.py:876] Epoch 17, batch 1000, loss[loss=0.105, simple_loss=0.1618, pruned_loss=0.02416, over 4696.00 frames. ], tot_loss[loss=0.1479, simple_loss=0.208, pruned_loss=0.04391, over 949457.08 frames. ], batch size: 23, lr: 1.05e-02, +2022-12-02 01:43:28,539 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=23921.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:43:40,254 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=23933.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:43:55,045 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6859, 3.6575, 3.9044, 3.9259, 4.1058, 3.5999, 3.8374, 3.9777], + device='cuda:0'), covar=tensor([0.0353, 0.0343, 0.0185, 0.0189, 0.0210, 0.0353, 0.0197, 0.0425], + device='cuda:0'), in_proj_covar=tensor([0.0103, 0.0114, 0.0094, 0.0101, 0.0091, 0.0125, 0.0084, 0.0090], + device='cuda:0'), out_proj_covar=tensor([8.8273e-05, 9.6576e-05, 7.8987e-05, 8.5970e-05, 7.7468e-05, 1.0741e-04, + 7.4650e-05, 7.8116e-05], device='cuda:0') +2022-12-02 01:44:12,166 INFO [train.py:876] Epoch 17, batch 1050, loss[loss=0.1661, simple_loss=0.2267, pruned_loss=0.05274, over 4863.00 frames. ], tot_loss[loss=0.1468, simple_loss=0.2066, pruned_loss=0.04347, over 949746.37 frames. ], batch size: 39, lr: 1.04e-02, +2022-12-02 01:44:28,123 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.97 vs. limit=2.0 +2022-12-02 01:44:38,265 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.5170, 4.5534, 4.9556, 4.4208, 4.7932, 4.6410, 4.5511, 4.3717], + device='cuda:0'), covar=tensor([0.0866, 0.0537, 0.0670, 0.0514, 0.0788, 0.0575, 0.1010, 0.0576], + device='cuda:0'), in_proj_covar=tensor([0.0157, 0.0115, 0.0145, 0.0127, 0.0110, 0.0140, 0.0160, 0.0115], + device='cuda:0'), out_proj_covar=tensor([1.2071e-04, 7.9755e-05, 1.1722e-04, 9.3010e-05, 8.6234e-05, 1.0689e-04, + 1.2571e-04, 8.4702e-05], device='cuda:0') +2022-12-02 01:44:45,136 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-24000.pt +2022-12-02 01:44:48,309 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.101e+02 1.910e+02 2.275e+02 2.596e+02 4.800e+02, threshold=4.549e+02, percent-clipped=2.0 +2022-12-02 01:45:02,697 INFO [train.py:876] Epoch 17, batch 1100, loss[loss=0.1554, simple_loss=0.2204, pruned_loss=0.04522, over 4880.00 frames. ], tot_loss[loss=0.1471, simple_loss=0.2072, pruned_loss=0.04351, over 950920.09 frames. ], batch size: 37, lr: 1.04e-02, +2022-12-02 01:45:23,249 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=24037.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:45:34,651 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2990, 1.9470, 2.1015, 2.2340, 1.9234, 2.0159, 1.4149, 2.4017], + device='cuda:0'), covar=tensor([0.0906, 0.0961, 0.0947, 0.0718, 0.1139, 0.1850, 0.1299, 0.0549], + device='cuda:0'), in_proj_covar=tensor([0.0074, 0.0075, 0.0094, 0.0078, 0.0099, 0.0080, 0.0091, 0.0082], + device='cuda:0'), out_proj_covar=tensor([7.6567e-05, 7.7223e-05, 9.3670e-05, 7.8603e-05, 9.7585e-05, 8.2411e-05, + 9.1353e-05, 8.3156e-05], device='cuda:0') +2022-12-02 01:45:50,749 INFO [train.py:876] Epoch 17, batch 1150, loss[loss=0.1348, simple_loss=0.1926, pruned_loss=0.0385, over 4734.00 frames. ], tot_loss[loss=0.1475, simple_loss=0.2078, pruned_loss=0.04365, over 952994.37 frames. ], batch size: 27, lr: 1.04e-02, +2022-12-02 01:46:21,373 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=24098.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:46:23,874 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.185e+02 1.928e+02 2.257e+02 2.621e+02 5.042e+02, threshold=4.514e+02, percent-clipped=2.0 +2022-12-02 01:46:38,134 INFO [train.py:876] Epoch 17, batch 1200, loss[loss=0.1881, simple_loss=0.2481, pruned_loss=0.06406, over 4792.00 frames. ], tot_loss[loss=0.1487, simple_loss=0.2091, pruned_loss=0.04419, over 951522.05 frames. ], batch size: 54, lr: 1.04e-02, +2022-12-02 01:46:40,321 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.4558, 4.9921, 4.6953, 5.1975, 4.5316, 4.1187, 4.9694, 4.6903], + device='cuda:0'), covar=tensor([0.0175, 0.0077, 0.0114, 0.0114, 0.0125, 0.0153, 0.0080, 0.0125], + device='cuda:0'), in_proj_covar=tensor([0.0054, 0.0050, 0.0050, 0.0040, 0.0051, 0.0052, 0.0048, 0.0048], + device='cuda:0'), out_proj_covar=tensor([5.2623e-05, 4.4783e-05, 4.5390e-05, 3.6071e-05, 4.7123e-05, 4.9057e-05, + 4.0426e-05, 4.2956e-05], device='cuda:0') +2022-12-02 01:46:42,722 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.12 vs. limit=2.0 +2022-12-02 01:47:05,578 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=24144.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 01:47:26,873 INFO [train.py:876] Epoch 17, batch 1250, loss[loss=0.2222, simple_loss=0.2669, pruned_loss=0.08873, over 4885.00 frames. ], tot_loss[loss=0.1492, simple_loss=0.2091, pruned_loss=0.04464, over 946837.00 frames. ], batch size: 44, lr: 1.04e-02, +2022-12-02 01:47:30,617 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.02 vs. limit=2.0 +2022-12-02 01:47:52,561 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=24192.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:48:01,725 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.050e+02 1.760e+02 2.214e+02 2.857e+02 5.808e+02, threshold=4.428e+02, percent-clipped=2.0 +2022-12-02 01:48:16,559 INFO [train.py:876] Epoch 17, batch 1300, loss[loss=0.1583, simple_loss=0.2096, pruned_loss=0.05349, over 4859.00 frames. ], tot_loss[loss=0.1465, simple_loss=0.2058, pruned_loss=0.0436, over 947024.53 frames. ], batch size: 36, lr: 1.04e-02, +2022-12-02 01:49:06,169 INFO [train.py:876] Epoch 17, batch 1350, loss[loss=0.1463, simple_loss=0.2138, pruned_loss=0.03941, over 4846.00 frames. ], tot_loss[loss=0.1443, simple_loss=0.2034, pruned_loss=0.04258, over 945173.11 frames. ], batch size: 47, lr: 1.04e-02, +2022-12-02 01:49:40,395 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.724e+01 1.643e+02 1.960e+02 2.579e+02 6.986e+02, threshold=3.919e+02, percent-clipped=6.0 +2022-12-02 01:49:54,701 INFO [train.py:876] Epoch 17, batch 1400, loss[loss=0.1621, simple_loss=0.2244, pruned_loss=0.0499, over 4892.00 frames. ], tot_loss[loss=0.1448, simple_loss=0.2032, pruned_loss=0.04319, over 944229.83 frames. ], batch size: 44, lr: 1.04e-02, +2022-12-02 01:49:56,902 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4364, 2.3711, 2.0168, 1.7427, 1.8798, 2.5476, 2.2640, 1.9495], + device='cuda:0'), covar=tensor([0.0805, 0.0368, 0.0950, 0.0560, 0.0740, 0.1008, 0.0526, 0.0537], + device='cuda:0'), in_proj_covar=tensor([0.0060, 0.0061, 0.0057, 0.0061, 0.0054, 0.0050, 0.0052, 0.0055], + device='cuda:0'), out_proj_covar=tensor([5.3912e-05, 5.3772e-05, 5.2004e-05, 5.4737e-05, 4.9842e-05, 4.5534e-05, + 4.8051e-05, 4.9224e-05], device='cuda:0') +2022-12-02 01:50:05,691 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.4920, 4.4450, 5.0598, 4.4624, 4.7707, 4.6359, 4.4848, 4.3960], + device='cuda:0'), covar=tensor([0.0824, 0.0589, 0.0574, 0.0460, 0.0766, 0.0610, 0.1144, 0.0698], + device='cuda:0'), in_proj_covar=tensor([0.0156, 0.0114, 0.0144, 0.0124, 0.0109, 0.0140, 0.0160, 0.0114], + device='cuda:0'), out_proj_covar=tensor([1.1995e-04, 7.8509e-05, 1.1574e-04, 9.0301e-05, 8.5942e-05, 1.0716e-04, + 1.2623e-04, 8.3652e-05], device='cuda:0') +2022-12-02 01:50:25,884 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-17.pt +2022-12-02 01:50:35,359 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 01:50:35,923 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 01:50:36,220 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 01:50:36,251 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 01:50:37,355 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 01:50:37,674 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 01:50:39,578 INFO [train.py:876] Epoch 18, batch 0, loss[loss=0.1674, simple_loss=0.2259, pruned_loss=0.05449, over 4842.00 frames. ], tot_loss[loss=0.1674, simple_loss=0.2259, pruned_loss=0.05449, over 4842.00 frames. ], batch size: 35, lr: 1.01e-02, +2022-12-02 01:50:39,579 INFO [train.py:901] Computing validation loss +2022-12-02 01:50:55,177 INFO [train.py:910] Epoch 18, validation: loss=0.228, simple_loss=0.2739, pruned_loss=0.09103, over 253132.00 frames. +2022-12-02 01:50:55,178 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-02 01:51:16,568 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.5838, 4.5602, 5.0657, 4.4213, 4.7190, 4.6417, 4.4081, 4.3964], + device='cuda:0'), covar=tensor([0.0752, 0.0563, 0.0569, 0.0532, 0.0950, 0.0603, 0.1206, 0.0625], + device='cuda:0'), in_proj_covar=tensor([0.0160, 0.0117, 0.0147, 0.0127, 0.0113, 0.0143, 0.0165, 0.0116], + device='cuda:0'), out_proj_covar=tensor([1.2244e-04, 8.0772e-05, 1.1821e-04, 9.2874e-05, 8.8488e-05, 1.0894e-04, + 1.2954e-04, 8.5479e-05], device='cuda:0') +2022-12-02 01:51:37,763 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=24393.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:51:40,374 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.86 vs. limit=2.0 +2022-12-02 01:51:43,544 INFO [train.py:876] Epoch 18, batch 50, loss[loss=0.09852, simple_loss=0.1492, pruned_loss=0.02391, over 4625.00 frames. ], tot_loss[loss=0.1387, simple_loss=0.198, pruned_loss=0.03969, over 215956.67 frames. ], batch size: 21, lr: 1.01e-02, +2022-12-02 01:51:45,891 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.069e+02 1.708e+02 2.090e+02 2.688e+02 4.463e+02, threshold=4.181e+02, percent-clipped=2.0 +2022-12-02 01:51:57,744 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.5254, 1.7620, 0.8385, 1.5109, 1.6303, 1.4329, 1.2598, 1.4886], + device='cuda:0'), covar=tensor([0.0254, 0.0310, 0.0396, 0.0355, 0.0278, 0.0408, 0.0336, 0.0436], + device='cuda:0'), in_proj_covar=tensor([0.0046, 0.0044, 0.0050, 0.0036, 0.0041, 0.0042, 0.0042, 0.0039], + device='cuda:0'), out_proj_covar=tensor([4.3305e-05, 4.1630e-05, 4.8189e-05, 3.4223e-05, 3.7876e-05, 4.0024e-05, + 3.9798e-05, 3.8422e-05], device='cuda:0') +2022-12-02 01:51:58,913 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.60 vs. limit=5.0 +2022-12-02 01:52:03,350 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4524, 4.1701, 3.9251, 4.2578, 3.6728, 3.3590, 4.2450, 4.1380], + device='cuda:0'), covar=tensor([0.0694, 0.0460, 0.0486, 0.0639, 0.0560, 0.0508, 0.0477, 0.0366], + device='cuda:0'), in_proj_covar=tensor([0.0055, 0.0049, 0.0050, 0.0040, 0.0051, 0.0052, 0.0048, 0.0048], + device='cuda:0'), out_proj_covar=tensor([5.2871e-05, 4.3625e-05, 4.5562e-05, 3.5486e-05, 4.6855e-05, 4.8993e-05, + 4.0606e-05, 4.2178e-05], device='cuda:0') +2022-12-02 01:52:08,907 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 01:52:32,072 INFO [train.py:876] Epoch 18, batch 100, loss[loss=0.1154, simple_loss=0.1728, pruned_loss=0.02907, over 4900.00 frames. ], tot_loss[loss=0.1403, simple_loss=0.2008, pruned_loss=0.03992, over 379026.04 frames. ], batch size: 29, lr: 1.01e-02, +2022-12-02 01:52:50,631 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 01:53:12,085 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 01:53:20,641 INFO [train.py:876] Epoch 18, batch 150, loss[loss=0.141, simple_loss=0.2092, pruned_loss=0.03643, over 4864.00 frames. ], tot_loss[loss=0.1417, simple_loss=0.2022, pruned_loss=0.04062, over 503890.35 frames. ], batch size: 36, lr: 1.00e-02, +2022-12-02 01:53:22,605 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.086e+02 1.600e+02 2.106e+02 2.859e+02 1.275e+03, threshold=4.211e+02, percent-clipped=5.0 +2022-12-02 01:53:47,756 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.02 vs. limit=2.0 +2022-12-02 01:54:07,953 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=24547.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:54:09,749 INFO [train.py:876] Epoch 18, batch 200, loss[loss=0.09677, simple_loss=0.1618, pruned_loss=0.01588, over 4906.00 frames. ], tot_loss[loss=0.1418, simple_loss=0.2026, pruned_loss=0.04054, over 604175.38 frames. ], batch size: 29, lr: 1.00e-02, +2022-12-02 01:54:16,990 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6463, 2.5301, 2.5758, 2.6222, 2.4344, 2.8417, 2.2953, 2.6709], + device='cuda:0'), covar=tensor([0.0350, 0.0389, 0.0254, 0.0275, 0.0392, 0.0294, 0.0315, 0.0245], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0065, 0.0051, 0.0054, 0.0068, 0.0060, 0.0054, 0.0052], + device='cuda:0'), out_proj_covar=tensor([4.3029e-05, 4.8785e-05, 3.7408e-05, 3.8634e-05, 4.9859e-05, 4.4381e-05, + 3.9780e-05, 3.8032e-05], device='cuda:0') +2022-12-02 01:54:30,180 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=24569.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:54:34,388 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.97 vs. limit=2.0 +2022-12-02 01:54:39,037 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9435, 3.1536, 3.2901, 2.8648, 3.2142, 3.2230, 3.2425, 3.3102], + device='cuda:0'), covar=tensor([0.1482, 0.0500, 0.0523, 0.0629, 0.0615, 0.0495, 0.0386, 0.0403], + device='cuda:0'), in_proj_covar=tensor([0.0181, 0.0155, 0.0167, 0.0150, 0.0162, 0.0154, 0.0151, 0.0148], + device='cuda:0'), out_proj_covar=tensor([1.2117e-04, 1.0347e-04, 1.1068e-04, 1.0108e-04, 1.0647e-04, 1.0108e-04, + 1.0018e-04, 9.8027e-05], device='cuda:0') +2022-12-02 01:54:59,082 INFO [train.py:876] Epoch 18, batch 250, loss[loss=0.1448, simple_loss=0.2209, pruned_loss=0.03435, over 4802.00 frames. ], tot_loss[loss=0.1427, simple_loss=0.2037, pruned_loss=0.04085, over 682969.10 frames. ], batch size: 58, lr: 1.00e-02, +2022-12-02 01:55:01,206 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.108e+02 1.808e+02 2.008e+02 2.518e+02 6.480e+02, threshold=4.016e+02, percent-clipped=4.0 +2022-12-02 01:55:03,132 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 01:55:08,376 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=24608.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:55:29,546 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=24630.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:55:41,364 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.97 vs. limit=2.0 +2022-12-02 01:55:47,699 INFO [train.py:876] Epoch 18, batch 300, loss[loss=0.1729, simple_loss=0.2404, pruned_loss=0.05267, over 4797.00 frames. ], tot_loss[loss=0.1441, simple_loss=0.2051, pruned_loss=0.04156, over 739603.22 frames. ], batch size: 54, lr: 1.00e-02, +2022-12-02 01:56:00,059 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 01:56:10,308 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2562, 3.7428, 3.6288, 3.4294, 3.6791, 3.5686, 3.5254, 3.8315], + device='cuda:0'), covar=tensor([0.1367, 0.0446, 0.0603, 0.0496, 0.0386, 0.0451, 0.0537, 0.0438], + device='cuda:0'), in_proj_covar=tensor([0.0180, 0.0156, 0.0167, 0.0148, 0.0161, 0.0155, 0.0151, 0.0149], + device='cuda:0'), out_proj_covar=tensor([1.2081e-04, 1.0399e-04, 1.1093e-04, 9.9959e-05, 1.0610e-04, 1.0151e-04, + 1.0020e-04, 9.8224e-05], device='cuda:0') +2022-12-02 01:56:30,343 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=24693.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:56:36,016 INFO [train.py:876] Epoch 18, batch 350, loss[loss=0.1552, simple_loss=0.226, pruned_loss=0.04214, over 4808.00 frames. ], tot_loss[loss=0.1436, simple_loss=0.2038, pruned_loss=0.0417, over 785805.07 frames. ], batch size: 54, lr: 1.00e-02, +2022-12-02 01:56:37,912 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.221e+02 1.737e+02 2.176e+02 3.105e+02 6.535e+02, threshold=4.352e+02, percent-clipped=9.0 +2022-12-02 01:57:01,187 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.83 vs. limit=2.0 +2022-12-02 01:57:13,639 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-02 01:57:16,531 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.43 vs. limit=2.0 +2022-12-02 01:57:16,925 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=24741.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:57:24,860 INFO [train.py:876] Epoch 18, batch 400, loss[loss=0.1108, simple_loss=0.1524, pruned_loss=0.03457, over 3798.00 frames. ], tot_loss[loss=0.1426, simple_loss=0.203, pruned_loss=0.04114, over 823334.36 frames. ], batch size: 15, lr: 9.99e-03, +2022-12-02 01:57:34,732 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8115, 1.7710, 1.8885, 1.4483, 1.7140, 2.4066, 2.3204, 1.7802], + device='cuda:0'), covar=tensor([0.0987, 0.0593, 0.1221, 0.1008, 0.0872, 0.0842, 0.0667, 0.0908], + device='cuda:0'), in_proj_covar=tensor([0.0061, 0.0063, 0.0059, 0.0064, 0.0056, 0.0051, 0.0053, 0.0057], + device='cuda:0'), out_proj_covar=tensor([5.4793e-05, 5.6010e-05, 5.4474e-05, 5.7254e-05, 5.1299e-05, 4.7100e-05, + 4.8774e-05, 5.1424e-05], device='cuda:0') +2022-12-02 01:57:39,406 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 01:57:40,510 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=24765.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:58:01,701 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 01:58:13,179 INFO [train.py:876] Epoch 18, batch 450, loss[loss=0.1028, simple_loss=0.1602, pruned_loss=0.02271, over 4749.00 frames. ], tot_loss[loss=0.1435, simple_loss=0.2045, pruned_loss=0.04126, over 854008.28 frames. ], batch size: 27, lr: 9.98e-03, +2022-12-02 01:58:15,402 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.305e+02 1.725e+02 2.063e+02 2.627e+02 5.429e+02, threshold=4.126e+02, percent-clipped=3.0 +2022-12-02 01:58:40,821 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=24826.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:58:43,959 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5014, 2.6894, 3.1216, 3.3807, 2.8293, 3.4593, 3.2015, 3.1281], + device='cuda:0'), covar=tensor([0.0257, 0.0357, 0.0285, 0.0210, 0.0237, 0.0215, 0.0233, 0.0363], + device='cuda:0'), in_proj_covar=tensor([0.0107, 0.0114, 0.0118, 0.0106, 0.0095, 0.0106, 0.0112, 0.0128], + device='cuda:0'), out_proj_covar=tensor([7.3022e-05, 7.7940e-05, 7.9745e-05, 7.1204e-05, 6.3422e-05, 7.0791e-05, + 7.5574e-05, 8.8905e-05], device='cuda:0') +2022-12-02 01:59:02,040 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=24848.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 01:59:02,838 INFO [train.py:876] Epoch 18, batch 500, loss[loss=0.1475, simple_loss=0.2187, pruned_loss=0.03814, over 4888.00 frames. ], tot_loss[loss=0.143, simple_loss=0.204, pruned_loss=0.04107, over 876785.32 frames. ], batch size: 44, lr: 9.97e-03, +2022-12-02 01:59:51,186 INFO [train.py:876] Epoch 18, batch 550, loss[loss=0.1372, simple_loss=0.2064, pruned_loss=0.03403, over 4841.00 frames. ], tot_loss[loss=0.1423, simple_loss=0.2031, pruned_loss=0.0407, over 896803.27 frames. ], batch size: 41, lr: 9.96e-03, +2022-12-02 01:59:53,003 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.240e+02 1.776e+02 2.103e+02 2.691e+02 8.095e+02, threshold=4.205e+02, percent-clipped=4.0 +2022-12-02 01:59:55,050 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=24903.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:00:00,994 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=24909.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:00:16,285 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=24925.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:00:39,348 INFO [train.py:876] Epoch 18, batch 600, loss[loss=0.1602, simple_loss=0.2173, pruned_loss=0.05149, over 4809.00 frames. ], tot_loss[loss=0.1432, simple_loss=0.2042, pruned_loss=0.04113, over 909223.57 frames. ], batch size: 33, lr: 9.95e-03, +2022-12-02 02:01:24,623 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=24995.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:01:25,795 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.33 vs. limit=5.0 +2022-12-02 02:01:28,272 INFO [train.py:876] Epoch 18, batch 650, loss[loss=0.1678, simple_loss=0.2475, pruned_loss=0.04405, over 4703.00 frames. ], tot_loss[loss=0.1442, simple_loss=0.2051, pruned_loss=0.04165, over 918279.31 frames. ], batch size: 63, lr: 9.94e-03, +2022-12-02 02:01:30,398 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.263e+02 1.716e+02 2.166e+02 2.623e+02 5.319e+02, threshold=4.332e+02, percent-clipped=3.0 +2022-12-02 02:01:59,770 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4949, 3.8626, 3.9689, 3.5275, 3.4773, 3.8369, 3.7950, 3.8147], + device='cuda:0'), covar=tensor([0.0867, 0.0227, 0.0255, 0.0366, 0.0345, 0.0345, 0.0245, 0.0355], + device='cuda:0'), in_proj_covar=tensor([0.0231, 0.0149, 0.0167, 0.0161, 0.0159, 0.0162, 0.0149, 0.0168], + device='cuda:0'), out_proj_covar=tensor([1.5047e-04, 9.6017e-05, 1.0920e-04, 1.0446e-04, 1.0369e-04, 1.0565e-04, + 9.8773e-05, 1.1238e-04], device='cuda:0') +2022-12-02 02:02:16,837 INFO [train.py:876] Epoch 18, batch 700, loss[loss=0.1188, simple_loss=0.1671, pruned_loss=0.03528, over 4722.00 frames. ], tot_loss[loss=0.1452, simple_loss=0.2054, pruned_loss=0.04248, over 925474.50 frames. ], batch size: 27, lr: 9.93e-03, +2022-12-02 02:02:24,040 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=25056.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 02:02:39,282 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2233, 3.2335, 3.0074, 2.6374, 2.3202, 2.0142, 3.1832, 1.5591], + device='cuda:0'), covar=tensor([0.0627, 0.0406, 0.0574, 0.1048, 0.1539, 0.3632, 0.0336, 0.3442], + device='cuda:0'), in_proj_covar=tensor([0.0108, 0.0091, 0.0084, 0.0127, 0.0132, 0.0155, 0.0080, 0.0167], + device='cuda:0'), out_proj_covar=tensor([1.2385e-04, 1.1294e-04, 1.0643e-04, 1.4205e-04, 1.4833e-04, 1.7592e-04, + 9.3828e-05, 1.8174e-04], device='cuda:0') +2022-12-02 02:03:05,841 INFO [train.py:876] Epoch 18, batch 750, loss[loss=0.1329, simple_loss=0.192, pruned_loss=0.03693, over 4813.00 frames. ], tot_loss[loss=0.1439, simple_loss=0.204, pruned_loss=0.04189, over 932735.80 frames. ], batch size: 33, lr: 9.92e-03, +2022-12-02 02:03:07,753 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.045e+02 1.604e+02 2.128e+02 2.774e+02 6.059e+02, threshold=4.256e+02, percent-clipped=4.0 +2022-12-02 02:03:27,270 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=25121.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:03:54,943 INFO [train.py:876] Epoch 18, batch 800, loss[loss=0.1591, simple_loss=0.2033, pruned_loss=0.05749, over 4894.00 frames. ], tot_loss[loss=0.1439, simple_loss=0.204, pruned_loss=0.04193, over 938273.84 frames. ], batch size: 30, lr: 9.91e-03, +2022-12-02 02:04:43,163 INFO [train.py:876] Epoch 18, batch 850, loss[loss=0.1579, simple_loss=0.2252, pruned_loss=0.04535, over 4796.00 frames. ], tot_loss[loss=0.1439, simple_loss=0.204, pruned_loss=0.0419, over 939924.54 frames. ], batch size: 54, lr: 9.91e-03, +2022-12-02 02:04:45,134 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.169e+02 1.772e+02 2.127e+02 2.607e+02 4.959e+02, threshold=4.253e+02, percent-clipped=2.0 +2022-12-02 02:04:47,153 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=25203.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:04:48,008 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=25204.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:05:08,650 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=25225.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:05:31,451 INFO [train.py:876] Epoch 18, batch 900, loss[loss=0.2086, simple_loss=0.2664, pruned_loss=0.07535, over 4795.00 frames. ], tot_loss[loss=0.1444, simple_loss=0.2055, pruned_loss=0.04165, over 942862.20 frames. ], batch size: 54, lr: 9.90e-03, +2022-12-02 02:05:33,372 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=25251.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:05:54,868 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=25273.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:06:20,144 INFO [train.py:876] Epoch 18, batch 950, loss[loss=0.1229, simple_loss=0.149, pruned_loss=0.04841, over 3908.00 frames. ], tot_loss[loss=0.144, simple_loss=0.2048, pruned_loss=0.04165, over 943467.46 frames. ], batch size: 15, lr: 9.89e-03, +2022-12-02 02:06:22,088 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.017e+02 1.794e+02 2.062e+02 2.327e+02 6.907e+02, threshold=4.124e+02, percent-clipped=1.0 +2022-12-02 02:06:53,727 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8352, 3.0343, 3.5158, 3.6950, 3.3481, 3.7186, 3.5279, 3.3880], + device='cuda:0'), covar=tensor([0.0192, 0.0309, 0.0228, 0.0173, 0.0186, 0.0170, 0.0243, 0.0307], + device='cuda:0'), in_proj_covar=tensor([0.0108, 0.0114, 0.0115, 0.0107, 0.0095, 0.0107, 0.0115, 0.0127], + device='cuda:0'), out_proj_covar=tensor([7.3781e-05, 7.8191e-05, 7.8112e-05, 7.1558e-05, 6.3206e-05, 7.2117e-05, + 7.7396e-05, 8.8396e-05], device='cuda:0') +2022-12-02 02:07:08,721 INFO [train.py:876] Epoch 18, batch 1000, loss[loss=0.1468, simple_loss=0.2094, pruned_loss=0.04212, over 4866.00 frames. ], tot_loss[loss=0.1434, simple_loss=0.2041, pruned_loss=0.04135, over 945948.00 frames. ], batch size: 39, lr: 9.88e-03, +2022-12-02 02:07:10,616 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=25351.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 02:07:56,951 INFO [train.py:876] Epoch 18, batch 1050, loss[loss=0.1581, simple_loss=0.2261, pruned_loss=0.04505, over 4778.00 frames. ], tot_loss[loss=0.1431, simple_loss=0.2036, pruned_loss=0.04129, over 948298.19 frames. ], batch size: 51, lr: 9.87e-03, +2022-12-02 02:07:59,047 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.053e+02 1.836e+02 2.200e+02 2.888e+02 5.391e+02, threshold=4.401e+02, percent-clipped=3.0 +2022-12-02 02:08:09,404 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4610, 3.5778, 3.7283, 3.4977, 3.2763, 3.5891, 3.6143, 3.7276], + device='cuda:0'), covar=tensor([0.0909, 0.0305, 0.0308, 0.0350, 0.0346, 0.0339, 0.0286, 0.0373], + device='cuda:0'), in_proj_covar=tensor([0.0235, 0.0155, 0.0170, 0.0165, 0.0163, 0.0165, 0.0153, 0.0171], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 02:08:18,629 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=25421.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:08:25,339 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=25428.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 02:08:45,534 INFO [train.py:876] Epoch 18, batch 1100, loss[loss=0.1196, simple_loss=0.1758, pruned_loss=0.03171, over 4826.00 frames. ], tot_loss[loss=0.1428, simple_loss=0.2036, pruned_loss=0.04098, over 949242.88 frames. ], batch size: 25, lr: 9.86e-03, +2022-12-02 02:08:50,840 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.12 vs. limit=2.0 +2022-12-02 02:09:05,185 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=25469.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:09:24,787 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=25489.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 02:09:33,902 INFO [train.py:876] Epoch 18, batch 1150, loss[loss=0.173, simple_loss=0.2177, pruned_loss=0.06416, over 4803.00 frames. ], tot_loss[loss=0.1425, simple_loss=0.2029, pruned_loss=0.04101, over 946474.08 frames. ], batch size: 32, lr: 9.85e-03, +2022-12-02 02:09:35,823 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.065e+02 1.635e+02 2.025e+02 2.571e+02 8.226e+02, threshold=4.049e+02, percent-clipped=4.0 +2022-12-02 02:09:38,785 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=25504.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:10:01,389 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5174, 2.4019, 2.4586, 2.4766, 2.2576, 2.7583, 2.3716, 2.4401], + device='cuda:0'), covar=tensor([0.0295, 0.0380, 0.0251, 0.0299, 0.0527, 0.0302, 0.0371, 0.0293], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0066, 0.0052, 0.0055, 0.0069, 0.0060, 0.0057, 0.0053], + device='cuda:0'), out_proj_covar=tensor([4.2968e-05, 4.9398e-05, 3.8065e-05, 3.9652e-05, 5.0861e-05, 4.4879e-05, + 4.1569e-05, 3.8414e-05], device='cuda:0') +2022-12-02 02:10:22,749 INFO [train.py:876] Epoch 18, batch 1200, loss[loss=0.1827, simple_loss=0.2454, pruned_loss=0.06003, over 4790.00 frames. ], tot_loss[loss=0.1428, simple_loss=0.2029, pruned_loss=0.04134, over 947177.34 frames. ], batch size: 58, lr: 9.84e-03, +2022-12-02 02:10:25,788 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=25552.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:11:11,000 INFO [train.py:876] Epoch 18, batch 1250, loss[loss=0.1804, simple_loss=0.2451, pruned_loss=0.05781, over 4810.00 frames. ], tot_loss[loss=0.1437, simple_loss=0.2037, pruned_loss=0.0418, over 947624.04 frames. ], batch size: 54, lr: 9.83e-03, +2022-12-02 02:11:13,214 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.167e+02 1.766e+02 2.154e+02 2.726e+02 6.499e+02, threshold=4.308e+02, percent-clipped=4.0 +2022-12-02 02:11:59,549 INFO [train.py:876] Epoch 18, batch 1300, loss[loss=0.1506, simple_loss=0.2105, pruned_loss=0.04537, over 4852.00 frames. ], tot_loss[loss=0.1432, simple_loss=0.2031, pruned_loss=0.04165, over 950738.51 frames. ], batch size: 36, lr: 9.82e-03, +2022-12-02 02:12:01,673 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=25651.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 02:12:17,304 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.92 vs. limit=2.0 +2022-12-02 02:12:48,583 INFO [train.py:876] Epoch 18, batch 1350, loss[loss=0.1514, simple_loss=0.2118, pruned_loss=0.04545, over 4807.00 frames. ], tot_loss[loss=0.1432, simple_loss=0.2035, pruned_loss=0.04146, over 952345.86 frames. ], batch size: 33, lr: 9.81e-03, +2022-12-02 02:12:48,622 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=25699.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:12:50,554 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.098e+02 1.798e+02 2.164e+02 2.567e+02 5.290e+02, threshold=4.327e+02, percent-clipped=2.0 +2022-12-02 02:13:17,232 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.95 vs. limit=2.0 +2022-12-02 02:13:38,030 INFO [train.py:876] Epoch 18, batch 1400, loss[loss=0.1446, simple_loss=0.2027, pruned_loss=0.04328, over 3958.00 frames. ], tot_loss[loss=0.1436, simple_loss=0.2041, pruned_loss=0.04157, over 949866.29 frames. ], batch size: 72, lr: 9.80e-03, +2022-12-02 02:14:09,618 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-18.pt +2022-12-02 02:14:18,734 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 02:14:19,302 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 02:14:19,953 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 02:14:19,985 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 02:14:20,787 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 02:14:21,501 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 02:14:23,063 INFO [train.py:876] Epoch 19, batch 0, loss[loss=0.1705, simple_loss=0.2307, pruned_loss=0.05513, over 4832.00 frames. ], tot_loss[loss=0.1705, simple_loss=0.2307, pruned_loss=0.05513, over 4832.00 frames. ], batch size: 49, lr: 9.53e-03, +2022-12-02 02:14:23,064 INFO [train.py:901] Computing validation loss +2022-12-02 02:14:37,997 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1832, 2.9786, 3.0504, 2.9438, 2.4675, 3.0198, 2.9142, 3.1183], + device='cuda:0'), covar=tensor([0.0280, 0.0370, 0.0235, 0.0368, 0.0626, 0.0318, 0.0400, 0.0233], + device='cuda:0'), in_proj_covar=tensor([0.0059, 0.0066, 0.0053, 0.0055, 0.0069, 0.0062, 0.0058, 0.0053], + device='cuda:0'), out_proj_covar=tensor([4.3533e-05, 4.9484e-05, 3.8443e-05, 3.9816e-05, 5.0561e-05, 4.5851e-05, + 4.2259e-05, 3.8419e-05], device='cuda:0') +2022-12-02 02:14:38,764 INFO [train.py:910] Epoch 19, validation: loss=0.229, simple_loss=0.2737, pruned_loss=0.09215, over 253132.00 frames. +2022-12-02 02:14:38,765 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-02 02:14:41,805 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=25784.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 02:14:58,477 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.059e+02 1.572e+02 1.972e+02 2.490e+02 1.211e+03, threshold=3.944e+02, percent-clipped=3.0 +2022-12-02 02:15:04,704 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.04 vs. limit=2.0 +2022-12-02 02:15:27,646 INFO [train.py:876] Epoch 19, batch 50, loss[loss=0.1463, simple_loss=0.2222, pruned_loss=0.03525, over 4840.00 frames. ], tot_loss[loss=0.1393, simple_loss=0.1992, pruned_loss=0.03965, over 216987.31 frames. ], batch size: 41, lr: 9.52e-03, +2022-12-02 02:15:39,620 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8302, 3.2848, 3.3685, 3.2491, 2.2980, 3.6981, 3.4523, 2.7808], + device='cuda:0'), covar=tensor([0.4852, 0.0945, 0.0952, 0.0618, 0.1092, 0.0659, 0.0504, 0.1401], + device='cuda:0'), in_proj_covar=tensor([0.0192, 0.0119, 0.0149, 0.0121, 0.0125, 0.0118, 0.0114, 0.0125], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 02:15:53,055 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 02:15:54,183 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=25858.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 02:16:08,872 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.06 vs. limit=2.0 +2022-12-02 02:16:16,335 INFO [train.py:876] Epoch 19, batch 100, loss[loss=0.1441, simple_loss=0.203, pruned_loss=0.04262, over 4841.00 frames. ], tot_loss[loss=0.1373, simple_loss=0.1984, pruned_loss=0.03811, over 382244.51 frames. ], batch size: 41, lr: 9.51e-03, +2022-12-02 02:16:35,886 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.164e+02 1.768e+02 2.050e+02 2.471e+02 3.771e+02, threshold=4.100e+02, percent-clipped=0.0 +2022-12-02 02:16:38,091 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 02:16:53,127 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.92 vs. limit=5.0 +2022-12-02 02:16:54,718 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=25919.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 02:17:03,197 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 02:17:06,102 INFO [train.py:876] Epoch 19, batch 150, loss[loss=0.1247, simple_loss=0.1865, pruned_loss=0.0315, over 4862.00 frames. ], tot_loss[loss=0.1356, simple_loss=0.1961, pruned_loss=0.03757, over 509354.26 frames. ], batch size: 39, lr: 9.50e-03, +2022-12-02 02:17:54,937 INFO [train.py:876] Epoch 19, batch 200, loss[loss=0.1166, simple_loss=0.1633, pruned_loss=0.03497, over 4671.00 frames. ], tot_loss[loss=0.1365, simple_loss=0.1967, pruned_loss=0.0382, over 607342.87 frames. ], batch size: 21, lr: 9.50e-03, +2022-12-02 02:18:03,206 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4251, 1.0884, 1.4658, 1.0039, 1.0242, 1.0933, 1.2961, 1.3370], + device='cuda:0'), covar=tensor([0.0160, 0.0178, 0.0142, 0.0189, 0.0162, 0.0128, 0.0138, 0.0150], + device='cuda:0'), in_proj_covar=tensor([0.0033, 0.0031, 0.0031, 0.0033, 0.0036, 0.0033, 0.0036, 0.0030], + device='cuda:0'), out_proj_covar=tensor([2.6410e-05, 2.3952e-05, 2.4853e-05, 2.5871e-05, 2.9294e-05, 2.6534e-05, + 2.9729e-05, 2.4261e-05], device='cuda:0') +2022-12-02 02:18:09,971 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8822, 3.2077, 3.4246, 3.4035, 2.2944, 3.5560, 3.5824, 2.7671], + device='cuda:0'), covar=tensor([0.4871, 0.0638, 0.0867, 0.0504, 0.1052, 0.0838, 0.0374, 0.0912], + device='cuda:0'), in_proj_covar=tensor([0.0187, 0.0117, 0.0146, 0.0117, 0.0123, 0.0115, 0.0111, 0.0124], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 02:18:13,950 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-26000.pt +2022-12-02 02:18:17,229 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.092e+02 1.674e+02 2.018e+02 2.578e+02 5.889e+02, threshold=4.036e+02, percent-clipped=2.0 +2022-12-02 02:18:22,539 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.76 vs. limit=2.0 +2022-12-02 02:18:30,711 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.84 vs. limit=2.0 +2022-12-02 02:18:46,558 INFO [train.py:876] Epoch 19, batch 250, loss[loss=0.07923, simple_loss=0.134, pruned_loss=0.01222, over 3501.00 frames. ], tot_loss[loss=0.1364, simple_loss=0.1968, pruned_loss=0.03805, over 683213.63 frames. ], batch size: 13, lr: 9.49e-03, +2022-12-02 02:19:01,185 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 02:19:07,582 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.87 vs. limit=2.0 +2022-12-02 02:19:35,418 INFO [train.py:876] Epoch 19, batch 300, loss[loss=0.159, simple_loss=0.2206, pruned_loss=0.04868, over 4820.00 frames. ], tot_loss[loss=0.1379, simple_loss=0.1986, pruned_loss=0.03856, over 745202.04 frames. ], batch size: 45, lr: 9.48e-03, +2022-12-02 02:19:38,529 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=26084.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 02:19:55,123 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.026e+02 1.720e+02 2.016e+02 2.572e+02 7.917e+02, threshold=4.032e+02, percent-clipped=4.0 +2022-12-02 02:19:56,126 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=26102.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:20:01,972 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 02:20:24,484 INFO [train.py:876] Epoch 19, batch 350, loss[loss=0.157, simple_loss=0.2319, pruned_loss=0.04103, over 4890.00 frames. ], tot_loss[loss=0.1369, simple_loss=0.1978, pruned_loss=0.03798, over 787457.50 frames. ], batch size: 44, lr: 9.47e-03, +2022-12-02 02:20:25,535 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=26132.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 02:20:55,634 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=26163.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:21:00,448 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=26168.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:21:13,317 INFO [train.py:876] Epoch 19, batch 400, loss[loss=0.2049, simple_loss=0.262, pruned_loss=0.07388, over 4842.00 frames. ], tot_loss[loss=0.1393, simple_loss=0.2002, pruned_loss=0.03923, over 824289.59 frames. ], batch size: 47, lr: 9.46e-03, +2022-12-02 02:21:15,257 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6303, 3.5443, 3.6428, 4.0438, 3.1804, 2.5947, 3.9996, 2.1566], + device='cuda:0'), covar=tensor([0.0566, 0.0701, 0.0254, 0.0417, 0.1271, 0.2942, 0.0246, 0.3261], + device='cuda:0'), in_proj_covar=tensor([0.0110, 0.0092, 0.0085, 0.0129, 0.0134, 0.0154, 0.0082, 0.0170], + device='cuda:0'), out_proj_covar=tensor([1.2611e-04, 1.1545e-04, 1.0763e-04, 1.4515e-04, 1.5055e-04, 1.7647e-04, + 9.6300e-05, 1.8487e-04], device='cuda:0') +2022-12-02 02:21:33,087 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.107e+02 1.726e+02 2.110e+02 2.626e+02 7.269e+02, threshold=4.219e+02, percent-clipped=5.0 +2022-12-02 02:21:36,149 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 02:21:46,036 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=26214.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 02:22:00,693 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=26229.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:22:02,427 INFO [train.py:876] Epoch 19, batch 450, loss[loss=0.1371, simple_loss=0.1868, pruned_loss=0.04375, over 4908.00 frames. ], tot_loss[loss=0.1396, simple_loss=0.2004, pruned_loss=0.03941, over 854275.86 frames. ], batch size: 31, lr: 9.45e-03, +2022-12-02 02:22:04,405 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 02:22:21,196 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=26250.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:22:29,200 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.3652, 1.4198, 1.4727, 1.1391, 1.1544, 1.4053, 1.3394, 1.5351], + device='cuda:0'), covar=tensor([0.0183, 0.0182, 0.0198, 0.0158, 0.0303, 0.0170, 0.0150, 0.0136], + device='cuda:0'), in_proj_covar=tensor([0.0033, 0.0032, 0.0031, 0.0033, 0.0037, 0.0033, 0.0037, 0.0031], + device='cuda:0'), out_proj_covar=tensor([2.6900e-05, 2.4670e-05, 2.5131e-05, 2.6034e-05, 2.9846e-05, 2.6709e-05, + 3.0352e-05, 2.4417e-05], device='cuda:0') +2022-12-02 02:22:51,242 INFO [train.py:876] Epoch 19, batch 500, loss[loss=0.1414, simple_loss=0.2001, pruned_loss=0.04139, over 4825.00 frames. ], tot_loss[loss=0.1396, simple_loss=0.2011, pruned_loss=0.03903, over 875750.95 frames. ], batch size: 34, lr: 9.44e-03, +2022-12-02 02:23:10,355 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.140e+02 1.753e+02 2.110e+02 2.571e+02 1.195e+03, threshold=4.220e+02, percent-clipped=2.0 +2022-12-02 02:23:20,451 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=26311.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:23:40,003 INFO [train.py:876] Epoch 19, batch 550, loss[loss=0.1169, simple_loss=0.1613, pruned_loss=0.03623, over 4818.00 frames. ], tot_loss[loss=0.1389, simple_loss=0.2005, pruned_loss=0.03863, over 893901.95 frames. ], batch size: 25, lr: 9.44e-03, +2022-12-02 02:24:27,247 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5800, 3.3570, 3.4391, 3.8645, 2.6419, 2.4266, 3.6460, 1.9688], + device='cuda:0'), covar=tensor([0.0415, 0.0565, 0.0275, 0.0459, 0.1650, 0.3298, 0.0229, 0.2807], + device='cuda:0'), in_proj_covar=tensor([0.0107, 0.0091, 0.0084, 0.0127, 0.0131, 0.0152, 0.0081, 0.0166], + device='cuda:0'), out_proj_covar=tensor([1.2339e-04, 1.1406e-04, 1.0632e-04, 1.4264e-04, 1.4812e-04, 1.7403e-04, + 9.5925e-05, 1.8087e-04], device='cuda:0') +2022-12-02 02:24:28,980 INFO [train.py:876] Epoch 19, batch 600, loss[loss=0.1992, simple_loss=0.2518, pruned_loss=0.07335, over 4843.00 frames. ], tot_loss[loss=0.1376, simple_loss=0.1984, pruned_loss=0.03834, over 906850.74 frames. ], batch size: 47, lr: 9.43e-03, +2022-12-02 02:24:36,204 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1731, 3.4692, 3.4994, 3.2313, 3.4374, 3.4785, 3.3962, 3.6691], + device='cuda:0'), covar=tensor([0.1531, 0.0570, 0.0598, 0.0566, 0.0574, 0.0525, 0.0496, 0.0466], + device='cuda:0'), in_proj_covar=tensor([0.0184, 0.0160, 0.0173, 0.0155, 0.0166, 0.0157, 0.0157, 0.0156], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 02:24:48,985 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.092e+02 1.619e+02 1.908e+02 2.270e+02 4.612e+02, threshold=3.817e+02, percent-clipped=1.0 +2022-12-02 02:25:01,888 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=26414.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:25:18,891 INFO [train.py:876] Epoch 19, batch 650, loss[loss=0.1504, simple_loss=0.215, pruned_loss=0.04286, over 4799.00 frames. ], tot_loss[loss=0.1368, simple_loss=0.1976, pruned_loss=0.03803, over 917654.26 frames. ], batch size: 54, lr: 9.42e-03, +2022-12-02 02:25:30,029 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1401, 1.6598, 2.0601, 1.6088, 1.8953, 2.3218, 2.0814, 1.9831], + device='cuda:0'), covar=tensor([0.0626, 0.0834, 0.1267, 0.0753, 0.1205, 0.0751, 0.0855, 0.0507], + device='cuda:0'), in_proj_covar=tensor([0.0063, 0.0065, 0.0061, 0.0067, 0.0059, 0.0052, 0.0055, 0.0061], + device='cuda:0'), out_proj_covar=tensor([5.6699e-05, 5.7795e-05, 5.6346e-05, 6.0053e-05, 5.4253e-05, 4.8346e-05, + 5.1187e-05, 5.4678e-05], device='cuda:0') +2022-12-02 02:25:45,484 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=26458.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:26:02,524 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=26475.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:26:07,966 INFO [train.py:876] Epoch 19, batch 700, loss[loss=0.1422, simple_loss=0.2017, pruned_loss=0.0413, over 4802.00 frames. ], tot_loss[loss=0.1368, simple_loss=0.1977, pruned_loss=0.03796, over 922493.32 frames. ], batch size: 33, lr: 9.41e-03, +2022-12-02 02:26:27,157 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.053e+02 1.616e+02 1.934e+02 2.358e+02 5.310e+02, threshold=3.867e+02, percent-clipped=4.0 +2022-12-02 02:26:40,146 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=26514.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 02:26:49,590 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=26524.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:26:56,228 INFO [train.py:876] Epoch 19, batch 750, loss[loss=0.1958, simple_loss=0.2455, pruned_loss=0.07302, over 4173.00 frames. ], tot_loss[loss=0.1371, simple_loss=0.198, pruned_loss=0.03811, over 930564.99 frames. ], batch size: 73, lr: 9.40e-03, +2022-12-02 02:27:26,660 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=26562.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 02:27:28,140 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.91 vs. limit=5.0 +2022-12-02 02:27:28,886 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.91 vs. limit=2.0 +2022-12-02 02:27:42,557 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.85 vs. limit=2.0 +2022-12-02 02:27:44,737 INFO [train.py:876] Epoch 19, batch 800, loss[loss=0.132, simple_loss=0.1849, pruned_loss=0.03951, over 4736.00 frames. ], tot_loss[loss=0.1376, simple_loss=0.1984, pruned_loss=0.03836, over 935380.59 frames. ], batch size: 27, lr: 9.39e-03, +2022-12-02 02:27:45,278 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=2.00 vs. limit=2.0 +2022-12-02 02:27:46,847 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1506, 2.8251, 3.3067, 2.8273, 3.2727, 3.0391, 2.7276, 3.5717], + device='cuda:0'), covar=tensor([0.0171, 0.0940, 0.0473, 0.1267, 0.0265, 0.0401, 0.1225, 0.0334], + device='cuda:0'), in_proj_covar=tensor([0.0065, 0.0106, 0.0086, 0.0119, 0.0085, 0.0083, 0.0114, 0.0095], + device='cuda:0'), out_proj_covar=tensor([8.0298e-05, 1.2405e-04, 1.0391e-04, 1.3873e-04, 9.8019e-05, 1.0263e-04, + 1.3126e-04, 1.0662e-04], device='cuda:0') +2022-12-02 02:27:46,962 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7166, 3.4582, 3.9651, 3.7816, 3.9877, 3.3578, 3.7593, 3.9598], + device='cuda:0'), covar=tensor([0.0351, 0.0404, 0.0226, 0.0272, 0.0217, 0.0489, 0.0239, 0.0360], + device='cuda:0'), in_proj_covar=tensor([0.0112, 0.0123, 0.0105, 0.0112, 0.0102, 0.0136, 0.0093, 0.0099], + device='cuda:0'), out_proj_covar=tensor([9.5277e-05, 1.0441e-04, 8.8421e-05, 9.6456e-05, 8.7394e-05, 1.1676e-04, + 8.1727e-05, 8.6749e-05], device='cuda:0') +2022-12-02 02:28:03,719 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.039e+02 1.725e+02 2.034e+02 2.500e+02 5.144e+02, threshold=4.068e+02, percent-clipped=5.0 +2022-12-02 02:28:08,655 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=26606.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:28:09,100 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-02 02:28:32,783 INFO [train.py:876] Epoch 19, batch 850, loss[loss=0.06855, simple_loss=0.1159, pruned_loss=0.01059, over 3938.00 frames. ], tot_loss[loss=0.1378, simple_loss=0.1989, pruned_loss=0.03838, over 941241.94 frames. ], batch size: 15, lr: 9.38e-03, +2022-12-02 02:28:51,173 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4625, 1.7035, 1.9261, 3.2090, 2.3318, 2.8823, 2.9449, 3.5575], + device='cuda:0'), covar=tensor([0.0355, 0.1715, 0.2260, 0.0405, 0.0594, 0.0550, 0.0793, 0.0300], + device='cuda:0'), in_proj_covar=tensor([0.0067, 0.0097, 0.0115, 0.0067, 0.0075, 0.0066, 0.0078, 0.0082], + device='cuda:0'), out_proj_covar=tensor([7.1470e-05, 1.0704e-04, 1.2364e-04, 7.6181e-05, 7.7319e-05, 7.4601e-05, + 8.6503e-05, 8.0732e-05], device='cuda:0') +2022-12-02 02:29:20,863 INFO [train.py:876] Epoch 19, batch 900, loss[loss=0.1486, simple_loss=0.2116, pruned_loss=0.04274, over 4789.00 frames. ], tot_loss[loss=0.1377, simple_loss=0.1992, pruned_loss=0.03811, over 943322.22 frames. ], batch size: 54, lr: 9.38e-03, +2022-12-02 02:29:23,445 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.97 vs. limit=2.0 +2022-12-02 02:29:31,984 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.59 vs. limit=2.0 +2022-12-02 02:29:34,506 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4754, 3.3920, 3.0506, 3.4308, 2.6395, 2.4494, 3.5658, 1.9865], + device='cuda:0'), covar=tensor([0.0351, 0.0350, 0.0435, 0.0489, 0.1315, 0.2560, 0.0271, 0.2543], + device='cuda:0'), in_proj_covar=tensor([0.0111, 0.0093, 0.0086, 0.0131, 0.0135, 0.0156, 0.0082, 0.0170], + device='cuda:0'), out_proj_covar=tensor([1.2694e-04, 1.1644e-04, 1.0926e-04, 1.4651e-04, 1.5195e-04, 1.7772e-04, + 9.7574e-05, 1.8563e-04], device='cuda:0') +2022-12-02 02:29:40,146 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.059e+02 1.650e+02 1.896e+02 2.488e+02 6.357e+02, threshold=3.791e+02, percent-clipped=3.0 +2022-12-02 02:29:52,310 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.32 vs. limit=2.0 +2022-12-02 02:30:08,569 INFO [train.py:876] Epoch 19, batch 950, loss[loss=0.1576, simple_loss=0.2104, pruned_loss=0.05242, over 4825.00 frames. ], tot_loss[loss=0.1385, simple_loss=0.1999, pruned_loss=0.03857, over 947344.03 frames. ], batch size: 34, lr: 9.37e-03, +2022-12-02 02:30:35,537 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=26758.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:30:47,079 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=26770.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:30:56,215 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.87 vs. limit=2.0 +2022-12-02 02:30:57,407 INFO [train.py:876] Epoch 19, batch 1000, loss[loss=0.1804, simple_loss=0.2484, pruned_loss=0.05625, over 4786.00 frames. ], tot_loss[loss=0.1386, simple_loss=0.1997, pruned_loss=0.03878, over 948297.89 frames. ], batch size: 58, lr: 9.36e-03, +2022-12-02 02:31:02,619 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.01 vs. limit=2.0 +2022-12-02 02:31:17,191 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.742e+01 1.822e+02 2.181e+02 2.757e+02 7.344e+02, threshold=4.362e+02, percent-clipped=4.0 +2022-12-02 02:31:22,146 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=26806.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:31:25,191 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4958, 1.6221, 1.9447, 3.2523, 2.2681, 3.0300, 2.9527, 3.6838], + device='cuda:0'), covar=tensor([0.0340, 0.1811, 0.2407, 0.0437, 0.0742, 0.0580, 0.0800, 0.0306], + device='cuda:0'), in_proj_covar=tensor([0.0066, 0.0098, 0.0114, 0.0067, 0.0076, 0.0066, 0.0078, 0.0082], + device='cuda:0'), out_proj_covar=tensor([7.0931e-05, 1.0840e-04, 1.2325e-04, 7.6834e-05, 7.7789e-05, 7.5216e-05, + 8.6610e-05, 8.0024e-05], device='cuda:0') +2022-12-02 02:31:34,392 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.56 vs. limit=2.0 +2022-12-02 02:31:39,979 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=26824.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:31:46,718 INFO [train.py:876] Epoch 19, batch 1050, loss[loss=0.1613, simple_loss=0.2259, pruned_loss=0.04837, over 4884.00 frames. ], tot_loss[loss=0.138, simple_loss=0.1989, pruned_loss=0.03857, over 947094.15 frames. ], batch size: 37, lr: 9.35e-03, +2022-12-02 02:31:50,956 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0619, 3.4558, 3.3288, 2.9976, 3.3436, 3.3147, 3.3427, 3.5441], + device='cuda:0'), covar=tensor([0.1661, 0.0450, 0.0618, 0.0678, 0.0605, 0.0470, 0.0495, 0.0420], + device='cuda:0'), in_proj_covar=tensor([0.0177, 0.0155, 0.0170, 0.0151, 0.0163, 0.0153, 0.0152, 0.0154], + device='cuda:0'), out_proj_covar=tensor([1.1900e-04, 1.0312e-04, 1.1292e-04, 1.0166e-04, 1.0715e-04, 9.9831e-05, + 1.0092e-04, 1.0075e-04], device='cuda:0') +2022-12-02 02:31:58,309 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.79 vs. limit=5.0 +2022-12-02 02:32:26,692 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=26872.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:32:35,313 INFO [train.py:876] Epoch 19, batch 1100, loss[loss=0.1822, simple_loss=0.2302, pruned_loss=0.0671, over 4841.00 frames. ], tot_loss[loss=0.1387, simple_loss=0.1998, pruned_loss=0.0388, over 947945.99 frames. ], batch size: 47, lr: 9.34e-03, +2022-12-02 02:32:54,861 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.073e+02 1.521e+02 1.882e+02 2.557e+02 6.558e+02, threshold=3.764e+02, percent-clipped=4.0 +2022-12-02 02:32:57,959 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=26904.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 02:32:59,697 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=26906.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:33:09,001 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.63 vs. limit=2.0 +2022-12-02 02:33:24,130 INFO [train.py:876] Epoch 19, batch 1150, loss[loss=0.1361, simple_loss=0.2039, pruned_loss=0.03416, over 4864.00 frames. ], tot_loss[loss=0.1375, simple_loss=0.1983, pruned_loss=0.03838, over 948474.83 frames. ], batch size: 39, lr: 9.33e-03, +2022-12-02 02:33:42,188 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=26950.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:33:46,037 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=26954.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:33:57,370 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.9960, 2.5873, 2.9813, 2.5817, 2.6975, 2.0699, 2.7666, 2.9222], + device='cuda:0'), covar=tensor([0.0186, 0.1031, 0.0509, 0.1147, 0.0333, 0.0502, 0.1097, 0.0545], + device='cuda:0'), in_proj_covar=tensor([0.0064, 0.0104, 0.0085, 0.0115, 0.0081, 0.0082, 0.0111, 0.0094], + device='cuda:0'), out_proj_covar=tensor([7.8901e-05, 1.2155e-04, 1.0299e-04, 1.3331e-04, 9.4650e-05, 1.0090e-04, + 1.2804e-04, 1.0535e-04], device='cuda:0') +2022-12-02 02:33:57,450 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=26965.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 02:34:12,396 INFO [train.py:876] Epoch 19, batch 1200, loss[loss=0.1396, simple_loss=0.2083, pruned_loss=0.03551, over 4862.00 frames. ], tot_loss[loss=0.1394, simple_loss=0.2003, pruned_loss=0.03927, over 947093.40 frames. ], batch size: 39, lr: 9.32e-03, +2022-12-02 02:34:31,592 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.223e+01 1.668e+02 2.034e+02 2.450e+02 4.435e+02, threshold=4.069e+02, percent-clipped=2.0 +2022-12-02 02:34:41,351 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=27011.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:35:00,014 INFO [train.py:876] Epoch 19, batch 1250, loss[loss=0.09852, simple_loss=0.1521, pruned_loss=0.02248, over 4697.00 frames. ], tot_loss[loss=0.1398, simple_loss=0.2013, pruned_loss=0.03914, over 950053.02 frames. ], batch size: 21, lr: 9.32e-03, +2022-12-02 02:35:04,393 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5209, 3.1792, 3.7770, 3.6528, 3.8298, 2.9988, 3.6991, 3.7122], + device='cuda:0'), covar=tensor([0.0375, 0.0427, 0.0280, 0.0294, 0.0237, 0.0535, 0.0240, 0.0311], + device='cuda:0'), in_proj_covar=tensor([0.0112, 0.0121, 0.0107, 0.0112, 0.0100, 0.0135, 0.0092, 0.0099], + device='cuda:0'), out_proj_covar=tensor([9.5050e-05, 1.0227e-04, 9.0364e-05, 9.5985e-05, 8.4963e-05, 1.1552e-04, + 8.1289e-05, 8.6002e-05], device='cuda:0') +2022-12-02 02:35:22,531 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.2065, 3.8457, 3.6035, 4.6856, 3.7455, 2.8838, 4.4411, 1.9435], + device='cuda:0'), covar=tensor([0.0407, 0.0513, 0.0364, 0.0148, 0.0720, 0.2049, 0.0186, 0.3129], + device='cuda:0'), in_proj_covar=tensor([0.0108, 0.0093, 0.0085, 0.0126, 0.0132, 0.0153, 0.0080, 0.0166], + device='cuda:0'), out_proj_covar=tensor([1.2414e-04, 1.1554e-04, 1.0773e-04, 1.4147e-04, 1.4849e-04, 1.7420e-04, + 9.5204e-05, 1.8117e-04], device='cuda:0') +2022-12-02 02:35:29,619 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6540, 1.8799, 2.1519, 3.4373, 2.8988, 3.1444, 3.1024, 3.8663], + device='cuda:0'), covar=tensor([0.0248, 0.1581, 0.1988, 0.0377, 0.0407, 0.0426, 0.0469, 0.0231], + device='cuda:0'), in_proj_covar=tensor([0.0065, 0.0096, 0.0111, 0.0066, 0.0074, 0.0064, 0.0076, 0.0079], + device='cuda:0'), out_proj_covar=tensor([6.9572e-05, 1.0594e-04, 1.1985e-04, 7.4776e-05, 7.5665e-05, 7.3071e-05, + 8.4398e-05, 7.7572e-05], device='cuda:0') +2022-12-02 02:35:38,370 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=27070.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:35:49,158 INFO [train.py:876] Epoch 19, batch 1300, loss[loss=0.1346, simple_loss=0.187, pruned_loss=0.04112, over 4773.00 frames. ], tot_loss[loss=0.14, simple_loss=0.2014, pruned_loss=0.03928, over 947887.86 frames. ], batch size: 26, lr: 9.31e-03, +2022-12-02 02:36:06,482 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.97 vs. limit=2.0 +2022-12-02 02:36:08,891 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.061e+02 1.794e+02 2.065e+02 2.566e+02 6.547e+02, threshold=4.130e+02, percent-clipped=5.0 +2022-12-02 02:36:25,604 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=27118.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:36:37,948 INFO [train.py:876] Epoch 19, batch 1350, loss[loss=0.1707, simple_loss=0.2195, pruned_loss=0.06092, over 4796.00 frames. ], tot_loss[loss=0.1393, simple_loss=0.2002, pruned_loss=0.0392, over 946261.29 frames. ], batch size: 45, lr: 9.30e-03, +2022-12-02 02:36:54,812 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=27148.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:37:15,297 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.5697, 1.6796, 0.9356, 1.7631, 1.6412, 1.6752, 1.6021, 1.5714], + device='cuda:0'), covar=tensor([0.0261, 0.0443, 0.0400, 0.0252, 0.0226, 0.0267, 0.0244, 0.0535], + device='cuda:0'), in_proj_covar=tensor([0.0045, 0.0044, 0.0049, 0.0036, 0.0041, 0.0042, 0.0041, 0.0038], + device='cuda:0'), out_proj_covar=tensor([4.2153e-05, 4.1243e-05, 4.7579e-05, 3.4210e-05, 3.7495e-05, 3.9921e-05, + 3.8981e-05, 3.7397e-05], device='cuda:0') +2022-12-02 02:37:27,006 INFO [train.py:876] Epoch 19, batch 1400, loss[loss=0.1732, simple_loss=0.2473, pruned_loss=0.04951, over 4799.00 frames. ], tot_loss[loss=0.1388, simple_loss=0.1995, pruned_loss=0.039, over 947588.42 frames. ], batch size: 54, lr: 9.29e-03, +2022-12-02 02:37:46,272 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.004e+02 1.813e+02 2.274e+02 2.969e+02 7.503e+02, threshold=4.549e+02, percent-clipped=6.0 +2022-12-02 02:37:54,383 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=27209.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:37:55,421 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4633, 2.4237, 2.2688, 2.5988, 1.9668, 2.4418, 1.5504, 2.5590], + device='cuda:0'), covar=tensor([0.0825, 0.0883, 0.1057, 0.0666, 0.1184, 0.1296, 0.1341, 0.0631], + device='cuda:0'), in_proj_covar=tensor([0.0078, 0.0082, 0.0099, 0.0083, 0.0104, 0.0087, 0.0092, 0.0088], + device='cuda:0'), out_proj_covar=tensor([8.1308e-05, 8.4037e-05, 9.8894e-05, 8.4991e-05, 1.0368e-04, 8.9936e-05, + 9.4180e-05, 9.0023e-05], device='cuda:0') +2022-12-02 02:37:57,250 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-19.pt +2022-12-02 02:38:06,372 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 02:38:07,312 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 02:38:07,607 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 02:38:07,639 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 02:38:08,799 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 02:38:09,120 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 02:38:10,363 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.76 vs. limit=2.0 +2022-12-02 02:38:10,732 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.93 vs. limit=2.0 +2022-12-02 02:38:10,829 INFO [train.py:876] Epoch 20, batch 0, loss[loss=0.1356, simple_loss=0.1867, pruned_loss=0.04223, over 4923.00 frames. ], tot_loss[loss=0.1356, simple_loss=0.1867, pruned_loss=0.04223, over 4923.00 frames. ], batch size: 31, lr: 9.05e-03, +2022-12-02 02:38:10,830 INFO [train.py:901] Computing validation loss +2022-12-02 02:38:26,412 INFO [train.py:910] Epoch 20, validation: loss=0.2307, simple_loss=0.2751, pruned_loss=0.09318, over 253132.00 frames. +2022-12-02 02:38:26,412 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-02 02:38:44,793 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3654, 2.3177, 2.3280, 2.6105, 2.0390, 2.3105, 2.4649, 2.5420], + device='cuda:0'), covar=tensor([0.0393, 0.0457, 0.0339, 0.0321, 0.0539, 0.0435, 0.0343, 0.0236], + device='cuda:0'), in_proj_covar=tensor([0.0060, 0.0065, 0.0053, 0.0055, 0.0069, 0.0062, 0.0057, 0.0051], + device='cuda:0'), out_proj_covar=tensor([4.4226e-05, 4.9157e-05, 3.8452e-05, 3.9828e-05, 5.0723e-05, 4.5558e-05, + 4.2443e-05, 3.7113e-05], device='cuda:0') +2022-12-02 02:39:11,979 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=27260.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 02:39:14,622 INFO [train.py:876] Epoch 20, batch 50, loss[loss=0.1105, simple_loss=0.1735, pruned_loss=0.02374, over 4753.00 frames. ], tot_loss[loss=0.1332, simple_loss=0.1949, pruned_loss=0.03572, over 214538.50 frames. ], batch size: 27, lr: 9.04e-03, +2022-12-02 02:39:36,409 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 02:39:43,426 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.56 vs. limit=2.0 +2022-12-02 02:39:51,760 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.161e+02 1.627e+02 1.885e+02 2.452e+02 4.761e+02, threshold=3.771e+02, percent-clipped=1.0 +2022-12-02 02:39:57,072 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=27306.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:40:03,783 INFO [train.py:876] Epoch 20, batch 100, loss[loss=0.1285, simple_loss=0.1961, pruned_loss=0.03042, over 3956.00 frames. ], tot_loss[loss=0.1297, simple_loss=0.1916, pruned_loss=0.03392, over 378722.57 frames. ], batch size: 72, lr: 9.03e-03, +2022-12-02 02:40:18,895 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 02:40:40,016 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 02:40:51,373 INFO [train.py:876] Epoch 20, batch 150, loss[loss=0.1502, simple_loss=0.2169, pruned_loss=0.04178, over 4883.00 frames. ], tot_loss[loss=0.1338, simple_loss=0.1967, pruned_loss=0.0355, over 507548.56 frames. ], batch size: 38, lr: 9.02e-03, +2022-12-02 02:41:20,388 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4913, 1.4506, 1.4580, 1.0742, 1.0212, 1.4789, 1.3556, 1.4086], + device='cuda:0'), covar=tensor([0.0156, 0.0176, 0.0155, 0.0236, 0.0244, 0.0137, 0.0150, 0.0129], + device='cuda:0'), in_proj_covar=tensor([0.0034, 0.0032, 0.0031, 0.0033, 0.0036, 0.0033, 0.0037, 0.0030], + device='cuda:0'), out_proj_covar=tensor([2.7115e-05, 2.4587e-05, 2.5386e-05, 2.6437e-05, 2.9211e-05, 2.5976e-05, + 3.0058e-05, 2.3915e-05], device='cuda:0') +2022-12-02 02:41:28,206 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.027e+02 1.545e+02 1.808e+02 2.339e+02 4.432e+02, threshold=3.616e+02, percent-clipped=2.0 +2022-12-02 02:41:39,743 INFO [train.py:876] Epoch 20, batch 200, loss[loss=0.1414, simple_loss=0.2023, pruned_loss=0.04029, over 4835.00 frames. ], tot_loss[loss=0.1352, simple_loss=0.1971, pruned_loss=0.03666, over 606453.48 frames. ], batch size: 41, lr: 9.01e-03, +2022-12-02 02:42:13,123 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.7483, 5.2661, 5.0029, 5.4448, 4.7667, 4.3155, 5.3817, 5.0240], + device='cuda:0'), covar=tensor([0.0127, 0.0066, 0.0103, 0.0112, 0.0112, 0.0147, 0.0065, 0.0105], + device='cuda:0'), in_proj_covar=tensor([0.0055, 0.0050, 0.0054, 0.0042, 0.0053, 0.0054, 0.0049, 0.0049], + device='cuda:0'), out_proj_covar=tensor([5.0903e-05, 4.3500e-05, 4.7998e-05, 3.5871e-05, 4.7412e-05, 4.9452e-05, + 4.0759e-05, 4.1413e-05], device='cuda:0') +2022-12-02 02:42:23,907 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4064, 1.6295, 1.9465, 3.2058, 2.2499, 2.8680, 3.0099, 3.5316], + device='cuda:0'), covar=tensor([0.0302, 0.1785, 0.2309, 0.0395, 0.0649, 0.0528, 0.0628, 0.0330], + device='cuda:0'), in_proj_covar=tensor([0.0066, 0.0099, 0.0116, 0.0068, 0.0076, 0.0067, 0.0078, 0.0083], + device='cuda:0'), out_proj_covar=tensor([7.1321e-05, 1.0942e-04, 1.2482e-04, 7.7173e-05, 7.8511e-05, 7.6533e-05, + 8.6467e-05, 8.1414e-05], device='cuda:0') +2022-12-02 02:42:28,571 INFO [train.py:876] Epoch 20, batch 250, loss[loss=0.1197, simple_loss=0.18, pruned_loss=0.02973, over 4833.00 frames. ], tot_loss[loss=0.1335, simple_loss=0.1951, pruned_loss=0.03598, over 684425.69 frames. ], batch size: 41, lr: 9.01e-03, +2022-12-02 02:42:30,498 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 02:43:05,534 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.039e+02 1.571e+02 1.907e+02 2.236e+02 3.831e+02, threshold=3.815e+02, percent-clipped=3.0 +2022-12-02 02:43:08,636 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=27504.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:43:17,243 INFO [train.py:876] Epoch 20, batch 300, loss[loss=0.1506, simple_loss=0.2149, pruned_loss=0.04316, over 4828.00 frames. ], tot_loss[loss=0.1351, simple_loss=0.1969, pruned_loss=0.03665, over 746558.32 frames. ], batch size: 45, lr: 9.00e-03, +2022-12-02 02:43:24,717 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 02:44:03,520 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=27560.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 02:44:06,165 INFO [train.py:876] Epoch 20, batch 350, loss[loss=0.1741, simple_loss=0.2371, pruned_loss=0.05559, over 4785.00 frames. ], tot_loss[loss=0.1366, simple_loss=0.1983, pruned_loss=0.03747, over 793862.08 frames. ], batch size: 51, lr: 8.99e-03, +2022-12-02 02:44:24,544 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.82 vs. limit=5.0 +2022-12-02 02:44:43,387 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.515e+01 1.669e+02 2.034e+02 2.575e+02 4.956e+02, threshold=4.067e+02, percent-clipped=3.0 +2022-12-02 02:44:48,254 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=27606.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:44:50,320 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=27608.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 02:44:54,740 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.88 vs. limit=2.0 +2022-12-02 02:44:54,904 INFO [train.py:876] Epoch 20, batch 400, loss[loss=0.1555, simple_loss=0.2147, pruned_loss=0.04817, over 4842.00 frames. ], tot_loss[loss=0.1373, simple_loss=0.1991, pruned_loss=0.03774, over 830644.83 frames. ], batch size: 49, lr: 8.98e-03, +2022-12-02 02:44:58,576 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 02:45:20,614 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.84 vs. limit=2.0 +2022-12-02 02:45:20,829 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 02:45:27,051 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1233, 3.1737, 3.5507, 3.1024, 2.8334, 3.2182, 3.4003, 3.3798], + device='cuda:0'), covar=tensor([0.1002, 0.0359, 0.0413, 0.0388, 0.0494, 0.0492, 0.0322, 0.0441], + device='cuda:0'), in_proj_covar=tensor([0.0232, 0.0154, 0.0174, 0.0166, 0.0168, 0.0167, 0.0153, 0.0177], + device='cuda:0'), out_proj_covar=tensor([1.5107e-04, 9.9588e-05, 1.1265e-04, 1.0740e-04, 1.1004e-04, 1.0909e-04, + 1.0075e-04, 1.1734e-04], device='cuda:0') +2022-12-02 02:45:34,402 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=27654.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:45:43,074 INFO [train.py:876] Epoch 20, batch 450, loss[loss=0.1237, simple_loss=0.1836, pruned_loss=0.03188, over 4786.00 frames. ], tot_loss[loss=0.138, simple_loss=0.1999, pruned_loss=0.03802, over 858341.78 frames. ], batch size: 32, lr: 8.98e-03, +2022-12-02 02:46:02,207 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.09 vs. limit=2.0 +2022-12-02 02:46:19,763 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.096e+02 1.746e+02 2.019e+02 2.497e+02 6.031e+02, threshold=4.039e+02, percent-clipped=1.0 +2022-12-02 02:46:31,388 INFO [train.py:876] Epoch 20, batch 500, loss[loss=0.1344, simple_loss=0.2044, pruned_loss=0.0322, over 4841.00 frames. ], tot_loss[loss=0.1373, simple_loss=0.1993, pruned_loss=0.03766, over 880085.11 frames. ], batch size: 34, lr: 8.97e-03, +2022-12-02 02:46:32,507 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=27714.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 02:47:20,272 INFO [train.py:876] Epoch 20, batch 550, loss[loss=0.1272, simple_loss=0.1854, pruned_loss=0.03447, over 4839.00 frames. ], tot_loss[loss=0.1358, simple_loss=0.1977, pruned_loss=0.0369, over 898335.07 frames. ], batch size: 34, lr: 8.96e-03, +2022-12-02 02:47:32,395 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=27775.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 02:47:54,076 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0660, 3.1794, 3.6555, 3.5857, 2.7118, 3.5768, 3.7380, 2.8931], + device='cuda:0'), covar=tensor([0.5008, 0.0851, 0.0769, 0.0392, 0.1082, 0.0913, 0.0459, 0.1235], + device='cuda:0'), in_proj_covar=tensor([0.0182, 0.0114, 0.0144, 0.0117, 0.0126, 0.0115, 0.0116, 0.0121], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 02:47:58,034 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.173e+02 1.597e+02 1.886e+02 2.327e+02 5.792e+02, threshold=3.772e+02, percent-clipped=2.0 +2022-12-02 02:48:01,151 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=27804.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:48:10,044 INFO [train.py:876] Epoch 20, batch 600, loss[loss=0.1347, simple_loss=0.1965, pruned_loss=0.03647, over 4835.00 frames. ], tot_loss[loss=0.1341, simple_loss=0.1953, pruned_loss=0.03643, over 909025.18 frames. ], batch size: 35, lr: 8.95e-03, +2022-12-02 02:48:18,943 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0715, 2.5963, 3.0988, 2.8095, 2.8121, 2.8852, 2.6569, 3.5200], + device='cuda:0'), covar=tensor([0.0160, 0.1198, 0.0551, 0.1178, 0.0366, 0.0345, 0.1396, 0.0386], + device='cuda:0'), in_proj_covar=tensor([0.0067, 0.0108, 0.0089, 0.0119, 0.0083, 0.0084, 0.0117, 0.0097], + device='cuda:0'), out_proj_covar=tensor([8.2620e-05, 1.2584e-04, 1.0685e-04, 1.3936e-04, 9.6406e-05, 1.0414e-04, + 1.3417e-04, 1.0863e-04], device='cuda:0') +2022-12-02 02:48:35,196 INFO [zipformer.py:1414] attn_weights_entropy = tensor([5.0455, 5.0613, 5.5003, 4.8226, 5.2585, 5.1980, 4.9339, 4.9712], + device='cuda:0'), covar=tensor([0.0548, 0.0390, 0.0407, 0.0462, 0.0702, 0.0337, 0.1044, 0.0376], + device='cuda:0'), in_proj_covar=tensor([0.0160, 0.0118, 0.0153, 0.0130, 0.0118, 0.0141, 0.0166, 0.0118], + device='cuda:0'), out_proj_covar=tensor([1.1899e-04, 7.8923e-05, 1.2045e-04, 9.2444e-05, 9.0386e-05, 1.0524e-04, + 1.2837e-04, 8.3819e-05], device='cuda:0') +2022-12-02 02:48:39,413 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4617, 3.7642, 3.7034, 3.4709, 3.2489, 3.5490, 3.7608, 3.5430], + device='cuda:0'), covar=tensor([0.1143, 0.0296, 0.0427, 0.0383, 0.0434, 0.0433, 0.0298, 0.0566], + device='cuda:0'), in_proj_covar=tensor([0.0228, 0.0153, 0.0173, 0.0164, 0.0165, 0.0165, 0.0152, 0.0175], + device='cuda:0'), out_proj_covar=tensor([1.4841e-04, 9.9180e-05, 1.1195e-04, 1.0558e-04, 1.0795e-04, 1.0767e-04, + 9.9755e-05, 1.1631e-04], device='cuda:0') +2022-12-02 02:48:47,895 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=27852.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:48:52,454 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.68 vs. limit=2.0 +2022-12-02 02:48:58,690 INFO [train.py:876] Epoch 20, batch 650, loss[loss=0.1609, simple_loss=0.2239, pruned_loss=0.04896, over 4794.00 frames. ], tot_loss[loss=0.1348, simple_loss=0.1963, pruned_loss=0.03665, over 917564.98 frames. ], batch size: 51, lr: 8.94e-03, +2022-12-02 02:49:35,326 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.077e+02 1.546e+02 1.979e+02 2.575e+02 8.772e+02, threshold=3.957e+02, percent-clipped=3.0 +2022-12-02 02:49:45,520 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=8.79 vs. limit=5.0 +2022-12-02 02:49:47,005 INFO [train.py:876] Epoch 20, batch 700, loss[loss=0.1382, simple_loss=0.1937, pruned_loss=0.04135, over 4855.00 frames. ], tot_loss[loss=0.1354, simple_loss=0.1972, pruned_loss=0.03683, over 927046.15 frames. ], batch size: 35, lr: 8.94e-03, +2022-12-02 02:50:27,305 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5457, 1.7258, 2.0222, 3.2476, 2.3383, 2.9293, 2.8483, 3.6268], + device='cuda:0'), covar=tensor([0.0318, 0.2053, 0.2378, 0.0394, 0.0680, 0.0607, 0.0618, 0.0336], + device='cuda:0'), in_proj_covar=tensor([0.0068, 0.0100, 0.0119, 0.0067, 0.0078, 0.0069, 0.0079, 0.0083], + device='cuda:0'), out_proj_covar=tensor([7.3289e-05, 1.1057e-04, 1.2812e-04, 7.7135e-05, 8.0350e-05, 7.8187e-05, + 8.8653e-05, 8.2205e-05], device='cuda:0') +2022-12-02 02:50:28,959 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.28 vs. limit=5.0 +2022-12-02 02:50:35,852 INFO [train.py:876] Epoch 20, batch 750, loss[loss=0.1428, simple_loss=0.2037, pruned_loss=0.04093, over 4838.00 frames. ], tot_loss[loss=0.1332, simple_loss=0.1946, pruned_loss=0.0359, over 931871.28 frames. ], batch size: 41, lr: 8.93e-03, +2022-12-02 02:51:11,936 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-28000.pt +2022-12-02 02:51:15,165 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.102e+02 1.619e+02 1.882e+02 2.373e+02 5.620e+02, threshold=3.764e+02, percent-clipped=3.0 +2022-12-02 02:51:26,471 INFO [train.py:876] Epoch 20, batch 800, loss[loss=0.1458, simple_loss=0.2148, pruned_loss=0.03844, over 4867.00 frames. ], tot_loss[loss=0.1338, simple_loss=0.195, pruned_loss=0.03636, over 934499.73 frames. ], batch size: 36, lr: 8.92e-03, +2022-12-02 02:52:13,339 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8255, 3.1186, 3.5939, 3.4931, 2.3596, 3.7462, 3.6636, 2.8549], + device='cuda:0'), covar=tensor([0.5315, 0.1154, 0.0786, 0.0381, 0.1292, 0.0765, 0.0510, 0.1065], + device='cuda:0'), in_proj_covar=tensor([0.0181, 0.0114, 0.0143, 0.0117, 0.0125, 0.0115, 0.0115, 0.0120], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 02:52:15,118 INFO [train.py:876] Epoch 20, batch 850, loss[loss=0.1603, simple_loss=0.2322, pruned_loss=0.04418, over 4802.00 frames. ], tot_loss[loss=0.1334, simple_loss=0.1947, pruned_loss=0.03603, over 940535.62 frames. ], batch size: 51, lr: 8.91e-03, +2022-12-02 02:52:22,228 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=28070.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 02:52:52,623 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.035e+02 1.604e+02 1.992e+02 2.403e+02 3.369e+03, threshold=3.984e+02, percent-clipped=8.0 +2022-12-02 02:53:04,267 INFO [train.py:876] Epoch 20, batch 900, loss[loss=0.1303, simple_loss=0.2067, pruned_loss=0.02696, over 4845.00 frames. ], tot_loss[loss=0.1331, simple_loss=0.1942, pruned_loss=0.036, over 942899.35 frames. ], batch size: 47, lr: 8.91e-03, +2022-12-02 02:53:22,011 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6785, 3.5224, 3.8546, 3.6702, 3.8456, 3.2781, 3.7381, 3.3624], + device='cuda:0'), covar=tensor([0.0311, 0.0361, 0.0252, 0.0281, 0.0263, 0.0460, 0.0247, 0.0798], + device='cuda:0'), in_proj_covar=tensor([0.0113, 0.0119, 0.0106, 0.0110, 0.0099, 0.0135, 0.0090, 0.0105], + device='cuda:0'), out_proj_covar=tensor([9.5793e-05, 1.0100e-04, 9.0058e-05, 9.3983e-05, 8.3627e-05, 1.1523e-04, + 7.9865e-05, 9.0442e-05], device='cuda:0') +2022-12-02 02:53:29,738 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9021, 2.1139, 2.4525, 3.7108, 3.1814, 3.1099, 3.1826, 3.9192], + device='cuda:0'), covar=tensor([0.0214, 0.1642, 0.2038, 0.0309, 0.0380, 0.0611, 0.0558, 0.0249], + device='cuda:0'), in_proj_covar=tensor([0.0069, 0.0100, 0.0119, 0.0067, 0.0077, 0.0069, 0.0078, 0.0083], + device='cuda:0'), out_proj_covar=tensor([7.3358e-05, 1.1048e-04, 1.2840e-04, 7.7099e-05, 7.9304e-05, 7.8281e-05, + 8.7944e-05, 8.1870e-05], device='cuda:0') +2022-12-02 02:53:32,994 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.21 vs. limit=5.0 +2022-12-02 02:53:52,585 INFO [train.py:876] Epoch 20, batch 950, loss[loss=0.1678, simple_loss=0.2347, pruned_loss=0.05048, over 4807.00 frames. ], tot_loss[loss=0.1338, simple_loss=0.1948, pruned_loss=0.03643, over 946193.13 frames. ], batch size: 42, lr: 8.90e-03, +2022-12-02 02:54:29,777 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.183e+02 1.598e+02 1.904e+02 2.307e+02 6.858e+02, threshold=3.809e+02, percent-clipped=2.0 +2022-12-02 02:54:41,501 INFO [train.py:876] Epoch 20, batch 1000, loss[loss=0.09143, simple_loss=0.142, pruned_loss=0.02041, over 4001.00 frames. ], tot_loss[loss=0.1345, simple_loss=0.1953, pruned_loss=0.03683, over 947033.77 frames. ], batch size: 16, lr: 8.89e-03, +2022-12-02 02:54:57,528 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=28229.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:55:16,107 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9370, 4.5881, 4.6296, 4.5238, 4.3059, 4.3185, 4.5003, 4.6224], + device='cuda:0'), covar=tensor([0.1110, 0.0202, 0.0256, 0.0222, 0.0207, 0.0269, 0.0214, 0.0378], + device='cuda:0'), in_proj_covar=tensor([0.0234, 0.0157, 0.0177, 0.0167, 0.0168, 0.0170, 0.0156, 0.0179], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 02:55:22,704 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=28255.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 02:55:27,048 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.85 vs. limit=2.0 +2022-12-02 02:55:30,037 INFO [train.py:876] Epoch 20, batch 1050, loss[loss=0.07764, simple_loss=0.1397, pruned_loss=0.007803, over 4724.00 frames. ], tot_loss[loss=0.1349, simple_loss=0.1959, pruned_loss=0.03695, over 945516.53 frames. ], batch size: 23, lr: 8.88e-03, +2022-12-02 02:55:53,675 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=28287.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:55:56,333 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=28290.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:56:06,925 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.350e+01 1.689e+02 1.944e+02 2.522e+02 6.294e+02, threshold=3.889e+02, percent-clipped=8.0 +2022-12-02 02:56:18,563 INFO [train.py:876] Epoch 20, batch 1100, loss[loss=0.1949, simple_loss=0.2514, pruned_loss=0.06914, over 4786.00 frames. ], tot_loss[loss=0.1356, simple_loss=0.1966, pruned_loss=0.03725, over 949316.81 frames. ], batch size: 54, lr: 8.87e-03, +2022-12-02 02:56:21,746 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=28316.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 02:56:46,478 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5767, 3.0164, 3.3850, 3.4998, 3.1479, 3.5373, 3.2990, 3.2316], + device='cuda:0'), covar=tensor([0.0227, 0.0317, 0.0295, 0.0246, 0.0211, 0.0271, 0.0272, 0.0307], + device='cuda:0'), in_proj_covar=tensor([0.0114, 0.0118, 0.0120, 0.0112, 0.0096, 0.0112, 0.0119, 0.0131], + device='cuda:0'), out_proj_covar=tensor([7.7220e-05, 8.0196e-05, 8.0935e-05, 7.4636e-05, 6.3586e-05, 7.5622e-05, + 7.9940e-05, 9.0020e-05], device='cuda:0') +2022-12-02 02:56:53,218 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=28348.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:57:07,029 INFO [train.py:876] Epoch 20, batch 1150, loss[loss=0.1489, simple_loss=0.2148, pruned_loss=0.04154, over 4827.00 frames. ], tot_loss[loss=0.1354, simple_loss=0.1965, pruned_loss=0.03708, over 947586.57 frames. ], batch size: 45, lr: 8.87e-03, +2022-12-02 02:57:12,159 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.2850, 1.5049, 1.6048, 1.2970, 1.2084, 1.2861, 1.4835, 1.5358], + device='cuda:0'), covar=tensor([0.0215, 0.0189, 0.0167, 0.0228, 0.0274, 0.0183, 0.0174, 0.0136], + device='cuda:0'), in_proj_covar=tensor([0.0034, 0.0032, 0.0031, 0.0033, 0.0037, 0.0033, 0.0036, 0.0030], + device='cuda:0'), out_proj_covar=tensor([2.7164e-05, 2.5130e-05, 2.4892e-05, 2.6393e-05, 3.0251e-05, 2.5978e-05, + 2.9332e-05, 2.3400e-05], device='cuda:0') +2022-12-02 02:57:13,935 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=28370.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 02:57:44,444 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.462e+01 1.624e+02 1.856e+02 2.266e+02 7.207e+02, threshold=3.712e+02, percent-clipped=3.0 +2022-12-02 02:57:56,312 INFO [train.py:876] Epoch 20, batch 1200, loss[loss=0.1374, simple_loss=0.1961, pruned_loss=0.03941, over 4803.00 frames. ], tot_loss[loss=0.1338, simple_loss=0.1953, pruned_loss=0.03616, over 949686.07 frames. ], batch size: 32, lr: 8.86e-03, +2022-12-02 02:58:01,415 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=28418.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 02:58:32,838 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3451, 3.1449, 3.5368, 3.3028, 3.6518, 2.7665, 3.3389, 3.3064], + device='cuda:0'), covar=tensor([0.0433, 0.0567, 0.0348, 0.0378, 0.0253, 0.0699, 0.0324, 0.0807], + device='cuda:0'), in_proj_covar=tensor([0.0117, 0.0125, 0.0111, 0.0114, 0.0104, 0.0141, 0.0095, 0.0109], + device='cuda:0'), out_proj_covar=tensor([9.9359e-05, 1.0590e-04, 9.5190e-05, 9.7872e-05, 8.7923e-05, 1.2032e-04, + 8.3813e-05, 9.4684e-05], device='cuda:0') +2022-12-02 02:58:40,067 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.97 vs. limit=2.0 +2022-12-02 02:58:45,229 INFO [train.py:876] Epoch 20, batch 1250, loss[loss=0.1094, simple_loss=0.1744, pruned_loss=0.02221, over 4884.00 frames. ], tot_loss[loss=0.1332, simple_loss=0.1948, pruned_loss=0.03583, over 948350.20 frames. ], batch size: 29, lr: 8.85e-03, +2022-12-02 02:59:07,738 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=28486.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:59:13,369 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=28492.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 02:59:22,143 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.000e+01 1.646e+02 2.044e+02 2.398e+02 4.730e+02, threshold=4.088e+02, percent-clipped=7.0 +2022-12-02 02:59:33,697 INFO [train.py:876] Epoch 20, batch 1300, loss[loss=0.1457, simple_loss=0.2115, pruned_loss=0.03997, over 4852.00 frames. ], tot_loss[loss=0.1321, simple_loss=0.1932, pruned_loss=0.03548, over 948278.89 frames. ], batch size: 40, lr: 8.84e-03, +2022-12-02 02:59:49,905 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.87 vs. limit=2.0 +2022-12-02 03:00:06,866 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=28547.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:00:12,867 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=28553.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:00:22,236 INFO [train.py:876] Epoch 20, batch 1350, loss[loss=0.1232, simple_loss=0.1837, pruned_loss=0.03134, over 4730.00 frames. ], tot_loss[loss=0.1312, simple_loss=0.1921, pruned_loss=0.03516, over 948544.63 frames. ], batch size: 27, lr: 8.84e-03, +2022-12-02 03:00:29,442 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0034, 2.3326, 2.9411, 2.3135, 2.7585, 2.2812, 2.7571, 3.0601], + device='cuda:0'), covar=tensor([0.0127, 0.0813, 0.0431, 0.1012, 0.0238, 0.0481, 0.0867, 0.0384], + device='cuda:0'), in_proj_covar=tensor([0.0066, 0.0104, 0.0086, 0.0117, 0.0081, 0.0084, 0.0115, 0.0094], + device='cuda:0'), out_proj_covar=tensor([8.1433e-05, 1.2178e-04, 1.0434e-04, 1.3639e-04, 9.4398e-05, 1.0434e-04, + 1.3259e-04, 1.0578e-04], device='cuda:0') +2022-12-02 03:00:43,832 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=28585.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:00:51,865 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.7806, 3.3066, 3.5844, 3.3330, 2.5790, 3.6027, 3.5923, 2.9837], + device='cuda:0'), covar=tensor([0.4217, 0.0555, 0.0603, 0.0443, 0.0795, 0.0650, 0.0439, 0.0678], + device='cuda:0'), in_proj_covar=tensor([0.0181, 0.0116, 0.0142, 0.0120, 0.0125, 0.0116, 0.0115, 0.0121], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 03:00:59,658 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.701e+01 1.634e+02 2.003e+02 2.530e+02 5.006e+02, threshold=4.007e+02, percent-clipped=7.0 +2022-12-02 03:01:09,389 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=28611.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 03:01:11,235 INFO [train.py:876] Epoch 20, batch 1400, loss[loss=0.1569, simple_loss=0.2213, pruned_loss=0.04624, over 4831.00 frames. ], tot_loss[loss=0.1324, simple_loss=0.1936, pruned_loss=0.03557, over 944227.39 frames. ], batch size: 47, lr: 8.83e-03, +2022-12-02 03:01:21,112 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.9565, 2.1801, 2.9405, 2.2723, 2.7159, 2.0688, 2.6489, 2.9969], + device='cuda:0'), covar=tensor([0.0100, 0.1015, 0.0430, 0.1082, 0.0210, 0.0507, 0.1021, 0.0406], + device='cuda:0'), in_proj_covar=tensor([0.0066, 0.0105, 0.0086, 0.0117, 0.0081, 0.0084, 0.0115, 0.0095], + device='cuda:0'), out_proj_covar=tensor([8.1072e-05, 1.2269e-04, 1.0443e-04, 1.3623e-04, 9.3808e-05, 1.0417e-04, + 1.3262e-04, 1.0608e-04], device='cuda:0') +2022-12-02 03:01:31,954 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.03 vs. limit=2.0 +2022-12-02 03:01:40,086 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=28643.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:01:42,067 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-20.pt +2022-12-02 03:01:50,853 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 03:01:51,754 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 03:01:52,050 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 03:01:52,082 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 03:01:53,227 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 03:01:53,548 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 03:01:55,168 INFO [train.py:876] Epoch 21, batch 0, loss[loss=0.08762, simple_loss=0.1415, pruned_loss=0.01688, over 4663.00 frames. ], tot_loss[loss=0.08762, simple_loss=0.1415, pruned_loss=0.01688, over 4663.00 frames. ], batch size: 21, lr: 8.61e-03, +2022-12-02 03:01:55,169 INFO [train.py:901] Computing validation loss +2022-12-02 03:02:10,861 INFO [train.py:910] Epoch 21, validation: loss=0.2319, simple_loss=0.2774, pruned_loss=0.09321, over 253132.00 frames. +2022-12-02 03:02:10,862 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-02 03:02:22,188 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9557, 3.5931, 3.6381, 4.1006, 3.8634, 4.1181, 3.7811, 3.6071], + device='cuda:0'), covar=tensor([0.0336, 0.0272, 0.0329, 0.0177, 0.0149, 0.0171, 0.0215, 0.0269], + device='cuda:0'), in_proj_covar=tensor([0.0117, 0.0120, 0.0124, 0.0115, 0.0098, 0.0116, 0.0123, 0.0133], + device='cuda:0'), out_proj_covar=tensor([7.9502e-05, 8.1945e-05, 8.3880e-05, 7.6559e-05, 6.5059e-05, 7.7819e-05, + 8.1896e-05, 9.1579e-05], device='cuda:0') +2022-12-02 03:02:59,242 INFO [train.py:876] Epoch 21, batch 50, loss[loss=0.181, simple_loss=0.2449, pruned_loss=0.05857, over 4641.00 frames. ], tot_loss[loss=0.1303, simple_loss=0.1913, pruned_loss=0.03471, over 216168.28 frames. ], batch size: 63, lr: 8.60e-03, +2022-12-02 03:03:04,807 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.868e+01 1.826e+02 2.202e+02 2.588e+02 5.936e+02, threshold=4.403e+02, percent-clipped=2.0 +2022-12-02 03:03:07,750 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.8382, 2.2561, 3.0292, 2.4211, 2.6970, 2.5459, 2.5786, 3.3516], + device='cuda:0'), covar=tensor([0.0178, 0.1187, 0.0525, 0.1440, 0.0308, 0.0369, 0.1449, 0.0407], + device='cuda:0'), in_proj_covar=tensor([0.0068, 0.0109, 0.0090, 0.0121, 0.0083, 0.0087, 0.0120, 0.0098], + device='cuda:0'), out_proj_covar=tensor([8.3617e-05, 1.2751e-04, 1.0823e-04, 1.4118e-04, 9.6175e-05, 1.0706e-04, + 1.3776e-04, 1.0943e-04], device='cuda:0') +2022-12-02 03:03:20,327 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 03:03:48,787 INFO [train.py:876] Epoch 21, batch 100, loss[loss=0.08303, simple_loss=0.1294, pruned_loss=0.01834, over 4361.00 frames. ], tot_loss[loss=0.1311, simple_loss=0.1924, pruned_loss=0.03487, over 378918.38 frames. ], batch size: 17, lr: 8.59e-03, +2022-12-02 03:04:04,428 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 03:04:10,746 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.97 vs. limit=2.0 +2022-12-02 03:04:28,644 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 03:04:37,289 INFO [train.py:876] Epoch 21, batch 150, loss[loss=0.1032, simple_loss=0.1727, pruned_loss=0.01685, over 4883.00 frames. ], tot_loss[loss=0.1303, simple_loss=0.1922, pruned_loss=0.03424, over 506506.30 frames. ], batch size: 29, lr: 8.59e-03, +2022-12-02 03:04:42,474 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.001e+02 1.625e+02 1.971e+02 2.599e+02 5.873e+02, threshold=3.941e+02, percent-clipped=3.0 +2022-12-02 03:04:50,858 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.51 vs. limit=2.0 +2022-12-02 03:04:52,123 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.72 vs. limit=2.0 +2022-12-02 03:05:22,420 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=28842.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:05:26,225 INFO [train.py:876] Epoch 21, batch 200, loss[loss=0.1693, simple_loss=0.2203, pruned_loss=0.05921, over 4787.00 frames. ], tot_loss[loss=0.132, simple_loss=0.1937, pruned_loss=0.03517, over 604790.29 frames. ], batch size: 58, lr: 8.58e-03, +2022-12-02 03:05:28,243 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=28848.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:05:41,287 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=28861.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:06:04,268 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=28885.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:06:12,178 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 03:06:14,989 INFO [train.py:876] Epoch 21, batch 250, loss[loss=0.1223, simple_loss=0.1748, pruned_loss=0.03487, over 4792.00 frames. ], tot_loss[loss=0.1318, simple_loss=0.1932, pruned_loss=0.03516, over 680036.30 frames. ], batch size: 32, lr: 8.57e-03, +2022-12-02 03:06:19,726 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.092e+02 1.707e+02 1.993e+02 2.530e+02 5.662e+02, threshold=3.986e+02, percent-clipped=7.0 +2022-12-02 03:06:29,407 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=28911.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 03:06:34,807 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.99 vs. limit=2.0 +2022-12-02 03:06:40,363 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=28922.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:06:50,884 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=28933.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:07:01,583 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=28943.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:07:04,461 INFO [train.py:876] Epoch 21, batch 300, loss[loss=0.09973, simple_loss=0.162, pruned_loss=0.01875, over 4757.00 frames. ], tot_loss[loss=0.1311, simple_loss=0.1925, pruned_loss=0.03484, over 739609.58 frames. ], batch size: 27, lr: 8.56e-03, +2022-12-02 03:07:10,306 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 03:07:17,028 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=28959.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 03:07:48,170 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=28991.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:07:50,721 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.95 vs. limit=2.0 +2022-12-02 03:07:53,215 INFO [train.py:876] Epoch 21, batch 350, loss[loss=0.1346, simple_loss=0.198, pruned_loss=0.03562, over 4925.00 frames. ], tot_loss[loss=0.131, simple_loss=0.193, pruned_loss=0.0345, over 786868.98 frames. ], batch size: 32, lr: 8.56e-03, +2022-12-02 03:07:58,410 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.515e+01 1.612e+02 1.999e+02 2.438e+02 4.120e+02, threshold=3.998e+02, percent-clipped=1.0 +2022-12-02 03:07:59,520 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=29002.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:08:07,416 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8254, 1.9192, 1.1222, 2.1424, 1.8639, 1.8507, 1.6960, 1.7631], + device='cuda:0'), covar=tensor([0.0311, 0.0348, 0.0376, 0.0255, 0.0261, 0.0281, 0.0331, 0.0570], + device='cuda:0'), in_proj_covar=tensor([0.0048, 0.0048, 0.0052, 0.0039, 0.0043, 0.0046, 0.0044, 0.0041], + device='cuda:0'), out_proj_covar=tensor([4.5596e-05, 4.5358e-05, 4.9753e-05, 3.6805e-05, 4.0326e-05, 4.3508e-05, + 4.1539e-05, 4.0981e-05], device='cuda:0') +2022-12-02 03:08:13,440 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9415, 3.4600, 3.6348, 3.8466, 3.4982, 3.9879, 3.6066, 3.4914], + device='cuda:0'), covar=tensor([0.0207, 0.0275, 0.0258, 0.0205, 0.0174, 0.0186, 0.0207, 0.0313], + device='cuda:0'), in_proj_covar=tensor([0.0120, 0.0122, 0.0125, 0.0116, 0.0100, 0.0118, 0.0124, 0.0136], + device='cuda:0'), out_proj_covar=tensor([8.1274e-05, 8.3161e-05, 8.4622e-05, 7.7868e-05, 6.6496e-05, 7.8850e-05, + 8.2811e-05, 9.3227e-05], device='cuda:0') +2022-12-02 03:08:42,575 INFO [train.py:876] Epoch 21, batch 400, loss[loss=0.1287, simple_loss=0.1829, pruned_loss=0.03729, over 4897.00 frames. ], tot_loss[loss=0.1303, simple_loss=0.1916, pruned_loss=0.03446, over 826182.84 frames. ], batch size: 30, lr: 8.55e-03, +2022-12-02 03:08:50,399 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 03:08:59,751 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=29063.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:09:03,627 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9224, 2.0633, 2.5029, 3.8504, 3.3654, 3.3898, 3.0987, 3.8912], + device='cuda:0'), covar=tensor([0.0225, 0.1490, 0.1868, 0.0334, 0.0324, 0.0530, 0.0569, 0.0269], + device='cuda:0'), in_proj_covar=tensor([0.0071, 0.0102, 0.0123, 0.0069, 0.0080, 0.0072, 0.0084, 0.0087], + device='cuda:0'), out_proj_covar=tensor([7.6053e-05, 1.1317e-04, 1.3237e-04, 7.9712e-05, 8.2842e-05, 8.1633e-05, + 9.3438e-05, 8.6083e-05], device='cuda:0') +2022-12-02 03:09:06,839 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.94 vs. limit=2.0 +2022-12-02 03:09:13,102 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 03:09:17,332 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.94 vs. limit=2.0 +2022-12-02 03:09:31,944 INFO [train.py:876] Epoch 21, batch 450, loss[loss=0.125, simple_loss=0.2012, pruned_loss=0.02438, over 4835.00 frames. ], tot_loss[loss=0.1298, simple_loss=0.1917, pruned_loss=0.03401, over 852799.35 frames. ], batch size: 41, lr: 8.54e-03, +2022-12-02 03:09:36,758 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.025e+02 1.559e+02 1.748e+02 2.361e+02 6.189e+02, threshold=3.496e+02, percent-clipped=3.0 +2022-12-02 03:10:17,245 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=29142.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:10:21,130 INFO [train.py:876] Epoch 21, batch 500, loss[loss=0.1158, simple_loss=0.1859, pruned_loss=0.02286, over 4857.00 frames. ], tot_loss[loss=0.1286, simple_loss=0.1903, pruned_loss=0.03349, over 875853.45 frames. ], batch size: 35, lr: 8.54e-03, +2022-12-02 03:10:23,273 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=29148.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:10:27,510 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.56 vs. limit=2.0 +2022-12-02 03:11:03,913 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=29190.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:11:09,821 INFO [train.py:876] Epoch 21, batch 550, loss[loss=0.1603, simple_loss=0.2219, pruned_loss=0.0494, over 4845.00 frames. ], tot_loss[loss=0.1298, simple_loss=0.1911, pruned_loss=0.03423, over 891506.85 frames. ], batch size: 49, lr: 8.53e-03, +2022-12-02 03:11:09,867 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=29196.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:11:15,149 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.354e+01 1.610e+02 1.937e+02 2.348e+02 5.354e+02, threshold=3.875e+02, percent-clipped=8.0 +2022-12-02 03:11:31,266 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=29217.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:11:49,617 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=29236.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:11:52,500 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5072, 3.1867, 3.2594, 3.0838, 2.4210, 2.1163, 3.5116, 1.7846], + device='cuda:0'), covar=tensor([0.0418, 0.0414, 0.0511, 0.0555, 0.1656, 0.3331, 0.0263, 0.2730], + device='cuda:0'), in_proj_covar=tensor([0.0116, 0.0099, 0.0090, 0.0134, 0.0139, 0.0162, 0.0083, 0.0177], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 03:11:59,356 INFO [train.py:876] Epoch 21, batch 600, loss[loss=0.1342, simple_loss=0.1872, pruned_loss=0.04061, over 4906.00 frames. ], tot_loss[loss=0.1301, simple_loss=0.1917, pruned_loss=0.03424, over 905477.76 frames. ], batch size: 30, lr: 8.52e-03, +2022-12-02 03:12:10,431 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-02 03:12:48,856 INFO [train.py:876] Epoch 21, batch 650, loss[loss=0.1105, simple_loss=0.1745, pruned_loss=0.02323, over 4924.00 frames. ], tot_loss[loss=0.1292, simple_loss=0.1907, pruned_loss=0.03388, over 916441.54 frames. ], batch size: 31, lr: 8.51e-03, +2022-12-02 03:12:49,992 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=29297.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:12:53,836 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.067e+02 1.603e+02 1.910e+02 2.391e+02 4.328e+02, threshold=3.820e+02, percent-clipped=3.0 +2022-12-02 03:13:10,537 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.63 vs. limit=5.0 +2022-12-02 03:13:32,127 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9906, 3.2052, 3.7957, 3.6556, 2.5667, 4.0476, 3.9529, 2.7033], + device='cuda:0'), covar=tensor([0.4373, 0.1087, 0.0812, 0.0329, 0.1222, 0.0688, 0.0425, 0.1941], + device='cuda:0'), in_proj_covar=tensor([0.0176, 0.0116, 0.0146, 0.0119, 0.0126, 0.0116, 0.0116, 0.0121], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 03:13:37,633 INFO [train.py:876] Epoch 21, batch 700, loss[loss=0.09373, simple_loss=0.1619, pruned_loss=0.01279, over 4731.00 frames. ], tot_loss[loss=0.1303, simple_loss=0.1919, pruned_loss=0.03436, over 923497.31 frames. ], batch size: 27, lr: 8.51e-03, +2022-12-02 03:13:49,278 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=29358.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:14:25,966 INFO [train.py:876] Epoch 21, batch 750, loss[loss=0.1452, simple_loss=0.2134, pruned_loss=0.03855, over 4888.00 frames. ], tot_loss[loss=0.1313, simple_loss=0.1931, pruned_loss=0.0348, over 927480.33 frames. ], batch size: 44, lr: 8.50e-03, +2022-12-02 03:14:31,023 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.094e+02 1.682e+02 2.120e+02 2.385e+02 6.945e+02, threshold=4.240e+02, percent-clipped=2.0 +2022-12-02 03:14:35,635 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.44 vs. limit=2.0 +2022-12-02 03:15:06,263 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6122, 3.9341, 3.8984, 3.7631, 3.6879, 3.8523, 3.8617, 3.8391], + device='cuda:0'), covar=tensor([0.1150, 0.0247, 0.0383, 0.0330, 0.0317, 0.0369, 0.0286, 0.0495], + device='cuda:0'), in_proj_covar=tensor([0.0246, 0.0165, 0.0185, 0.0177, 0.0175, 0.0180, 0.0163, 0.0189], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 03:15:14,693 INFO [train.py:876] Epoch 21, batch 800, loss[loss=0.1121, simple_loss=0.1768, pruned_loss=0.0237, over 4734.00 frames. ], tot_loss[loss=0.1313, simple_loss=0.1933, pruned_loss=0.03469, over 931002.35 frames. ], batch size: 27, lr: 8.49e-03, +2022-12-02 03:15:42,461 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4531, 1.7880, 1.8563, 3.2988, 2.4423, 3.0387, 3.0078, 3.4527], + device='cuda:0'), covar=tensor([0.0287, 0.1993, 0.2683, 0.0410, 0.0749, 0.0561, 0.0667, 0.0413], + device='cuda:0'), in_proj_covar=tensor([0.0068, 0.0103, 0.0119, 0.0069, 0.0080, 0.0071, 0.0082, 0.0085], + device='cuda:0'), out_proj_covar=tensor([7.3427e-05, 1.1286e-04, 1.2834e-04, 7.9202e-05, 8.2430e-05, 7.9716e-05, + 9.0840e-05, 8.4299e-05], device='cuda:0') +2022-12-02 03:15:51,301 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.3441, 4.8212, 4.7108, 5.0352, 4.5443, 4.1634, 4.9762, 4.6478], + device='cuda:0'), covar=tensor([0.0194, 0.0097, 0.0108, 0.0145, 0.0152, 0.0176, 0.0087, 0.0117], + device='cuda:0'), in_proj_covar=tensor([0.0055, 0.0051, 0.0053, 0.0043, 0.0054, 0.0055, 0.0049, 0.0050], + device='cuda:0'), out_proj_covar=tensor([5.0498e-05, 4.3654e-05, 4.6659e-05, 3.6311e-05, 4.8444e-05, 4.9851e-05, + 4.0111e-05, 4.1774e-05], device='cuda:0') +2022-12-02 03:16:03,791 INFO [train.py:876] Epoch 21, batch 850, loss[loss=0.0995, simple_loss=0.1425, pruned_loss=0.02824, over 4681.00 frames. ], tot_loss[loss=0.1307, simple_loss=0.1924, pruned_loss=0.03446, over 935178.11 frames. ], batch size: 21, lr: 8.49e-03, +2022-12-02 03:16:08,575 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.081e+02 1.612e+02 2.049e+02 2.300e+02 6.071e+02, threshold=4.098e+02, percent-clipped=3.0 +2022-12-02 03:16:23,922 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=29517.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:16:32,904 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.0980, 3.4985, 3.5718, 4.1157, 3.7380, 4.0249, 3.6921, 3.6376], + device='cuda:0'), covar=tensor([0.0178, 0.0272, 0.0258, 0.0164, 0.0151, 0.0162, 0.0231, 0.0256], + device='cuda:0'), in_proj_covar=tensor([0.0118, 0.0121, 0.0124, 0.0115, 0.0100, 0.0118, 0.0123, 0.0134], + device='cuda:0'), out_proj_covar=tensor([8.0019e-05, 8.2233e-05, 8.3511e-05, 7.6857e-05, 6.6274e-05, 7.9063e-05, + 8.2220e-05, 9.2246e-05], device='cuda:0') +2022-12-02 03:16:51,606 INFO [train.py:876] Epoch 21, batch 900, loss[loss=0.08048, simple_loss=0.1278, pruned_loss=0.01656, over 4612.00 frames. ], tot_loss[loss=0.1309, simple_loss=0.1926, pruned_loss=0.03466, over 939888.82 frames. ], batch size: 21, lr: 8.48e-03, +2022-12-02 03:16:56,654 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=29551.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:17:10,153 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=29565.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:17:36,123 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=29592.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:17:39,924 INFO [train.py:876] Epoch 21, batch 950, loss[loss=0.1332, simple_loss=0.2011, pruned_loss=0.03268, over 4808.00 frames. ], tot_loss[loss=0.1316, simple_loss=0.1939, pruned_loss=0.03466, over 942081.45 frames. ], batch size: 45, lr: 8.47e-03, +2022-12-02 03:17:44,235 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0582, 1.8427, 2.1391, 1.6900, 1.9917, 2.1625, 1.6506, 2.0680], + device='cuda:0'), covar=tensor([0.0770, 0.0568, 0.0932, 0.0727, 0.0640, 0.0659, 0.0977, 0.0455], + device='cuda:0'), in_proj_covar=tensor([0.0065, 0.0069, 0.0065, 0.0070, 0.0063, 0.0058, 0.0058, 0.0064], + device='cuda:0'), out_proj_covar=tensor([5.9101e-05, 6.2088e-05, 6.0245e-05, 6.3245e-05, 5.8213e-05, 5.3083e-05, + 5.4052e-05, 5.8636e-05], device='cuda:0') +2022-12-02 03:17:44,882 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.096e+02 1.624e+02 1.922e+02 2.397e+02 6.755e+02, threshold=3.845e+02, percent-clipped=1.0 +2022-12-02 03:17:55,603 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=29612.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:18:02,313 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8945, 1.9842, 2.0614, 1.7861, 2.3701, 2.2849, 1.9023, 1.9037], + device='cuda:0'), covar=tensor([0.1911, 0.0690, 0.1813, 0.1142, 0.0478, 0.1217, 0.0958, 0.0665], + device='cuda:0'), in_proj_covar=tensor([0.0065, 0.0070, 0.0065, 0.0071, 0.0063, 0.0058, 0.0058, 0.0065], + device='cuda:0'), out_proj_covar=tensor([5.9356e-05, 6.2327e-05, 6.0460e-05, 6.3667e-05, 5.8441e-05, 5.3393e-05, + 5.4319e-05, 5.8831e-05], device='cuda:0') +2022-12-02 03:18:09,551 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.46 vs. limit=5.0 +2022-12-02 03:18:28,629 INFO [train.py:876] Epoch 21, batch 1000, loss[loss=0.1427, simple_loss=0.2093, pruned_loss=0.03803, over 4880.00 frames. ], tot_loss[loss=0.1322, simple_loss=0.1943, pruned_loss=0.03498, over 943801.17 frames. ], batch size: 44, lr: 8.47e-03, +2022-12-02 03:18:40,643 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=29658.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:18:44,370 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=29662.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:19:17,520 INFO [train.py:876] Epoch 21, batch 1050, loss[loss=0.1181, simple_loss=0.175, pruned_loss=0.03057, over 4912.00 frames. ], tot_loss[loss=0.1316, simple_loss=0.1936, pruned_loss=0.03486, over 947020.88 frames. ], batch size: 29, lr: 8.46e-03, +2022-12-02 03:19:22,482 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.046e+02 1.641e+02 2.025e+02 2.447e+02 4.538e+02, threshold=4.051e+02, percent-clipped=4.0 +2022-12-02 03:19:24,670 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.2938, 1.6860, 1.9362, 1.6133, 1.2336, 1.5656, 1.5395, 1.7999], + device='cuda:0'), covar=tensor([0.0389, 0.0217, 0.0222, 0.0254, 0.0280, 0.0193, 0.0207, 0.0197], + device='cuda:0'), in_proj_covar=tensor([0.0034, 0.0032, 0.0031, 0.0034, 0.0037, 0.0033, 0.0036, 0.0030], + device='cuda:0'), out_proj_covar=tensor([2.7141e-05, 2.5146e-05, 2.4530e-05, 2.6900e-05, 3.0447e-05, 2.6137e-05, + 2.9458e-05, 2.3409e-05], device='cuda:0') +2022-12-02 03:19:27,573 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=29706.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:19:43,752 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=29723.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 03:20:05,759 INFO [train.py:876] Epoch 21, batch 1100, loss[loss=0.1413, simple_loss=0.2123, pruned_loss=0.03518, over 4836.00 frames. ], tot_loss[loss=0.1308, simple_loss=0.1926, pruned_loss=0.03446, over 947959.71 frames. ], batch size: 45, lr: 8.45e-03, +2022-12-02 03:20:54,296 INFO [train.py:876] Epoch 21, batch 1150, loss[loss=0.136, simple_loss=0.2043, pruned_loss=0.03391, over 4840.00 frames. ], tot_loss[loss=0.1314, simple_loss=0.1933, pruned_loss=0.03472, over 949381.98 frames. ], batch size: 49, lr: 8.45e-03, +2022-12-02 03:20:59,440 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.038e+02 1.694e+02 2.142e+02 2.794e+02 1.285e+03, threshold=4.284e+02, percent-clipped=6.0 +2022-12-02 03:21:00,522 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=29802.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:21:42,927 INFO [train.py:876] Epoch 21, batch 1200, loss[loss=0.1247, simple_loss=0.1916, pruned_loss=0.02894, over 4849.00 frames. ], tot_loss[loss=0.1309, simple_loss=0.1931, pruned_loss=0.03438, over 949621.09 frames. ], batch size: 47, lr: 8.44e-03, +2022-12-02 03:21:57,326 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.01 vs. limit=5.0 +2022-12-02 03:21:59,853 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=29863.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:22:27,676 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=29892.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:22:31,462 INFO [train.py:876] Epoch 21, batch 1250, loss[loss=0.1173, simple_loss=0.1805, pruned_loss=0.02705, over 4800.00 frames. ], tot_loss[loss=0.1307, simple_loss=0.1928, pruned_loss=0.03434, over 951537.43 frames. ], batch size: 32, lr: 8.43e-03, +2022-12-02 03:22:36,246 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.179e+02 1.477e+02 1.732e+02 2.138e+02 4.922e+02, threshold=3.464e+02, percent-clipped=1.0 +2022-12-02 03:22:42,323 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=29907.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:23:14,934 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=29940.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:23:17,908 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.5085, 4.5555, 4.9528, 4.3484, 4.7475, 4.6788, 4.4188, 4.2460], + device='cuda:0'), covar=tensor([0.0658, 0.0436, 0.0447, 0.0489, 0.0695, 0.0422, 0.1100, 0.0648], + device='cuda:0'), in_proj_covar=tensor([0.0168, 0.0124, 0.0162, 0.0139, 0.0124, 0.0149, 0.0178, 0.0123], + device='cuda:0'), out_proj_covar=tensor([1.2360e-04, 8.2005e-05, 1.2606e-04, 9.8040e-05, 9.4144e-05, 1.1012e-04, + 1.3582e-04, 8.6199e-05], device='cuda:0') +2022-12-02 03:23:20,716 INFO [train.py:876] Epoch 21, batch 1300, loss[loss=0.1312, simple_loss=0.1951, pruned_loss=0.03365, over 4859.00 frames. ], tot_loss[loss=0.1302, simple_loss=0.192, pruned_loss=0.03419, over 951233.90 frames. ], batch size: 35, lr: 8.42e-03, +2022-12-02 03:23:23,837 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3709, 2.8703, 3.3184, 3.2704, 3.2304, 3.5121, 2.7787, 4.1858], + device='cuda:0'), covar=tensor([0.0119, 0.0940, 0.0570, 0.0960, 0.0224, 0.0330, 0.1649, 0.0221], + device='cuda:0'), in_proj_covar=tensor([0.0070, 0.0110, 0.0091, 0.0122, 0.0082, 0.0087, 0.0119, 0.0099], + device='cuda:0'), out_proj_covar=tensor([8.4772e-05, 1.2895e-04, 1.0956e-04, 1.4244e-04, 9.4597e-05, 1.0869e-04, + 1.3793e-04, 1.1069e-04], device='cuda:0') +2022-12-02 03:24:09,788 INFO [train.py:876] Epoch 21, batch 1350, loss[loss=0.1419, simple_loss=0.2168, pruned_loss=0.03345, over 4842.00 frames. ], tot_loss[loss=0.1302, simple_loss=0.1922, pruned_loss=0.03412, over 948943.02 frames. ], batch size: 49, lr: 8.42e-03, +2022-12-02 03:24:14,008 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-30000.pt +2022-12-02 03:24:17,272 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.009e+02 1.474e+02 1.789e+02 2.286e+02 6.386e+02, threshold=3.578e+02, percent-clipped=7.0 +2022-12-02 03:24:33,768 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=30018.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 03:25:00,877 INFO [train.py:876] Epoch 21, batch 1400, loss[loss=0.1346, simple_loss=0.1924, pruned_loss=0.03837, over 4887.00 frames. ], tot_loss[loss=0.1299, simple_loss=0.192, pruned_loss=0.03394, over 951173.14 frames. ], batch size: 38, lr: 8.41e-03, +2022-12-02 03:25:30,441 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-21.pt +2022-12-02 03:25:41,543 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 03:25:42,463 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 03:25:42,768 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 03:25:42,801 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 03:25:43,953 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 03:25:44,283 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 03:25:45,860 INFO [train.py:876] Epoch 22, batch 0, loss[loss=0.1559, simple_loss=0.2222, pruned_loss=0.0448, over 4843.00 frames. ], tot_loss[loss=0.1559, simple_loss=0.2222, pruned_loss=0.0448, over 4843.00 frames. ], batch size: 49, lr: 8.21e-03, +2022-12-02 03:25:45,861 INFO [train.py:901] Computing validation loss +2022-12-02 03:25:46,603 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5834, 2.6107, 2.6928, 2.7678, 2.2738, 2.5310, 2.3934, 2.6134], + device='cuda:0'), covar=tensor([0.0423, 0.0440, 0.0325, 0.0352, 0.0648, 0.0518, 0.0541, 0.0322], + device='cuda:0'), in_proj_covar=tensor([0.0063, 0.0068, 0.0056, 0.0056, 0.0072, 0.0066, 0.0061, 0.0055], + device='cuda:0'), out_proj_covar=tensor([4.6434e-05, 5.0496e-05, 4.1070e-05, 4.0698e-05, 5.2723e-05, 4.8930e-05, + 4.5111e-05, 4.0005e-05], device='cuda:0') +2022-12-02 03:26:01,447 INFO [train.py:910] Epoch 22, validation: loss=0.2286, simple_loss=0.2718, pruned_loss=0.09266, over 253132.00 frames. +2022-12-02 03:26:01,447 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-02 03:26:24,251 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.992e+01 1.609e+02 1.966e+02 2.523e+02 5.123e+02, threshold=3.932e+02, percent-clipped=4.0 +2022-12-02 03:26:47,644 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=30125.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:26:49,789 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.06 vs. limit=2.0 +2022-12-02 03:26:50,247 INFO [train.py:876] Epoch 22, batch 50, loss[loss=0.09356, simple_loss=0.1577, pruned_loss=0.01473, over 4905.00 frames. ], tot_loss[loss=0.13, simple_loss=0.1911, pruned_loss=0.03448, over 214373.31 frames. ], batch size: 29, lr: 8.20e-03, +2022-12-02 03:26:53,304 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.6323, 5.1549, 4.7380, 5.3376, 4.7143, 4.2305, 5.2992, 4.8175], + device='cuda:0'), covar=tensor([0.0139, 0.0077, 0.0117, 0.0141, 0.0172, 0.0140, 0.0071, 0.0127], + device='cuda:0'), in_proj_covar=tensor([0.0056, 0.0053, 0.0056, 0.0044, 0.0055, 0.0057, 0.0051, 0.0051], + device='cuda:0'), out_proj_covar=tensor([5.0967e-05, 4.4649e-05, 4.9286e-05, 3.7565e-05, 5.0079e-05, 5.1475e-05, + 4.1252e-05, 4.2604e-05], device='cuda:0') +2022-12-02 03:27:01,210 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5227, 3.3042, 3.3525, 3.1624, 2.5852, 2.6040, 3.6874, 1.8692], + device='cuda:0'), covar=tensor([0.0428, 0.0420, 0.0492, 0.0678, 0.1518, 0.2343, 0.0256, 0.2711], + device='cuda:0'), in_proj_covar=tensor([0.0120, 0.0100, 0.0093, 0.0136, 0.0141, 0.0161, 0.0086, 0.0179], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 03:27:09,956 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 03:27:14,971 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6147, 3.4996, 3.5182, 3.4603, 2.8601, 2.6451, 3.9053, 1.8781], + device='cuda:0'), covar=tensor([0.0472, 0.0454, 0.0320, 0.0613, 0.1382, 0.2415, 0.0222, 0.3213], + device='cuda:0'), in_proj_covar=tensor([0.0120, 0.0100, 0.0093, 0.0136, 0.0142, 0.0161, 0.0086, 0.0180], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 03:27:19,558 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=30158.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:27:25,649 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6523, 3.4263, 3.6336, 3.5349, 3.7315, 2.9692, 3.6573, 3.8834], + device='cuda:0'), covar=tensor([0.0371, 0.0444, 0.0355, 0.0394, 0.0293, 0.0587, 0.0329, 0.0412], + device='cuda:0'), in_proj_covar=tensor([0.0118, 0.0127, 0.0112, 0.0116, 0.0102, 0.0141, 0.0094, 0.0109], + device='cuda:0'), out_proj_covar=tensor([9.9955e-05, 1.0807e-04, 9.5672e-05, 9.9080e-05, 8.6216e-05, 1.2060e-04, + 8.3257e-05, 9.5135e-05], device='cuda:0') +2022-12-02 03:27:39,155 INFO [train.py:876] Epoch 22, batch 100, loss[loss=0.1158, simple_loss=0.1856, pruned_loss=0.02299, over 4857.00 frames. ], tot_loss[loss=0.1267, simple_loss=0.1875, pruned_loss=0.03295, over 378277.86 frames. ], batch size: 40, lr: 8.20e-03, +2022-12-02 03:27:41,763 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-02 03:27:46,995 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=30186.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:27:53,515 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 03:28:01,549 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.059e+02 1.641e+02 2.089e+02 2.633e+02 1.622e+03, threshold=4.178e+02, percent-clipped=7.0 +2022-12-02 03:28:07,345 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=30207.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:28:09,359 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=30209.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:28:18,285 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 03:28:27,661 INFO [train.py:876] Epoch 22, batch 150, loss[loss=0.124, simple_loss=0.1898, pruned_loss=0.02911, over 4877.00 frames. ], tot_loss[loss=0.1274, simple_loss=0.1883, pruned_loss=0.03327, over 504710.31 frames. ], batch size: 38, lr: 8.19e-03, +2022-12-02 03:28:54,117 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=30255.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:29:09,031 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=30270.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:29:16,565 INFO [train.py:876] Epoch 22, batch 200, loss[loss=0.1453, simple_loss=0.2137, pruned_loss=0.03841, over 4828.00 frames. ], tot_loss[loss=0.1268, simple_loss=0.1881, pruned_loss=0.03277, over 604430.41 frames. ], batch size: 41, lr: 8.18e-03, +2022-12-02 03:29:16,907 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3826, 3.9100, 3.6085, 3.5075, 3.7737, 3.7264, 3.6618, 3.9098], + device='cuda:0'), covar=tensor([0.1668, 0.0492, 0.0715, 0.0595, 0.0554, 0.0485, 0.0680, 0.0462], + device='cuda:0'), in_proj_covar=tensor([0.0188, 0.0162, 0.0183, 0.0159, 0.0172, 0.0161, 0.0165, 0.0167], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 03:29:28,525 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6043, 3.7939, 3.7356, 3.7627, 3.1861, 3.7977, 3.3474, 3.8151], + device='cuda:0'), covar=tensor([0.0189, 0.0210, 0.0200, 0.0187, 0.0490, 0.0292, 0.0320, 0.0155], + device='cuda:0'), in_proj_covar=tensor([0.0065, 0.0069, 0.0057, 0.0058, 0.0074, 0.0068, 0.0062, 0.0056], + device='cuda:0'), out_proj_covar=tensor([4.8044e-05, 5.1482e-05, 4.1747e-05, 4.1910e-05, 5.4367e-05, 4.9814e-05, + 4.5794e-05, 4.0964e-05], device='cuda:0') +2022-12-02 03:29:39,241 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.570e+01 1.431e+02 1.798e+02 2.356e+02 3.568e+02, threshold=3.596e+02, percent-clipped=0.0 +2022-12-02 03:29:55,948 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=30318.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 03:30:05,508 INFO [train.py:876] Epoch 22, batch 250, loss[loss=0.1272, simple_loss=0.1789, pruned_loss=0.03776, over 4922.00 frames. ], tot_loss[loss=0.1271, simple_loss=0.1886, pruned_loss=0.03275, over 680031.19 frames. ], batch size: 32, lr: 8.18e-03, +2022-12-02 03:30:10,371 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 03:30:37,049 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=30360.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:30:42,818 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=30366.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:30:54,246 INFO [train.py:876] Epoch 22, batch 300, loss[loss=0.1319, simple_loss=0.1943, pruned_loss=0.03474, over 4842.00 frames. ], tot_loss[loss=0.1279, simple_loss=0.1897, pruned_loss=0.03309, over 741626.09 frames. ], batch size: 35, lr: 8.17e-03, +2022-12-02 03:31:08,023 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 03:31:16,893 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.714e+01 1.498e+02 1.830e+02 2.442e+02 7.202e+02, threshold=3.661e+02, percent-clipped=5.0 +2022-12-02 03:31:28,980 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6006, 3.0457, 3.6826, 3.3808, 3.6668, 2.8860, 3.5067, 3.7343], + device='cuda:0'), covar=tensor([0.0404, 0.0522, 0.0301, 0.0427, 0.0300, 0.0644, 0.0405, 0.0618], + device='cuda:0'), in_proj_covar=tensor([0.0120, 0.0129, 0.0113, 0.0119, 0.0106, 0.0146, 0.0098, 0.0113], + device='cuda:0'), out_proj_covar=tensor([1.0167e-04, 1.0988e-04, 9.7293e-05, 1.0170e-04, 8.9401e-05, 1.2453e-04, + 8.5873e-05, 9.8126e-05], device='cuda:0') +2022-12-02 03:31:36,713 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=30421.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:31:43,413 INFO [train.py:876] Epoch 22, batch 350, loss[loss=0.135, simple_loss=0.1795, pruned_loss=0.04528, over 4800.00 frames. ], tot_loss[loss=0.1272, simple_loss=0.1891, pruned_loss=0.03264, over 788393.96 frames. ], batch size: 32, lr: 8.16e-03, +2022-12-02 03:32:12,879 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=30458.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:32:30,266 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.71 vs. limit=2.0 +2022-12-02 03:32:32,436 INFO [train.py:876] Epoch 22, batch 400, loss[loss=0.126, simple_loss=0.1916, pruned_loss=0.03018, over 4858.00 frames. ], tot_loss[loss=0.1279, simple_loss=0.1902, pruned_loss=0.03281, over 826495.17 frames. ], batch size: 39, lr: 8.16e-03, +2022-12-02 03:32:35,126 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=30481.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:32:45,000 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 03:32:48,128 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0466, 2.3288, 3.0719, 2.2504, 2.6837, 2.4191, 2.5510, 3.2186], + device='cuda:0'), covar=tensor([0.0213, 0.1066, 0.0551, 0.1481, 0.0237, 0.0407, 0.1302, 0.0413], + device='cuda:0'), in_proj_covar=tensor([0.0071, 0.0109, 0.0091, 0.0121, 0.0081, 0.0088, 0.0122, 0.0098], + device='cuda:0'), out_proj_covar=tensor([8.6055e-05, 1.2847e-04, 1.1006e-04, 1.4175e-04, 9.4451e-05, 1.0875e-04, + 1.4037e-04, 1.0964e-04], device='cuda:0') +2022-12-02 03:32:49,280 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8154, 2.9513, 3.2251, 3.6656, 2.9847, 3.6349, 3.2573, 3.2723], + device='cuda:0'), covar=tensor([0.0217, 0.0353, 0.0285, 0.0181, 0.0244, 0.0233, 0.0271, 0.0304], + device='cuda:0'), in_proj_covar=tensor([0.0122, 0.0122, 0.0127, 0.0118, 0.0102, 0.0122, 0.0124, 0.0137], + device='cuda:0'), out_proj_covar=tensor([8.2247e-05, 8.3180e-05, 8.5669e-05, 7.9025e-05, 6.7207e-05, 8.1465e-05, + 8.2880e-05, 9.4044e-05], device='cuda:0') +2022-12-02 03:32:54,815 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.129e+02 1.540e+02 1.845e+02 2.288e+02 4.317e+02, threshold=3.689e+02, percent-clipped=2.0 +2022-12-02 03:32:59,907 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=30506.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:33:11,015 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3866, 2.2710, 2.3227, 2.4499, 1.8288, 2.5377, 2.1406, 2.5436], + device='cuda:0'), covar=tensor([0.0498, 0.0569, 0.0405, 0.0360, 0.0825, 0.0481, 0.0535, 0.0334], + device='cuda:0'), in_proj_covar=tensor([0.0064, 0.0068, 0.0057, 0.0057, 0.0073, 0.0066, 0.0061, 0.0055], + device='cuda:0'), out_proj_covar=tensor([4.6840e-05, 5.0565e-05, 4.1313e-05, 4.1159e-05, 5.3219e-05, 4.8453e-05, + 4.4749e-05, 3.9929e-05], device='cuda:0') +2022-12-02 03:33:14,603 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 03:33:21,257 INFO [train.py:876] Epoch 22, batch 450, loss[loss=0.1365, simple_loss=0.1994, pruned_loss=0.03676, over 4841.00 frames. ], tot_loss[loss=0.127, simple_loss=0.1892, pruned_loss=0.0324, over 855756.77 frames. ], batch size: 41, lr: 8.15e-03, +2022-12-02 03:33:57,522 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=30565.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:33:57,694 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5000, 3.8294, 3.2664, 3.7145, 3.2274, 3.9086, 3.1091, 3.6969], + device='cuda:0'), covar=tensor([0.0205, 0.0218, 0.0258, 0.0235, 0.0395, 0.0222, 0.0302, 0.0180], + device='cuda:0'), in_proj_covar=tensor([0.0064, 0.0068, 0.0057, 0.0057, 0.0072, 0.0066, 0.0061, 0.0055], + device='cuda:0'), out_proj_covar=tensor([4.6913e-05, 5.0366e-05, 4.1306e-05, 4.1074e-05, 5.3032e-05, 4.8344e-05, + 4.4616e-05, 4.0108e-05], device='cuda:0') +2022-12-02 03:34:09,980 INFO [train.py:876] Epoch 22, batch 500, loss[loss=0.1397, simple_loss=0.1985, pruned_loss=0.04049, over 4867.00 frames. ], tot_loss[loss=0.1282, simple_loss=0.1906, pruned_loss=0.03288, over 877680.00 frames. ], batch size: 36, lr: 8.14e-03, +2022-12-02 03:34:19,326 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.94 vs. limit=2.0 +2022-12-02 03:34:33,094 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.758e+01 1.524e+02 1.815e+02 2.285e+02 4.020e+02, threshold=3.630e+02, percent-clipped=1.0 +2022-12-02 03:34:59,157 INFO [train.py:876] Epoch 22, batch 550, loss[loss=0.09368, simple_loss=0.1417, pruned_loss=0.02283, over 4815.00 frames. ], tot_loss[loss=0.1283, simple_loss=0.1908, pruned_loss=0.03288, over 893914.85 frames. ], batch size: 25, lr: 8.14e-03, +2022-12-02 03:35:14,905 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4914, 2.3278, 2.4156, 2.5910, 1.8890, 2.5219, 2.3183, 2.5964], + device='cuda:0'), covar=tensor([0.0419, 0.0483, 0.0331, 0.0319, 0.0613, 0.0428, 0.0478, 0.0311], + device='cuda:0'), in_proj_covar=tensor([0.0063, 0.0067, 0.0056, 0.0057, 0.0072, 0.0065, 0.0060, 0.0054], + device='cuda:0'), out_proj_covar=tensor([4.6235e-05, 4.9501e-05, 4.0985e-05, 4.0780e-05, 5.2546e-05, 4.7785e-05, + 4.4030e-05, 3.9437e-05], device='cuda:0') +2022-12-02 03:35:25,560 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=30655.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:35:48,132 INFO [train.py:876] Epoch 22, batch 600, loss[loss=0.1535, simple_loss=0.2178, pruned_loss=0.04459, over 4794.00 frames. ], tot_loss[loss=0.1274, simple_loss=0.1898, pruned_loss=0.0325, over 908115.13 frames. ], batch size: 51, lr: 8.13e-03, +2022-12-02 03:36:06,305 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.68 vs. limit=2.0 +2022-12-02 03:36:10,585 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.022e+02 1.559e+02 1.907e+02 2.352e+02 5.569e+02, threshold=3.815e+02, percent-clipped=1.0 +2022-12-02 03:36:25,445 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=30716.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:36:25,588 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=30716.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 03:36:32,255 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6027, 2.0259, 2.1540, 3.4450, 2.9292, 3.1445, 2.9748, 3.5478], + device='cuda:0'), covar=tensor([0.0272, 0.1648, 0.2322, 0.0470, 0.0459, 0.0549, 0.0639, 0.0385], + device='cuda:0'), in_proj_covar=tensor([0.0067, 0.0103, 0.0121, 0.0070, 0.0079, 0.0071, 0.0080, 0.0085], + device='cuda:0'), out_proj_covar=tensor([7.2951e-05, 1.1295e-04, 1.2980e-04, 8.0373e-05, 8.1919e-05, 7.9827e-05, + 8.9525e-05, 8.4188e-05], device='cuda:0') +2022-12-02 03:36:36,761 INFO [train.py:876] Epoch 22, batch 650, loss[loss=0.1193, simple_loss=0.1867, pruned_loss=0.02597, over 4852.00 frames. ], tot_loss[loss=0.1282, simple_loss=0.1909, pruned_loss=0.03272, over 919831.64 frames. ], batch size: 34, lr: 8.12e-03, +2022-12-02 03:37:11,679 INFO [zipformer.py:1414] attn_weights_entropy = tensor([5.2275, 5.5211, 5.2991, 5.7557, 5.3465, 4.5399, 5.7312, 5.4383], + device='cuda:0'), covar=tensor([0.0081, 0.0052, 0.0067, 0.0078, 0.0092, 0.0106, 0.0046, 0.0072], + device='cuda:0'), in_proj_covar=tensor([0.0057, 0.0053, 0.0057, 0.0045, 0.0055, 0.0058, 0.0052, 0.0052], + device='cuda:0'), out_proj_covar=tensor([5.1404e-05, 4.4106e-05, 5.0361e-05, 3.7703e-05, 4.9499e-05, 5.2563e-05, + 4.2114e-05, 4.3098e-05], device='cuda:0') +2022-12-02 03:37:25,234 INFO [train.py:876] Epoch 22, batch 700, loss[loss=0.1193, simple_loss=0.1857, pruned_loss=0.02648, over 4874.00 frames. ], tot_loss[loss=0.1282, simple_loss=0.1905, pruned_loss=0.03293, over 925209.17 frames. ], batch size: 39, lr: 8.12e-03, +2022-12-02 03:37:28,356 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=30781.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:37:47,604 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.008e+02 1.625e+02 1.974e+02 2.418e+02 5.568e+02, threshold=3.947e+02, percent-clipped=3.0 +2022-12-02 03:38:13,707 INFO [train.py:876] Epoch 22, batch 750, loss[loss=0.1344, simple_loss=0.2057, pruned_loss=0.03149, over 4841.00 frames. ], tot_loss[loss=0.1279, simple_loss=0.1898, pruned_loss=0.03302, over 929889.57 frames. ], batch size: 40, lr: 8.11e-03, +2022-12-02 03:38:14,698 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=30829.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:38:49,897 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=30865.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:38:51,593 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.89 vs. limit=2.0 +2022-12-02 03:39:02,727 INFO [train.py:876] Epoch 22, batch 800, loss[loss=0.1036, simple_loss=0.1633, pruned_loss=0.02189, over 4726.00 frames. ], tot_loss[loss=0.1283, simple_loss=0.1906, pruned_loss=0.03298, over 936778.21 frames. ], batch size: 27, lr: 8.11e-03, +2022-12-02 03:39:25,250 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.151e+01 1.559e+02 1.960e+02 2.381e+02 4.774e+02, threshold=3.919e+02, percent-clipped=1.0 +2022-12-02 03:39:27,391 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=30903.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:39:37,296 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=30913.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:39:49,059 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=30925.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:39:51,955 INFO [train.py:876] Epoch 22, batch 850, loss[loss=0.1288, simple_loss=0.1845, pruned_loss=0.03652, over 4801.00 frames. ], tot_loss[loss=0.1275, simple_loss=0.1896, pruned_loss=0.03265, over 941260.77 frames. ], batch size: 32, lr: 8.10e-03, +2022-12-02 03:39:57,945 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=30934.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:40:27,422 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=30964.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:40:29,360 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9215, 2.5165, 2.5181, 2.2933, 2.2840, 2.9004, 2.4698, 2.2684], + device='cuda:0'), covar=tensor([0.1716, 0.0655, 0.1452, 0.0620, 0.1312, 0.0822, 0.0693, 0.0742], + device='cuda:0'), in_proj_covar=tensor([0.0067, 0.0070, 0.0066, 0.0071, 0.0064, 0.0060, 0.0059, 0.0066], + device='cuda:0'), out_proj_covar=tensor([6.0808e-05, 6.3042e-05, 6.1147e-05, 6.4043e-05, 5.9199e-05, 5.5042e-05, + 5.5617e-05, 6.0258e-05], device='cuda:0') +2022-12-02 03:40:35,169 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5318, 3.1517, 3.6016, 3.3947, 3.5337, 3.5435, 2.9364, 4.3312], + device='cuda:0'), covar=tensor([0.0125, 0.0787, 0.0442, 0.0819, 0.0212, 0.0348, 0.1770, 0.0171], + device='cuda:0'), in_proj_covar=tensor([0.0068, 0.0106, 0.0088, 0.0118, 0.0080, 0.0085, 0.0118, 0.0097], + device='cuda:0'), out_proj_covar=tensor([8.3946e-05, 1.2484e-04, 1.0653e-04, 1.3801e-04, 9.3374e-05, 1.0557e-04, + 1.3649e-04, 1.0912e-04], device='cuda:0') +2022-12-02 03:40:40,743 INFO [train.py:876] Epoch 22, batch 900, loss[loss=0.08056, simple_loss=0.1248, pruned_loss=0.01814, over 4074.00 frames. ], tot_loss[loss=0.1276, simple_loss=0.1896, pruned_loss=0.03281, over 942539.82 frames. ], batch size: 16, lr: 8.09e-03, +2022-12-02 03:40:48,634 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=30986.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:40:57,102 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=30995.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:41:02,985 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.059e+02 1.504e+02 1.828e+02 2.252e+02 4.431e+02, threshold=3.656e+02, percent-clipped=1.0 +2022-12-02 03:41:13,280 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=31011.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 03:41:13,471 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=31011.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 03:41:18,114 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=31016.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:41:29,895 INFO [train.py:876] Epoch 22, batch 950, loss[loss=0.1548, simple_loss=0.2319, pruned_loss=0.03887, over 4777.00 frames. ], tot_loss[loss=0.127, simple_loss=0.1886, pruned_loss=0.03269, over 943895.63 frames. ], batch size: 51, lr: 8.09e-03, +2022-12-02 03:41:58,700 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2594, 2.7528, 3.1815, 3.3709, 2.7528, 3.3928, 2.9905, 3.0022], + device='cuda:0'), covar=tensor([0.0374, 0.0361, 0.0281, 0.0240, 0.0254, 0.0238, 0.0307, 0.0380], + device='cuda:0'), in_proj_covar=tensor([0.0122, 0.0122, 0.0129, 0.0118, 0.0102, 0.0123, 0.0125, 0.0136], + device='cuda:0'), out_proj_covar=tensor([8.2051e-05, 8.3473e-05, 8.7160e-05, 7.9084e-05, 6.7393e-05, 8.2194e-05, + 8.3386e-05, 9.3270e-05], device='cuda:0') +2022-12-02 03:42:04,223 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=31064.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:42:12,113 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=31072.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 03:42:17,830 INFO [train.py:876] Epoch 22, batch 1000, loss[loss=0.09102, simple_loss=0.1489, pruned_loss=0.01656, over 4819.00 frames. ], tot_loss[loss=0.1278, simple_loss=0.1901, pruned_loss=0.03281, over 946827.45 frames. ], batch size: 25, lr: 8.08e-03, +2022-12-02 03:42:40,359 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.256e+01 1.552e+02 1.972e+02 2.360e+02 3.687e+02, threshold=3.945e+02, percent-clipped=1.0 +2022-12-02 03:43:06,559 INFO [train.py:876] Epoch 22, batch 1050, loss[loss=0.1554, simple_loss=0.2152, pruned_loss=0.04779, over 4814.00 frames. ], tot_loss[loss=0.1277, simple_loss=0.1896, pruned_loss=0.03291, over 947283.68 frames. ], batch size: 42, lr: 8.07e-03, +2022-12-02 03:43:28,195 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.6791, 2.9906, 3.2808, 3.1826, 2.1927, 3.1577, 3.2003, 2.6232], + device='cuda:0'), covar=tensor([0.4287, 0.0863, 0.0685, 0.0612, 0.0993, 0.0867, 0.0557, 0.1055], + device='cuda:0'), in_proj_covar=tensor([0.0181, 0.0119, 0.0150, 0.0124, 0.0130, 0.0120, 0.0117, 0.0126], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 03:43:55,033 INFO [train.py:876] Epoch 22, batch 1100, loss[loss=0.1295, simple_loss=0.1923, pruned_loss=0.03329, over 4896.00 frames. ], tot_loss[loss=0.1277, simple_loss=0.1897, pruned_loss=0.03284, over 949010.44 frames. ], batch size: 38, lr: 8.07e-03, +2022-12-02 03:44:17,554 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.061e+02 1.606e+02 1.977e+02 2.324e+02 5.129e+02, threshold=3.954e+02, percent-clipped=1.0 +2022-12-02 03:44:44,594 INFO [train.py:876] Epoch 22, batch 1150, loss[loss=0.1301, simple_loss=0.1934, pruned_loss=0.03341, over 4814.00 frames. ], tot_loss[loss=0.1281, simple_loss=0.1898, pruned_loss=0.03322, over 948130.43 frames. ], batch size: 42, lr: 8.06e-03, +2022-12-02 03:45:04,604 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.02 vs. limit=2.0 +2022-12-02 03:45:14,783 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=31259.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:45:31,020 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3573, 2.8280, 3.2303, 2.9293, 2.3654, 1.7962, 3.2552, 1.5194], + device='cuda:0'), covar=tensor([0.0455, 0.0374, 0.0436, 0.0812, 0.1670, 0.3638, 0.0297, 0.3196], + device='cuda:0'), in_proj_covar=tensor([0.0119, 0.0100, 0.0092, 0.0133, 0.0140, 0.0164, 0.0087, 0.0180], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 03:45:32,703 INFO [train.py:876] Epoch 22, batch 1200, loss[loss=0.1514, simple_loss=0.2138, pruned_loss=0.04447, over 4840.00 frames. ], tot_loss[loss=0.1277, simple_loss=0.1894, pruned_loss=0.03298, over 948751.34 frames. ], batch size: 49, lr: 8.05e-03, +2022-12-02 03:45:35,592 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=31281.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:45:44,073 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=31290.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:45:53,884 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6478, 4.4683, 4.3496, 4.6853, 4.1323, 3.8438, 4.4715, 4.2776], + device='cuda:0'), covar=tensor([0.0299, 0.0114, 0.0157, 0.0201, 0.0193, 0.0237, 0.0133, 0.0165], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0054, 0.0058, 0.0046, 0.0056, 0.0059, 0.0053, 0.0054], + device='cuda:0'), out_proj_covar=tensor([5.2726e-05, 4.4910e-05, 5.1027e-05, 3.8576e-05, 5.0091e-05, 5.3640e-05, + 4.3027e-05, 4.5537e-05], device='cuda:0') +2022-12-02 03:45:54,722 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.347e+01 1.621e+02 2.026e+02 2.519e+02 8.139e+02, threshold=4.052e+02, percent-clipped=4.0 +2022-12-02 03:46:04,352 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=31311.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:46:20,593 INFO [train.py:876] Epoch 22, batch 1250, loss[loss=0.1437, simple_loss=0.209, pruned_loss=0.03923, over 4881.00 frames. ], tot_loss[loss=0.1282, simple_loss=0.1902, pruned_loss=0.03309, over 950060.90 frames. ], batch size: 44, lr: 8.05e-03, +2022-12-02 03:46:21,708 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4593, 1.4666, 0.8823, 1.7384, 1.6598, 1.6017, 1.5215, 1.5953], + device='cuda:0'), covar=tensor([0.0393, 0.0425, 0.0430, 0.0312, 0.0284, 0.0321, 0.0354, 0.0394], + device='cuda:0'), in_proj_covar=tensor([0.0049, 0.0048, 0.0052, 0.0042, 0.0045, 0.0047, 0.0047, 0.0042], + device='cuda:0'), out_proj_covar=tensor([4.6109e-05, 4.5419e-05, 4.9824e-05, 3.9397e-05, 4.1784e-05, 4.4211e-05, + 4.4014e-05, 4.1676e-05], device='cuda:0') +2022-12-02 03:46:50,368 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=31359.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:46:58,007 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=31367.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 03:47:08,799 INFO [train.py:876] Epoch 22, batch 1300, loss[loss=0.09245, simple_loss=0.1537, pruned_loss=0.01561, over 4780.00 frames. ], tot_loss[loss=0.1279, simple_loss=0.1901, pruned_loss=0.03291, over 949472.84 frames. ], batch size: 26, lr: 8.04e-03, +2022-12-02 03:47:26,967 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.7632, 2.1754, 2.1475, 1.9271, 1.8625, 2.5224, 1.9711, 2.2356], + device='cuda:0'), covar=tensor([0.1724, 0.0499, 0.1010, 0.1111, 0.2181, 0.1111, 0.0896, 0.0663], + device='cuda:0'), in_proj_covar=tensor([0.0065, 0.0070, 0.0067, 0.0071, 0.0064, 0.0060, 0.0060, 0.0066], + device='cuda:0'), out_proj_covar=tensor([5.9782e-05, 6.2815e-05, 6.1838e-05, 6.4313e-05, 5.8695e-05, 5.5172e-05, + 5.5851e-05, 5.9767e-05], device='cuda:0') +2022-12-02 03:47:31,806 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.869e+01 1.520e+02 1.839e+02 2.202e+02 3.894e+02, threshold=3.677e+02, percent-clipped=0.0 +2022-12-02 03:47:39,620 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=31409.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:47:50,922 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.79 vs. limit=5.0 +2022-12-02 03:47:51,434 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.8044, 2.7610, 2.4556, 2.7544, 2.1715, 2.5975, 1.1998, 2.7770], + device='cuda:0'), covar=tensor([0.0719, 0.0836, 0.0873, 0.0665, 0.1115, 0.1152, 0.1403, 0.0570], + device='cuda:0'), in_proj_covar=tensor([0.0081, 0.0087, 0.0104, 0.0088, 0.0109, 0.0092, 0.0095, 0.0093], + device='cuda:0'), out_proj_covar=tensor([8.5457e-05, 9.0276e-05, 1.0493e-04, 8.9693e-05, 1.0927e-04, 9.6129e-05, + 9.7258e-05, 9.5401e-05], device='cuda:0') +2022-12-02 03:47:53,508 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.5875, 1.5927, 0.7865, 1.9651, 1.8031, 1.6269, 1.5286, 1.6442], + device='cuda:0'), covar=tensor([0.0284, 0.0341, 0.0440, 0.0212, 0.0279, 0.0366, 0.0337, 0.0529], + device='cuda:0'), in_proj_covar=tensor([0.0049, 0.0049, 0.0052, 0.0041, 0.0045, 0.0047, 0.0047, 0.0043], + device='cuda:0'), out_proj_covar=tensor([4.6495e-05, 4.5673e-05, 5.0227e-05, 3.9333e-05, 4.2023e-05, 4.4788e-05, + 4.4213e-05, 4.2225e-05], device='cuda:0') +2022-12-02 03:47:58,225 INFO [train.py:876] Epoch 22, batch 1350, loss[loss=0.1377, simple_loss=0.2014, pruned_loss=0.03702, over 4880.00 frames. ], tot_loss[loss=0.1279, simple_loss=0.1899, pruned_loss=0.03292, over 948658.24 frames. ], batch size: 39, lr: 8.04e-03, +2022-12-02 03:48:39,438 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=31470.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:48:41,237 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.4973, 4.6404, 5.0532, 4.4271, 4.6276, 4.7244, 4.4513, 4.4128], + device='cuda:0'), covar=tensor([0.0880, 0.0543, 0.0567, 0.0565, 0.1023, 0.0547, 0.1444, 0.0496], + device='cuda:0'), in_proj_covar=tensor([0.0182, 0.0136, 0.0175, 0.0151, 0.0134, 0.0161, 0.0194, 0.0132], + device='cuda:0'), out_proj_covar=tensor([1.3316e-04, 8.9928e-05, 1.3641e-04, 1.0543e-04, 1.0048e-04, 1.1818e-04, + 1.4739e-04, 9.1596e-05], device='cuda:0') +2022-12-02 03:48:46,673 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.74 vs. limit=2.0 +2022-12-02 03:48:47,005 INFO [train.py:876] Epoch 22, batch 1400, loss[loss=0.132, simple_loss=0.1968, pruned_loss=0.03361, over 4847.00 frames. ], tot_loss[loss=0.1261, simple_loss=0.1881, pruned_loss=0.03208, over 948567.46 frames. ], batch size: 35, lr: 8.03e-03, +2022-12-02 03:48:59,965 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=31491.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 03:49:09,433 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.569e+01 1.616e+02 1.956e+02 2.395e+02 5.916e+02, threshold=3.912e+02, percent-clipped=5.0 +2022-12-02 03:49:16,024 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.92 vs. limit=2.0 +2022-12-02 03:49:17,372 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-22.pt +2022-12-02 03:49:27,981 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 03:49:28,542 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 03:49:28,837 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 03:49:28,869 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 03:49:29,969 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 03:49:30,296 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 03:49:32,190 INFO [train.py:876] Epoch 23, batch 0, loss[loss=0.08321, simple_loss=0.1292, pruned_loss=0.0186, over 3385.00 frames. ], tot_loss[loss=0.08321, simple_loss=0.1292, pruned_loss=0.0186, over 3385.00 frames. ], batch size: 13, lr: 7.85e-03, +2022-12-02 03:49:32,191 INFO [train.py:901] Computing validation loss +2022-12-02 03:49:47,808 INFO [train.py:910] Epoch 23, validation: loss=0.2301, simple_loss=0.2741, pruned_loss=0.09306, over 253132.00 frames. +2022-12-02 03:49:47,808 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-02 03:50:05,929 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.22 vs. limit=5.0 +2022-12-02 03:50:13,988 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.5830, 1.4131, 1.5236, 1.3152, 0.9868, 1.1889, 1.4343, 1.6667], + device='cuda:0'), covar=tensor([0.0202, 0.0246, 0.0190, 0.0188, 0.0310, 0.0173, 0.0147, 0.0128], + device='cuda:0'), in_proj_covar=tensor([0.0036, 0.0034, 0.0034, 0.0036, 0.0040, 0.0035, 0.0038, 0.0033], + device='cuda:0'), out_proj_covar=tensor([2.8670e-05, 2.6669e-05, 2.7226e-05, 2.8256e-05, 3.2230e-05, 2.7573e-05, + 3.0893e-05, 2.5901e-05], device='cuda:0') +2022-12-02 03:50:29,589 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=31552.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 03:50:36,219 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=31559.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:50:37,149 INFO [train.py:876] Epoch 23, batch 50, loss[loss=0.1187, simple_loss=0.1847, pruned_loss=0.0263, over 4792.00 frames. ], tot_loss[loss=0.1218, simple_loss=0.1833, pruned_loss=0.03017, over 212786.45 frames. ], batch size: 32, lr: 7.84e-03, +2022-12-02 03:50:57,522 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=31581.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:50:59,434 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 03:51:00,060 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.96 vs. limit=2.0 +2022-12-02 03:51:06,385 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=31590.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:51:17,322 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.406e+01 1.543e+02 1.824e+02 2.246e+02 4.346e+02, threshold=3.647e+02, percent-clipped=2.0 +2022-12-02 03:51:23,017 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=31607.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:51:25,826 INFO [train.py:876] Epoch 23, batch 100, loss[loss=0.08933, simple_loss=0.1555, pruned_loss=0.01155, over 4733.00 frames. ], tot_loss[loss=0.1238, simple_loss=0.1854, pruned_loss=0.03112, over 375542.29 frames. ], batch size: 27, lr: 7.83e-03, +2022-12-02 03:51:41,766 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.45 vs. limit=2.0 +2022-12-02 03:51:43,106 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 03:51:44,108 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=31629.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:51:52,757 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=31638.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:52:05,271 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 03:52:10,309 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8453, 3.2259, 3.4232, 3.8515, 3.4747, 3.7444, 3.4956, 3.4055], + device='cuda:0'), covar=tensor([0.0177, 0.0305, 0.0276, 0.0178, 0.0183, 0.0240, 0.0236, 0.0314], + device='cuda:0'), in_proj_covar=tensor([0.0120, 0.0123, 0.0130, 0.0120, 0.0102, 0.0123, 0.0128, 0.0138], + device='cuda:0'), out_proj_covar=tensor([8.1317e-05, 8.3949e-05, 8.7034e-05, 7.9985e-05, 6.7523e-05, 8.2174e-05, + 8.5217e-05, 9.4216e-05], device='cuda:0') +2022-12-02 03:52:13,826 INFO [train.py:876] Epoch 23, batch 150, loss[loss=0.1436, simple_loss=0.2029, pruned_loss=0.04209, over 4841.00 frames. ], tot_loss[loss=0.1257, simple_loss=0.1878, pruned_loss=0.03178, over 503225.36 frames. ], batch size: 47, lr: 7.83e-03, +2022-12-02 03:52:20,781 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=31667.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 03:52:23,924 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6902, 3.6724, 3.8597, 3.3586, 3.1765, 3.4722, 3.7944, 3.7259], + device='cuda:0'), covar=tensor([0.0807, 0.0336, 0.0382, 0.0478, 0.0500, 0.0553, 0.0396, 0.0588], + device='cuda:0'), in_proj_covar=tensor([0.0240, 0.0165, 0.0185, 0.0179, 0.0176, 0.0180, 0.0164, 0.0191], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 03:52:29,648 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8032, 3.3864, 3.4299, 3.7634, 3.5755, 3.6227, 3.4857, 3.4160], + device='cuda:0'), covar=tensor([0.0184, 0.0286, 0.0295, 0.0205, 0.0166, 0.0222, 0.0247, 0.0311], + device='cuda:0'), in_proj_covar=tensor([0.0120, 0.0123, 0.0129, 0.0119, 0.0102, 0.0122, 0.0127, 0.0137], + device='cuda:0'), out_proj_covar=tensor([8.0986e-05, 8.3485e-05, 8.6494e-05, 7.9594e-05, 6.7073e-05, 8.1722e-05, + 8.4884e-05, 9.3817e-05], device='cuda:0') +2022-12-02 03:52:50,979 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4442, 1.4936, 0.9163, 1.6635, 1.5332, 1.4751, 1.3933, 1.4268], + device='cuda:0'), covar=tensor([0.0333, 0.0339, 0.0397, 0.0240, 0.0272, 0.0423, 0.0325, 0.0558], + device='cuda:0'), in_proj_covar=tensor([0.0048, 0.0047, 0.0050, 0.0040, 0.0044, 0.0046, 0.0046, 0.0042], + device='cuda:0'), out_proj_covar=tensor([4.5775e-05, 4.4134e-05, 4.8584e-05, 3.8165e-05, 4.0646e-05, 4.3400e-05, + 4.2984e-05, 4.1817e-05], device='cuda:0') +2022-12-02 03:52:53,472 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.019e+01 1.644e+02 2.042e+02 2.522e+02 5.145e+02, threshold=4.083e+02, percent-clipped=8.0 +2022-12-02 03:52:56,680 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=31704.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:53:02,231 INFO [train.py:876] Epoch 23, batch 200, loss[loss=0.1557, simple_loss=0.2245, pruned_loss=0.04345, over 4662.00 frames. ], tot_loss[loss=0.125, simple_loss=0.1875, pruned_loss=0.03128, over 604385.41 frames. ], batch size: 63, lr: 7.82e-03, +2022-12-02 03:53:05,258 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.86 vs. limit=2.0 +2022-12-02 03:53:07,718 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=31715.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 03:53:19,725 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7496, 2.9170, 3.4271, 3.6412, 3.0701, 3.5555, 3.4092, 3.2321], + device='cuda:0'), covar=tensor([0.0229, 0.0374, 0.0316, 0.0218, 0.0238, 0.0301, 0.0296, 0.0361], + device='cuda:0'), in_proj_covar=tensor([0.0122, 0.0124, 0.0130, 0.0121, 0.0103, 0.0123, 0.0128, 0.0139], + device='cuda:0'), out_proj_covar=tensor([8.2111e-05, 8.4333e-05, 8.7096e-05, 8.0536e-05, 6.7641e-05, 8.2369e-05, + 8.5378e-05, 9.5051e-05], device='cuda:0') +2022-12-02 03:53:25,555 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9204, 3.4670, 3.7504, 3.4643, 2.6215, 3.9808, 3.6012, 2.8434], + device='cuda:0'), covar=tensor([0.4864, 0.0810, 0.0798, 0.0480, 0.1168, 0.0819, 0.0621, 0.1384], + device='cuda:0'), in_proj_covar=tensor([0.0181, 0.0117, 0.0149, 0.0125, 0.0129, 0.0121, 0.0120, 0.0124], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 03:53:50,661 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 03:53:51,598 INFO [train.py:876] Epoch 23, batch 250, loss[loss=0.141, simple_loss=0.2033, pruned_loss=0.03938, over 4822.00 frames. ], tot_loss[loss=0.1254, simple_loss=0.188, pruned_loss=0.0314, over 682273.42 frames. ], batch size: 45, lr: 7.82e-03, +2022-12-02 03:53:56,523 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=31765.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:53:56,660 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=31765.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:54:19,259 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=31788.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:54:21,408 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8706, 3.1928, 3.8316, 3.2050, 3.8803, 3.3033, 3.6947, 3.8693], + device='cuda:0'), covar=tensor([0.0273, 0.0441, 0.0307, 0.0418, 0.0239, 0.0461, 0.0267, 0.0428], + device='cuda:0'), in_proj_covar=tensor([0.0116, 0.0125, 0.0112, 0.0116, 0.0104, 0.0141, 0.0095, 0.0111], + device='cuda:0'), out_proj_covar=tensor([9.8104e-05, 1.0681e-04, 9.5684e-05, 9.9000e-05, 8.7907e-05, 1.2107e-04, + 8.3491e-05, 9.6225e-05], device='cuda:0') +2022-12-02 03:54:31,991 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.065e+02 1.494e+02 1.767e+02 2.470e+02 5.275e+02, threshold=3.534e+02, percent-clipped=1.0 +2022-12-02 03:54:40,950 INFO [train.py:876] Epoch 23, batch 300, loss[loss=0.1263, simple_loss=0.183, pruned_loss=0.03481, over 4784.00 frames. ], tot_loss[loss=0.1251, simple_loss=0.1875, pruned_loss=0.0313, over 740976.35 frames. ], batch size: 32, lr: 7.81e-03, +2022-12-02 03:54:49,703 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 03:54:53,772 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=31823.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:55:17,393 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=31847.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 03:55:19,473 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=31849.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:55:30,057 INFO [train.py:876] Epoch 23, batch 350, loss[loss=0.1389, simple_loss=0.2092, pruned_loss=0.03425, over 4845.00 frames. ], tot_loss[loss=0.1246, simple_loss=0.1871, pruned_loss=0.03112, over 789498.13 frames. ], batch size: 41, lr: 7.80e-03, +2022-12-02 03:55:47,708 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=31878.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:55:53,690 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=31884.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:56:09,835 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.027e+02 1.475e+02 1.852e+02 2.124e+02 6.484e+02, threshold=3.704e+02, percent-clipped=6.0 +2022-12-02 03:56:18,924 INFO [train.py:876] Epoch 23, batch 400, loss[loss=0.1467, simple_loss=0.214, pruned_loss=0.03968, over 4816.00 frames. ], tot_loss[loss=0.1249, simple_loss=0.1872, pruned_loss=0.03133, over 823772.63 frames. ], batch size: 45, lr: 7.80e-03, +2022-12-02 03:56:23,952 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=31915.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:56:25,708 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 03:56:46,374 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.3423, 1.3879, 1.5406, 1.1825, 1.1154, 1.1616, 1.5951, 1.6233], + device='cuda:0'), covar=tensor([0.0213, 0.0185, 0.0166, 0.0235, 0.0342, 0.0193, 0.0164, 0.0150], + device='cuda:0'), in_proj_covar=tensor([0.0034, 0.0031, 0.0032, 0.0034, 0.0037, 0.0033, 0.0036, 0.0031], + device='cuda:0'), out_proj_covar=tensor([2.7221e-05, 2.4565e-05, 2.5592e-05, 2.6467e-05, 2.9856e-05, 2.5713e-05, + 2.9061e-05, 2.4126e-05], device='cuda:0') +2022-12-02 03:56:47,362 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=31939.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:56:51,153 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 03:56:57,850 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=31950.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:57:07,966 INFO [train.py:876] Epoch 23, batch 450, loss[loss=0.1402, simple_loss=0.2084, pruned_loss=0.03599, over 4795.00 frames. ], tot_loss[loss=0.1257, simple_loss=0.1884, pruned_loss=0.03147, over 851083.28 frames. ], batch size: 51, lr: 7.79e-03, +2022-12-02 03:57:23,457 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=31976.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:57:47,073 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-32000.pt +2022-12-02 03:57:50,274 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.130e+02 1.635e+02 1.938e+02 2.415e+02 4.439e+02, threshold=3.875e+02, percent-clipped=3.0 +2022-12-02 03:57:59,071 INFO [train.py:876] Epoch 23, batch 500, loss[loss=0.1512, simple_loss=0.2038, pruned_loss=0.04929, over 4775.00 frames. ], tot_loss[loss=0.1258, simple_loss=0.1887, pruned_loss=0.03143, over 874101.55 frames. ], batch size: 51, lr: 7.79e-03, +2022-12-02 03:58:00,332 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=32011.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:58:12,665 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.86 vs. limit=2.0 +2022-12-02 03:58:32,622 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.9487, 4.9230, 5.4319, 4.7854, 5.1515, 5.1010, 4.7273, 4.7948], + device='cuda:0'), covar=tensor([0.0617, 0.0496, 0.0453, 0.0482, 0.0650, 0.0442, 0.1323, 0.0456], + device='cuda:0'), in_proj_covar=tensor([0.0174, 0.0130, 0.0170, 0.0144, 0.0129, 0.0156, 0.0190, 0.0126], + device='cuda:0'), out_proj_covar=tensor([1.2718e-04, 8.6102e-05, 1.3133e-04, 1.0050e-04, 9.6983e-05, 1.1453e-04, + 1.4405e-04, 8.6856e-05], device='cuda:0') +2022-12-02 03:58:48,468 INFO [train.py:876] Epoch 23, batch 550, loss[loss=0.1677, simple_loss=0.2361, pruned_loss=0.04961, over 4821.00 frames. ], tot_loss[loss=0.125, simple_loss=0.1875, pruned_loss=0.03126, over 891885.63 frames. ], batch size: 45, lr: 7.78e-03, +2022-12-02 03:58:48,528 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=32060.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:58:53,518 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=32065.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:59:03,670 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.89 vs. limit=5.0 +2022-12-02 03:59:28,554 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.008e+02 1.480e+02 1.829e+02 2.252e+02 6.262e+02, threshold=3.659e+02, percent-clipped=4.0 +2022-12-02 03:59:37,138 INFO [train.py:876] Epoch 23, batch 600, loss[loss=0.1067, simple_loss=0.174, pruned_loss=0.0197, over 4889.00 frames. ], tot_loss[loss=0.1249, simple_loss=0.1876, pruned_loss=0.03107, over 907013.32 frames. ], batch size: 29, lr: 7.77e-03, +2022-12-02 03:59:39,866 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=32113.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 03:59:44,159 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.97 vs. limit=2.0 +2022-12-02 03:59:56,051 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.01 vs. limit=2.0 +2022-12-02 04:00:10,125 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=32144.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:00:13,012 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=32147.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 04:00:25,651 INFO [train.py:876] Epoch 23, batch 650, loss[loss=0.1453, simple_loss=0.2068, pruned_loss=0.04192, over 4829.00 frames. ], tot_loss[loss=0.1254, simple_loss=0.1882, pruned_loss=0.03134, over 918219.60 frames. ], batch size: 34, lr: 7.77e-03, +2022-12-02 04:00:44,266 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=32179.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:00:45,504 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7470, 3.5812, 3.8824, 3.5875, 4.0244, 3.0897, 3.7449, 3.9764], + device='cuda:0'), covar=tensor([0.0417, 0.0434, 0.0341, 0.0308, 0.0257, 0.0648, 0.0298, 0.0445], + device='cuda:0'), in_proj_covar=tensor([0.0120, 0.0130, 0.0117, 0.0117, 0.0107, 0.0144, 0.0100, 0.0114], + device='cuda:0'), out_proj_covar=tensor([1.0171e-04, 1.1066e-04, 9.9765e-05, 9.9737e-05, 9.0808e-05, 1.2359e-04, + 8.7718e-05, 9.8873e-05], device='cuda:0') +2022-12-02 04:00:59,979 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=32195.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 04:01:06,002 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.934e+01 1.555e+02 1.876e+02 2.271e+02 4.343e+02, threshold=3.751e+02, percent-clipped=2.0 +2022-12-02 04:01:06,816 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.90 vs. limit=2.0 +2022-12-02 04:01:14,947 INFO [train.py:876] Epoch 23, batch 700, loss[loss=0.131, simple_loss=0.1948, pruned_loss=0.03363, over 4911.00 frames. ], tot_loss[loss=0.1255, simple_loss=0.1882, pruned_loss=0.0314, over 926444.15 frames. ], batch size: 31, lr: 7.76e-03, +2022-12-02 04:01:38,389 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=32234.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:01:42,202 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.84 vs. limit=2.0 +2022-12-02 04:02:04,414 INFO [train.py:876] Epoch 23, batch 750, loss[loss=0.08314, simple_loss=0.1382, pruned_loss=0.01405, over 4828.00 frames. ], tot_loss[loss=0.1252, simple_loss=0.1881, pruned_loss=0.03119, over 933388.83 frames. ], batch size: 25, lr: 7.76e-03, +2022-12-02 04:02:15,174 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=32271.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:02:44,260 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.388e+01 1.487e+02 1.835e+02 2.219e+02 4.131e+02, threshold=3.671e+02, percent-clipped=1.0 +2022-12-02 04:02:49,085 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=32306.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:02:53,020 INFO [train.py:876] Epoch 23, batch 800, loss[loss=0.1291, simple_loss=0.1945, pruned_loss=0.03189, over 4871.00 frames. ], tot_loss[loss=0.1242, simple_loss=0.187, pruned_loss=0.03071, over 936131.18 frames. ], batch size: 39, lr: 7.75e-03, +2022-12-02 04:03:41,371 INFO [train.py:876] Epoch 23, batch 850, loss[loss=0.1114, simple_loss=0.1719, pruned_loss=0.0255, over 4916.00 frames. ], tot_loss[loss=0.124, simple_loss=0.1871, pruned_loss=0.03042, over 939813.12 frames. ], batch size: 32, lr: 7.74e-03, +2022-12-02 04:03:41,581 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=32360.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:04:14,776 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5177, 3.4998, 3.8547, 3.3162, 3.1109, 3.5967, 3.7303, 3.7403], + device='cuda:0'), covar=tensor([0.1002, 0.0399, 0.0340, 0.0478, 0.0543, 0.0424, 0.0317, 0.0460], + device='cuda:0'), in_proj_covar=tensor([0.0239, 0.0165, 0.0185, 0.0176, 0.0175, 0.0179, 0.0165, 0.0191], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 04:04:19,584 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0952, 2.0042, 1.9795, 2.2222, 1.5993, 2.1694, 2.1156, 2.1764], + device='cuda:0'), covar=tensor([0.0450, 0.0587, 0.0425, 0.0354, 0.0754, 0.0482, 0.0444, 0.0360], + device='cuda:0'), in_proj_covar=tensor([0.0064, 0.0069, 0.0057, 0.0058, 0.0072, 0.0066, 0.0061, 0.0056], + device='cuda:0'), out_proj_covar=tensor([4.6813e-05, 5.1292e-05, 4.1571e-05, 4.1677e-05, 5.2821e-05, 4.8488e-05, + 4.5303e-05, 4.0812e-05], device='cuda:0') +2022-12-02 04:04:21,353 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.366e+01 1.478e+02 1.768e+02 2.149e+02 4.328e+02, threshold=3.536e+02, percent-clipped=3.0 +2022-12-02 04:04:27,442 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7989, 1.6717, 2.1486, 3.4603, 2.8554, 3.1975, 3.0112, 3.8467], + device='cuda:0'), covar=tensor([0.0278, 0.1868, 0.2338, 0.0399, 0.0408, 0.0539, 0.0706, 0.0295], + device='cuda:0'), in_proj_covar=tensor([0.0069, 0.0102, 0.0122, 0.0068, 0.0078, 0.0071, 0.0082, 0.0086], + device='cuda:0'), out_proj_covar=tensor([7.4419e-05, 1.1171e-04, 1.3139e-04, 8.0162e-05, 8.1619e-05, 8.0329e-05, + 9.1248e-05, 8.4323e-05], device='cuda:0') +2022-12-02 04:04:28,282 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=32408.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:04:30,121 INFO [train.py:876] Epoch 23, batch 900, loss[loss=0.1188, simple_loss=0.1923, pruned_loss=0.02261, over 4786.00 frames. ], tot_loss[loss=0.1249, simple_loss=0.188, pruned_loss=0.03088, over 943577.39 frames. ], batch size: 54, lr: 7.74e-03, +2022-12-02 04:04:30,251 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.0456, 4.6319, 4.5285, 4.7787, 4.2218, 4.1506, 4.7086, 4.3292], + device='cuda:0'), covar=tensor([0.0227, 0.0097, 0.0113, 0.0198, 0.0146, 0.0166, 0.0099, 0.0169], + device='cuda:0'), in_proj_covar=tensor([0.0059, 0.0054, 0.0058, 0.0045, 0.0057, 0.0060, 0.0053, 0.0054], + device='cuda:0'), out_proj_covar=tensor([5.3305e-05, 4.4329e-05, 5.0987e-05, 3.7760e-05, 5.0363e-05, 5.3507e-05, + 4.3128e-05, 4.5680e-05], device='cuda:0') +2022-12-02 04:04:40,981 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.5071, 4.5789, 5.0254, 4.3148, 4.8035, 4.7282, 4.3577, 4.3388], + device='cuda:0'), covar=tensor([0.0805, 0.0688, 0.0619, 0.0582, 0.0759, 0.0602, 0.1664, 0.0707], + device='cuda:0'), in_proj_covar=tensor([0.0174, 0.0132, 0.0171, 0.0145, 0.0128, 0.0159, 0.0191, 0.0129], + device='cuda:0'), out_proj_covar=tensor([1.2661e-04, 8.7215e-05, 1.3266e-04, 1.0062e-04, 9.6297e-05, 1.1659e-04, + 1.4499e-04, 8.8799e-05], device='cuda:0') +2022-12-02 04:05:02,997 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=32444.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:05:05,633 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.27 vs. limit=5.0 +2022-12-02 04:05:16,905 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4863, 3.3234, 3.6794, 3.5565, 3.7907, 3.0133, 3.5481, 3.8416], + device='cuda:0'), covar=tensor([0.0411, 0.0449, 0.0342, 0.0310, 0.0333, 0.0648, 0.0322, 0.0329], + device='cuda:0'), in_proj_covar=tensor([0.0117, 0.0128, 0.0116, 0.0116, 0.0106, 0.0143, 0.0099, 0.0112], + device='cuda:0'), out_proj_covar=tensor([9.9275e-05, 1.0860e-04, 9.8992e-05, 9.8297e-05, 9.0010e-05, 1.2203e-04, + 8.6570e-05, 9.6697e-05], device='cuda:0') +2022-12-02 04:05:18,555 INFO [train.py:876] Epoch 23, batch 950, loss[loss=0.13, simple_loss=0.189, pruned_loss=0.03552, over 4802.00 frames. ], tot_loss[loss=0.124, simple_loss=0.1869, pruned_loss=0.03059, over 947344.70 frames. ], batch size: 33, lr: 7.73e-03, +2022-12-02 04:05:37,427 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=32479.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:05:49,698 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=32492.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:05:58,553 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.340e+01 1.427e+02 1.730e+02 2.400e+02 3.871e+02, threshold=3.461e+02, percent-clipped=4.0 +2022-12-02 04:06:07,498 INFO [train.py:876] Epoch 23, batch 1000, loss[loss=0.1436, simple_loss=0.2016, pruned_loss=0.04283, over 4852.00 frames. ], tot_loss[loss=0.1236, simple_loss=0.186, pruned_loss=0.03059, over 950734.96 frames. ], batch size: 39, lr: 7.73e-03, +2022-12-02 04:06:23,583 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=32527.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:06:28,754 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2533, 2.2359, 2.4982, 2.0104, 2.1547, 2.7740, 2.1927, 2.3243], + device='cuda:0'), covar=tensor([0.1243, 0.0545, 0.1075, 0.0901, 0.2104, 0.1056, 0.0717, 0.0621], + device='cuda:0'), in_proj_covar=tensor([0.0067, 0.0072, 0.0067, 0.0075, 0.0064, 0.0060, 0.0061, 0.0067], + device='cuda:0'), out_proj_covar=tensor([6.1400e-05, 6.4164e-05, 6.2187e-05, 6.7560e-05, 5.9647e-05, 5.5888e-05, + 5.6933e-05, 6.1099e-05], device='cuda:0') +2022-12-02 04:06:30,619 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=32534.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:06:55,566 INFO [train.py:876] Epoch 23, batch 1050, loss[loss=0.1395, simple_loss=0.2127, pruned_loss=0.03317, over 4840.00 frames. ], tot_loss[loss=0.1242, simple_loss=0.1864, pruned_loss=0.03096, over 949867.57 frames. ], batch size: 40, lr: 7.72e-03, +2022-12-02 04:07:05,909 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=32571.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:07:16,359 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=32582.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:07:35,015 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.012e+02 1.621e+02 1.860e+02 2.358e+02 4.120e+02, threshold=3.721e+02, percent-clipped=4.0 +2022-12-02 04:07:40,054 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=32606.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:07:43,662 INFO [train.py:876] Epoch 23, batch 1100, loss[loss=0.117, simple_loss=0.1893, pruned_loss=0.02237, over 4836.00 frames. ], tot_loss[loss=0.1247, simple_loss=0.1869, pruned_loss=0.03122, over 951504.41 frames. ], batch size: 41, lr: 7.72e-03, +2022-12-02 04:07:52,079 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=32619.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:08:11,713 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5745, 3.2272, 3.6984, 3.2159, 3.7384, 2.8743, 3.5652, 3.8349], + device='cuda:0'), covar=tensor([0.0411, 0.0465, 0.0364, 0.0448, 0.0304, 0.0728, 0.0410, 0.0381], + device='cuda:0'), in_proj_covar=tensor([0.0115, 0.0124, 0.0112, 0.0113, 0.0103, 0.0140, 0.0096, 0.0109], + device='cuda:0'), out_proj_covar=tensor([9.7825e-05, 1.0575e-04, 9.5966e-05, 9.5897e-05, 8.7770e-05, 1.1947e-04, + 8.4316e-05, 9.4442e-05], device='cuda:0') +2022-12-02 04:08:26,049 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=32654.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:08:31,843 INFO [train.py:876] Epoch 23, batch 1150, loss[loss=0.08437, simple_loss=0.1408, pruned_loss=0.01396, over 3588.00 frames. ], tot_loss[loss=0.1253, simple_loss=0.1878, pruned_loss=0.03141, over 952540.22 frames. ], batch size: 14, lr: 7.71e-03, +2022-12-02 04:09:11,454 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.383e+01 1.537e+02 1.796e+02 2.159e+02 5.480e+02, threshold=3.593e+02, percent-clipped=3.0 +2022-12-02 04:09:20,296 INFO [train.py:876] Epoch 23, batch 1200, loss[loss=0.1403, simple_loss=0.1889, pruned_loss=0.04581, over 4916.00 frames. ], tot_loss[loss=0.1264, simple_loss=0.1887, pruned_loss=0.03208, over 952182.23 frames. ], batch size: 31, lr: 7.70e-03, +2022-12-02 04:09:29,458 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.5671, 5.1255, 4.8699, 5.4059, 4.8078, 4.4378, 5.2338, 5.0405], + device='cuda:0'), covar=tensor([0.0178, 0.0073, 0.0117, 0.0145, 0.0131, 0.0163, 0.0082, 0.0139], + device='cuda:0'), in_proj_covar=tensor([0.0059, 0.0053, 0.0058, 0.0045, 0.0056, 0.0059, 0.0053, 0.0053], + device='cuda:0'), out_proj_covar=tensor([5.3185e-05, 4.3797e-05, 5.0941e-05, 3.7631e-05, 4.9462e-05, 5.3110e-05, + 4.3155e-05, 4.4654e-05], device='cuda:0') +2022-12-02 04:09:39,161 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0351, 2.8336, 2.7574, 3.0457, 2.3036, 2.8629, 1.5316, 3.0139], + device='cuda:0'), covar=tensor([0.0734, 0.0883, 0.0788, 0.0608, 0.1053, 0.1222, 0.1231, 0.0431], + device='cuda:0'), in_proj_covar=tensor([0.0081, 0.0088, 0.0104, 0.0089, 0.0109, 0.0092, 0.0095, 0.0092], + device='cuda:0'), out_proj_covar=tensor([8.5653e-05, 9.0809e-05, 1.0467e-04, 9.0530e-05, 1.0964e-04, 9.6493e-05, + 9.7488e-05, 9.4759e-05], device='cuda:0') +2022-12-02 04:09:45,915 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9239, 1.7148, 1.0806, 1.9669, 1.8929, 1.6767, 1.5880, 1.7369], + device='cuda:0'), covar=tensor([0.0284, 0.0396, 0.0431, 0.0323, 0.0268, 0.0385, 0.0522, 0.0510], + device='cuda:0'), in_proj_covar=tensor([0.0050, 0.0048, 0.0054, 0.0042, 0.0046, 0.0048, 0.0047, 0.0045], + device='cuda:0'), out_proj_covar=tensor([4.6844e-05, 4.5402e-05, 5.1607e-05, 3.9951e-05, 4.3041e-05, 4.5031e-05, + 4.3912e-05, 4.3838e-05], device='cuda:0') +2022-12-02 04:09:53,701 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.6609, 4.5069, 5.1063, 4.4362, 4.8479, 4.6800, 4.4213, 4.4759], + device='cuda:0'), covar=tensor([0.0622, 0.0671, 0.0547, 0.0481, 0.0779, 0.0551, 0.1162, 0.0500], + device='cuda:0'), in_proj_covar=tensor([0.0176, 0.0133, 0.0175, 0.0147, 0.0132, 0.0163, 0.0191, 0.0129], + device='cuda:0'), out_proj_covar=tensor([1.2783e-04, 8.7059e-05, 1.3506e-04, 1.0114e-04, 9.8387e-05, 1.1853e-04, + 1.4530e-04, 8.8849e-05], device='cuda:0') +2022-12-02 04:10:09,294 INFO [train.py:876] Epoch 23, batch 1250, loss[loss=0.141, simple_loss=0.2198, pruned_loss=0.03114, over 4844.00 frames. ], tot_loss[loss=0.1247, simple_loss=0.1871, pruned_loss=0.03111, over 954848.38 frames. ], batch size: 41, lr: 7.70e-03, +2022-12-02 04:10:09,455 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.1935, 2.3783, 2.6638, 4.0653, 3.6992, 3.4667, 3.1951, 4.2890], + device='cuda:0'), covar=tensor([0.0235, 0.1655, 0.2005, 0.0453, 0.0201, 0.0795, 0.0850, 0.0238], + device='cuda:0'), in_proj_covar=tensor([0.0070, 0.0104, 0.0125, 0.0071, 0.0078, 0.0072, 0.0083, 0.0086], + device='cuda:0'), out_proj_covar=tensor([7.5651e-05, 1.1431e-04, 1.3379e-04, 8.2407e-05, 8.2051e-05, 8.1608e-05, + 9.2578e-05, 8.4869e-05], device='cuda:0') +2022-12-02 04:10:49,303 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.055e+02 1.541e+02 1.903e+02 2.297e+02 5.793e+02, threshold=3.806e+02, percent-clipped=5.0 +2022-12-02 04:10:57,803 INFO [train.py:876] Epoch 23, batch 1300, loss[loss=0.1377, simple_loss=0.196, pruned_loss=0.03975, over 4826.00 frames. ], tot_loss[loss=0.1247, simple_loss=0.1865, pruned_loss=0.03146, over 954765.96 frames. ], batch size: 45, lr: 7.69e-03, +2022-12-02 04:11:19,475 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3089, 1.9466, 2.2100, 1.6220, 1.8860, 2.5231, 1.7681, 2.2253], + device='cuda:0'), covar=tensor([0.1026, 0.0621, 0.1289, 0.0894, 0.1002, 0.0926, 0.1387, 0.0408], + device='cuda:0'), in_proj_covar=tensor([0.0069, 0.0072, 0.0068, 0.0076, 0.0066, 0.0062, 0.0062, 0.0067], + device='cuda:0'), out_proj_covar=tensor([6.2754e-05, 6.4659e-05, 6.2984e-05, 6.8146e-05, 6.0679e-05, 5.6966e-05, + 5.7589e-05, 6.1176e-05], device='cuda:0') +2022-12-02 04:11:23,712 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.01 vs. limit=2.0 +2022-12-02 04:11:46,741 INFO [train.py:876] Epoch 23, batch 1350, loss[loss=0.1361, simple_loss=0.2, pruned_loss=0.03606, over 4832.00 frames. ], tot_loss[loss=0.124, simple_loss=0.1855, pruned_loss=0.03122, over 950735.43 frames. ], batch size: 34, lr: 7.69e-03, +2022-12-02 04:11:50,823 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1623, 2.2215, 2.7246, 2.2029, 2.5622, 2.2997, 2.5137, 2.6397], + device='cuda:0'), covar=tensor([0.0179, 0.1094, 0.0587, 0.1349, 0.0338, 0.0608, 0.1279, 0.0604], + device='cuda:0'), in_proj_covar=tensor([0.0073, 0.0110, 0.0091, 0.0123, 0.0084, 0.0089, 0.0124, 0.0102], + device='cuda:0'), out_proj_covar=tensor([8.9485e-05, 1.2950e-04, 1.0977e-04, 1.4345e-04, 9.7501e-05, 1.1056e-04, + 1.4302e-04, 1.1492e-04], device='cuda:0') +2022-12-02 04:12:12,498 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.1996, 4.2314, 4.4660, 4.3455, 4.7213, 4.0637, 4.3933, 4.4330], + device='cuda:0'), covar=tensor([0.0308, 0.0266, 0.0207, 0.0182, 0.0202, 0.0309, 0.0291, 0.0381], + device='cuda:0'), in_proj_covar=tensor([0.0114, 0.0123, 0.0111, 0.0111, 0.0102, 0.0137, 0.0095, 0.0108], + device='cuda:0'), out_proj_covar=tensor([9.7101e-05, 1.0501e-04, 9.4644e-05, 9.4505e-05, 8.6668e-05, 1.1707e-04, + 8.3222e-05, 9.3624e-05], device='cuda:0') +2022-12-02 04:12:26,986 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.613e+01 1.579e+02 2.033e+02 2.438e+02 6.895e+02, threshold=4.065e+02, percent-clipped=8.0 +2022-12-02 04:12:35,873 INFO [train.py:876] Epoch 23, batch 1400, loss[loss=0.08453, simple_loss=0.1359, pruned_loss=0.0166, over 4288.00 frames. ], tot_loss[loss=0.1237, simple_loss=0.185, pruned_loss=0.03118, over 944400.87 frames. ], batch size: 17, lr: 7.68e-03, +2022-12-02 04:12:48,660 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3408, 2.0895, 2.2895, 2.4451, 1.9298, 2.3382, 2.3345, 2.5068], + device='cuda:0'), covar=tensor([0.0383, 0.0506, 0.0328, 0.0324, 0.0655, 0.0466, 0.0511, 0.0288], + device='cuda:0'), in_proj_covar=tensor([0.0066, 0.0071, 0.0059, 0.0060, 0.0075, 0.0069, 0.0064, 0.0058], + device='cuda:0'), out_proj_covar=tensor([4.8619e-05, 5.2723e-05, 4.2829e-05, 4.3807e-05, 5.5283e-05, 5.0315e-05, + 4.7496e-05, 4.2411e-05], device='cuda:0') +2022-12-02 04:13:06,231 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-23.pt +2022-12-02 04:13:15,364 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 04:13:15,950 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 04:13:16,604 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 04:13:16,636 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 04:13:17,437 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 04:13:18,165 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 04:13:19,791 INFO [train.py:876] Epoch 24, batch 0, loss[loss=0.0714, simple_loss=0.1259, pruned_loss=0.008444, over 3835.00 frames. ], tot_loss[loss=0.0714, simple_loss=0.1259, pruned_loss=0.008444, over 3835.00 frames. ], batch size: 15, lr: 7.51e-03, +2022-12-02 04:13:19,793 INFO [train.py:901] Computing validation loss +2022-12-02 04:13:22,994 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3561, 2.9028, 3.2558, 3.4007, 2.8142, 2.9434, 1.6863, 3.3549], + device='cuda:0'), covar=tensor([0.0558, 0.1941, 0.0639, 0.0489, 0.0894, 0.1038, 0.1209, 0.0500], + device='cuda:0'), in_proj_covar=tensor([0.0084, 0.0090, 0.0106, 0.0090, 0.0110, 0.0094, 0.0097, 0.0094], + device='cuda:0'), out_proj_covar=tensor([8.8156e-05, 9.3396e-05, 1.0723e-04, 9.1763e-05, 1.1119e-04, 9.8575e-05, + 9.9642e-05, 9.6795e-05], device='cuda:0') +2022-12-02 04:13:35,384 INFO [train.py:910] Epoch 24, validation: loss=0.234, simple_loss=0.2748, pruned_loss=0.09664, over 253132.00 frames. +2022-12-02 04:13:35,385 INFO [train.py:911] Maximum memory allocated so far is 7468MB +2022-12-02 04:13:41,984 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.93 vs. limit=2.0 +2022-12-02 04:13:59,628 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=32966.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:14:02,910 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.57 vs. limit=5.0 +2022-12-02 04:14:11,447 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2980, 2.2314, 2.9334, 2.3674, 2.6343, 2.4304, 2.6835, 3.1430], + device='cuda:0'), covar=tensor([0.0184, 0.1220, 0.0619, 0.1407, 0.0347, 0.0460, 0.1503, 0.0479], + device='cuda:0'), in_proj_covar=tensor([0.0072, 0.0109, 0.0090, 0.0122, 0.0083, 0.0089, 0.0124, 0.0102], + device='cuda:0'), out_proj_covar=tensor([8.8744e-05, 1.2883e-04, 1.0944e-04, 1.4252e-04, 9.7458e-05, 1.1065e-04, + 1.4341e-04, 1.1492e-04], device='cuda:0') +2022-12-02 04:14:24,874 INFO [train.py:876] Epoch 24, batch 50, loss[loss=0.0816, simple_loss=0.135, pruned_loss=0.01411, over 4736.00 frames. ], tot_loss[loss=0.1225, simple_loss=0.1837, pruned_loss=0.03062, over 214162.58 frames. ], batch size: 23, lr: 7.51e-03, +2022-12-02 04:14:33,749 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.120e+01 1.426e+02 1.737e+02 2.442e+02 4.543e+02, threshold=3.474e+02, percent-clipped=5.0 +2022-12-02 04:14:45,494 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 04:14:59,081 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=33027.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:15:13,339 INFO [train.py:876] Epoch 24, batch 100, loss[loss=0.1154, simple_loss=0.1824, pruned_loss=0.02417, over 4826.00 frames. ], tot_loss[loss=0.1232, simple_loss=0.1849, pruned_loss=0.03072, over 380616.74 frames. ], batch size: 34, lr: 7.50e-03, +2022-12-02 04:15:27,905 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 04:15:39,792 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=33069.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:15:47,102 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9460, 1.5835, 1.0657, 1.9659, 1.9262, 1.6803, 1.7458, 1.6763], + device='cuda:0'), covar=tensor([0.0292, 0.0564, 0.0460, 0.0321, 0.0301, 0.0410, 0.0326, 0.0515], + device='cuda:0'), in_proj_covar=tensor([0.0050, 0.0049, 0.0054, 0.0043, 0.0046, 0.0048, 0.0047, 0.0045], + device='cuda:0'), out_proj_covar=tensor([4.7335e-05, 4.5907e-05, 5.1629e-05, 4.0358e-05, 4.3259e-05, 4.5558e-05, + 4.4146e-05, 4.4288e-05], device='cuda:0') +2022-12-02 04:15:48,814 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 04:15:52,842 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1274, 1.9816, 2.1640, 2.3233, 1.7993, 2.2097, 2.2686, 2.2084], + device='cuda:0'), covar=tensor([0.0456, 0.0513, 0.0407, 0.0368, 0.0739, 0.0531, 0.0535, 0.0381], + device='cuda:0'), in_proj_covar=tensor([0.0067, 0.0072, 0.0060, 0.0061, 0.0077, 0.0071, 0.0066, 0.0059], + device='cuda:0'), out_proj_covar=tensor([4.9282e-05, 5.3573e-05, 4.3758e-05, 4.4422e-05, 5.6411e-05, 5.1987e-05, + 4.8967e-05, 4.3160e-05], device='cuda:0') +2022-12-02 04:16:02,382 INFO [train.py:876] Epoch 24, batch 150, loss[loss=0.1208, simple_loss=0.1937, pruned_loss=0.02393, over 4885.00 frames. ], tot_loss[loss=0.1242, simple_loss=0.1868, pruned_loss=0.0308, over 507425.42 frames. ], batch size: 44, lr: 7.50e-03, +2022-12-02 04:16:03,103 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.82 vs. limit=2.0 +2022-12-02 04:16:05,841 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-02 04:16:11,069 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.743e+01 1.500e+02 1.694e+02 2.223e+02 3.962e+02, threshold=3.388e+02, percent-clipped=2.0 +2022-12-02 04:16:39,797 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=33130.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:16:51,188 INFO [train.py:876] Epoch 24, batch 200, loss[loss=0.1388, simple_loss=0.2026, pruned_loss=0.03749, over 4815.00 frames. ], tot_loss[loss=0.1225, simple_loss=0.1852, pruned_loss=0.02989, over 608012.86 frames. ], batch size: 42, lr: 7.49e-03, +2022-12-02 04:16:52,982 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-02 04:17:04,072 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=33155.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:17:27,044 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9182, 1.9454, 2.1128, 2.4953, 2.1018, 1.9902, 1.3303, 2.5470], + device='cuda:0'), covar=tensor([0.1165, 0.1511, 0.0994, 0.0602, 0.0904, 0.1690, 0.1224, 0.0828], + device='cuda:0'), in_proj_covar=tensor([0.0084, 0.0091, 0.0107, 0.0090, 0.0111, 0.0094, 0.0097, 0.0095], + device='cuda:0'), out_proj_covar=tensor([8.8680e-05, 9.4435e-05, 1.0800e-04, 9.1681e-05, 1.1138e-04, 9.8709e-05, + 9.9884e-05, 9.7448e-05], device='cuda:0') +2022-12-02 04:17:35,537 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 04:17:39,343 INFO [train.py:876] Epoch 24, batch 250, loss[loss=0.144, simple_loss=0.2004, pruned_loss=0.04382, over 4880.00 frames. ], tot_loss[loss=0.1223, simple_loss=0.1846, pruned_loss=0.03004, over 684044.69 frames. ], batch size: 37, lr: 7.48e-03, +2022-12-02 04:17:48,318 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.419e+01 1.468e+02 1.773e+02 2.104e+02 3.458e+02, threshold=3.545e+02, percent-clipped=1.0 +2022-12-02 04:18:03,117 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=33216.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:18:12,998 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2516, 2.5077, 2.9313, 2.6233, 2.8551, 2.9097, 2.6553, 3.2382], + device='cuda:0'), covar=tensor([0.0184, 0.1063, 0.0617, 0.1283, 0.0276, 0.0343, 0.1472, 0.0450], + device='cuda:0'), in_proj_covar=tensor([0.0073, 0.0109, 0.0091, 0.0123, 0.0084, 0.0090, 0.0125, 0.0102], + device='cuda:0'), out_proj_covar=tensor([8.9249e-05, 1.2875e-04, 1.1031e-04, 1.4414e-04, 9.8002e-05, 1.1150e-04, + 1.4406e-04, 1.1571e-04], device='cuda:0') +2022-12-02 04:18:16,000 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8956, 3.6859, 3.6565, 4.0103, 3.2971, 2.5953, 4.1491, 1.9622], + device='cuda:0'), covar=tensor([0.0452, 0.0539, 0.0274, 0.0325, 0.1155, 0.2746, 0.0218, 0.3924], + device='cuda:0'), in_proj_covar=tensor([0.0117, 0.0100, 0.0092, 0.0132, 0.0141, 0.0163, 0.0087, 0.0174], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 04:18:16,047 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2038, 3.4933, 3.9082, 3.7578, 2.6668, 3.8271, 3.7712, 2.9985], + device='cuda:0'), covar=tensor([0.4481, 0.0667, 0.0703, 0.0372, 0.1016, 0.1057, 0.0508, 0.1530], + device='cuda:0'), in_proj_covar=tensor([0.0185, 0.0120, 0.0151, 0.0127, 0.0132, 0.0124, 0.0125, 0.0128], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 04:18:28,355 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.57 vs. limit=2.0 +2022-12-02 04:18:28,702 INFO [train.py:876] Epoch 24, batch 300, loss[loss=0.1238, simple_loss=0.1799, pruned_loss=0.03388, over 4857.00 frames. ], tot_loss[loss=0.1218, simple_loss=0.1838, pruned_loss=0.02987, over 742404.26 frames. ], batch size: 36, lr: 7.48e-03, +2022-12-02 04:18:38,545 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 04:18:42,668 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.2536, 4.7488, 4.6480, 4.9177, 4.2706, 4.2042, 4.9017, 4.4877], + device='cuda:0'), covar=tensor([0.0202, 0.0088, 0.0116, 0.0150, 0.0153, 0.0167, 0.0092, 0.0144], + device='cuda:0'), in_proj_covar=tensor([0.0057, 0.0052, 0.0057, 0.0044, 0.0055, 0.0057, 0.0052, 0.0052], + device='cuda:0'), out_proj_covar=tensor([5.1315e-05, 4.2356e-05, 4.9524e-05, 3.6461e-05, 4.7952e-05, 5.0832e-05, + 4.1321e-05, 4.3722e-05], device='cuda:0') +2022-12-02 04:18:51,966 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.37 vs. limit=2.0 +2022-12-02 04:19:17,866 INFO [train.py:876] Epoch 24, batch 350, loss[loss=0.1408, simple_loss=0.2067, pruned_loss=0.03745, over 4848.00 frames. ], tot_loss[loss=0.1219, simple_loss=0.1843, pruned_loss=0.02971, over 789636.39 frames. ], batch size: 49, lr: 7.47e-03, +2022-12-02 04:19:26,484 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.731e+01 1.520e+02 1.779e+02 2.217e+02 1.624e+03, threshold=3.558e+02, percent-clipped=4.0 +2022-12-02 04:19:47,304 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=33322.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:19:53,256 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2466, 2.9917, 3.2375, 2.7212, 2.1006, 1.8929, 3.3828, 1.5020], + device='cuda:0'), covar=tensor([0.0649, 0.0349, 0.0396, 0.0830, 0.1841, 0.3337, 0.0306, 0.3011], + device='cuda:0'), in_proj_covar=tensor([0.0118, 0.0098, 0.0092, 0.0132, 0.0141, 0.0164, 0.0086, 0.0175], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 04:19:58,369 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6603, 2.7730, 3.2278, 3.4704, 2.9889, 3.4327, 3.1677, 3.3041], + device='cuda:0'), covar=tensor([0.0208, 0.0389, 0.0277, 0.0218, 0.0226, 0.0275, 0.0308, 0.0318], + device='cuda:0'), in_proj_covar=tensor([0.0123, 0.0121, 0.0128, 0.0121, 0.0101, 0.0125, 0.0126, 0.0138], + device='cuda:0'), out_proj_covar=tensor([8.2793e-05, 8.2302e-05, 8.5921e-05, 8.0571e-05, 6.6085e-05, 8.3644e-05, + 8.4377e-05, 9.4367e-05], device='cuda:0') +2022-12-02 04:20:07,148 INFO [train.py:876] Epoch 24, batch 400, loss[loss=0.177, simple_loss=0.2361, pruned_loss=0.05898, over 4836.00 frames. ], tot_loss[loss=0.1219, simple_loss=0.1842, pruned_loss=0.02979, over 824798.77 frames. ], batch size: 47, lr: 7.47e-03, +2022-12-02 04:20:09,013 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7872, 3.6739, 4.0039, 3.7074, 4.0284, 3.1316, 3.7917, 3.9065], + device='cuda:0'), covar=tensor([0.0340, 0.0396, 0.0316, 0.0349, 0.0291, 0.0682, 0.0280, 0.0445], + device='cuda:0'), in_proj_covar=tensor([0.0117, 0.0128, 0.0114, 0.0116, 0.0106, 0.0144, 0.0100, 0.0114], + device='cuda:0'), out_proj_covar=tensor([1.0011e-04, 1.0870e-04, 9.7498e-05, 9.8706e-05, 9.0435e-05, 1.2332e-04, + 8.7399e-05, 9.8884e-05], device='cuda:0') +2022-12-02 04:20:12,674 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 04:20:22,669 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.8566, 3.0651, 3.3588, 3.3334, 2.7974, 3.2186, 1.9719, 3.3903], + device='cuda:0'), covar=tensor([0.1358, 0.1000, 0.0689, 0.0683, 0.0836, 0.0880, 0.0964, 0.0622], + device='cuda:0'), in_proj_covar=tensor([0.0085, 0.0092, 0.0108, 0.0091, 0.0112, 0.0095, 0.0097, 0.0094], + device='cuda:0'), out_proj_covar=tensor([9.0015e-05, 9.5245e-05, 1.0871e-04, 9.2948e-05, 1.1258e-04, 9.9764e-05, + 1.0057e-04, 9.7239e-05], device='cuda:0') +2022-12-02 04:20:42,061 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 04:20:55,484 INFO [train.py:876] Epoch 24, batch 450, loss[loss=0.137, simple_loss=0.1972, pruned_loss=0.0384, over 4855.00 frames. ], tot_loss[loss=0.1223, simple_loss=0.1847, pruned_loss=0.02997, over 854762.75 frames. ], batch size: 36, lr: 7.46e-03, +2022-12-02 04:21:04,743 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.989e+01 1.492e+02 1.781e+02 2.203e+02 5.532e+02, threshold=3.563e+02, percent-clipped=6.0 +2022-12-02 04:21:28,265 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=33425.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:21:44,670 INFO [train.py:876] Epoch 24, batch 500, loss[loss=0.1183, simple_loss=0.1733, pruned_loss=0.03171, over 4892.00 frames. ], tot_loss[loss=0.1224, simple_loss=0.1847, pruned_loss=0.03005, over 876830.37 frames. ], batch size: 30, lr: 7.46e-03, +2022-12-02 04:22:14,168 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=33472.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:22:33,550 INFO [train.py:876] Epoch 24, batch 550, loss[loss=0.1379, simple_loss=0.2021, pruned_loss=0.03688, over 4804.00 frames. ], tot_loss[loss=0.1217, simple_loss=0.1835, pruned_loss=0.02993, over 894245.19 frames. ], batch size: 42, lr: 7.45e-03, +2022-12-02 04:22:37,683 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5498, 2.3804, 2.3917, 1.8736, 2.3240, 2.7858, 2.4098, 2.0367], + device='cuda:0'), covar=tensor([0.0772, 0.0971, 0.1412, 0.1181, 0.1039, 0.0606, 0.0633, 0.1011], + device='cuda:0'), in_proj_covar=tensor([0.0071, 0.0074, 0.0070, 0.0077, 0.0067, 0.0063, 0.0063, 0.0069], + device='cuda:0'), out_proj_covar=tensor([6.4438e-05, 6.6578e-05, 6.4210e-05, 6.9323e-05, 6.2390e-05, 5.8222e-05, + 5.8786e-05, 6.2716e-05], device='cuda:0') +2022-12-02 04:22:42,400 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.868e+01 1.527e+02 1.889e+02 2.202e+02 8.284e+02, threshold=3.779e+02, percent-clipped=6.0 +2022-12-02 04:22:52,243 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=33511.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:23:09,566 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8244, 1.6167, 0.9980, 1.8409, 1.7624, 1.5721, 1.5929, 1.5255], + device='cuda:0'), covar=tensor([0.0301, 0.0435, 0.0466, 0.0320, 0.0352, 0.0404, 0.0353, 0.0532], + device='cuda:0'), in_proj_covar=tensor([0.0048, 0.0046, 0.0051, 0.0040, 0.0044, 0.0046, 0.0045, 0.0042], + device='cuda:0'), out_proj_covar=tensor([4.5359e-05, 4.3094e-05, 4.9526e-05, 3.8094e-05, 4.1318e-05, 4.3264e-05, + 4.2229e-05, 4.1607e-05], device='cuda:0') +2022-12-02 04:23:13,506 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=33533.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:23:22,333 INFO [train.py:876] Epoch 24, batch 600, loss[loss=0.09567, simple_loss=0.1538, pruned_loss=0.01878, over 4913.00 frames. ], tot_loss[loss=0.1207, simple_loss=0.1823, pruned_loss=0.0296, over 905428.23 frames. ], batch size: 29, lr: 7.45e-03, +2022-12-02 04:24:11,471 INFO [train.py:876] Epoch 24, batch 650, loss[loss=0.1376, simple_loss=0.2084, pruned_loss=0.03339, over 4714.00 frames. ], tot_loss[loss=0.1215, simple_loss=0.1833, pruned_loss=0.02979, over 914317.98 frames. ], batch size: 63, lr: 7.44e-03, +2022-12-02 04:24:20,957 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.880e+01 1.652e+02 1.889e+02 2.407e+02 5.995e+02, threshold=3.777e+02, percent-clipped=2.0 +2022-12-02 04:24:27,216 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.8400, 2.5354, 2.6496, 2.8444, 2.3028, 2.8276, 1.4556, 2.9161], + device='cuda:0'), covar=tensor([0.0696, 0.1366, 0.0994, 0.0785, 0.1156, 0.1107, 0.1524, 0.0551], + device='cuda:0'), in_proj_covar=tensor([0.0085, 0.0092, 0.0109, 0.0091, 0.0112, 0.0095, 0.0098, 0.0095], + device='cuda:0'), out_proj_covar=tensor([9.0276e-05, 9.5508e-05, 1.1011e-04, 9.3315e-05, 1.1317e-04, 9.9784e-05, + 1.0095e-04, 9.8221e-05], device='cuda:0') +2022-12-02 04:24:35,026 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7962, 2.7686, 3.3566, 3.5539, 3.0305, 3.3928, 3.3106, 3.1876], + device='cuda:0'), covar=tensor([0.0226, 0.0432, 0.0292, 0.0223, 0.0254, 0.0417, 0.0281, 0.0381], + device='cuda:0'), in_proj_covar=tensor([0.0124, 0.0120, 0.0128, 0.0120, 0.0101, 0.0126, 0.0126, 0.0140], + device='cuda:0'), out_proj_covar=tensor([8.3204e-05, 8.1398e-05, 8.5555e-05, 8.0302e-05, 6.5862e-05, 8.4015e-05, + 8.4569e-05, 9.5418e-05], device='cuda:0') +2022-12-02 04:24:41,688 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=33622.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:25:01,135 INFO [train.py:876] Epoch 24, batch 700, loss[loss=0.1546, simple_loss=0.22, pruned_loss=0.04458, over 4797.00 frames. ], tot_loss[loss=0.1208, simple_loss=0.1824, pruned_loss=0.02966, over 924448.06 frames. ], batch size: 58, lr: 7.44e-03, +2022-12-02 04:25:17,683 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6841, 2.5415, 2.5118, 2.7731, 2.2803, 2.5649, 1.4052, 2.8374], + device='cuda:0'), covar=tensor([0.1022, 0.1742, 0.1085, 0.0792, 0.1142, 0.1406, 0.1567, 0.0649], + device='cuda:0'), in_proj_covar=tensor([0.0085, 0.0091, 0.0109, 0.0091, 0.0112, 0.0095, 0.0097, 0.0095], + device='cuda:0'), out_proj_covar=tensor([8.9873e-05, 9.4749e-05, 1.0948e-04, 9.3078e-05, 1.1266e-04, 9.9772e-05, + 1.0021e-04, 9.7543e-05], device='cuda:0') +2022-12-02 04:25:22,429 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2323, 2.0730, 2.0784, 2.3785, 2.0451, 2.0207, 1.3021, 2.6285], + device='cuda:0'), covar=tensor([0.0983, 0.1346, 0.1011, 0.0693, 0.1086, 0.1639, 0.1444, 0.0653], + device='cuda:0'), in_proj_covar=tensor([0.0085, 0.0091, 0.0109, 0.0091, 0.0112, 0.0095, 0.0097, 0.0095], + device='cuda:0'), out_proj_covar=tensor([8.9945e-05, 9.4885e-05, 1.0967e-04, 9.3256e-05, 1.1277e-04, 9.9987e-05, + 1.0039e-04, 9.7657e-05], device='cuda:0') +2022-12-02 04:25:27,821 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=33670.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:25:28,933 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.3126, 3.9412, 3.7506, 4.7026, 3.6627, 3.0518, 4.7925, 2.0044], + device='cuda:0'), covar=tensor([0.0629, 0.0512, 0.0427, 0.0195, 0.0960, 0.1990, 0.0132, 0.3906], + device='cuda:0'), in_proj_covar=tensor([0.0121, 0.0100, 0.0094, 0.0136, 0.0145, 0.0168, 0.0088, 0.0181], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 04:25:34,692 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=33677.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:25:48,886 INFO [train.py:876] Epoch 24, batch 750, loss[loss=0.1248, simple_loss=0.1778, pruned_loss=0.03594, over 4920.00 frames. ], tot_loss[loss=0.1217, simple_loss=0.1835, pruned_loss=0.02999, over 930911.58 frames. ], batch size: 31, lr: 7.43e-03, +2022-12-02 04:25:57,614 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.100e+01 1.554e+02 1.800e+02 2.059e+02 4.459e+02, threshold=3.600e+02, percent-clipped=3.0 +2022-12-02 04:26:20,803 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=33725.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:26:33,604 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=33738.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 04:26:37,198 INFO [train.py:876] Epoch 24, batch 800, loss[loss=0.1154, simple_loss=0.1853, pruned_loss=0.02275, over 4836.00 frames. ], tot_loss[loss=0.1219, simple_loss=0.1839, pruned_loss=0.03, over 934900.50 frames. ], batch size: 34, lr: 7.42e-03, +2022-12-02 04:27:07,374 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=33773.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:27:13,859 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.31 vs. limit=2.0 +2022-12-02 04:27:26,353 INFO [train.py:876] Epoch 24, batch 850, loss[loss=0.07059, simple_loss=0.1206, pruned_loss=0.01028, over 4694.00 frames. ], tot_loss[loss=0.1222, simple_loss=0.1836, pruned_loss=0.03039, over 938816.07 frames. ], batch size: 21, lr: 7.42e-03, +2022-12-02 04:27:35,396 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 6.552e+01 1.501e+02 1.915e+02 2.250e+02 4.043e+02, threshold=3.830e+02, percent-clipped=1.0 +2022-12-02 04:27:40,303 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.8933, 3.2654, 3.6588, 3.5639, 2.7467, 3.7627, 3.5353, 2.6919], + device='cuda:0'), covar=tensor([0.4751, 0.0930, 0.0764, 0.0416, 0.1089, 0.0744, 0.0552, 0.1317], + device='cuda:0'), in_proj_covar=tensor([0.0183, 0.0119, 0.0152, 0.0129, 0.0133, 0.0123, 0.0125, 0.0127], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 04:27:45,048 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=33811.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:28:01,196 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=33828.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:28:02,385 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7106, 2.4404, 2.4633, 2.7080, 2.1966, 2.5877, 1.4439, 2.7329], + device='cuda:0'), covar=tensor([0.0666, 0.1335, 0.1090, 0.0775, 0.1165, 0.2212, 0.1421, 0.0633], + device='cuda:0'), in_proj_covar=tensor([0.0084, 0.0091, 0.0108, 0.0091, 0.0111, 0.0097, 0.0097, 0.0094], + device='cuda:0'), out_proj_covar=tensor([8.9529e-05, 9.4286e-05, 1.0918e-04, 9.3090e-05, 1.1228e-04, 1.0119e-04, + 9.9593e-05, 9.7254e-05], device='cuda:0') +2022-12-02 04:28:14,605 INFO [train.py:876] Epoch 24, batch 900, loss[loss=0.127, simple_loss=0.1974, pruned_loss=0.02829, over 4883.00 frames. ], tot_loss[loss=0.122, simple_loss=0.1841, pruned_loss=0.02995, over 943363.27 frames. ], batch size: 38, lr: 7.41e-03, +2022-12-02 04:28:31,620 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=33859.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:29:00,449 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1052, 3.1101, 2.9901, 2.5736, 2.1502, 2.1241, 3.1876, 1.4670], + device='cuda:0'), covar=tensor([0.0715, 0.0433, 0.0564, 0.0927, 0.1826, 0.2744, 0.0426, 0.3203], + device='cuda:0'), in_proj_covar=tensor([0.0122, 0.0103, 0.0096, 0.0136, 0.0145, 0.0169, 0.0089, 0.0181], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 04:29:03,277 INFO [train.py:876] Epoch 24, batch 950, loss[loss=0.1543, simple_loss=0.2113, pruned_loss=0.04858, over 4857.00 frames. ], tot_loss[loss=0.1214, simple_loss=0.1837, pruned_loss=0.02954, over 946786.53 frames. ], batch size: 40, lr: 7.41e-03, +2022-12-02 04:29:12,357 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.058e+02 1.567e+02 1.864e+02 2.387e+02 6.096e+02, threshold=3.727e+02, percent-clipped=2.0 +2022-12-02 04:29:22,400 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9981, 1.9440, 2.3623, 1.7924, 1.9248, 2.5182, 1.8706, 2.0157], + device='cuda:0'), covar=tensor([0.1374, 0.0664, 0.0831, 0.0736, 0.0756, 0.0695, 0.0961, 0.0687], + device='cuda:0'), in_proj_covar=tensor([0.0069, 0.0073, 0.0068, 0.0077, 0.0066, 0.0061, 0.0062, 0.0067], + device='cuda:0'), out_proj_covar=tensor([6.2781e-05, 6.5588e-05, 6.2838e-05, 6.8780e-05, 6.0699e-05, 5.7103e-05, + 5.7818e-05, 6.1110e-05], device='cuda:0') +2022-12-02 04:29:40,502 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.97 vs. limit=2.0 +2022-12-02 04:29:49,829 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=33939.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:29:52,541 INFO [train.py:876] Epoch 24, batch 1000, loss[loss=0.09426, simple_loss=0.1579, pruned_loss=0.01529, over 4913.00 frames. ], tot_loss[loss=0.1203, simple_loss=0.1828, pruned_loss=0.02887, over 947230.27 frames. ], batch size: 29, lr: 7.40e-03, +2022-12-02 04:30:19,366 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.1895, 1.1673, 1.5557, 1.1605, 0.9278, 1.1508, 1.5451, 1.0055], + device='cuda:0'), covar=tensor([0.0208, 0.0160, 0.0125, 0.0169, 0.0247, 0.0161, 0.0129, 0.0187], + device='cuda:0'), in_proj_covar=tensor([0.0037, 0.0034, 0.0034, 0.0036, 0.0039, 0.0036, 0.0038, 0.0034], + device='cuda:0'), out_proj_covar=tensor([3.0044e-05, 2.6905e-05, 2.7655e-05, 2.8330e-05, 3.1358e-05, 2.8119e-05, + 3.0842e-05, 2.6154e-05], device='cuda:0') +2022-12-02 04:30:39,983 INFO [train.py:876] Epoch 24, batch 1050, loss[loss=0.1546, simple_loss=0.2151, pruned_loss=0.04703, over 4678.00 frames. ], tot_loss[loss=0.122, simple_loss=0.1848, pruned_loss=0.02955, over 947907.27 frames. ], batch size: 63, lr: 7.40e-03, +2022-12-02 04:30:47,714 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-34000.pt +2022-12-02 04:30:50,154 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=34000.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:30:50,900 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.170e+02 1.580e+02 1.940e+02 2.294e+02 5.261e+02, threshold=3.881e+02, percent-clipped=2.0 +2022-12-02 04:31:22,009 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=34033.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 04:31:30,746 INFO [train.py:876] Epoch 24, batch 1100, loss[loss=0.1359, simple_loss=0.1965, pruned_loss=0.03768, over 4823.00 frames. ], tot_loss[loss=0.1215, simple_loss=0.1846, pruned_loss=0.02922, over 948715.21 frames. ], batch size: 34, lr: 7.39e-03, +2022-12-02 04:31:32,936 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=34044.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:31:57,408 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3450, 3.3591, 3.5123, 3.4383, 2.7255, 2.2938, 3.7279, 1.8713], + device='cuda:0'), covar=tensor([0.0723, 0.0546, 0.0467, 0.0577, 0.1778, 0.3398, 0.0346, 0.3731], + device='cuda:0'), in_proj_covar=tensor([0.0123, 0.0104, 0.0096, 0.0137, 0.0145, 0.0171, 0.0090, 0.0184], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 04:32:09,045 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.2989, 1.3485, 1.5027, 1.2973, 1.1235, 1.2726, 1.7538, 1.4439], + device='cuda:0'), covar=tensor([0.0272, 0.0227, 0.0214, 0.0223, 0.0412, 0.0147, 0.0133, 0.0185], + device='cuda:0'), in_proj_covar=tensor([0.0037, 0.0034, 0.0034, 0.0036, 0.0039, 0.0036, 0.0038, 0.0034], + device='cuda:0'), out_proj_covar=tensor([2.9838e-05, 2.6905e-05, 2.7275e-05, 2.8566e-05, 3.1528e-05, 2.8153e-05, + 3.0470e-05, 2.6158e-05], device='cuda:0') +2022-12-02 04:32:19,572 INFO [train.py:876] Epoch 24, batch 1150, loss[loss=0.1659, simple_loss=0.2179, pruned_loss=0.05692, over 4872.00 frames. ], tot_loss[loss=0.121, simple_loss=0.1839, pruned_loss=0.02903, over 949226.46 frames. ], batch size: 37, lr: 7.39e-03, +2022-12-02 04:32:21,030 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.89 vs. limit=2.0 +2022-12-02 04:32:28,166 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.556e+01 1.529e+02 1.914e+02 2.357e+02 3.401e+02, threshold=3.828e+02, percent-clipped=0.0 +2022-12-02 04:32:31,994 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=34105.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:32:51,886 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.47 vs. limit=2.0 +2022-12-02 04:32:54,251 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=34128.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:32:58,483 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.99 vs. limit=5.0 +2022-12-02 04:33:07,763 INFO [train.py:876] Epoch 24, batch 1200, loss[loss=0.1516, simple_loss=0.2175, pruned_loss=0.04289, over 3952.00 frames. ], tot_loss[loss=0.1215, simple_loss=0.1842, pruned_loss=0.02938, over 949119.35 frames. ], batch size: 72, lr: 7.38e-03, +2022-12-02 04:33:10,835 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=34145.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:33:41,056 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=34176.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:33:56,378 INFO [train.py:876] Epoch 24, batch 1250, loss[loss=0.1083, simple_loss=0.1733, pruned_loss=0.0216, over 4883.00 frames. ], tot_loss[loss=0.1216, simple_loss=0.1844, pruned_loss=0.02946, over 949087.66 frames. ], batch size: 29, lr: 7.38e-03, +2022-12-02 04:34:05,673 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.100e+02 1.617e+02 1.909e+02 2.222e+02 4.360e+02, threshold=3.818e+02, percent-clipped=1.0 +2022-12-02 04:34:09,789 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5843, 1.7598, 1.9351, 3.4635, 2.5681, 3.0569, 3.2258, 3.9029], + device='cuda:0'), covar=tensor([0.0365, 0.2092, 0.2786, 0.0424, 0.0642, 0.0480, 0.0582, 0.0301], + device='cuda:0'), in_proj_covar=tensor([0.0072, 0.0105, 0.0125, 0.0071, 0.0080, 0.0070, 0.0084, 0.0085], + device='cuda:0'), out_proj_covar=tensor([7.6974e-05, 1.1539e-04, 1.3359e-04, 8.3069e-05, 8.3328e-05, 7.9774e-05, + 9.2942e-05, 8.3688e-05], device='cuda:0') +2022-12-02 04:34:10,854 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=34206.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:34:45,132 INFO [train.py:876] Epoch 24, batch 1300, loss[loss=0.1411, simple_loss=0.2055, pruned_loss=0.0384, over 4819.00 frames. ], tot_loss[loss=0.1224, simple_loss=0.1857, pruned_loss=0.02959, over 952793.38 frames. ], batch size: 45, lr: 7.37e-03, +2022-12-02 04:35:32,909 INFO [train.py:876] Epoch 24, batch 1350, loss[loss=0.1186, simple_loss=0.1853, pruned_loss=0.026, over 4883.00 frames. ], tot_loss[loss=0.1238, simple_loss=0.1869, pruned_loss=0.03032, over 953057.24 frames. ], batch size: 44, lr: 7.37e-03, +2022-12-02 04:35:35,887 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=34295.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:35:41,877 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.415e+01 1.534e+02 1.879e+02 2.307e+02 4.322e+02, threshold=3.757e+02, percent-clipped=2.0 +2022-12-02 04:36:09,487 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9922, 3.4577, 3.3815, 3.0245, 3.4550, 3.4804, 3.2827, 3.6610], + device='cuda:0'), covar=tensor([0.1986, 0.0657, 0.0811, 0.0778, 0.0775, 0.0561, 0.0736, 0.0575], + device='cuda:0'), in_proj_covar=tensor([0.0188, 0.0173, 0.0196, 0.0165, 0.0187, 0.0174, 0.0175, 0.0179], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 04:36:12,959 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=34333.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:36:21,970 INFO [train.py:876] Epoch 24, batch 1400, loss[loss=0.1647, simple_loss=0.2318, pruned_loss=0.04881, over 4116.00 frames. ], tot_loss[loss=0.1241, simple_loss=0.1873, pruned_loss=0.03044, over 946026.23 frames. ], batch size: 72, lr: 7.36e-03, +2022-12-02 04:36:36,979 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4727, 3.6546, 3.8110, 3.4269, 3.2853, 3.7460, 3.8285, 3.8096], + device='cuda:0'), covar=tensor([0.1064, 0.0372, 0.0385, 0.0455, 0.0412, 0.0356, 0.0291, 0.0449], + device='cuda:0'), in_proj_covar=tensor([0.0243, 0.0169, 0.0189, 0.0180, 0.0179, 0.0186, 0.0167, 0.0198], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 04:36:51,486 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-24.pt +2022-12-02 04:36:54,009 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 04:36:54,928 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁��ནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 04:36:55,224 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 04:36:55,260 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 04:36:56,421 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 04:36:56,739 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 04:36:58,325 INFO [train.py:876] Epoch 25, batch 0, loss[loss=0.1001, simple_loss=0.163, pruned_loss=0.01854, over 4914.00 frames. ], tot_loss[loss=0.1001, simple_loss=0.163, pruned_loss=0.01854, over 4914.00 frames. ], batch size: 30, lr: 7.21e-03, +2022-12-02 04:36:58,326 INFO [train.py:901] Computing validation loss +2022-12-02 04:37:08,287 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.9495, 4.7684, 5.3820, 4.8661, 4.9977, 4.9909, 4.8312, 4.6946], + device='cuda:0'), covar=tensor([0.0451, 0.0403, 0.0353, 0.0510, 0.0826, 0.0425, 0.0923, 0.0431], + device='cuda:0'), in_proj_covar=tensor([0.0182, 0.0130, 0.0177, 0.0152, 0.0134, 0.0164, 0.0194, 0.0133], + device='cuda:0'), out_proj_covar=tensor([1.3179e-04, 8.4371e-05, 1.3627e-04, 1.0513e-04, 9.9772e-05, 1.1926e-04, + 1.4670e-04, 9.0796e-05], device='cuda:0') +2022-12-02 04:37:14,020 INFO [train.py:910] Epoch 25, validation: loss=0.2341, simple_loss=0.276, pruned_loss=0.09608, over 253132.00 frames. +2022-12-02 04:37:14,020 INFO [train.py:911] Maximum memory allocated so far is 7567MB +2022-12-02 04:37:14,110 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.5980, 4.5110, 5.0362, 4.4495, 4.7360, 4.6349, 4.3898, 4.3667], + device='cuda:0'), covar=tensor([0.0707, 0.0616, 0.0528, 0.0562, 0.0795, 0.0532, 0.1272, 0.0486], + device='cuda:0'), in_proj_covar=tensor([0.0182, 0.0130, 0.0177, 0.0152, 0.0134, 0.0164, 0.0194, 0.0133], + device='cuda:0'), out_proj_covar=tensor([1.3179e-04, 8.4371e-05, 1.3627e-04, 1.0513e-04, 9.9772e-05, 1.1926e-04, + 1.4670e-04, 9.0796e-05], device='cuda:0') +2022-12-02 04:37:20,777 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=34381.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:37:35,155 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.69 vs. limit=2.0 +2022-12-02 04:37:39,976 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=34400.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:37:40,733 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.002e+02 1.513e+02 1.833e+02 2.416e+02 6.362e+02, threshold=3.666e+02, percent-clipped=3.0 +2022-12-02 04:38:03,242 INFO [train.py:876] Epoch 25, batch 50, loss[loss=0.1132, simple_loss=0.1683, pruned_loss=0.02904, over 4935.00 frames. ], tot_loss[loss=0.1176, simple_loss=0.1805, pruned_loss=0.0273, over 216170.34 frames. ], batch size: 32, lr: 7.20e-03, +2022-12-02 04:38:28,268 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 04:38:53,039 INFO [train.py:876] Epoch 25, batch 100, loss[loss=0.09923, simple_loss=0.1656, pruned_loss=0.01643, over 4834.00 frames. ], tot_loss[loss=0.1174, simple_loss=0.1795, pruned_loss=0.0277, over 380741.32 frames. ], batch size: 34, lr: 7.20e-03, +2022-12-02 04:39:10,505 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 04:39:19,335 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.997e+01 1.362e+02 1.705e+02 2.085e+02 5.460e+02, threshold=3.410e+02, percent-clipped=2.0 +2022-12-02 04:39:19,427 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=34501.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:39:35,222 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 04:39:42,078 INFO [train.py:876] Epoch 25, batch 150, loss[loss=0.1265, simple_loss=0.1955, pruned_loss=0.02879, over 4880.00 frames. ], tot_loss[loss=0.1169, simple_loss=0.1798, pruned_loss=0.02699, over 509886.33 frames. ], batch size: 37, lr: 7.19e-03, +2022-12-02 04:40:26,392 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.53 vs. limit=5.0 +2022-12-02 04:40:30,635 INFO [train.py:876] Epoch 25, batch 200, loss[loss=0.08477, simple_loss=0.1383, pruned_loss=0.01564, over 4815.00 frames. ], tot_loss[loss=0.1176, simple_loss=0.1803, pruned_loss=0.02746, over 607907.24 frames. ], batch size: 25, lr: 7.19e-03, +2022-12-02 04:40:49,039 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=34593.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:40:51,114 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=34595.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:40:57,058 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.311e+01 1.453e+02 1.762e+02 2.248e+02 4.685e+02, threshold=3.524e+02, percent-clipped=3.0 +2022-12-02 04:41:03,310 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9599, 3.9405, 4.0996, 3.9551, 4.3277, 3.8743, 4.2341, 3.7046], + device='cuda:0'), covar=tensor([0.0350, 0.0374, 0.0313, 0.0281, 0.0338, 0.0397, 0.0205, 0.0975], + device='cuda:0'), in_proj_covar=tensor([0.0123, 0.0134, 0.0118, 0.0121, 0.0112, 0.0151, 0.0101, 0.0119], + device='cuda:0'), out_proj_covar=tensor([1.0460e-04, 1.1389e-04, 1.0092e-04, 1.0272e-04, 9.5458e-05, 1.2924e-04, + 8.9080e-05, 1.0320e-04], device='cuda:0') +2022-12-02 04:41:17,385 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=34622.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:41:19,139 INFO [train.py:876] Epoch 25, batch 250, loss[loss=0.1001, simple_loss=0.1694, pruned_loss=0.01542, over 4902.00 frames. ], tot_loss[loss=0.1181, simple_loss=0.181, pruned_loss=0.02758, over 682776.31 frames. ], batch size: 30, lr: 7.18e-03, +2022-12-02 04:41:21,327 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=34626.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:41:21,765 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.91 vs. limit=2.0 +2022-12-02 04:41:24,792 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 04:41:37,520 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=34643.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:41:48,443 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=34654.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:42:07,830 INFO [train.py:876] Epoch 25, batch 300, loss[loss=0.1248, simple_loss=0.1866, pruned_loss=0.03143, over 4860.00 frames. ], tot_loss[loss=0.1193, simple_loss=0.1818, pruned_loss=0.02836, over 744991.88 frames. ], batch size: 36, lr: 7.18e-03, +2022-12-02 04:42:17,095 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=34683.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 04:42:20,933 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=34687.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:42:23,554 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 04:42:33,441 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=34700.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:42:34,330 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.646e+01 1.538e+02 1.777e+02 2.163e+02 5.282e+02, threshold=3.554e+02, percent-clipped=3.0 +2022-12-02 04:42:42,475 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.04 vs. limit=2.0 +2022-12-02 04:42:43,103 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=34710.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 04:42:57,147 INFO [train.py:876] Epoch 25, batch 350, loss[loss=0.1215, simple_loss=0.1828, pruned_loss=0.03015, over 4864.00 frames. ], tot_loss[loss=0.1197, simple_loss=0.182, pruned_loss=0.02866, over 790359.80 frames. ], batch size: 39, lr: 7.17e-03, +2022-12-02 04:43:20,566 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=34748.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:43:43,184 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=34771.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 04:43:45,798 INFO [train.py:876] Epoch 25, batch 400, loss[loss=0.08872, simple_loss=0.1321, pruned_loss=0.02266, over 4707.00 frames. ], tot_loss[loss=0.1189, simple_loss=0.1811, pruned_loss=0.0284, over 827116.31 frames. ], batch size: 23, lr: 7.17e-03, +2022-12-02 04:44:03,296 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 04:44:12,056 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.195e+01 1.558e+02 1.847e+02 2.236e+02 4.843e+02, threshold=3.694e+02, percent-clipped=4.0 +2022-12-02 04:44:12,218 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=34801.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:44:30,589 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 04:44:34,429 INFO [train.py:876] Epoch 25, batch 450, loss[loss=0.1191, simple_loss=0.1876, pruned_loss=0.02533, over 4794.00 frames. ], tot_loss[loss=0.1189, simple_loss=0.1808, pruned_loss=0.02846, over 855758.57 frames. ], batch size: 32, lr: 7.16e-03, +2022-12-02 04:44:56,560 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.63 vs. limit=2.0 +2022-12-02 04:44:58,885 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=34849.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:45:01,216 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.93 vs. limit=2.0 +2022-12-02 04:45:01,812 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5753, 3.0527, 3.3514, 3.1801, 3.5415, 3.5823, 3.1538, 4.1172], + device='cuda:0'), covar=tensor([0.0166, 0.0998, 0.0521, 0.1203, 0.0195, 0.0448, 0.1379, 0.0274], + device='cuda:0'), in_proj_covar=tensor([0.0075, 0.0110, 0.0093, 0.0124, 0.0084, 0.0092, 0.0126, 0.0106], + device='cuda:0'), out_proj_covar=tensor([9.1771e-05, 1.3030e-04, 1.1265e-04, 1.4546e-04, 9.8329e-05, 1.1392e-04, + 1.4598e-04, 1.1976e-04], device='cuda:0') +2022-12-02 04:45:06,598 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5513, 2.3292, 2.3734, 2.5782, 1.9337, 2.5189, 2.2602, 2.7796], + device='cuda:0'), covar=tensor([0.0444, 0.0554, 0.0382, 0.0316, 0.0800, 0.0530, 0.0600, 0.0277], + device='cuda:0'), in_proj_covar=tensor([0.0071, 0.0075, 0.0063, 0.0063, 0.0080, 0.0073, 0.0071, 0.0060], + device='cuda:0'), out_proj_covar=tensor([5.2151e-05, 5.5198e-05, 4.5394e-05, 4.5453e-05, 5.8304e-05, 5.3675e-05, + 5.1765e-05, 4.3577e-05], device='cuda:0') +2022-12-02 04:45:15,949 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.69 vs. limit=2.0 +2022-12-02 04:45:22,921 INFO [train.py:876] Epoch 25, batch 500, loss[loss=0.1253, simple_loss=0.1891, pruned_loss=0.03075, over 4870.00 frames. ], tot_loss[loss=0.1186, simple_loss=0.1807, pruned_loss=0.02826, over 878057.94 frames. ], batch size: 39, lr: 7.16e-03, +2022-12-02 04:45:49,139 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.559e+01 1.475e+02 1.749e+02 2.304e+02 3.817e+02, threshold=3.498e+02, percent-clipped=1.0 +2022-12-02 04:45:53,110 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.1119, 4.7660, 4.6187, 4.9533, 4.2134, 4.1612, 4.8379, 4.4831], + device='cuda:0'), covar=tensor([0.0262, 0.0102, 0.0150, 0.0207, 0.0189, 0.0178, 0.0122, 0.0165], + device='cuda:0'), in_proj_covar=tensor([0.0059, 0.0053, 0.0058, 0.0046, 0.0057, 0.0058, 0.0054, 0.0054], + device='cuda:0'), out_proj_covar=tensor([5.2697e-05, 4.2414e-05, 5.0795e-05, 3.7900e-05, 4.8925e-05, 5.1276e-05, + 4.2840e-05, 4.4834e-05], device='cuda:0') +2022-12-02 04:46:04,727 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0353, 3.2610, 3.3807, 2.9739, 2.9359, 2.8778, 3.2843, 2.9172], + device='cuda:0'), covar=tensor([0.1324, 0.0468, 0.0505, 0.0571, 0.0607, 0.0889, 0.0438, 0.1377], + device='cuda:0'), in_proj_covar=tensor([0.0248, 0.0174, 0.0196, 0.0186, 0.0185, 0.0190, 0.0172, 0.0202], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 04:46:11,138 INFO [train.py:876] Epoch 25, batch 550, loss[loss=0.1244, simple_loss=0.1854, pruned_loss=0.03166, over 4883.00 frames. ], tot_loss[loss=0.1188, simple_loss=0.181, pruned_loss=0.02831, over 896619.94 frames. ], batch size: 38, lr: 7.15e-03, +2022-12-02 04:46:35,540 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=34949.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:46:59,761 INFO [train.py:876] Epoch 25, batch 600, loss[loss=0.1206, simple_loss=0.1885, pruned_loss=0.02638, over 4851.00 frames. ], tot_loss[loss=0.1189, simple_loss=0.181, pruned_loss=0.02837, over 907194.32 frames. ], batch size: 40, lr: 7.15e-03, +2022-12-02 04:47:03,611 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=34978.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 04:47:07,418 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=34982.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:47:27,115 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.684e+01 1.437e+02 1.798e+02 2.201e+02 5.416e+02, threshold=3.595e+02, percent-clipped=7.0 +2022-12-02 04:47:31,699 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.81 vs. limit=5.0 +2022-12-02 04:47:49,699 INFO [train.py:876] Epoch 25, batch 650, loss[loss=0.1037, simple_loss=0.1629, pruned_loss=0.02222, over 4722.00 frames. ], tot_loss[loss=0.1181, simple_loss=0.1802, pruned_loss=0.02803, over 916342.35 frames. ], batch size: 27, lr: 7.14e-03, +2022-12-02 04:47:57,247 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.94 vs. limit=2.0 +2022-12-02 04:48:30,512 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=35066.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 04:48:38,296 INFO [train.py:876] Epoch 25, batch 700, loss[loss=0.08762, simple_loss=0.1408, pruned_loss=0.01723, over 4823.00 frames. ], tot_loss[loss=0.1185, simple_loss=0.1808, pruned_loss=0.02815, over 925362.53 frames. ], batch size: 25, lr: 7.14e-03, +2022-12-02 04:48:41,529 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7368, 2.6420, 2.7167, 2.7067, 2.1382, 2.7321, 2.3201, 3.0088], + device='cuda:0'), covar=tensor([0.0350, 0.0356, 0.0313, 0.0319, 0.0640, 0.0470, 0.0619, 0.0219], + device='cuda:0'), in_proj_covar=tensor([0.0071, 0.0074, 0.0063, 0.0063, 0.0080, 0.0074, 0.0071, 0.0060], + device='cuda:0'), out_proj_covar=tensor([5.2176e-05, 5.4881e-05, 4.5748e-05, 4.5581e-05, 5.8614e-05, 5.4037e-05, + 5.2049e-05, 4.3737e-05], device='cuda:0') +2022-12-02 04:48:44,564 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.2363, 1.1900, 1.5473, 1.1060, 1.0225, 1.2640, 1.4354, 1.2148], + device='cuda:0'), covar=tensor([0.0233, 0.0219, 0.0285, 0.0280, 0.0233, 0.0195, 0.0154, 0.0217], + device='cuda:0'), in_proj_covar=tensor([0.0039, 0.0035, 0.0035, 0.0038, 0.0040, 0.0037, 0.0039, 0.0035], + device='cuda:0'), out_proj_covar=tensor([3.1166e-05, 2.7453e-05, 2.8279e-05, 3.0258e-05, 3.2502e-05, 2.8683e-05, + 3.1444e-05, 2.6986e-05], device='cuda:0') +2022-12-02 04:48:45,402 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.5705, 1.3462, 0.8140, 1.5610, 1.5396, 1.4576, 1.4081, 1.5346], + device='cuda:0'), covar=tensor([0.0289, 0.0507, 0.0397, 0.0282, 0.0291, 0.0394, 0.0415, 0.0505], + device='cuda:0'), in_proj_covar=tensor([0.0049, 0.0046, 0.0051, 0.0041, 0.0044, 0.0046, 0.0046, 0.0043], + device='cuda:0'), out_proj_covar=tensor([4.6910e-05, 4.3844e-05, 4.9190e-05, 3.8692e-05, 4.1483e-05, 4.3676e-05, + 4.3351e-05, 4.2080e-05], device='cuda:0') +2022-12-02 04:48:56,108 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=35092.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:49:04,824 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.005e+02 1.387e+02 1.635e+02 1.973e+02 9.530e+02, threshold=3.269e+02, percent-clipped=1.0 +2022-12-02 04:49:13,015 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1872, 3.2997, 3.3609, 2.7804, 2.6642, 3.4111, 2.9141, 3.5789], + device='cuda:0'), covar=tensor([0.0279, 0.0350, 0.0272, 0.0322, 0.0577, 0.0450, 0.0474, 0.0182], + device='cuda:0'), in_proj_covar=tensor([0.0071, 0.0074, 0.0063, 0.0063, 0.0080, 0.0074, 0.0071, 0.0060], + device='cuda:0'), out_proj_covar=tensor([5.2055e-05, 5.4743e-05, 4.5704e-05, 4.5580e-05, 5.8487e-05, 5.4033e-05, + 5.2068e-05, 4.3750e-05], device='cuda:0') +2022-12-02 04:49:14,067 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7477, 2.7749, 2.7379, 2.6551, 2.1765, 2.7681, 2.5116, 3.0724], + device='cuda:0'), covar=tensor([0.0386, 0.0396, 0.0323, 0.0318, 0.0648, 0.0498, 0.0571, 0.0234], + device='cuda:0'), in_proj_covar=tensor([0.0071, 0.0074, 0.0063, 0.0063, 0.0080, 0.0074, 0.0071, 0.0060], + device='cuda:0'), out_proj_covar=tensor([5.2048e-05, 5.4737e-05, 4.5699e-05, 4.5580e-05, 5.8484e-05, 5.4028e-05, + 5.2072e-05, 4.3745e-05], device='cuda:0') +2022-12-02 04:49:27,290 INFO [train.py:876] Epoch 25, batch 750, loss[loss=0.1109, simple_loss=0.1787, pruned_loss=0.0216, over 4824.00 frames. ], tot_loss[loss=0.1184, simple_loss=0.1807, pruned_loss=0.02805, over 931194.44 frames. ], batch size: 45, lr: 7.13e-03, +2022-12-02 04:49:55,548 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=35153.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:50:13,267 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4097, 2.9815, 3.2290, 2.9909, 2.1861, 1.9605, 3.3770, 1.6670], + device='cuda:0'), covar=tensor([0.0493, 0.0400, 0.0435, 0.0735, 0.1874, 0.3721, 0.0305, 0.2967], + device='cuda:0'), in_proj_covar=tensor([0.0117, 0.0100, 0.0095, 0.0131, 0.0140, 0.0165, 0.0085, 0.0177], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 04:50:15,034 INFO [train.py:876] Epoch 25, batch 800, loss[loss=0.1187, simple_loss=0.1826, pruned_loss=0.02737, over 4847.00 frames. ], tot_loss[loss=0.1199, simple_loss=0.1823, pruned_loss=0.0287, over 936284.89 frames. ], batch size: 41, lr: 7.13e-03, +2022-12-02 04:50:41,422 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.807e+01 1.510e+02 1.825e+02 2.332e+02 5.623e+02, threshold=3.650e+02, percent-clipped=3.0 +2022-12-02 04:50:43,601 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.5717, 1.5883, 0.7813, 1.6680, 1.6312, 1.5498, 1.6260, 1.5891], + device='cuda:0'), covar=tensor([0.0311, 0.0328, 0.0429, 0.0369, 0.0264, 0.0357, 0.0268, 0.0546], + device='cuda:0'), in_proj_covar=tensor([0.0049, 0.0046, 0.0051, 0.0041, 0.0044, 0.0046, 0.0046, 0.0043], + device='cuda:0'), out_proj_covar=tensor([4.6630e-05, 4.3716e-05, 4.9098e-05, 3.8686e-05, 4.1545e-05, 4.3657e-05, + 4.3158e-05, 4.2037e-05], device='cuda:0') +2022-12-02 04:51:03,638 INFO [train.py:876] Epoch 25, batch 850, loss[loss=0.1194, simple_loss=0.1811, pruned_loss=0.02881, over 4859.00 frames. ], tot_loss[loss=0.1199, simple_loss=0.1824, pruned_loss=0.02871, over 940863.30 frames. ], batch size: 36, lr: 7.12e-03, +2022-12-02 04:51:27,530 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=35249.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:51:42,968 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6708, 4.1112, 4.2809, 4.0280, 3.6925, 3.9861, 4.1249, 4.3435], + device='cuda:0'), covar=tensor([0.1188, 0.0268, 0.0299, 0.0324, 0.0408, 0.0381, 0.0276, 0.0330], + device='cuda:0'), in_proj_covar=tensor([0.0253, 0.0175, 0.0199, 0.0187, 0.0187, 0.0194, 0.0172, 0.0202], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 04:51:51,095 INFO [train.py:876] Epoch 25, batch 900, loss[loss=0.1409, simple_loss=0.1963, pruned_loss=0.04277, over 4895.00 frames. ], tot_loss[loss=0.1207, simple_loss=0.1826, pruned_loss=0.02935, over 940565.67 frames. ], batch size: 38, lr: 7.12e-03, +2022-12-02 04:51:54,945 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=35278.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 04:51:58,855 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=35282.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:52:13,773 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=35297.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:52:17,505 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.656e+01 1.534e+02 1.809e+02 2.318e+02 5.638e+02, threshold=3.618e+02, percent-clipped=6.0 +2022-12-02 04:52:22,174 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.04 vs. limit=2.0 +2022-12-02 04:52:39,948 INFO [train.py:876] Epoch 25, batch 950, loss[loss=0.1052, simple_loss=0.1681, pruned_loss=0.02111, over 4883.00 frames. ], tot_loss[loss=0.1206, simple_loss=0.1829, pruned_loss=0.02916, over 943564.22 frames. ], batch size: 38, lr: 7.11e-03, +2022-12-02 04:52:41,924 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=35326.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:52:45,813 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=35330.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:53:00,491 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=35345.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 04:53:20,808 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=35366.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 04:53:28,300 INFO [train.py:876] Epoch 25, batch 1000, loss[loss=0.1308, simple_loss=0.1882, pruned_loss=0.03674, over 4885.00 frames. ], tot_loss[loss=0.1203, simple_loss=0.1828, pruned_loss=0.0289, over 948965.80 frames. ], batch size: 37, lr: 7.11e-03, +2022-12-02 04:53:28,523 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=35374.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:53:53,745 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.140e+01 1.514e+02 1.750e+02 2.112e+02 3.108e+02, threshold=3.500e+02, percent-clipped=0.0 +2022-12-02 04:53:58,847 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=35406.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 04:54:06,302 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=35414.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 04:54:16,025 INFO [train.py:876] Epoch 25, batch 1050, loss[loss=0.1255, simple_loss=0.1844, pruned_loss=0.03326, over 4931.00 frames. ], tot_loss[loss=0.1213, simple_loss=0.1839, pruned_loss=0.02941, over 950649.19 frames. ], batch size: 32, lr: 7.10e-03, +2022-12-02 04:54:26,709 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=35435.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:54:38,765 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=35448.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:55:03,408 INFO [train.py:876] Epoch 25, batch 1100, loss[loss=0.1283, simple_loss=0.195, pruned_loss=0.03081, over 4802.00 frames. ], tot_loss[loss=0.1216, simple_loss=0.1839, pruned_loss=0.02967, over 950853.49 frames. ], batch size: 54, lr: 7.10e-03, +2022-12-02 04:55:29,385 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.226e+01 1.480e+02 1.847e+02 2.231e+02 5.084e+02, threshold=3.694e+02, percent-clipped=4.0 +2022-12-02 04:55:51,532 INFO [train.py:876] Epoch 25, batch 1150, loss[loss=0.1319, simple_loss=0.2018, pruned_loss=0.03101, over 4823.00 frames. ], tot_loss[loss=0.1213, simple_loss=0.1834, pruned_loss=0.02954, over 950989.07 frames. ], batch size: 42, lr: 7.09e-03, +2022-12-02 04:55:57,613 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=35530.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:56:39,811 INFO [train.py:876] Epoch 25, batch 1200, loss[loss=0.1257, simple_loss=0.1837, pruned_loss=0.03383, over 4885.00 frames. ], tot_loss[loss=0.1211, simple_loss=0.1835, pruned_loss=0.02939, over 952020.05 frames. ], batch size: 37, lr: 7.09e-03, +2022-12-02 04:56:56,693 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=35591.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:57:06,067 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.397e+01 1.532e+02 1.845e+02 2.260e+02 4.385e+02, threshold=3.689e+02, percent-clipped=3.0 +2022-12-02 04:57:14,600 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.6495, 1.2695, 1.8670, 1.0537, 1.3107, 1.4681, 1.5838, 1.5388], + device='cuda:0'), covar=tensor([0.0239, 0.0256, 0.0232, 0.0318, 0.0284, 0.0236, 0.0162, 0.0138], + device='cuda:0'), in_proj_covar=tensor([0.0038, 0.0036, 0.0035, 0.0038, 0.0040, 0.0037, 0.0039, 0.0035], + device='cuda:0'), out_proj_covar=tensor([3.0548e-05, 2.7728e-05, 2.8219e-05, 2.9761e-05, 3.2524e-05, 2.8838e-05, + 3.1457e-05, 2.6613e-05], device='cuda:0') +2022-12-02 04:57:28,939 INFO [train.py:876] Epoch 25, batch 1250, loss[loss=0.1045, simple_loss=0.1716, pruned_loss=0.01869, over 4741.00 frames. ], tot_loss[loss=0.1198, simple_loss=0.182, pruned_loss=0.02885, over 948056.31 frames. ], batch size: 27, lr: 7.08e-03, +2022-12-02 04:57:40,235 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8751, 3.7425, 3.9028, 3.5551, 4.0547, 3.3863, 3.8112, 4.0139], + device='cuda:0'), covar=tensor([0.0320, 0.0422, 0.0360, 0.0379, 0.0304, 0.0612, 0.0352, 0.0408], + device='cuda:0'), in_proj_covar=tensor([0.0122, 0.0131, 0.0118, 0.0121, 0.0112, 0.0151, 0.0100, 0.0117], + device='cuda:0'), out_proj_covar=tensor([1.0420e-04, 1.1237e-04, 1.0089e-04, 1.0286e-04, 9.5173e-05, 1.2926e-04, + 8.7750e-05, 1.0128e-04], device='cuda:0') +2022-12-02 04:58:06,139 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.95 vs. limit=2.0 +2022-12-02 04:58:10,289 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.93 vs. limit=2.0 +2022-12-02 04:58:18,192 INFO [train.py:876] Epoch 25, batch 1300, loss[loss=0.1248, simple_loss=0.1995, pruned_loss=0.02509, over 4851.00 frames. ], tot_loss[loss=0.1196, simple_loss=0.1817, pruned_loss=0.02879, over 950113.36 frames. ], batch size: 39, lr: 7.08e-03, +2022-12-02 04:58:44,584 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.551e+01 1.435e+02 1.704e+02 1.959e+02 3.347e+02, threshold=3.408e+02, percent-clipped=1.0 +2022-12-02 04:58:44,667 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=35701.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 04:58:55,261 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.90 vs. limit=5.0 +2022-12-02 04:58:56,603 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9479, 3.1716, 3.5492, 3.4740, 2.4912, 3.5732, 3.4883, 2.8539], + device='cuda:0'), covar=tensor([0.4912, 0.0881, 0.0881, 0.0521, 0.1167, 0.0765, 0.0616, 0.1577], + device='cuda:0'), in_proj_covar=tensor([0.0180, 0.0119, 0.0150, 0.0128, 0.0130, 0.0121, 0.0125, 0.0124], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 04:59:07,003 INFO [train.py:876] Epoch 25, batch 1350, loss[loss=0.1411, simple_loss=0.2103, pruned_loss=0.03597, over 4879.00 frames. ], tot_loss[loss=0.1197, simple_loss=0.1825, pruned_loss=0.02852, over 949040.69 frames. ], batch size: 44, lr: 7.07e-03, +2022-12-02 04:59:07,697 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.68 vs. limit=2.0 +2022-12-02 04:59:12,843 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=35730.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:59:27,191 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3381, 2.0641, 2.2804, 2.3674, 1.9074, 2.2446, 2.0544, 2.4361], + device='cuda:0'), covar=tensor([0.0466, 0.0579, 0.0450, 0.0440, 0.0817, 0.0723, 0.0554, 0.0382], + device='cuda:0'), in_proj_covar=tensor([0.0070, 0.0073, 0.0063, 0.0063, 0.0080, 0.0074, 0.0070, 0.0060], + device='cuda:0'), out_proj_covar=tensor([5.0936e-05, 5.3587e-05, 4.5651e-05, 4.5541e-05, 5.8203e-05, 5.4348e-05, + 5.1539e-05, 4.3970e-05], device='cuda:0') +2022-12-02 04:59:29,950 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=35748.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:59:31,262 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.3243, 1.0358, 1.3966, 0.9946, 1.0364, 1.2487, 1.3200, 1.1243], + device='cuda:0'), covar=tensor([0.0200, 0.0265, 0.0235, 0.0306, 0.0251, 0.0162, 0.0171, 0.0155], + device='cuda:0'), in_proj_covar=tensor([0.0038, 0.0036, 0.0036, 0.0038, 0.0040, 0.0037, 0.0039, 0.0035], + device='cuda:0'), out_proj_covar=tensor([3.0693e-05, 2.7969e-05, 2.8651e-05, 2.9872e-05, 3.2639e-05, 2.8993e-05, + 3.1598e-05, 2.6934e-05], device='cuda:0') +2022-12-02 04:59:41,200 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1028, 2.1252, 2.5425, 1.8988, 1.9989, 2.5861, 2.3795, 2.2335], + device='cuda:0'), covar=tensor([0.0981, 0.0944, 0.1021, 0.1053, 0.0921, 0.0845, 0.0609, 0.0645], + device='cuda:0'), in_proj_covar=tensor([0.0068, 0.0075, 0.0070, 0.0077, 0.0067, 0.0063, 0.0063, 0.0069], + device='cuda:0'), out_proj_covar=tensor([6.2935e-05, 6.7290e-05, 6.4673e-05, 6.9155e-05, 6.1779e-05, 5.8482e-05, + 5.8556e-05, 6.3495e-05], device='cuda:0') +2022-12-02 04:59:52,634 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=35771.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 04:59:55,240 INFO [train.py:876] Epoch 25, batch 1400, loss[loss=0.1007, simple_loss=0.1578, pruned_loss=0.0218, over 4799.00 frames. ], tot_loss[loss=0.1209, simple_loss=0.1835, pruned_loss=0.02912, over 944893.58 frames. ], batch size: 32, lr: 7.07e-03, +2022-12-02 05:00:14,670 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4849, 1.2819, 1.4935, 0.9143, 1.1015, 1.3430, 1.3423, 1.1247], + device='cuda:0'), covar=tensor([0.0220, 0.0196, 0.0273, 0.0336, 0.0284, 0.0185, 0.0245, 0.0197], + device='cuda:0'), in_proj_covar=tensor([0.0038, 0.0036, 0.0035, 0.0037, 0.0040, 0.0037, 0.0039, 0.0035], + device='cuda:0'), out_proj_covar=tensor([3.0666e-05, 2.7976e-05, 2.8452e-05, 2.9540e-05, 3.2416e-05, 2.8785e-05, + 3.1606e-05, 2.6912e-05], device='cuda:0') +2022-12-02 05:00:16,356 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=35796.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:00:21,450 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.000e+02 1.547e+02 1.862e+02 2.443e+02 4.924e+02, threshold=3.724e+02, percent-clipped=9.0 +2022-12-02 05:00:25,528 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-25.pt +2022-12-02 05:00:34,711 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 05:00:35,627 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 05:00:35,921 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 05:00:35,952 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 05:00:37,087 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 05:00:37,409 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 05:00:39,021 INFO [train.py:876] Epoch 26, batch 0, loss[loss=0.1256, simple_loss=0.1902, pruned_loss=0.03049, over 4870.00 frames. ], tot_loss[loss=0.1256, simple_loss=0.1902, pruned_loss=0.03049, over 4870.00 frames. ], batch size: 39, lr: 6.92e-03, +2022-12-02 05:00:39,022 INFO [train.py:901] Computing validation loss +2022-12-02 05:00:41,671 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3195, 2.7436, 3.1296, 3.1456, 2.8533, 2.9013, 1.9719, 3.0366], + device='cuda:0'), covar=tensor([0.0499, 0.1558, 0.0750, 0.0613, 0.0900, 0.1254, 0.1052, 0.0599], + device='cuda:0'), in_proj_covar=tensor([0.0084, 0.0089, 0.0105, 0.0089, 0.0109, 0.0093, 0.0094, 0.0094], + device='cuda:0'), out_proj_covar=tensor([8.8947e-05, 9.3094e-05, 1.0684e-04, 9.1537e-05, 1.1051e-04, 9.7631e-05, + 9.7248e-05, 9.6400e-05], device='cuda:0') +2022-12-02 05:00:46,350 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9103, 1.6476, 0.9379, 1.9327, 1.9144, 1.8670, 1.7332, 1.7797], + device='cuda:0'), covar=tensor([0.0332, 0.0438, 0.0439, 0.0340, 0.0266, 0.0355, 0.0397, 0.0729], + device='cuda:0'), in_proj_covar=tensor([0.0050, 0.0047, 0.0052, 0.0041, 0.0044, 0.0047, 0.0046, 0.0043], + device='cuda:0'), out_proj_covar=tensor([4.6973e-05, 4.4125e-05, 4.9661e-05, 3.8904e-05, 4.1446e-05, 4.4369e-05, + 4.3470e-05, 4.2676e-05], device='cuda:0') +2022-12-02 05:00:53,491 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5608, 3.7941, 3.9724, 3.6487, 3.4285, 3.6981, 3.8952, 3.8226], + device='cuda:0'), covar=tensor([0.1213, 0.0312, 0.0304, 0.0406, 0.0437, 0.0554, 0.0325, 0.0493], + device='cuda:0'), in_proj_covar=tensor([0.0247, 0.0172, 0.0193, 0.0183, 0.0185, 0.0189, 0.0170, 0.0198], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 05:00:54,691 INFO [train.py:910] Epoch 26, validation: loss=0.2338, simple_loss=0.2756, pruned_loss=0.09597, over 253132.00 frames. +2022-12-02 05:00:54,691 INFO [train.py:911] Maximum memory allocated so far is 7567MB +2022-12-02 05:00:59,960 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=35811.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:01:20,716 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=35832.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:01:43,475 INFO [train.py:876] Epoch 26, batch 50, loss[loss=0.1451, simple_loss=0.199, pruned_loss=0.04558, over 4892.00 frames. ], tot_loss[loss=0.1125, simple_loss=0.1743, pruned_loss=0.02533, over 214430.71 frames. ], batch size: 38, lr: 6.92e-03, +2022-12-02 05:01:44,923 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.95 vs. limit=2.0 +2022-12-02 05:01:59,711 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=35872.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:02:07,999 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 05:02:12,657 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=35886.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:02:20,772 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7804, 2.8320, 2.9563, 2.9065, 2.3206, 2.9921, 2.4713, 2.9835], + device='cuda:0'), covar=tensor([0.0325, 0.0298, 0.0294, 0.0308, 0.0493, 0.0425, 0.0511, 0.0239], + device='cuda:0'), in_proj_covar=tensor([0.0068, 0.0072, 0.0062, 0.0061, 0.0079, 0.0073, 0.0069, 0.0060], + device='cuda:0'), out_proj_covar=tensor([4.9991e-05, 5.2869e-05, 4.4679e-05, 4.4554e-05, 5.7678e-05, 5.3018e-05, + 5.0319e-05, 4.3353e-05], device='cuda:0') +2022-12-02 05:02:27,107 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.906e+01 1.459e+02 1.682e+02 2.056e+02 4.030e+02, threshold=3.363e+02, percent-clipped=1.0 +2022-12-02 05:02:31,986 INFO [train.py:876] Epoch 26, batch 100, loss[loss=0.1513, simple_loss=0.205, pruned_loss=0.04878, over 4012.00 frames. ], tot_loss[loss=0.1148, simple_loss=0.1772, pruned_loss=0.02616, over 380184.58 frames. ], batch size: 72, lr: 6.91e-03, +2022-12-02 05:02:50,090 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 05:03:08,508 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.36 vs. limit=2.0 +2022-12-02 05:03:15,449 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 05:03:20,277 INFO [train.py:876] Epoch 26, batch 150, loss[loss=0.09855, simple_loss=0.1508, pruned_loss=0.02317, over 4696.00 frames. ], tot_loss[loss=0.1156, simple_loss=0.1782, pruned_loss=0.02653, over 507203.58 frames. ], batch size: 23, lr: 6.91e-03, +2022-12-02 05:03:20,413 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.5130, 5.0531, 4.9340, 5.3496, 4.6090, 4.3360, 5.0905, 4.7893], + device='cuda:0'), covar=tensor([0.0169, 0.0075, 0.0097, 0.0083, 0.0126, 0.0129, 0.0079, 0.0111], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0053, 0.0058, 0.0045, 0.0056, 0.0057, 0.0053, 0.0054], + device='cuda:0'), out_proj_covar=tensor([5.1481e-05, 4.2761e-05, 5.0709e-05, 3.7096e-05, 4.8253e-05, 5.0600e-05, + 4.2236e-05, 4.4142e-05], device='cuda:0') +2022-12-02 05:03:32,917 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.30 vs. limit=5.0 +2022-12-02 05:03:42,099 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3161, 1.9143, 2.3781, 1.8979, 2.0153, 2.4142, 2.0497, 2.0973], + device='cuda:0'), covar=tensor([0.0705, 0.0664, 0.1033, 0.0774, 0.0906, 0.0829, 0.0799, 0.0888], + device='cuda:0'), in_proj_covar=tensor([0.0068, 0.0074, 0.0069, 0.0077, 0.0067, 0.0063, 0.0062, 0.0070], + device='cuda:0'), out_proj_covar=tensor([6.2500e-05, 6.6475e-05, 6.4260e-05, 6.8610e-05, 6.2182e-05, 5.8596e-05, + 5.8310e-05, 6.3785e-05], device='cuda:0') +2022-12-02 05:04:01,494 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.1183, 2.8642, 3.0022, 2.7535, 3.0841, 2.8543, 2.7319, 3.6875], + device='cuda:0'), covar=tensor([0.0139, 0.1024, 0.0684, 0.1246, 0.0276, 0.0416, 0.1648, 0.0398], + device='cuda:0'), in_proj_covar=tensor([0.0078, 0.0116, 0.0095, 0.0126, 0.0087, 0.0095, 0.0133, 0.0110], + device='cuda:0'), out_proj_covar=tensor([9.5781e-05, 1.3722e-04, 1.1546e-04, 1.4825e-04, 1.0107e-04, 1.1810e-04, + 1.5292e-04, 1.2483e-04], device='cuda:0') +2022-12-02 05:04:03,707 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-36000.pt +2022-12-02 05:04:06,931 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.357e+01 1.495e+02 1.769e+02 2.264e+02 4.681e+02, threshold=3.538e+02, percent-clipped=4.0 +2022-12-02 05:04:07,042 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=36001.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 05:04:11,673 INFO [train.py:876] Epoch 26, batch 200, loss[loss=0.1068, simple_loss=0.1784, pruned_loss=0.01758, over 4886.00 frames. ], tot_loss[loss=0.1166, simple_loss=0.1791, pruned_loss=0.02699, over 606350.32 frames. ], batch size: 44, lr: 6.90e-03, +2022-12-02 05:04:34,412 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=36030.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:04:52,453 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=36049.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 05:04:59,156 INFO [train.py:876] Epoch 26, batch 250, loss[loss=0.1227, simple_loss=0.1887, pruned_loss=0.0284, over 4824.00 frames. ], tot_loss[loss=0.1173, simple_loss=0.1801, pruned_loss=0.02728, over 683327.65 frames. ], batch size: 42, lr: 6.90e-03, +2022-12-02 05:05:02,056 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 05:05:05,329 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2856, 3.3928, 3.5155, 3.1298, 2.9635, 3.2084, 3.4886, 3.3568], + device='cuda:0'), covar=tensor([0.1038, 0.0413, 0.0548, 0.0549, 0.0562, 0.0598, 0.0390, 0.0684], + device='cuda:0'), in_proj_covar=tensor([0.0246, 0.0172, 0.0191, 0.0183, 0.0184, 0.0189, 0.0170, 0.0198], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 05:05:16,843 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.68 vs. limit=2.0 +2022-12-02 05:05:19,982 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=6.29 vs. limit=5.0 +2022-12-02 05:05:20,367 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=36078.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:05:20,565 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6272, 2.6792, 2.7151, 2.7574, 2.0878, 2.8102, 2.5299, 2.9598], + device='cuda:0'), covar=tensor([0.0380, 0.0362, 0.0363, 0.0320, 0.0688, 0.0522, 0.0551, 0.0274], + device='cuda:0'), in_proj_covar=tensor([0.0069, 0.0072, 0.0063, 0.0061, 0.0080, 0.0073, 0.0069, 0.0060], + device='cuda:0'), out_proj_covar=tensor([5.0487e-05, 5.3396e-05, 4.5217e-05, 4.4547e-05, 5.8404e-05, 5.3350e-05, + 5.0805e-05, 4.3670e-05], device='cuda:0') +2022-12-02 05:05:37,976 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.37 vs. limit=5.0 +2022-12-02 05:05:42,452 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.890e+01 1.410e+02 1.814e+02 2.267e+02 4.851e+02, threshold=3.628e+02, percent-clipped=5.0 +2022-12-02 05:05:47,351 INFO [train.py:876] Epoch 26, batch 300, loss[loss=0.1308, simple_loss=0.1985, pruned_loss=0.03159, over 4838.00 frames. ], tot_loss[loss=0.1178, simple_loss=0.1808, pruned_loss=0.02745, over 741699.81 frames. ], batch size: 34, lr: 6.90e-03, +2022-12-02 05:05:58,504 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 05:06:02,562 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0003, 2.7323, 2.8172, 2.9695, 2.5810, 2.6996, 1.7339, 2.9226], + device='cuda:0'), covar=tensor([0.0729, 0.1494, 0.0853, 0.0727, 0.1010, 0.1183, 0.1142, 0.0549], + device='cuda:0'), in_proj_covar=tensor([0.0084, 0.0089, 0.0106, 0.0088, 0.0109, 0.0093, 0.0094, 0.0094], + device='cuda:0'), out_proj_covar=tensor([8.9284e-05, 9.3055e-05, 1.0684e-04, 9.1106e-05, 1.1020e-04, 9.7567e-05, + 9.7595e-05, 9.6439e-05], device='cuda:0') +2022-12-02 05:06:08,541 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=36127.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:06:36,934 INFO [train.py:876] Epoch 26, batch 350, loss[loss=0.1409, simple_loss=0.2072, pruned_loss=0.03727, over 4825.00 frames. ], tot_loss[loss=0.1163, simple_loss=0.1795, pruned_loss=0.0266, over 788887.23 frames. ], batch size: 34, lr: 6.89e-03, +2022-12-02 05:06:47,544 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=36167.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:07:01,141 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=36181.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:07:06,152 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=36186.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:07:06,249 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4804, 1.5863, 2.0518, 1.0771, 1.4619, 1.4722, 1.6432, 1.7644], + device='cuda:0'), covar=tensor([0.0264, 0.0319, 0.0210, 0.0325, 0.0258, 0.0272, 0.0273, 0.0142], + device='cuda:0'), in_proj_covar=tensor([0.0038, 0.0036, 0.0036, 0.0038, 0.0041, 0.0038, 0.0040, 0.0035], + device='cuda:0'), out_proj_covar=tensor([3.0420e-05, 2.7860e-05, 2.9243e-05, 3.0275e-05, 3.2962e-05, 2.9404e-05, + 3.1705e-05, 2.6809e-05], device='cuda:0') +2022-12-02 05:07:20,448 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.030e+02 1.411e+02 1.775e+02 2.148e+02 4.469e+02, threshold=3.550e+02, percent-clipped=1.0 +2022-12-02 05:07:25,226 INFO [train.py:876] Epoch 26, batch 400, loss[loss=0.09532, simple_loss=0.1538, pruned_loss=0.0184, over 4900.00 frames. ], tot_loss[loss=0.1165, simple_loss=0.1794, pruned_loss=0.02681, over 826015.49 frames. ], batch size: 29, lr: 6.89e-03, +2022-12-02 05:07:37,456 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7074, 4.2120, 4.3739, 3.9405, 3.5871, 4.0114, 4.1449, 4.3328], + device='cuda:0'), covar=tensor([0.1148, 0.0269, 0.0268, 0.0352, 0.0422, 0.0376, 0.0256, 0.0355], + device='cuda:0'), in_proj_covar=tensor([0.0242, 0.0169, 0.0189, 0.0180, 0.0181, 0.0186, 0.0166, 0.0196], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 05:07:38,148 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 05:07:52,751 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=36234.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:08:00,833 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=36242.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:08:02,636 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 05:08:14,369 INFO [train.py:876] Epoch 26, batch 450, loss[loss=0.118, simple_loss=0.1711, pruned_loss=0.03242, over 4834.00 frames. ], tot_loss[loss=0.1168, simple_loss=0.1799, pruned_loss=0.02685, over 854152.02 frames. ], batch size: 34, lr: 6.88e-03, +2022-12-02 05:08:58,199 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.308e+01 1.432e+02 1.846e+02 2.216e+02 3.581e+02, threshold=3.692e+02, percent-clipped=1.0 +2022-12-02 05:09:02,882 INFO [train.py:876] Epoch 26, batch 500, loss[loss=0.1485, simple_loss=0.2017, pruned_loss=0.04768, over 4886.00 frames. ], tot_loss[loss=0.1175, simple_loss=0.1806, pruned_loss=0.02723, over 874753.87 frames. ], batch size: 38, lr: 6.88e-03, +2022-12-02 05:09:51,680 INFO [train.py:876] Epoch 26, batch 550, loss[loss=0.07926, simple_loss=0.1222, pruned_loss=0.01816, over 3820.00 frames. ], tot_loss[loss=0.1161, simple_loss=0.1789, pruned_loss=0.02669, over 889317.51 frames. ], batch size: 15, lr: 6.87e-03, +2022-12-02 05:10:36,073 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.040e+02 1.495e+02 1.822e+02 2.271e+02 4.648e+02, threshold=3.644e+02, percent-clipped=3.0 +2022-12-02 05:10:41,128 INFO [train.py:876] Epoch 26, batch 600, loss[loss=0.134, simple_loss=0.2108, pruned_loss=0.02864, over 4799.00 frames. ], tot_loss[loss=0.1167, simple_loss=0.1789, pruned_loss=0.02727, over 903694.45 frames. ], batch size: 58, lr: 6.87e-03, +2022-12-02 05:10:55,446 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.74 vs. limit=5.0 +2022-12-02 05:11:02,138 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=36427.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:11:26,016 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.28 vs. limit=5.0 +2022-12-02 05:11:30,265 INFO [train.py:876] Epoch 26, batch 650, loss[loss=0.1202, simple_loss=0.1822, pruned_loss=0.02909, over 4858.00 frames. ], tot_loss[loss=0.1161, simple_loss=0.1781, pruned_loss=0.02708, over 912387.98 frames. ], batch size: 36, lr: 6.86e-03, +2022-12-02 05:11:41,073 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=36467.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:11:48,017 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2818, 2.6382, 3.2092, 3.2335, 2.7979, 3.0381, 2.9985, 2.9931], + device='cuda:0'), covar=tensor([0.0353, 0.0401, 0.0280, 0.0242, 0.0266, 0.0393, 0.0346, 0.0424], + device='cuda:0'), in_proj_covar=tensor([0.0137, 0.0128, 0.0143, 0.0129, 0.0109, 0.0139, 0.0139, 0.0154], + device='cuda:0'), out_proj_covar=tensor([9.1382e-05, 8.6769e-05, 9.5396e-05, 8.5716e-05, 7.1169e-05, 9.2140e-05, + 9.2313e-05, 1.0473e-04], device='cuda:0') +2022-12-02 05:11:48,675 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=36475.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:11:59,131 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.69 vs. limit=2.0 +2022-12-02 05:12:14,058 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.337e+01 1.420e+02 1.792e+02 2.239e+02 4.424e+02, threshold=3.583e+02, percent-clipped=5.0 +2022-12-02 05:12:18,857 INFO [train.py:876] Epoch 26, batch 700, loss[loss=0.08253, simple_loss=0.1374, pruned_loss=0.01384, over 4818.00 frames. ], tot_loss[loss=0.1163, simple_loss=0.1785, pruned_loss=0.02704, over 922347.35 frames. ], batch size: 25, lr: 6.86e-03, +2022-12-02 05:12:27,682 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=36515.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:12:48,653 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=36537.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:12:52,044 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.97 vs. limit=2.0 +2022-12-02 05:13:06,870 INFO [train.py:876] Epoch 26, batch 750, loss[loss=0.07641, simple_loss=0.127, pruned_loss=0.01291, over 4671.00 frames. ], tot_loss[loss=0.1164, simple_loss=0.1789, pruned_loss=0.02695, over 930531.82 frames. ], batch size: 21, lr: 6.85e-03, +2022-12-02 05:13:50,489 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.708e+01 1.554e+02 1.815e+02 2.073e+02 4.856e+02, threshold=3.629e+02, percent-clipped=1.0 +2022-12-02 05:13:54,470 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=36605.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:13:55,261 INFO [train.py:876] Epoch 26, batch 800, loss[loss=0.1143, simple_loss=0.1838, pruned_loss=0.02242, over 4887.00 frames. ], tot_loss[loss=0.1178, simple_loss=0.1807, pruned_loss=0.02744, over 937757.47 frames. ], batch size: 38, lr: 6.85e-03, +2022-12-02 05:13:57,326 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=36608.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:14:01,335 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=36612.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:14:43,431 INFO [train.py:876] Epoch 26, batch 850, loss[loss=0.08088, simple_loss=0.142, pruned_loss=0.009885, over 4898.00 frames. ], tot_loss[loss=0.1183, simple_loss=0.1813, pruned_loss=0.02763, over 942441.27 frames. ], batch size: 29, lr: 6.84e-03, +2022-12-02 05:14:53,319 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=36666.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:14:56,201 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=36669.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:15:00,082 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=36673.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:15:26,054 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=36700.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 05:15:26,766 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.007e+02 1.471e+02 1.744e+02 2.159e+02 5.667e+02, threshold=3.487e+02, percent-clipped=1.0 +2022-12-02 05:15:28,010 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6533, 3.2294, 3.8620, 3.3102, 3.7555, 2.9108, 3.6214, 3.7310], + device='cuda:0'), covar=tensor([0.0420, 0.0535, 0.0359, 0.0447, 0.0365, 0.0778, 0.0329, 0.0439], + device='cuda:0'), in_proj_covar=tensor([0.0121, 0.0129, 0.0116, 0.0120, 0.0108, 0.0149, 0.0101, 0.0116], + device='cuda:0'), out_proj_covar=tensor([1.0285e-04, 1.1030e-04, 9.8633e-05, 1.0266e-04, 9.2220e-05, 1.2779e-04, + 8.8779e-05, 1.0083e-04], device='cuda:0') +2022-12-02 05:15:31,695 INFO [train.py:876] Epoch 26, batch 900, loss[loss=0.1018, simple_loss=0.1627, pruned_loss=0.02051, over 4804.00 frames. ], tot_loss[loss=0.1181, simple_loss=0.1811, pruned_loss=0.02759, over 944179.24 frames. ], batch size: 32, lr: 6.84e-03, +2022-12-02 05:15:42,604 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4927, 2.1781, 2.3697, 2.4251, 1.8903, 2.5032, 2.2332, 2.6769], + device='cuda:0'), covar=tensor([0.0438, 0.0506, 0.0388, 0.0387, 0.0757, 0.0479, 0.0580, 0.0303], + device='cuda:0'), in_proj_covar=tensor([0.0070, 0.0073, 0.0063, 0.0062, 0.0080, 0.0073, 0.0069, 0.0060], + device='cuda:0'), out_proj_covar=tensor([5.1231e-05, 5.4232e-05, 4.5790e-05, 4.4834e-05, 5.8794e-05, 5.2861e-05, + 5.1063e-05, 4.3650e-05], device='cuda:0') +2022-12-02 05:16:19,757 INFO [train.py:876] Epoch 26, batch 950, loss[loss=0.1236, simple_loss=0.1954, pruned_loss=0.02591, over 4850.00 frames. ], tot_loss[loss=0.1183, simple_loss=0.1813, pruned_loss=0.02767, over 944566.94 frames. ], batch size: 40, lr: 6.84e-03, +2022-12-02 05:16:24,913 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=36761.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 05:16:49,265 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0687, 2.4362, 3.1411, 2.7664, 2.8898, 2.7864, 2.8665, 3.6888], + device='cuda:0'), covar=tensor([0.0158, 0.1341, 0.0654, 0.1308, 0.0319, 0.0450, 0.1600, 0.0387], + device='cuda:0'), in_proj_covar=tensor([0.0078, 0.0114, 0.0095, 0.0126, 0.0087, 0.0094, 0.0132, 0.0109], + device='cuda:0'), out_proj_covar=tensor([9.5157e-05, 1.3483e-04, 1.1533e-04, 1.4805e-04, 1.0088e-04, 1.1727e-04, + 1.5253e-04, 1.2379e-04], device='cuda:0') +2022-12-02 05:16:52,304 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0544, 2.4592, 3.0430, 3.0403, 2.4430, 3.0022, 2.9012, 2.6857], + device='cuda:0'), covar=tensor([0.0572, 0.0542, 0.0389, 0.0377, 0.0414, 0.0505, 0.0434, 0.0687], + device='cuda:0'), in_proj_covar=tensor([0.0139, 0.0128, 0.0143, 0.0130, 0.0110, 0.0139, 0.0138, 0.0154], + device='cuda:0'), out_proj_covar=tensor([9.2577e-05, 8.6622e-05, 9.5537e-05, 8.6675e-05, 7.1590e-05, 9.2286e-05, + 9.2093e-05, 1.0443e-04], device='cuda:0') +2022-12-02 05:17:03,567 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.088e+02 1.503e+02 1.816e+02 2.265e+02 4.434e+02, threshold=3.632e+02, percent-clipped=2.0 +2022-12-02 05:17:07,752 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=36805.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 05:17:08,617 INFO [train.py:876] Epoch 26, batch 1000, loss[loss=0.1079, simple_loss=0.1676, pruned_loss=0.02415, over 4898.00 frames. ], tot_loss[loss=0.1189, simple_loss=0.1822, pruned_loss=0.02782, over 945851.39 frames. ], batch size: 30, lr: 6.83e-03, +2022-12-02 05:17:26,907 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.50 vs. limit=2.0 +2022-12-02 05:17:39,163 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=36837.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:17:48,700 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.92 vs. limit=2.0 +2022-12-02 05:17:57,744 INFO [train.py:876] Epoch 26, batch 1050, loss[loss=0.1392, simple_loss=0.2035, pruned_loss=0.03745, over 4790.00 frames. ], tot_loss[loss=0.1197, simple_loss=0.183, pruned_loss=0.02816, over 946646.12 frames. ], batch size: 58, lr: 6.83e-03, +2022-12-02 05:18:07,471 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=36866.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 05:18:25,813 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=36885.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:18:33,323 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.23 vs. limit=5.0 +2022-12-02 05:18:41,691 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.026e+02 1.385e+02 1.621e+02 1.960e+02 3.596e+02, threshold=3.243e+02, percent-clipped=0.0 +2022-12-02 05:18:46,439 INFO [train.py:876] Epoch 26, batch 1100, loss[loss=0.1156, simple_loss=0.1942, pruned_loss=0.0185, over 4853.00 frames. ], tot_loss[loss=0.1181, simple_loss=0.181, pruned_loss=0.02756, over 950164.16 frames. ], batch size: 49, lr: 6.82e-03, +2022-12-02 05:18:51,403 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=36911.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:19:29,920 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.30 vs. limit=2.0 +2022-12-02 05:19:35,159 INFO [train.py:876] Epoch 26, batch 1150, loss[loss=0.1139, simple_loss=0.1862, pruned_loss=0.02081, over 4861.00 frames. ], tot_loss[loss=0.1179, simple_loss=0.1809, pruned_loss=0.02742, over 950648.92 frames. ], batch size: 36, lr: 6.82e-03, +2022-12-02 05:19:37,120 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=36958.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:19:40,056 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=36961.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:19:42,924 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=36964.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:19:46,797 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=36968.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:19:51,032 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=36972.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:20:19,396 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.495e+01 1.542e+02 1.862e+02 2.468e+02 4.442e+02, threshold=3.725e+02, percent-clipped=10.0 +2022-12-02 05:20:22,570 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=37004.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:20:24,235 INFO [train.py:876] Epoch 26, batch 1200, loss[loss=0.1105, simple_loss=0.1728, pruned_loss=0.02414, over 4799.00 frames. ], tot_loss[loss=0.1179, simple_loss=0.1806, pruned_loss=0.0276, over 949806.95 frames. ], batch size: 32, lr: 6.81e-03, +2022-12-02 05:20:36,899 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=37019.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:21:12,631 INFO [train.py:876] Epoch 26, batch 1250, loss[loss=0.0985, simple_loss=0.1574, pruned_loss=0.01981, over 4824.00 frames. ], tot_loss[loss=0.1173, simple_loss=0.1802, pruned_loss=0.02723, over 950537.47 frames. ], batch size: 25, lr: 6.81e-03, +2022-12-02 05:21:12,687 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=37056.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 05:21:21,449 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=37065.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:21:26,622 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.80 vs. limit=2.0 +2022-12-02 05:21:30,362 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.34 vs. limit=2.0 +2022-12-02 05:21:56,041 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.641e+01 1.491e+02 1.761e+02 2.235e+02 3.799e+02, threshold=3.521e+02, percent-clipped=2.0 +2022-12-02 05:22:00,958 INFO [train.py:876] Epoch 26, batch 1300, loss[loss=0.1317, simple_loss=0.195, pruned_loss=0.03418, over 4832.00 frames. ], tot_loss[loss=0.1168, simple_loss=0.1796, pruned_loss=0.02701, over 950908.72 frames. ], batch size: 47, lr: 6.80e-03, +2022-12-02 05:22:05,006 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.7740, 3.0776, 3.3835, 3.2464, 2.0445, 3.4576, 3.3891, 2.7399], + device='cuda:0'), covar=tensor([0.4085, 0.0944, 0.0867, 0.0538, 0.1225, 0.0768, 0.0570, 0.1191], + device='cuda:0'), in_proj_covar=tensor([0.0179, 0.0119, 0.0148, 0.0130, 0.0130, 0.0120, 0.0126, 0.0125], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 05:22:21,758 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.3252, 4.9312, 4.9245, 5.2355, 4.5094, 4.0632, 5.1127, 4.8236], + device='cuda:0'), covar=tensor([0.0186, 0.0079, 0.0099, 0.0121, 0.0135, 0.0182, 0.0065, 0.0116], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0053, 0.0058, 0.0046, 0.0057, 0.0058, 0.0053, 0.0053], + device='cuda:0'), out_proj_covar=tensor([5.1099e-05, 4.2107e-05, 4.9710e-05, 3.7839e-05, 4.8328e-05, 5.0972e-05, + 4.1742e-05, 4.3436e-05], device='cuda:0') +2022-12-02 05:22:29,668 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.3498, 4.3147, 4.8811, 4.2327, 4.4878, 4.4374, 4.1025, 4.2659], + device='cuda:0'), covar=tensor([0.0742, 0.0522, 0.0555, 0.0475, 0.0922, 0.0618, 0.1271, 0.0490], + device='cuda:0'), in_proj_covar=tensor([0.0183, 0.0132, 0.0183, 0.0154, 0.0139, 0.0169, 0.0196, 0.0139], + device='cuda:0'), out_proj_covar=tensor([1.3170e-04, 8.4784e-05, 1.3921e-04, 1.0558e-04, 1.0203e-04, 1.2214e-04, + 1.4796e-04, 9.4150e-05], device='cuda:0') +2022-12-02 05:22:50,393 INFO [train.py:876] Epoch 26, batch 1350, loss[loss=0.1024, simple_loss=0.168, pruned_loss=0.01841, over 4716.00 frames. ], tot_loss[loss=0.1169, simple_loss=0.1801, pruned_loss=0.02685, over 949454.93 frames. ], batch size: 27, lr: 6.80e-03, +2022-12-02 05:22:55,181 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=37161.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 05:23:34,261 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.153e+02 1.490e+02 1.746e+02 2.180e+02 4.168e+02, threshold=3.492e+02, percent-clipped=2.0 +2022-12-02 05:23:39,242 INFO [train.py:876] Epoch 26, batch 1400, loss[loss=0.1191, simple_loss=0.1794, pruned_loss=0.02946, over 4862.00 frames. ], tot_loss[loss=0.1171, simple_loss=0.1799, pruned_loss=0.02713, over 948938.24 frames. ], batch size: 35, lr: 6.79e-03, +2022-12-02 05:24:09,127 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-26.pt +2022-12-02 05:24:18,269 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 05:24:19,181 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 05:24:19,476 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 05:24:19,507 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 05:24:20,610 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 05:24:20,930 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 05:24:22,546 INFO [train.py:876] Epoch 27, batch 0, loss[loss=0.08303, simple_loss=0.1446, pruned_loss=0.01073, over 4881.00 frames. ], tot_loss[loss=0.08303, simple_loss=0.1446, pruned_loss=0.01073, over 4881.00 frames. ], batch size: 29, lr: 6.66e-03, +2022-12-02 05:24:22,547 INFO [train.py:901] Computing validation loss +2022-12-02 05:24:29,172 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2679, 1.9573, 1.0717, 2.2652, 2.1488, 2.0779, 2.2527, 2.0229], + device='cuda:0'), covar=tensor([0.0327, 0.0473, 0.0518, 0.0221, 0.0325, 0.0404, 0.0280, 0.0980], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0049, 0.0055, 0.0043, 0.0047, 0.0050, 0.0047, 0.0045], + device='cuda:0'), out_proj_covar=tensor([4.9900e-05, 4.6435e-05, 5.3477e-05, 4.1170e-05, 4.3897e-05, 4.7025e-05, + 4.5002e-05, 4.4128e-05], device='cuda:0') +2022-12-02 05:24:33,665 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.7062, 4.7635, 5.2119, 4.7070, 4.7409, 4.8545, 4.6465, 4.6666], + device='cuda:0'), covar=tensor([0.0503, 0.0387, 0.0372, 0.0502, 0.1060, 0.0391, 0.1010, 0.0443], + device='cuda:0'), in_proj_covar=tensor([0.0181, 0.0131, 0.0180, 0.0153, 0.0139, 0.0167, 0.0193, 0.0138], + device='cuda:0'), out_proj_covar=tensor([1.2957e-04, 8.4096e-05, 1.3754e-04, 1.0497e-04, 1.0182e-04, 1.2083e-04, + 1.4568e-04, 9.3878e-05], device='cuda:0') +2022-12-02 05:24:38,827 INFO [train.py:910] Epoch 27, validation: loss=0.2342, simple_loss=0.2752, pruned_loss=0.09657, over 253132.00 frames. +2022-12-02 05:24:38,827 INFO [train.py:911] Maximum memory allocated so far is 7567MB +2022-12-02 05:25:01,387 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=37261.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:25:04,170 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=37264.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:25:07,005 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=37267.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:25:07,926 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=37268.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:25:08,133 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=37268.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:25:22,788 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=7.69 vs. limit=5.0 +2022-12-02 05:25:27,041 INFO [train.py:876] Epoch 27, batch 50, loss[loss=0.08134, simple_loss=0.1333, pruned_loss=0.0147, over 4723.00 frames. ], tot_loss[loss=0.1122, simple_loss=0.1719, pruned_loss=0.02623, over 214592.80 frames. ], batch size: 23, lr: 6.66e-03, +2022-12-02 05:25:39,283 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.692e+01 1.441e+02 1.767e+02 2.268e+02 3.848e+02, threshold=3.535e+02, percent-clipped=2.0 +2022-12-02 05:25:47,015 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=37309.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:25:48,971 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 05:25:50,005 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=37312.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:25:52,010 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=37314.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:25:53,941 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=37316.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:26:06,621 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=37329.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:26:14,948 INFO [train.py:876] Epoch 27, batch 100, loss[loss=0.08738, simple_loss=0.1318, pruned_loss=0.02147, over 4736.00 frames. ], tot_loss[loss=0.1165, simple_loss=0.1769, pruned_loss=0.0281, over 378910.38 frames. ], batch size: 23, lr: 6.65e-03, +2022-12-02 05:26:27,774 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 05:26:31,914 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=37356.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 05:26:35,637 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=37360.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:26:39,755 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=37364.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:26:52,282 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 05:27:02,978 INFO [train.py:876] Epoch 27, batch 150, loss[loss=0.1425, simple_loss=0.2181, pruned_loss=0.0334, over 4638.00 frames. ], tot_loss[loss=0.1169, simple_loss=0.1787, pruned_loss=0.02756, over 506415.57 frames. ], batch size: 63, lr: 6.65e-03, +2022-12-02 05:27:15,396 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.435e+01 1.429e+02 1.728e+02 2.284e+02 6.041e+02, threshold=3.455e+02, percent-clipped=3.0 +2022-12-02 05:27:18,271 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=37404.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 05:27:35,970 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.3721, 4.9642, 4.8201, 5.1768, 4.4810, 4.1507, 5.0077, 4.6976], + device='cuda:0'), covar=tensor([0.0164, 0.0066, 0.0109, 0.0107, 0.0156, 0.0170, 0.0083, 0.0138], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0053, 0.0058, 0.0045, 0.0057, 0.0057, 0.0054, 0.0053], + device='cuda:0'), out_proj_covar=tensor([5.0850e-05, 4.1710e-05, 4.9709e-05, 3.7100e-05, 4.8635e-05, 5.0628e-05, + 4.2120e-05, 4.3191e-05], device='cuda:0') +2022-12-02 05:27:39,090 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=37425.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:27:51,290 INFO [train.py:876] Epoch 27, batch 200, loss[loss=0.1052, simple_loss=0.1683, pruned_loss=0.021, over 4793.00 frames. ], tot_loss[loss=0.1159, simple_loss=0.1783, pruned_loss=0.02677, over 603400.77 frames. ], batch size: 32, lr: 6.64e-03, +2022-12-02 05:28:10,079 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.5544, 2.0531, 2.3249, 2.4446, 1.7987, 2.2850, 2.2091, 2.6382], + device='cuda:0'), covar=tensor([0.0420, 0.0612, 0.0399, 0.0353, 0.0825, 0.0608, 0.0514, 0.0316], + device='cuda:0'), in_proj_covar=tensor([0.0071, 0.0074, 0.0064, 0.0063, 0.0082, 0.0075, 0.0071, 0.0061], + device='cuda:0'), out_proj_covar=tensor([5.1753e-05, 5.4422e-05, 4.6209e-05, 4.6105e-05, 6.0255e-05, 5.4046e-05, + 5.2166e-05, 4.4773e-05], device='cuda:0') +2022-12-02 05:28:12,360 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.99 vs. limit=5.0 +2022-12-02 05:28:13,655 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=37461.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 05:28:33,965 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.48 vs. limit=5.0 +2022-12-02 05:28:34,268 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 05:28:40,096 INFO [train.py:876] Epoch 27, batch 250, loss[loss=0.1147, simple_loss=0.1683, pruned_loss=0.03051, over 4905.00 frames. ], tot_loss[loss=0.1162, simple_loss=0.1791, pruned_loss=0.02662, over 679289.68 frames. ], batch size: 30, lr: 6.64e-03, +2022-12-02 05:28:47,850 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9518, 4.6629, 4.5157, 4.8814, 4.1268, 3.8621, 4.7275, 4.4502], + device='cuda:0'), covar=tensor([0.0274, 0.0084, 0.0145, 0.0137, 0.0201, 0.0248, 0.0096, 0.0134], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0053, 0.0058, 0.0045, 0.0057, 0.0057, 0.0053, 0.0053], + device='cuda:0'), out_proj_covar=tensor([5.0683e-05, 4.1568e-05, 4.9442e-05, 3.6984e-05, 4.8587e-05, 5.0553e-05, + 4.1605e-05, 4.3097e-05], device='cuda:0') +2022-12-02 05:28:52,609 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.010e+02 1.443e+02 1.711e+02 2.184e+02 7.627e+02, threshold=3.421e+02, percent-clipped=3.0 +2022-12-02 05:29:00,268 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=37509.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 05:29:02,062 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8950, 4.5747, 4.5525, 4.8009, 4.1292, 3.8920, 4.6652, 4.3930], + device='cuda:0'), covar=tensor([0.0204, 0.0070, 0.0116, 0.0132, 0.0171, 0.0210, 0.0084, 0.0118], + device='cuda:0'), in_proj_covar=tensor([0.0058, 0.0053, 0.0058, 0.0045, 0.0057, 0.0057, 0.0053, 0.0053], + device='cuda:0'), out_proj_covar=tensor([5.0688e-05, 4.1737e-05, 4.9567e-05, 3.7081e-05, 4.8668e-05, 5.0569e-05, + 4.1652e-05, 4.3143e-05], device='cuda:0') +2022-12-02 05:29:02,220 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.6147, 1.6950, 0.8908, 1.6832, 1.6111, 1.7474, 1.5802, 1.7159], + device='cuda:0'), covar=tensor([0.0355, 0.0421, 0.0484, 0.0405, 0.0260, 0.0378, 0.0316, 0.0452], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0049, 0.0056, 0.0044, 0.0047, 0.0050, 0.0048, 0.0046], + device='cuda:0'), out_proj_covar=tensor([5.0104e-05, 4.6513e-05, 5.3632e-05, 4.1389e-05, 4.4257e-05, 4.6998e-05, + 4.5525e-05, 4.4907e-05], device='cuda:0') +2022-12-02 05:29:28,332 INFO [train.py:876] Epoch 27, batch 300, loss[loss=0.1437, simple_loss=0.2046, pruned_loss=0.04135, over 4812.00 frames. ], tot_loss[loss=0.1155, simple_loss=0.1782, pruned_loss=0.02643, over 739975.78 frames. ], batch size: 42, lr: 6.64e-03, +2022-12-02 05:29:34,203 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 05:29:51,103 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-02 05:29:56,521 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=37567.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:30:17,036 INFO [train.py:876] Epoch 27, batch 350, loss[loss=0.1077, simple_loss=0.159, pruned_loss=0.02816, over 4889.00 frames. ], tot_loss[loss=0.1152, simple_loss=0.1776, pruned_loss=0.02646, over 787962.67 frames. ], batch size: 31, lr: 6.63e-03, +2022-12-02 05:30:30,162 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.139e+01 1.429e+02 1.744e+02 2.211e+02 4.096e+02, threshold=3.489e+02, percent-clipped=1.0 +2022-12-02 05:30:42,947 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=37614.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:30:43,792 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=37615.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:30:52,693 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=37624.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:31:06,184 INFO [train.py:876] Epoch 27, batch 400, loss[loss=0.09639, simple_loss=0.1652, pruned_loss=0.01377, over 4795.00 frames. ], tot_loss[loss=0.1148, simple_loss=0.1775, pruned_loss=0.02608, over 826716.44 frames. ], batch size: 32, lr: 6.63e-03, +2022-12-02 05:31:13,941 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 05:31:20,032 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3263, 1.9075, 2.1065, 2.4219, 1.6761, 2.2550, 2.1894, 2.3470], + device='cuda:0'), covar=tensor([0.0366, 0.0539, 0.0416, 0.0316, 0.0829, 0.0514, 0.0544, 0.0286], + device='cuda:0'), in_proj_covar=tensor([0.0070, 0.0073, 0.0064, 0.0063, 0.0082, 0.0074, 0.0071, 0.0061], + device='cuda:0'), out_proj_covar=tensor([5.0840e-05, 5.3991e-05, 4.6180e-05, 4.5622e-05, 5.9916e-05, 5.3448e-05, + 5.1974e-05, 4.4188e-05], device='cuda:0') +2022-12-02 05:31:27,466 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=37660.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:31:29,238 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=37662.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:31:38,843 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 05:31:54,530 INFO [train.py:876] Epoch 27, batch 450, loss[loss=0.1058, simple_loss=0.1676, pruned_loss=0.02197, over 4914.00 frames. ], tot_loss[loss=0.1153, simple_loss=0.1782, pruned_loss=0.02622, over 857711.14 frames. ], batch size: 31, lr: 6.62e-03, +2022-12-02 05:32:07,168 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.651e+01 1.495e+02 1.763e+02 2.096e+02 4.471e+02, threshold=3.525e+02, percent-clipped=2.0 +2022-12-02 05:32:13,909 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=37708.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:32:25,483 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=37720.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:32:43,137 INFO [train.py:876] Epoch 27, batch 500, loss[loss=0.1268, simple_loss=0.2014, pruned_loss=0.02609, over 4851.00 frames. ], tot_loss[loss=0.115, simple_loss=0.1774, pruned_loss=0.02628, over 876877.33 frames. ], batch size: 40, lr: 6.62e-03, +2022-12-02 05:33:32,547 INFO [train.py:876] Epoch 27, batch 550, loss[loss=0.09791, simple_loss=0.1676, pruned_loss=0.01411, over 4800.00 frames. ], tot_loss[loss=0.1148, simple_loss=0.1772, pruned_loss=0.02618, over 894581.49 frames. ], batch size: 33, lr: 6.61e-03, +2022-12-02 05:33:45,275 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.023e+01 1.406e+02 1.655e+02 2.011e+02 3.872e+02, threshold=3.310e+02, percent-clipped=1.0 +2022-12-02 05:34:13,672 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.80 vs. limit=2.0 +2022-12-02 05:34:21,123 INFO [train.py:876] Epoch 27, batch 600, loss[loss=0.1291, simple_loss=0.1969, pruned_loss=0.03062, over 4891.00 frames. ], tot_loss[loss=0.1146, simple_loss=0.1772, pruned_loss=0.026, over 908171.53 frames. ], batch size: 44, lr: 6.61e-03, +2022-12-02 05:35:08,186 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4983, 3.6053, 3.8055, 3.4986, 3.3705, 3.4070, 3.7460, 3.7234], + device='cuda:0'), covar=tensor([0.1429, 0.0412, 0.0476, 0.0483, 0.0496, 0.0668, 0.0381, 0.0705], + device='cuda:0'), in_proj_covar=tensor([0.0249, 0.0175, 0.0195, 0.0187, 0.0187, 0.0192, 0.0170, 0.0202], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 05:35:09,808 INFO [train.py:876] Epoch 27, batch 650, loss[loss=0.1073, simple_loss=0.1716, pruned_loss=0.02146, over 4886.00 frames. ], tot_loss[loss=0.1145, simple_loss=0.1771, pruned_loss=0.02595, over 918461.11 frames. ], batch size: 29, lr: 6.61e-03, +2022-12-02 05:35:11,243 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.92 vs. limit=2.0 +2022-12-02 05:35:22,621 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.738e+01 1.409e+02 1.690e+02 2.125e+02 4.723e+02, threshold=3.380e+02, percent-clipped=5.0 +2022-12-02 05:35:45,006 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=37924.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:35:57,831 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4999, 1.1953, 1.5487, 1.0304, 1.1935, 1.0424, 1.4540, 1.4081], + device='cuda:0'), covar=tensor([0.0185, 0.0237, 0.0159, 0.0240, 0.0198, 0.0205, 0.0215, 0.0153], + device='cuda:0'), in_proj_covar=tensor([0.0037, 0.0035, 0.0034, 0.0037, 0.0039, 0.0036, 0.0039, 0.0034], + device='cuda:0'), out_proj_covar=tensor([2.9377e-05, 2.6793e-05, 2.7476e-05, 2.9064e-05, 3.1399e-05, 2.8198e-05, + 3.1242e-05, 2.6386e-05], device='cuda:0') +2022-12-02 05:35:58,502 INFO [train.py:876] Epoch 27, batch 700, loss[loss=0.1295, simple_loss=0.2054, pruned_loss=0.02676, over 4839.00 frames. ], tot_loss[loss=0.1153, simple_loss=0.1782, pruned_loss=0.02625, over 925733.67 frames. ], batch size: 41, lr: 6.60e-03, +2022-12-02 05:36:12,225 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8496, 3.9889, 4.1918, 3.8602, 3.7340, 3.8488, 4.0547, 4.1992], + device='cuda:0'), covar=tensor([0.1243, 0.0361, 0.0442, 0.0438, 0.0461, 0.0453, 0.0324, 0.0558], + device='cuda:0'), in_proj_covar=tensor([0.0252, 0.0177, 0.0198, 0.0189, 0.0189, 0.0194, 0.0171, 0.0205], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 05:36:31,072 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=37972.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:36:38,035 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=37979.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:36:45,350 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.44 vs. limit=2.0 +2022-12-02 05:36:46,758 INFO [train.py:876] Epoch 27, batch 750, loss[loss=0.09939, simple_loss=0.1502, pruned_loss=0.02427, over 4707.00 frames. ], tot_loss[loss=0.1153, simple_loss=0.178, pruned_loss=0.02629, over 932075.91 frames. ], batch size: 23, lr: 6.60e-03, +2022-12-02 05:36:58,397 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-38000.pt +2022-12-02 05:37:01,603 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.022e+01 1.497e+02 1.748e+02 2.232e+02 7.773e+02, threshold=3.495e+02, percent-clipped=6.0 +2022-12-02 05:37:07,944 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.70 vs. limit=2.0 +2022-12-02 05:37:19,643 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=38020.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:37:24,596 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.7446, 2.9578, 3.1982, 3.2038, 2.2210, 3.1595, 3.3294, 2.3420], + device='cuda:0'), covar=tensor([0.4404, 0.1103, 0.0980, 0.0770, 0.1179, 0.1237, 0.0779, 0.1904], + device='cuda:0'), in_proj_covar=tensor([0.0184, 0.0123, 0.0153, 0.0135, 0.0135, 0.0123, 0.0130, 0.0129], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 05:37:36,804 INFO [train.py:876] Epoch 27, batch 800, loss[loss=0.08396, simple_loss=0.1401, pruned_loss=0.01391, over 4784.00 frames. ], tot_loss[loss=0.1169, simple_loss=0.1796, pruned_loss=0.02707, over 935506.67 frames. ], batch size: 26, lr: 6.59e-03, +2022-12-02 05:37:38,903 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=38040.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:38:04,853 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=38068.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:38:23,963 INFO [train.py:876] Epoch 27, batch 850, loss[loss=0.1228, simple_loss=0.1829, pruned_loss=0.03137, over 4790.00 frames. ], tot_loss[loss=0.1179, simple_loss=0.1806, pruned_loss=0.02755, over 939461.90 frames. ], batch size: 32, lr: 6.59e-03, +2022-12-02 05:38:36,326 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.005e+02 1.486e+02 1.756e+02 2.146e+02 6.569e+02, threshold=3.513e+02, percent-clipped=3.0 +2022-12-02 05:39:12,764 INFO [train.py:876] Epoch 27, batch 900, loss[loss=0.1174, simple_loss=0.1903, pruned_loss=0.02226, over 4782.00 frames. ], tot_loss[loss=0.1162, simple_loss=0.179, pruned_loss=0.02667, over 943178.70 frames. ], batch size: 58, lr: 6.58e-03, +2022-12-02 05:40:00,112 INFO [train.py:876] Epoch 27, batch 950, loss[loss=0.08471, simple_loss=0.1515, pruned_loss=0.008937, over 4761.00 frames. ], tot_loss[loss=0.1166, simple_loss=0.1798, pruned_loss=0.02672, over 946643.40 frames. ], batch size: 27, lr: 6.58e-03, +2022-12-02 05:40:13,141 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.022e+01 1.460e+02 1.927e+02 2.228e+02 7.788e+02, threshold=3.854e+02, percent-clipped=5.0 +2022-12-02 05:40:30,058 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0761, 1.8014, 2.2324, 1.8045, 2.0249, 2.1810, 1.6716, 2.0554], + device='cuda:0'), covar=tensor([0.0790, 0.1026, 0.0815, 0.0812, 0.1371, 0.0746, 0.0884, 0.0558], + device='cuda:0'), in_proj_covar=tensor([0.0069, 0.0073, 0.0072, 0.0077, 0.0068, 0.0064, 0.0064, 0.0070], + device='cuda:0'), out_proj_covar=tensor([6.3346e-05, 6.6056e-05, 6.6059e-05, 6.9481e-05, 6.3078e-05, 5.9228e-05, + 5.9657e-05, 6.4392e-05], device='cuda:0') +2022-12-02 05:40:49,060 INFO [train.py:876] Epoch 27, batch 1000, loss[loss=0.08847, simple_loss=0.1406, pruned_loss=0.01815, over 4700.00 frames. ], tot_loss[loss=0.1154, simple_loss=0.1785, pruned_loss=0.02614, over 948365.61 frames. ], batch size: 23, lr: 6.58e-03, +2022-12-02 05:41:31,206 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0628, 3.5386, 3.8271, 3.7644, 2.7702, 3.8509, 4.0920, 2.8650], + device='cuda:0'), covar=tensor([0.4429, 0.0706, 0.0929, 0.0486, 0.1271, 0.0768, 0.0465, 0.1059], + device='cuda:0'), in_proj_covar=tensor([0.0185, 0.0124, 0.0155, 0.0136, 0.0136, 0.0123, 0.0130, 0.0130], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 05:41:37,881 INFO [train.py:876] Epoch 27, batch 1050, loss[loss=0.1305, simple_loss=0.1932, pruned_loss=0.03389, over 4865.00 frames. ], tot_loss[loss=0.1148, simple_loss=0.1776, pruned_loss=0.02605, over 947674.67 frames. ], batch size: 39, lr: 6.57e-03, +2022-12-02 05:41:50,305 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.769e+01 1.401e+02 1.665e+02 2.054e+02 3.840e+02, threshold=3.330e+02, percent-clipped=0.0 +2022-12-02 05:42:02,482 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.7508, 1.4314, 1.8799, 1.2607, 1.4068, 1.4889, 1.6343, 1.4193], + device='cuda:0'), covar=tensor([0.0222, 0.0231, 0.0200, 0.0312, 0.0282, 0.0216, 0.0253, 0.0237], + device='cuda:0'), in_proj_covar=tensor([0.0037, 0.0034, 0.0034, 0.0037, 0.0039, 0.0036, 0.0038, 0.0034], + device='cuda:0'), out_proj_covar=tensor([2.9439e-05, 2.6012e-05, 2.7562e-05, 2.8845e-05, 3.1268e-05, 2.7875e-05, + 3.0655e-05, 2.6315e-05], device='cuda:0') +2022-12-02 05:42:10,101 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4754, 2.3298, 2.7196, 2.2484, 2.4096, 2.7281, 2.6975, 2.6277], + device='cuda:0'), covar=tensor([0.1269, 0.0766, 0.1792, 0.0831, 0.1103, 0.1053, 0.0776, 0.0594], + device='cuda:0'), in_proj_covar=tensor([0.0068, 0.0074, 0.0072, 0.0078, 0.0068, 0.0065, 0.0064, 0.0071], + device='cuda:0'), out_proj_covar=tensor([6.3087e-05, 6.6739e-05, 6.6437e-05, 6.9999e-05, 6.3073e-05, 5.9926e-05, + 6.0133e-05, 6.5050e-05], device='cuda:0') +2022-12-02 05:42:23,671 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=38335.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:42:24,854 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2310, 2.9594, 2.7303, 3.0186, 2.6022, 2.6827, 1.7236, 2.9060], + device='cuda:0'), covar=tensor([0.0547, 0.1074, 0.0996, 0.0649, 0.1027, 0.1557, 0.1353, 0.0648], + device='cuda:0'), in_proj_covar=tensor([0.0086, 0.0091, 0.0109, 0.0091, 0.0113, 0.0100, 0.0098, 0.0097], + device='cuda:0'), out_proj_covar=tensor([9.1216e-05, 9.5665e-05, 1.1028e-04, 9.4157e-05, 1.1503e-04, 1.0452e-04, + 1.0123e-04, 1.0034e-04], device='cuda:0') +2022-12-02 05:42:26,537 INFO [train.py:876] Epoch 27, batch 1100, loss[loss=0.1139, simple_loss=0.1854, pruned_loss=0.02118, over 4885.00 frames. ], tot_loss[loss=0.1144, simple_loss=0.1773, pruned_loss=0.0257, over 950101.10 frames. ], batch size: 44, lr: 6.57e-03, +2022-12-02 05:42:36,651 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1365, 2.0438, 2.3316, 1.9619, 2.1383, 2.3141, 2.2415, 2.3064], + device='cuda:0'), covar=tensor([0.1039, 0.0655, 0.1352, 0.0799, 0.0776, 0.1366, 0.0872, 0.0756], + device='cuda:0'), in_proj_covar=tensor([0.0068, 0.0074, 0.0072, 0.0078, 0.0068, 0.0065, 0.0065, 0.0071], + device='cuda:0'), out_proj_covar=tensor([6.3230e-05, 6.6954e-05, 6.6708e-05, 7.0122e-05, 6.3304e-05, 6.0107e-05, + 6.0367e-05, 6.5170e-05], device='cuda:0') +2022-12-02 05:42:37,604 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4746, 1.3525, 1.4316, 0.9131, 1.2144, 1.1066, 1.4619, 1.5485], + device='cuda:0'), covar=tensor([0.0208, 0.0263, 0.0224, 0.0322, 0.0225, 0.0282, 0.0237, 0.0175], + device='cuda:0'), in_proj_covar=tensor([0.0037, 0.0034, 0.0034, 0.0036, 0.0039, 0.0036, 0.0038, 0.0034], + device='cuda:0'), out_proj_covar=tensor([2.9459e-05, 2.5999e-05, 2.7530e-05, 2.8771e-05, 3.1301e-05, 2.7798e-05, + 3.0655e-05, 2.6393e-05], device='cuda:0') +2022-12-02 05:42:39,490 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9415, 1.8085, 2.1013, 1.6209, 2.0044, 2.1063, 2.0328, 2.2025], + device='cuda:0'), covar=tensor([0.0861, 0.1078, 0.0845, 0.0970, 0.0759, 0.0867, 0.0814, 0.0750], + device='cuda:0'), in_proj_covar=tensor([0.0068, 0.0074, 0.0072, 0.0078, 0.0068, 0.0065, 0.0065, 0.0071], + device='cuda:0'), out_proj_covar=tensor([6.3166e-05, 6.6923e-05, 6.6653e-05, 7.0112e-05, 6.3282e-05, 6.0004e-05, + 6.0334e-05, 6.5119e-05], device='cuda:0') +2022-12-02 05:42:48,013 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=38360.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:42:56,503 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.90 vs. limit=2.0 +2022-12-02 05:43:14,481 INFO [train.py:876] Epoch 27, batch 1150, loss[loss=0.103, simple_loss=0.1762, pruned_loss=0.01494, over 4877.00 frames. ], tot_loss[loss=0.1151, simple_loss=0.1778, pruned_loss=0.02617, over 950176.22 frames. ], batch size: 37, lr: 6.56e-03, +2022-12-02 05:43:25,138 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.95 vs. limit=2.0 +2022-12-02 05:43:26,791 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.570e+01 1.556e+02 1.875e+02 2.375e+02 6.264e+02, threshold=3.750e+02, percent-clipped=8.0 +2022-12-02 05:43:34,052 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.6060, 1.6086, 0.8272, 1.7165, 1.7023, 1.8411, 1.5242, 1.6243], + device='cuda:0'), covar=tensor([0.0377, 0.0593, 0.0447, 0.0341, 0.0325, 0.0416, 0.0392, 0.0780], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0051, 0.0057, 0.0044, 0.0047, 0.0050, 0.0049, 0.0046], + device='cuda:0'), out_proj_covar=tensor([5.0337e-05, 4.8228e-05, 5.5111e-05, 4.1673e-05, 4.4147e-05, 4.7571e-05, + 4.6886e-05, 4.5048e-05], device='cuda:0') +2022-12-02 05:43:43,976 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.6156, 1.5972, 0.8268, 1.4471, 1.5869, 1.4822, 1.5919, 1.6091], + device='cuda:0'), covar=tensor([0.0353, 0.0392, 0.0421, 0.0323, 0.0281, 0.0429, 0.0364, 0.0402], + device='cuda:0'), in_proj_covar=tensor([0.0053, 0.0051, 0.0057, 0.0044, 0.0047, 0.0050, 0.0049, 0.0046], + device='cuda:0'), out_proj_covar=tensor([5.0315e-05, 4.8207e-05, 5.5106e-05, 4.1644e-05, 4.4182e-05, 4.7530e-05, + 4.6881e-05, 4.5109e-05], device='cuda:0') +2022-12-02 05:43:46,839 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=38421.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:43:50,320 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-02 05:44:01,729 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.32 vs. limit=2.0 +2022-12-02 05:44:02,922 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.99 vs. limit=2.0 +2022-12-02 05:44:03,317 INFO [train.py:876] Epoch 27, batch 1200, loss[loss=0.123, simple_loss=0.1911, pruned_loss=0.02749, over 4852.00 frames. ], tot_loss[loss=0.1151, simple_loss=0.1779, pruned_loss=0.02616, over 952228.04 frames. ], batch size: 35, lr: 6.56e-03, +2022-12-02 05:44:10,159 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8480, 3.4859, 3.5416, 3.7097, 3.1410, 2.6709, 4.2304, 1.8517], + device='cuda:0'), covar=tensor([0.0598, 0.0606, 0.0483, 0.0450, 0.1386, 0.2847, 0.0234, 0.3676], + device='cuda:0'), in_proj_covar=tensor([0.0121, 0.0104, 0.0097, 0.0137, 0.0146, 0.0169, 0.0087, 0.0183], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 05:44:36,124 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0474, 3.5561, 3.5323, 3.3764, 3.6508, 3.4091, 3.4144, 3.7823], + device='cuda:0'), covar=tensor([0.2076, 0.0752, 0.0860, 0.0651, 0.0666, 0.0650, 0.0773, 0.0653], + device='cuda:0'), in_proj_covar=tensor([0.0192, 0.0175, 0.0200, 0.0166, 0.0190, 0.0178, 0.0178, 0.0186], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 05:44:51,403 INFO [train.py:876] Epoch 27, batch 1250, loss[loss=0.08685, simple_loss=0.1384, pruned_loss=0.01767, over 4633.00 frames. ], tot_loss[loss=0.1152, simple_loss=0.178, pruned_loss=0.02617, over 951288.96 frames. ], batch size: 21, lr: 6.56e-03, +2022-12-02 05:45:04,146 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.841e+01 1.308e+02 1.636e+02 2.057e+02 6.339e+02, threshold=3.273e+02, percent-clipped=3.0 +2022-12-02 05:45:10,642 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.77 vs. limit=2.0 +2022-12-02 05:45:17,437 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.95 vs. limit=2.0 +2022-12-02 05:45:40,329 INFO [train.py:876] Epoch 27, batch 1300, loss[loss=0.1438, simple_loss=0.2173, pruned_loss=0.03517, over 4814.00 frames. ], tot_loss[loss=0.1149, simple_loss=0.1779, pruned_loss=0.02599, over 946947.14 frames. ], batch size: 54, lr: 6.55e-03, +2022-12-02 05:45:56,876 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0119, 3.5724, 3.5409, 3.3193, 3.6522, 3.5580, 3.5162, 3.7891], + device='cuda:0'), covar=tensor([0.2194, 0.0757, 0.1002, 0.0755, 0.0730, 0.0673, 0.0776, 0.0634], + device='cuda:0'), in_proj_covar=tensor([0.0195, 0.0177, 0.0203, 0.0168, 0.0192, 0.0179, 0.0179, 0.0188], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 05:46:29,327 INFO [train.py:876] Epoch 27, batch 1350, loss[loss=0.08785, simple_loss=0.1481, pruned_loss=0.01377, over 4740.00 frames. ], tot_loss[loss=0.1144, simple_loss=0.1775, pruned_loss=0.02565, over 947270.08 frames. ], batch size: 27, lr: 6.55e-03, +2022-12-02 05:46:42,092 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 5.796e+01 1.388e+02 1.698e+02 1.938e+02 4.101e+02, threshold=3.396e+02, percent-clipped=2.0 +2022-12-02 05:47:09,153 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.44 vs. limit=5.0 +2022-12-02 05:47:15,632 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=38635.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:47:18,285 INFO [train.py:876] Epoch 27, batch 1400, loss[loss=0.1365, simple_loss=0.2106, pruned_loss=0.03119, over 4838.00 frames. ], tot_loss[loss=0.114, simple_loss=0.1776, pruned_loss=0.02522, over 945794.30 frames. ], batch size: 49, lr: 6.54e-03, +2022-12-02 05:47:48,279 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-27.pt +2022-12-02 05:47:57,809 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 05:47:58,374 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 05:47:58,669 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 05:47:58,700 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 05:47:59,796 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 05:48:00,113 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 05:48:02,080 INFO [train.py:876] Epoch 28, batch 0, loss[loss=0.1116, simple_loss=0.1712, pruned_loss=0.026, over 4928.00 frames. ], tot_loss[loss=0.1116, simple_loss=0.1712, pruned_loss=0.026, over 4928.00 frames. ], batch size: 32, lr: 6.42e-03, +2022-12-02 05:48:02,081 INFO [train.py:901] Computing validation loss +2022-12-02 05:48:17,714 INFO [train.py:910] Epoch 28, validation: loss=0.2374, simple_loss=0.2789, pruned_loss=0.09791, over 253132.00 frames. +2022-12-02 05:48:17,714 INFO [train.py:911] Maximum memory allocated so far is 7567MB +2022-12-02 05:48:30,416 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=38683.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:48:33,104 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7851, 4.5209, 4.1714, 4.6759, 3.9759, 3.8365, 4.5054, 4.3081], + device='cuda:0'), covar=tensor([0.0250, 0.0086, 0.0168, 0.0139, 0.0173, 0.0273, 0.0101, 0.0145], + device='cuda:0'), in_proj_covar=tensor([0.0060, 0.0056, 0.0060, 0.0047, 0.0057, 0.0059, 0.0055, 0.0055], + device='cuda:0'), out_proj_covar=tensor([5.2879e-05, 4.4251e-05, 5.1947e-05, 3.8750e-05, 4.8418e-05, 5.2079e-05, + 4.2563e-05, 4.4066e-05], device='cuda:0') +2022-12-02 05:48:47,484 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.074e+02 1.534e+02 1.869e+02 2.385e+02 9.953e+02, threshold=3.738e+02, percent-clipped=7.0 +2022-12-02 05:49:02,113 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=38716.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:49:05,999 INFO [train.py:876] Epoch 28, batch 50, loss[loss=0.09919, simple_loss=0.162, pruned_loss=0.01819, over 4923.00 frames. ], tot_loss[loss=0.1118, simple_loss=0.1739, pruned_loss=0.0249, over 216518.19 frames. ], batch size: 32, lr: 6.42e-03, +2022-12-02 05:49:13,888 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4208, 3.0572, 3.4152, 3.1034, 2.8301, 2.2713, 3.6252, 1.8590], + device='cuda:0'), covar=tensor([0.0496, 0.0450, 0.0548, 0.0743, 0.1328, 0.3046, 0.0292, 0.3035], + device='cuda:0'), in_proj_covar=tensor([0.0122, 0.0105, 0.0099, 0.0136, 0.0145, 0.0167, 0.0087, 0.0184], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 05:49:28,396 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 05:49:54,714 INFO [train.py:876] Epoch 28, batch 100, loss[loss=0.124, simple_loss=0.181, pruned_loss=0.03352, over 4861.00 frames. ], tot_loss[loss=0.1123, simple_loss=0.1745, pruned_loss=0.02501, over 380057.81 frames. ], batch size: 36, lr: 6.41e-03, +2022-12-02 05:50:08,874 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 05:50:24,490 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 6.097e+01 1.391e+02 1.661e+02 2.001e+02 3.793e+02, threshold=3.323e+02, percent-clipped=1.0 +2022-12-02 05:50:33,407 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 05:50:43,285 INFO [train.py:876] Epoch 28, batch 150, loss[loss=0.1245, simple_loss=0.2022, pruned_loss=0.02344, over 4670.00 frames. ], tot_loss[loss=0.1139, simple_loss=0.1766, pruned_loss=0.02559, over 506590.01 frames. ], batch size: 63, lr: 6.41e-03, +2022-12-02 05:50:46,297 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=38823.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:51:26,846 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.38 vs. limit=5.0 +2022-12-02 05:51:32,639 INFO [train.py:876] Epoch 28, batch 200, loss[loss=0.1203, simple_loss=0.1791, pruned_loss=0.03078, over 4819.00 frames. ], tot_loss[loss=0.1132, simple_loss=0.176, pruned_loss=0.02518, over 606465.99 frames. ], batch size: 45, lr: 6.40e-03, +2022-12-02 05:51:46,121 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=38884.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:52:02,322 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.275e+01 1.405e+02 1.752e+02 2.259e+02 3.954e+02, threshold=3.504e+02, percent-clipped=4.0 +2022-12-02 05:52:18,911 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 05:52:20,991 INFO [train.py:876] Epoch 28, batch 250, loss[loss=0.1191, simple_loss=0.1876, pruned_loss=0.02524, over 4844.00 frames. ], tot_loss[loss=0.1133, simple_loss=0.1763, pruned_loss=0.02515, over 684559.69 frames. ], batch size: 47, lr: 6.40e-03, +2022-12-02 05:52:53,376 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.84 vs. limit=5.0 +2022-12-02 05:53:09,652 INFO [train.py:876] Epoch 28, batch 300, loss[loss=0.1386, simple_loss=0.2082, pruned_loss=0.03454, over 4793.00 frames. ], tot_loss[loss=0.1137, simple_loss=0.1766, pruned_loss=0.02542, over 743492.87 frames. ], batch size: 58, lr: 6.40e-03, +2022-12-02 05:53:17,742 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 05:53:25,208 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.96 vs. limit=2.0 +2022-12-02 05:53:40,315 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.201e+01 1.384e+02 1.630e+02 1.923e+02 4.819e+02, threshold=3.260e+02, percent-clipped=3.0 +2022-12-02 05:53:52,393 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6105, 3.4021, 3.4621, 3.8057, 2.9399, 2.6039, 3.9565, 1.6923], + device='cuda:0'), covar=tensor([0.0648, 0.0611, 0.0561, 0.0391, 0.1574, 0.2840, 0.0259, 0.4006], + device='cuda:0'), in_proj_covar=tensor([0.0123, 0.0107, 0.0101, 0.0140, 0.0148, 0.0171, 0.0089, 0.0189], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 05:53:55,386 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=39016.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:53:58,997 INFO [train.py:876] Epoch 28, batch 350, loss[loss=0.1092, simple_loss=0.1705, pruned_loss=0.02395, over 4797.00 frames. ], tot_loss[loss=0.1132, simple_loss=0.1758, pruned_loss=0.02531, over 791596.18 frames. ], batch size: 32, lr: 6.39e-03, +2022-12-02 05:54:41,539 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=39064.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:54:47,449 INFO [train.py:876] Epoch 28, batch 400, loss[loss=0.07937, simple_loss=0.1366, pruned_loss=0.01108, over 4815.00 frames. ], tot_loss[loss=0.1133, simple_loss=0.1762, pruned_loss=0.02524, over 828494.24 frames. ], batch size: 25, lr: 6.39e-03, +2022-12-02 05:54:55,247 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 05:55:11,356 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.03 vs. limit=2.0 +2022-12-02 05:55:17,653 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.042e+01 1.374e+02 1.689e+02 1.934e+02 6.391e+02, threshold=3.378e+02, percent-clipped=7.0 +2022-12-02 05:55:18,001 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9061, 3.1762, 2.9442, 3.0377, 2.7971, 3.1535, 3.0439, 3.4998], + device='cuda:0'), covar=tensor([0.1692, 0.0604, 0.0881, 0.0673, 0.1878, 0.0653, 0.0703, 0.0405], + device='cuda:0'), in_proj_covar=tensor([0.0193, 0.0176, 0.0200, 0.0167, 0.0192, 0.0178, 0.0178, 0.0185], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 05:55:20,471 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 05:55:31,561 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5087, 3.0749, 3.1870, 3.2774, 2.4954, 2.2779, 3.5384, 1.6564], + device='cuda:0'), covar=tensor([0.0576, 0.0509, 0.0546, 0.0678, 0.1926, 0.3335, 0.0312, 0.3610], + device='cuda:0'), in_proj_covar=tensor([0.0123, 0.0106, 0.0101, 0.0140, 0.0148, 0.0171, 0.0089, 0.0188], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 05:55:36,040 INFO [train.py:876] Epoch 28, batch 450, loss[loss=0.1272, simple_loss=0.1903, pruned_loss=0.03203, over 4849.00 frames. ], tot_loss[loss=0.1132, simple_loss=0.1766, pruned_loss=0.02487, over 855446.42 frames. ], batch size: 49, lr: 6.38e-03, +2022-12-02 05:56:25,036 INFO [train.py:876] Epoch 28, batch 500, loss[loss=0.1229, simple_loss=0.1825, pruned_loss=0.03166, over 4884.00 frames. ], tot_loss[loss=0.1135, simple_loss=0.1767, pruned_loss=0.02518, over 879297.01 frames. ], batch size: 37, lr: 6.38e-03, +2022-12-02 05:56:33,107 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4786, 2.0968, 2.3849, 2.4101, 1.9587, 2.4778, 2.1981, 2.5292], + device='cuda:0'), covar=tensor([0.0477, 0.0636, 0.0464, 0.0405, 0.0864, 0.0640, 0.0690, 0.0367], + device='cuda:0'), in_proj_covar=tensor([0.0070, 0.0074, 0.0064, 0.0063, 0.0081, 0.0073, 0.0072, 0.0062], + device='cuda:0'), out_proj_covar=tensor([5.1440e-05, 5.4178e-05, 4.6507e-05, 4.5849e-05, 5.9224e-05, 5.3037e-05, + 5.3076e-05, 4.5129e-05], device='cuda:0') +2022-12-02 05:56:33,907 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=39179.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:56:39,780 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=39185.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:56:54,825 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.022e+02 1.494e+02 1.767e+02 2.153e+02 3.814e+02, threshold=3.534e+02, percent-clipped=3.0 +2022-12-02 05:57:06,589 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.3512, 4.3899, 4.8816, 4.3663, 4.6155, 4.5731, 4.2226, 4.1347], + device='cuda:0'), covar=tensor([0.1071, 0.0601, 0.0626, 0.0486, 0.0835, 0.0591, 0.1436, 0.0612], + device='cuda:0'), in_proj_covar=tensor([0.0190, 0.0138, 0.0198, 0.0162, 0.0143, 0.0178, 0.0206, 0.0144], + device='cuda:0'), out_proj_covar=tensor([1.3510e-04, 8.8205e-05, 1.5035e-04, 1.1060e-04, 1.0463e-04, 1.2707e-04, + 1.5447e-04, 9.7082e-05], device='cuda:0') +2022-12-02 05:57:13,434 INFO [train.py:876] Epoch 28, batch 550, loss[loss=0.1161, simple_loss=0.1756, pruned_loss=0.02826, over 4905.00 frames. ], tot_loss[loss=0.1143, simple_loss=0.177, pruned_loss=0.02575, over 891645.98 frames. ], batch size: 30, lr: 6.38e-03, +2022-12-02 05:57:39,182 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=39246.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:57:56,722 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.81 vs. limit=2.0 +2022-12-02 05:58:02,014 INFO [train.py:876] Epoch 28, batch 600, loss[loss=0.1425, simple_loss=0.2041, pruned_loss=0.04046, over 4834.00 frames. ], tot_loss[loss=0.1133, simple_loss=0.1757, pruned_loss=0.02545, over 900341.62 frames. ], batch size: 47, lr: 6.37e-03, +2022-12-02 05:58:07,127 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2632, 2.6409, 3.0093, 2.3352, 2.9261, 2.6087, 2.7329, 3.2215], + device='cuda:0'), covar=tensor([0.0209, 0.1091, 0.0573, 0.1660, 0.0331, 0.0487, 0.1668, 0.0531], + device='cuda:0'), in_proj_covar=tensor([0.0077, 0.0111, 0.0092, 0.0126, 0.0086, 0.0092, 0.0131, 0.0109], + device='cuda:0'), out_proj_covar=tensor([9.4976e-05, 1.3155e-04, 1.1295e-04, 1.4678e-04, 9.9740e-05, 1.1588e-04, + 1.5148e-04, 1.2358e-04], device='cuda:0') +2022-12-02 05:58:16,140 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=39284.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:58:32,288 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.618e+01 1.416e+02 1.696e+02 2.102e+02 5.797e+02, threshold=3.393e+02, percent-clipped=6.0 +2022-12-02 05:58:50,858 INFO [train.py:876] Epoch 28, batch 650, loss[loss=0.1029, simple_loss=0.1603, pruned_loss=0.02268, over 4739.00 frames. ], tot_loss[loss=0.1131, simple_loss=0.1755, pruned_loss=0.02531, over 913398.55 frames. ], batch size: 27, lr: 6.37e-03, +2022-12-02 05:59:06,818 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.44 vs. limit=2.0 +2022-12-02 05:59:15,224 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=39345.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 05:59:29,897 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2647, 1.9217, 2.1639, 2.0894, 2.0205, 2.3036, 2.2684, 2.2091], + device='cuda:0'), covar=tensor([0.0433, 0.0494, 0.0490, 0.0505, 0.0626, 0.0448, 0.0669, 0.0446], + device='cuda:0'), in_proj_covar=tensor([0.0070, 0.0074, 0.0064, 0.0063, 0.0081, 0.0072, 0.0073, 0.0062], + device='cuda:0'), out_proj_covar=tensor([5.0814e-05, 5.4170e-05, 4.6280e-05, 4.5653e-05, 5.9277e-05, 5.2501e-05, + 5.3132e-05, 4.5065e-05], device='cuda:0') +2022-12-02 05:59:39,322 INFO [train.py:876] Epoch 28, batch 700, loss[loss=0.117, simple_loss=0.1805, pruned_loss=0.02681, over 4798.00 frames. ], tot_loss[loss=0.1126, simple_loss=0.175, pruned_loss=0.02507, over 921503.36 frames. ], batch size: 32, lr: 6.36e-03, +2022-12-02 06:00:10,120 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.432e+01 1.430e+02 1.704e+02 2.074e+02 4.365e+02, threshold=3.408e+02, percent-clipped=4.0 +2022-12-02 06:00:28,665 INFO [train.py:876] Epoch 28, batch 750, loss[loss=0.1153, simple_loss=0.1856, pruned_loss=0.02246, over 4863.00 frames. ], tot_loss[loss=0.1118, simple_loss=0.1735, pruned_loss=0.02503, over 929151.65 frames. ], batch size: 36, lr: 6.36e-03, +2022-12-02 06:00:31,334 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.11 vs. limit=2.0 +2022-12-02 06:01:10,651 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=39463.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:01:17,228 INFO [train.py:876] Epoch 28, batch 800, loss[loss=0.1304, simple_loss=0.2098, pruned_loss=0.02553, over 4818.00 frames. ], tot_loss[loss=0.1128, simple_loss=0.1748, pruned_loss=0.02546, over 934315.22 frames. ], batch size: 54, lr: 6.36e-03, +2022-12-02 06:01:26,594 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=39479.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:01:47,395 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.742e+01 1.520e+02 1.833e+02 2.439e+02 5.902e+02, threshold=3.666e+02, percent-clipped=7.0 +2022-12-02 06:02:02,368 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8270, 4.2203, 4.3729, 3.9369, 3.6916, 3.9659, 4.1630, 4.2332], + device='cuda:0'), covar=tensor([0.1234, 0.0320, 0.0442, 0.0399, 0.0438, 0.0414, 0.0365, 0.0500], + device='cuda:0'), in_proj_covar=tensor([0.0253, 0.0179, 0.0198, 0.0192, 0.0192, 0.0199, 0.0175, 0.0211], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:02:06,062 INFO [train.py:876] Epoch 28, batch 850, loss[loss=0.1022, simple_loss=0.1718, pruned_loss=0.01628, over 4828.00 frames. ], tot_loss[loss=0.1123, simple_loss=0.1746, pruned_loss=0.02493, over 938771.32 frames. ], batch size: 34, lr: 6.35e-03, +2022-12-02 06:02:10,357 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=39524.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:02:13,204 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=39527.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:02:15,344 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0175, 2.3781, 2.8407, 2.0297, 2.6599, 2.3409, 2.3884, 2.7775], + device='cuda:0'), covar=tensor([0.0283, 0.1149, 0.0552, 0.1810, 0.0325, 0.0556, 0.1966, 0.0625], + device='cuda:0'), in_proj_covar=tensor([0.0079, 0.0114, 0.0093, 0.0128, 0.0087, 0.0094, 0.0134, 0.0111], + device='cuda:0'), out_proj_covar=tensor([9.7727e-05, 1.3422e-04, 1.1409e-04, 1.4978e-04, 1.0175e-04, 1.1858e-04, + 1.5481e-04, 1.2595e-04], device='cuda:0') +2022-12-02 06:02:16,393 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3184, 2.7754, 3.2975, 2.7811, 3.3721, 2.3884, 3.1857, 3.4637], + device='cuda:0'), covar=tensor([0.0533, 0.0710, 0.0464, 0.0628, 0.0421, 0.0968, 0.0376, 0.0588], + device='cuda:0'), in_proj_covar=tensor([0.0124, 0.0133, 0.0120, 0.0123, 0.0115, 0.0153, 0.0101, 0.0120], + device='cuda:0'), out_proj_covar=tensor([1.0592e-04, 1.1372e-04, 1.0233e-04, 1.0399e-04, 9.7872e-05, 1.3064e-04, + 8.8946e-05, 1.0420e-04], device='cuda:0') +2022-12-02 06:02:26,876 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=39541.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:02:54,903 INFO [train.py:876] Epoch 28, batch 900, loss[loss=0.12, simple_loss=0.1792, pruned_loss=0.03042, over 4853.00 frames. ], tot_loss[loss=0.1134, simple_loss=0.1763, pruned_loss=0.02523, over 941411.37 frames. ], batch size: 35, lr: 6.35e-03, +2022-12-02 06:03:24,782 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.181e+01 1.441e+02 1.760e+02 2.183e+02 5.419e+02, threshold=3.519e+02, percent-clipped=2.0 +2022-12-02 06:03:43,394 INFO [train.py:876] Epoch 28, batch 950, loss[loss=0.1198, simple_loss=0.1856, pruned_loss=0.02701, over 4832.00 frames. ], tot_loss[loss=0.1129, simple_loss=0.1763, pruned_loss=0.02477, over 944134.37 frames. ], batch size: 34, lr: 6.34e-03, +2022-12-02 06:04:01,811 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=39639.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 06:04:02,724 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=39640.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:04:31,271 INFO [train.py:876] Epoch 28, batch 1000, loss[loss=0.1226, simple_loss=0.1868, pruned_loss=0.02918, over 4896.00 frames. ], tot_loss[loss=0.1135, simple_loss=0.1766, pruned_loss=0.02524, over 944323.42 frames. ], batch size: 38, lr: 6.34e-03, +2022-12-02 06:04:33,514 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5580, 3.4218, 3.6750, 3.3950, 3.1072, 3.4345, 3.5295, 3.5261], + device='cuda:0'), covar=tensor([0.1229, 0.0613, 0.0581, 0.0656, 0.0745, 0.0811, 0.0516, 0.0780], + device='cuda:0'), in_proj_covar=tensor([0.0250, 0.0178, 0.0198, 0.0191, 0.0192, 0.0197, 0.0174, 0.0210], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:04:45,069 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7837, 2.6768, 2.5556, 2.8835, 2.4612, 2.5855, 1.8566, 3.0454], + device='cuda:0'), covar=tensor([0.0856, 0.1159, 0.1142, 0.0727, 0.1004, 0.1459, 0.1340, 0.0540], + device='cuda:0'), in_proj_covar=tensor([0.0090, 0.0095, 0.0113, 0.0094, 0.0118, 0.0102, 0.0100, 0.0099], + device='cuda:0'), out_proj_covar=tensor([9.5343e-05, 9.9546e-05, 1.1489e-04, 9.7457e-05, 1.1988e-04, 1.0695e-04, + 1.0387e-04, 1.0268e-04], device='cuda:0') +2022-12-02 06:05:00,784 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=39700.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 06:05:01,519 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.042e+02 1.462e+02 1.797e+02 2.175e+02 5.147e+02, threshold=3.593e+02, percent-clipped=2.0 +2022-12-02 06:05:15,550 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5058, 3.1882, 3.2650, 3.4133, 2.5052, 2.3146, 3.6447, 1.7865], + device='cuda:0'), covar=tensor([0.0637, 0.0440, 0.0665, 0.0629, 0.1805, 0.3137, 0.0249, 0.3414], + device='cuda:0'), in_proj_covar=tensor([0.0123, 0.0104, 0.0101, 0.0140, 0.0148, 0.0172, 0.0090, 0.0186], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 06:05:20,132 INFO [train.py:876] Epoch 28, batch 1050, loss[loss=0.1092, simple_loss=0.1685, pruned_loss=0.0249, over 4815.00 frames. ], tot_loss[loss=0.1133, simple_loss=0.1764, pruned_loss=0.02508, over 944334.92 frames. ], batch size: 32, lr: 6.34e-03, +2022-12-02 06:05:20,500 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8916, 3.1359, 3.4138, 3.8213, 3.4722, 3.6997, 3.4943, 3.5485], + device='cuda:0'), covar=tensor([0.0223, 0.0355, 0.0372, 0.0254, 0.0199, 0.0256, 0.0286, 0.0314], + device='cuda:0'), in_proj_covar=tensor([0.0137, 0.0128, 0.0145, 0.0133, 0.0108, 0.0138, 0.0138, 0.0152], + device='cuda:0'), out_proj_covar=tensor([9.0384e-05, 8.6636e-05, 9.6449e-05, 8.8272e-05, 7.0205e-05, 9.1494e-05, + 9.1491e-05, 1.0320e-04], device='cuda:0') +2022-12-02 06:06:05,813 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=39767.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:06:08,551 INFO [train.py:876] Epoch 28, batch 1100, loss[loss=0.1418, simple_loss=0.2004, pruned_loss=0.04164, over 4894.00 frames. ], tot_loss[loss=0.1133, simple_loss=0.1768, pruned_loss=0.02493, over 948112.04 frames. ], batch size: 31, lr: 6.33e-03, +2022-12-02 06:06:36,713 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9652, 3.6170, 4.0029, 3.6895, 4.0640, 3.1673, 3.7979, 3.9817], + device='cuda:0'), covar=tensor([0.0481, 0.0453, 0.0313, 0.0355, 0.0353, 0.0728, 0.0329, 0.0509], + device='cuda:0'), in_proj_covar=tensor([0.0127, 0.0135, 0.0121, 0.0124, 0.0117, 0.0155, 0.0101, 0.0121], + device='cuda:0'), out_proj_covar=tensor([1.0793e-04, 1.1551e-04, 1.0346e-04, 1.0540e-04, 9.9341e-05, 1.3231e-04, + 8.9371e-05, 1.0545e-04], device='cuda:0') +2022-12-02 06:06:38,527 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.935e+01 1.432e+02 1.737e+02 2.186e+02 4.004e+02, threshold=3.473e+02, percent-clipped=1.0 +2022-12-02 06:06:52,264 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=39815.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:06:56,100 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=39819.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:06:56,878 INFO [train.py:876] Epoch 28, batch 1150, loss[loss=0.1458, simple_loss=0.2031, pruned_loss=0.04426, over 4828.00 frames. ], tot_loss[loss=0.1135, simple_loss=0.1768, pruned_loss=0.02505, over 950598.14 frames. ], batch size: 45, lr: 6.33e-03, +2022-12-02 06:07:01,387 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.94 vs. limit=5.0 +2022-12-02 06:07:04,947 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=39828.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:07:06,905 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5036, 3.4458, 3.7465, 3.3914, 3.0641, 3.3418, 3.6563, 3.6168], + device='cuda:0'), covar=tensor([0.0812, 0.0431, 0.0472, 0.0484, 0.0571, 0.0507, 0.0334, 0.0643], + device='cuda:0'), in_proj_covar=tensor([0.0249, 0.0177, 0.0197, 0.0189, 0.0191, 0.0196, 0.0173, 0.0207], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:07:17,267 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=39841.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:07:29,523 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-02 06:07:39,821 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.4692, 3.9943, 3.6389, 3.7376, 3.9104, 3.9526, 3.8178, 4.0611], + device='cuda:0'), covar=tensor([0.2007, 0.0594, 0.1036, 0.0587, 0.0618, 0.0566, 0.0583, 0.0625], + device='cuda:0'), in_proj_covar=tensor([0.0194, 0.0178, 0.0201, 0.0169, 0.0194, 0.0179, 0.0180, 0.0187], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:07:39,940 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.89 vs. limit=2.0 +2022-12-02 06:07:45,501 INFO [train.py:876] Epoch 28, batch 1200, loss[loss=0.1335, simple_loss=0.1994, pruned_loss=0.03376, over 4687.00 frames. ], tot_loss[loss=0.1144, simple_loss=0.1778, pruned_loss=0.02545, over 951913.05 frames. ], batch size: 63, lr: 6.32e-03, +2022-12-02 06:07:51,543 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=39876.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:08:04,017 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=39889.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:08:15,340 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.495e+01 1.515e+02 1.868e+02 2.212e+02 6.326e+02, threshold=3.736e+02, percent-clipped=7.0 +2022-12-02 06:08:17,384 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3620, 3.1666, 3.2383, 3.2536, 2.5200, 2.0109, 3.4753, 1.7675], + device='cuda:0'), covar=tensor([0.0668, 0.0483, 0.0573, 0.0751, 0.1788, 0.3737, 0.0330, 0.3548], + device='cuda:0'), in_proj_covar=tensor([0.0124, 0.0105, 0.0101, 0.0141, 0.0148, 0.0172, 0.0090, 0.0187], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 06:08:33,894 INFO [train.py:876] Epoch 28, batch 1250, loss[loss=0.1125, simple_loss=0.1676, pruned_loss=0.02872, over 4920.00 frames. ], tot_loss[loss=0.1146, simple_loss=0.1777, pruned_loss=0.02571, over 949916.87 frames. ], batch size: 32, lr: 6.32e-03, +2022-12-02 06:08:53,018 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=39940.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:09:21,305 INFO [train.py:876] Epoch 28, batch 1300, loss[loss=0.1084, simple_loss=0.1638, pruned_loss=0.02656, over 4803.00 frames. ], tot_loss[loss=0.117, simple_loss=0.1804, pruned_loss=0.02677, over 949983.20 frames. ], batch size: 33, lr: 6.32e-03, +2022-12-02 06:09:39,021 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=39988.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:09:45,918 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=39995.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 06:09:46,015 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6056, 1.6878, 2.0099, 3.3744, 2.3532, 3.0014, 3.2875, 3.7581], + device='cuda:0'), covar=tensor([0.0344, 0.1993, 0.2386, 0.0395, 0.0686, 0.0475, 0.0494, 0.0250], + device='cuda:0'), in_proj_covar=tensor([0.0071, 0.0104, 0.0124, 0.0072, 0.0081, 0.0071, 0.0084, 0.0085], + device='cuda:0'), out_proj_covar=tensor([7.6238e-05, 1.1470e-04, 1.3255e-04, 8.3606e-05, 8.3909e-05, 8.0695e-05, + 9.3728e-05, 8.4669e-05], device='cuda:0') +2022-12-02 06:09:50,906 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-40000.pt +2022-12-02 06:09:53,962 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.148e+01 1.399e+02 1.726e+02 2.129e+02 4.316e+02, threshold=3.453e+02, percent-clipped=4.0 +2022-12-02 06:10:12,354 INFO [train.py:876] Epoch 28, batch 1350, loss[loss=0.1295, simple_loss=0.1878, pruned_loss=0.03564, over 4913.00 frames. ], tot_loss[loss=0.1156, simple_loss=0.1787, pruned_loss=0.02625, over 950694.06 frames. ], batch size: 32, lr: 6.31e-03, +2022-12-02 06:10:16,348 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.6273, 1.3218, 1.7307, 1.1114, 1.0982, 1.3202, 1.6729, 1.4543], + device='cuda:0'), covar=tensor([0.0214, 0.0245, 0.0219, 0.0249, 0.0362, 0.0176, 0.0159, 0.0149], + device='cuda:0'), in_proj_covar=tensor([0.0036, 0.0034, 0.0035, 0.0037, 0.0039, 0.0037, 0.0038, 0.0035], + device='cuda:0'), out_proj_covar=tensor([2.8717e-05, 2.6305e-05, 2.7820e-05, 2.8819e-05, 3.1589e-05, 2.8415e-05, + 3.0561e-05, 2.6651e-05], device='cuda:0') +2022-12-02 06:10:32,677 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.0879, 1.7204, 2.3555, 3.7900, 3.1384, 3.3697, 3.4737, 4.0229], + device='cuda:0'), covar=tensor([0.0259, 0.1999, 0.2177, 0.0469, 0.0446, 0.0593, 0.0667, 0.0312], + device='cuda:0'), in_proj_covar=tensor([0.0071, 0.0106, 0.0125, 0.0072, 0.0081, 0.0071, 0.0085, 0.0085], + device='cuda:0'), out_proj_covar=tensor([7.6437e-05, 1.1605e-04, 1.3332e-04, 8.4446e-05, 8.4689e-05, 8.1125e-05, + 9.4332e-05, 8.5373e-05], device='cuda:0') +2022-12-02 06:11:00,417 INFO [train.py:876] Epoch 28, batch 1400, loss[loss=0.122, simple_loss=0.183, pruned_loss=0.03053, over 4889.00 frames. ], tot_loss[loss=0.1142, simple_loss=0.1768, pruned_loss=0.0258, over 947689.17 frames. ], batch size: 44, lr: 6.31e-03, +2022-12-02 06:11:12,315 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.4334, 4.5540, 4.9791, 4.4421, 4.8587, 4.6844, 4.3485, 4.4773], + device='cuda:0'), covar=tensor([0.0989, 0.0667, 0.0738, 0.0568, 0.0668, 0.0699, 0.1591, 0.0570], + device='cuda:0'), in_proj_covar=tensor([0.0190, 0.0136, 0.0198, 0.0163, 0.0143, 0.0179, 0.0203, 0.0144], + device='cuda:0'), out_proj_covar=tensor([1.3474e-04, 8.7180e-05, 1.5007e-04, 1.1119e-04, 1.0485e-04, 1.2763e-04, + 1.5203e-04, 9.6786e-05], device='cuda:0') +2022-12-02 06:11:30,173 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.700e+01 1.370e+02 1.684e+02 2.116e+02 4.122e+02, threshold=3.369e+02, percent-clipped=5.0 +2022-12-02 06:11:30,305 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-28.pt +2022-12-02 06:13:52,304 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 06:13:52,875 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 06:13:53,533 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 06:13:53,564 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 06:13:54,365 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 06:13:55,080 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 06:13:56,655 INFO [train.py:876] Epoch 29, batch 0, loss[loss=0.1358, simple_loss=0.1993, pruned_loss=0.03617, over 4801.00 frames. ], tot_loss[loss=0.1358, simple_loss=0.1993, pruned_loss=0.03617, over 4801.00 frames. ], batch size: 54, lr: 6.19e-03, +2022-12-02 06:13:56,657 INFO [train.py:901] Computing validation loss +2022-12-02 06:14:11,868 INFO [train.py:910] Epoch 29, validation: loss=0.2373, simple_loss=0.2789, pruned_loss=0.09787, over 253132.00 frames. +2022-12-02 06:14:11,868 INFO [train.py:911] Maximum memory allocated so far is 7567MB +2022-12-02 06:14:27,960 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=40119.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:14:31,617 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=40123.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:14:59,716 INFO [train.py:876] Epoch 29, batch 50, loss[loss=0.109, simple_loss=0.1799, pruned_loss=0.01906, over 4852.00 frames. ], tot_loss[loss=0.1111, simple_loss=0.174, pruned_loss=0.02411, over 216059.84 frames. ], batch size: 47, lr: 6.19e-03, +2022-12-02 06:15:14,530 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=40167.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:15:18,576 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=40171.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:15:20,370 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 06:15:41,652 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=40195.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:15:44,593 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7933, 3.9662, 4.0798, 3.8632, 3.5732, 3.9335, 3.9835, 4.0870], + device='cuda:0'), covar=tensor([0.1040, 0.0419, 0.0483, 0.0450, 0.0566, 0.0631, 0.0374, 0.0506], + device='cuda:0'), in_proj_covar=tensor([0.0252, 0.0178, 0.0199, 0.0190, 0.0191, 0.0198, 0.0174, 0.0209], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:15:47,605 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.877e+01 1.395e+02 1.681e+02 2.074e+02 4.100e+02, threshold=3.361e+02, percent-clipped=3.0 +2022-12-02 06:15:48,516 INFO [train.py:876] Epoch 29, batch 100, loss[loss=0.1137, simple_loss=0.1792, pruned_loss=0.02411, over 4874.00 frames. ], tot_loss[loss=0.1103, simple_loss=0.1732, pruned_loss=0.02367, over 379562.92 frames. ], batch size: 38, lr: 6.19e-03, +2022-12-02 06:16:04,636 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=40218.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:16:05,391 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 06:16:11,870 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.80 vs. limit=5.0 +2022-12-02 06:16:28,883 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 06:16:37,407 INFO [train.py:876] Epoch 29, batch 150, loss[loss=0.1099, simple_loss=0.1702, pruned_loss=0.02477, over 4857.00 frames. ], tot_loss[loss=0.1109, simple_loss=0.1734, pruned_loss=0.02418, over 507237.91 frames. ], batch size: 35, lr: 6.18e-03, +2022-12-02 06:16:41,723 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=40256.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:16:57,354 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=40272.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:17:04,110 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=40279.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:17:19,594 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=40295.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 06:17:25,392 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.974e+01 1.491e+02 1.756e+02 2.196e+02 4.879e+02, threshold=3.512e+02, percent-clipped=4.0 +2022-12-02 06:17:26,466 INFO [train.py:876] Epoch 29, batch 200, loss[loss=0.09874, simple_loss=0.1564, pruned_loss=0.02054, over 4903.00 frames. ], tot_loss[loss=0.1111, simple_loss=0.1728, pruned_loss=0.02467, over 607725.25 frames. ], batch size: 30, lr: 6.18e-03, +2022-12-02 06:17:53,741 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=40330.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:17:56,523 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=40333.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:18:06,052 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=40343.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 06:18:14,599 INFO [train.py:876] Epoch 29, batch 250, loss[loss=0.1029, simple_loss=0.1771, pruned_loss=0.01433, over 4005.00 frames. ], tot_loss[loss=0.1105, simple_loss=0.1732, pruned_loss=0.02392, over 683332.35 frames. ], batch size: 72, lr: 6.18e-03, +2022-12-02 06:18:17,586 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 06:18:19,797 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1477, 1.9850, 1.8808, 2.2236, 1.7697, 2.2121, 2.0985, 2.2283], + device='cuda:0'), covar=tensor([0.0431, 0.0622, 0.0545, 0.0412, 0.0771, 0.0496, 0.0538, 0.0377], + device='cuda:0'), in_proj_covar=tensor([0.0070, 0.0073, 0.0065, 0.0063, 0.0081, 0.0073, 0.0072, 0.0062], + device='cuda:0'), out_proj_covar=tensor([5.1187e-05, 5.3875e-05, 4.6826e-05, 4.5901e-05, 5.9341e-05, 5.2779e-05, + 5.2539e-05, 4.5080e-05], device='cuda:0') +2022-12-02 06:18:20,268 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.83 vs. limit=2.0 +2022-12-02 06:18:52,814 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=40391.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:19:02,341 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.694e+01 1.396e+02 1.665e+02 2.008e+02 6.543e+02, threshold=3.331e+02, percent-clipped=2.0 +2022-12-02 06:19:03,352 INFO [train.py:876] Epoch 29, batch 300, loss[loss=0.08793, simple_loss=0.1513, pruned_loss=0.01228, over 4901.00 frames. ], tot_loss[loss=0.1105, simple_loss=0.1737, pruned_loss=0.02365, over 744535.68 frames. ], batch size: 29, lr: 6.17e-03, +2022-12-02 06:19:16,071 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 06:19:24,078 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=40423.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:19:36,085 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9696, 3.5429, 3.9265, 3.7360, 3.9298, 3.2851, 3.7773, 3.8788], + device='cuda:0'), covar=tensor([0.0452, 0.0582, 0.0438, 0.0417, 0.0380, 0.0715, 0.0378, 0.0767], + device='cuda:0'), in_proj_covar=tensor([0.0130, 0.0138, 0.0125, 0.0127, 0.0118, 0.0157, 0.0103, 0.0123], + device='cuda:0'), out_proj_covar=tensor([1.1096e-04, 1.1823e-04, 1.0604e-04, 1.0797e-04, 1.0038e-04, 1.3411e-04, + 9.0961e-05, 1.0720e-04], device='cuda:0') +2022-12-02 06:19:46,027 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.86 vs. limit=2.0 +2022-12-02 06:19:50,422 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.5946, 3.0073, 3.2257, 2.9585, 1.8190, 3.0057, 3.0004, 2.5309], + device='cuda:0'), covar=tensor([0.3717, 0.0814, 0.0489, 0.0696, 0.1337, 0.1214, 0.0815, 0.1157], + device='cuda:0'), in_proj_covar=tensor([0.0178, 0.0121, 0.0152, 0.0133, 0.0137, 0.0122, 0.0129, 0.0127], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 06:19:52,216 INFO [train.py:876] Epoch 29, batch 350, loss[loss=0.1039, simple_loss=0.1613, pruned_loss=0.02318, over 4907.00 frames. ], tot_loss[loss=0.1113, simple_loss=0.1747, pruned_loss=0.02401, over 791062.24 frames. ], batch size: 30, lr: 6.17e-03, +2022-12-02 06:20:10,210 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=40471.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:20:10,327 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=40471.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:20:30,782 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=40492.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:20:39,105 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.777e+01 1.415e+02 1.730e+02 2.088e+02 5.317e+02, threshold=3.460e+02, percent-clipped=1.0 +2022-12-02 06:20:40,171 INFO [train.py:876] Epoch 29, batch 400, loss[loss=0.09528, simple_loss=0.1451, pruned_loss=0.02272, over 4802.00 frames. ], tot_loss[loss=0.1117, simple_loss=0.1746, pruned_loss=0.02436, over 828938.16 frames. ], batch size: 25, lr: 6.16e-03, +2022-12-02 06:20:40,255 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.5449, 4.4867, 5.0819, 4.5098, 4.8738, 4.7367, 4.5146, 4.5812], + device='cuda:0'), covar=tensor([0.0945, 0.0752, 0.0660, 0.0674, 0.0837, 0.0650, 0.1214, 0.0482], + device='cuda:0'), in_proj_covar=tensor([0.0194, 0.0139, 0.0199, 0.0165, 0.0146, 0.0179, 0.0203, 0.0145], + device='cuda:0'), out_proj_covar=tensor([1.3769e-04, 8.8531e-05, 1.5060e-04, 1.1225e-04, 1.0635e-04, 1.2746e-04, + 1.5215e-04, 9.8128e-05], device='cuda:0') +2022-12-02 06:20:53,135 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 06:20:57,070 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=40519.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:21:16,419 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.4043, 4.2799, 4.9065, 4.3840, 4.7332, 4.6065, 4.3781, 4.3934], + device='cuda:0'), covar=tensor([0.1038, 0.0672, 0.0693, 0.0635, 0.0865, 0.0714, 0.1328, 0.0540], + device='cuda:0'), in_proj_covar=tensor([0.0193, 0.0138, 0.0198, 0.0163, 0.0146, 0.0178, 0.0202, 0.0145], + device='cuda:0'), out_proj_covar=tensor([1.3682e-04, 8.8030e-05, 1.5009e-04, 1.1123e-04, 1.0646e-04, 1.2713e-04, + 1.5113e-04, 9.7618e-05], device='cuda:0') +2022-12-02 06:21:18,439 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6958, 3.8284, 4.0576, 3.6201, 3.2886, 3.5769, 3.9583, 3.9923], + device='cuda:0'), covar=tensor([0.1002, 0.0431, 0.0414, 0.0484, 0.0578, 0.0593, 0.0429, 0.0487], + device='cuda:0'), in_proj_covar=tensor([0.0254, 0.0179, 0.0199, 0.0191, 0.0193, 0.0200, 0.0176, 0.0208], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:21:20,032 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 06:21:27,783 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=40551.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:21:28,766 INFO [train.py:876] Epoch 29, batch 450, loss[loss=0.09904, simple_loss=0.1365, pruned_loss=0.03078, over 4725.00 frames. ], tot_loss[loss=0.1118, simple_loss=0.1747, pruned_loss=0.02447, over 856783.91 frames. ], batch size: 23, lr: 6.16e-03, +2022-12-02 06:21:29,878 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=40553.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:21:50,533 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=40574.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:22:16,933 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.025e+02 1.478e+02 1.956e+02 2.356e+02 4.838e+02, threshold=3.911e+02, percent-clipped=5.0 +2022-12-02 06:22:17,214 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6957, 2.4986, 2.5060, 2.8557, 2.2788, 2.3624, 1.5485, 2.7390], + device='cuda:0'), covar=tensor([0.0706, 0.1175, 0.0906, 0.0641, 0.1184, 0.1622, 0.1312, 0.0712], + device='cuda:0'), in_proj_covar=tensor([0.0088, 0.0093, 0.0109, 0.0091, 0.0114, 0.0100, 0.0098, 0.0097], + device='cuda:0'), out_proj_covar=tensor([9.3311e-05, 9.7288e-05, 1.1055e-04, 9.4714e-05, 1.1649e-04, 1.0467e-04, + 1.0138e-04, 1.0071e-04], device='cuda:0') +2022-12-02 06:22:17,902 INFO [train.py:876] Epoch 29, batch 500, loss[loss=0.1147, simple_loss=0.1895, pruned_loss=0.01994, over 4878.00 frames. ], tot_loss[loss=0.1119, simple_loss=0.175, pruned_loss=0.02442, over 877233.11 frames. ], batch size: 44, lr: 6.16e-03, +2022-12-02 06:22:43,380 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=40628.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:22:57,837 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3522, 3.0897, 3.2681, 3.2356, 2.3854, 2.1114, 3.3557, 1.7689], + device='cuda:0'), covar=tensor([0.0654, 0.0429, 0.0540, 0.0749, 0.1995, 0.3861, 0.0367, 0.3577], + device='cuda:0'), in_proj_covar=tensor([0.0122, 0.0103, 0.0100, 0.0141, 0.0148, 0.0174, 0.0090, 0.0185], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 06:23:05,707 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=40651.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:23:06,557 INFO [train.py:876] Epoch 29, batch 550, loss[loss=0.1252, simple_loss=0.1935, pruned_loss=0.02845, over 4840.00 frames. ], tot_loss[loss=0.1116, simple_loss=0.1747, pruned_loss=0.02428, over 893352.47 frames. ], batch size: 41, lr: 6.15e-03, +2022-12-02 06:23:39,570 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=40686.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:23:53,953 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.168e+01 1.242e+02 1.637e+02 1.991e+02 4.410e+02, threshold=3.274e+02, percent-clipped=2.0 +2022-12-02 06:23:55,048 INFO [train.py:876] Epoch 29, batch 600, loss[loss=0.07627, simple_loss=0.1222, pruned_loss=0.01518, over 4658.00 frames. ], tot_loss[loss=0.1101, simple_loss=0.1731, pruned_loss=0.02354, over 906448.63 frames. ], batch size: 21, lr: 6.15e-03, +2022-12-02 06:24:04,345 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.80 vs. limit=2.0 +2022-12-02 06:24:04,992 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=40712.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:24:33,101 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0657, 3.6120, 3.5841, 3.4208, 3.5509, 3.6146, 3.3969, 3.7962], + device='cuda:0'), covar=tensor([0.2197, 0.0809, 0.0871, 0.0687, 0.0846, 0.0627, 0.0757, 0.0612], + device='cuda:0'), in_proj_covar=tensor([0.0198, 0.0182, 0.0209, 0.0173, 0.0201, 0.0184, 0.0185, 0.0195], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:24:37,019 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0987, 2.7313, 2.9239, 3.0417, 2.6637, 2.4530, 1.9959, 2.9895], + device='cuda:0'), covar=tensor([0.0541, 0.1422, 0.0790, 0.0767, 0.0967, 0.1989, 0.1086, 0.0645], + device='cuda:0'), in_proj_covar=tensor([0.0088, 0.0092, 0.0109, 0.0091, 0.0114, 0.0100, 0.0097, 0.0097], + device='cuda:0'), out_proj_covar=tensor([9.3338e-05, 9.6758e-05, 1.1101e-04, 9.4851e-05, 1.1594e-04, 1.0486e-04, + 1.0081e-04, 1.0072e-04], device='cuda:0') +2022-12-02 06:24:43,470 INFO [train.py:876] Epoch 29, batch 650, loss[loss=0.1262, simple_loss=0.1935, pruned_loss=0.02943, over 4811.00 frames. ], tot_loss[loss=0.1098, simple_loss=0.1727, pruned_loss=0.02343, over 919186.48 frames. ], batch size: 51, lr: 6.15e-03, +2022-12-02 06:25:31,719 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.842e+01 1.394e+02 1.719e+02 2.132e+02 4.405e+02, threshold=3.437e+02, percent-clipped=4.0 +2022-12-02 06:25:32,734 INFO [train.py:876] Epoch 29, batch 700, loss[loss=0.119, simple_loss=0.1837, pruned_loss=0.02715, over 4798.00 frames. ], tot_loss[loss=0.1108, simple_loss=0.1738, pruned_loss=0.0239, over 924761.76 frames. ], batch size: 33, lr: 6.14e-03, +2022-12-02 06:26:16,526 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=40848.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:26:18,546 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8149, 1.9908, 2.4435, 3.6213, 3.3127, 3.1753, 3.3248, 3.9397], + device='cuda:0'), covar=tensor([0.0326, 0.1715, 0.2071, 0.0482, 0.0375, 0.0520, 0.0578, 0.0309], + device='cuda:0'), in_proj_covar=tensor([0.0072, 0.0106, 0.0126, 0.0071, 0.0081, 0.0073, 0.0085, 0.0085], + device='cuda:0'), out_proj_covar=tensor([7.7229e-05, 1.1629e-04, 1.3418e-04, 8.3609e-05, 8.4501e-05, 8.2525e-05, + 9.5009e-05, 8.4830e-05], device='cuda:0') +2022-12-02 06:26:19,370 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=40851.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:26:20,303 INFO [train.py:876] Epoch 29, batch 750, loss[loss=0.0929, simple_loss=0.163, pruned_loss=0.01138, over 4849.00 frames. ], tot_loss[loss=0.1122, simple_loss=0.1751, pruned_loss=0.02466, over 928846.25 frames. ], batch size: 49, lr: 6.14e-03, +2022-12-02 06:26:25,888 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.78 vs. limit=5.0 +2022-12-02 06:26:26,299 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7215, 2.0273, 2.4292, 3.4878, 3.2449, 3.0698, 3.2364, 3.9183], + device='cuda:0'), covar=tensor([0.0351, 0.1633, 0.2026, 0.0462, 0.0361, 0.0572, 0.0615, 0.0263], + device='cuda:0'), in_proj_covar=tensor([0.0072, 0.0106, 0.0126, 0.0071, 0.0081, 0.0073, 0.0086, 0.0085], + device='cuda:0'), out_proj_covar=tensor([7.7300e-05, 1.1633e-04, 1.3432e-04, 8.3630e-05, 8.4528e-05, 8.2479e-05, + 9.5181e-05, 8.4888e-05], device='cuda:0') +2022-12-02 06:26:31,136 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.7767, 2.4802, 2.3651, 2.5841, 2.0968, 2.0920, 1.5865, 2.6751], + device='cuda:0'), covar=tensor([0.0675, 0.1497, 0.1165, 0.0982, 0.1464, 0.2282, 0.1330, 0.0745], + device='cuda:0'), in_proj_covar=tensor([0.0089, 0.0093, 0.0111, 0.0093, 0.0115, 0.0102, 0.0098, 0.0098], + device='cuda:0'), out_proj_covar=tensor([9.4382e-05, 9.7956e-05, 1.1310e-04, 9.6757e-05, 1.1713e-04, 1.0707e-04, + 1.0241e-04, 1.0214e-04], device='cuda:0') +2022-12-02 06:26:32,980 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=40865.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:26:41,892 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=40874.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:26:43,016 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9164, 3.5302, 3.5541, 3.1967, 3.5610, 3.5454, 3.3440, 3.6935], + device='cuda:0'), covar=tensor([0.2282, 0.0753, 0.0838, 0.0719, 0.0647, 0.0605, 0.0677, 0.0602], + device='cuda:0'), in_proj_covar=tensor([0.0197, 0.0183, 0.0207, 0.0173, 0.0200, 0.0183, 0.0184, 0.0193], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:27:05,950 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=40899.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:27:07,076 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=40900.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:27:07,780 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.240e+01 1.480e+02 1.768e+02 2.245e+02 5.998e+02, threshold=3.537e+02, percent-clipped=3.0 +2022-12-02 06:27:08,634 INFO [train.py:876] Epoch 29, batch 800, loss[loss=0.1217, simple_loss=0.1897, pruned_loss=0.02687, over 4893.00 frames. ], tot_loss[loss=0.112, simple_loss=0.175, pruned_loss=0.02447, over 932853.93 frames. ], batch size: 44, lr: 6.13e-03, +2022-12-02 06:27:24,331 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3836, 3.5337, 3.7592, 3.1909, 2.9352, 3.3712, 3.6218, 3.6835], + device='cuda:0'), covar=tensor([0.1281, 0.0478, 0.0449, 0.0653, 0.0732, 0.0734, 0.0429, 0.0614], + device='cuda:0'), in_proj_covar=tensor([0.0252, 0.0179, 0.0197, 0.0189, 0.0191, 0.0199, 0.0174, 0.0208], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:27:27,963 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=40922.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:27:32,021 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=40926.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:27:33,901 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=40928.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:27:40,357 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.73 vs. limit=2.0 +2022-12-02 06:27:56,795 INFO [train.py:876] Epoch 29, batch 850, loss[loss=0.1409, simple_loss=0.1973, pruned_loss=0.04225, over 4797.00 frames. ], tot_loss[loss=0.1128, simple_loss=0.1761, pruned_loss=0.02477, over 937299.71 frames. ], batch size: 51, lr: 6.13e-03, +2022-12-02 06:27:58,020 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4721, 1.3048, 1.5431, 1.2480, 1.3368, 1.3546, 1.5130, 1.2282], + device='cuda:0'), covar=tensor([0.0181, 0.0297, 0.0220, 0.0246, 0.0275, 0.0169, 0.0177, 0.0259], + device='cuda:0'), in_proj_covar=tensor([0.0039, 0.0036, 0.0036, 0.0039, 0.0041, 0.0038, 0.0041, 0.0037], + device='cuda:0'), out_proj_covar=tensor([3.0820e-05, 2.7272e-05, 2.9330e-05, 3.0873e-05, 3.3227e-05, 2.9537e-05, + 3.2196e-05, 2.8436e-05], device='cuda:0') +2022-12-02 06:28:05,285 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-02 06:28:05,892 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=40961.0, num_to_drop=1, layers_to_drop={3} +2022-12-02 06:28:19,937 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=40976.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:28:29,775 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=40986.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:28:32,915 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.19 vs. limit=5.0 +2022-12-02 06:28:39,952 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=40997.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:28:44,185 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.276e+01 1.460e+02 1.715e+02 2.218e+02 4.983e+02, threshold=3.429e+02, percent-clipped=3.0 +2022-12-02 06:28:45,312 INFO [train.py:876] Epoch 29, batch 900, loss[loss=0.07989, simple_loss=0.126, pruned_loss=0.01691, over 4261.00 frames. ], tot_loss[loss=0.1122, simple_loss=0.1749, pruned_loss=0.02472, over 940902.47 frames. ], batch size: 16, lr: 6.13e-03, +2022-12-02 06:28:46,528 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.7673, 1.7430, 2.1797, 3.5358, 2.8404, 3.1036, 3.2629, 3.9849], + device='cuda:0'), covar=tensor([0.0312, 0.2017, 0.2243, 0.0405, 0.0572, 0.0480, 0.0496, 0.0251], + device='cuda:0'), in_proj_covar=tensor([0.0070, 0.0104, 0.0123, 0.0069, 0.0080, 0.0071, 0.0083, 0.0083], + device='cuda:0'), out_proj_covar=tensor([7.5497e-05, 1.1358e-04, 1.3154e-04, 8.1188e-05, 8.2980e-05, 8.0830e-05, + 9.3001e-05, 8.3142e-05], device='cuda:0') +2022-12-02 06:28:50,412 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=41007.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:29:16,369 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=41034.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:29:26,994 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=41045.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:29:33,753 INFO [train.py:876] Epoch 29, batch 950, loss[loss=0.09926, simple_loss=0.1563, pruned_loss=0.02113, over 4909.00 frames. ], tot_loss[loss=0.1122, simple_loss=0.175, pruned_loss=0.02467, over 945546.38 frames. ], batch size: 30, lr: 6.12e-03, +2022-12-02 06:29:39,971 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=41058.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:30:21,658 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.288e+01 1.404e+02 1.704e+02 2.167e+02 5.077e+02, threshold=3.408e+02, percent-clipped=2.0 +2022-12-02 06:30:21,892 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3646, 3.0287, 3.2617, 3.2115, 3.2358, 3.0454, 2.6050, 3.9308], + device='cuda:0'), covar=tensor([0.0208, 0.1015, 0.0569, 0.1002, 0.0325, 0.0564, 0.2222, 0.0322], + device='cuda:0'), in_proj_covar=tensor([0.0082, 0.0117, 0.0094, 0.0129, 0.0089, 0.0097, 0.0137, 0.0112], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0001, 0.0001, 0.0002, 0.0001], + device='cuda:0') +2022-12-02 06:30:22,723 INFO [train.py:876] Epoch 29, batch 1000, loss[loss=0.0819, simple_loss=0.1423, pruned_loss=0.01076, over 4720.00 frames. ], tot_loss[loss=0.1115, simple_loss=0.174, pruned_loss=0.02446, over 946871.96 frames. ], batch size: 23, lr: 6.12e-03, +2022-12-02 06:30:22,895 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=41102.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:30:26,540 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=41106.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:31:02,795 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6543, 3.4174, 3.5490, 3.4364, 2.8720, 2.4355, 3.7474, 1.8628], + device='cuda:0'), covar=tensor([0.0559, 0.0470, 0.0356, 0.0645, 0.1481, 0.3112, 0.0242, 0.3452], + device='cuda:0'), in_proj_covar=tensor([0.0122, 0.0105, 0.0099, 0.0141, 0.0147, 0.0172, 0.0090, 0.0184], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 06:31:06,571 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=41148.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:31:10,256 INFO [train.py:876] Epoch 29, batch 1050, loss[loss=0.1012, simple_loss=0.1534, pruned_loss=0.02455, over 4761.00 frames. ], tot_loss[loss=0.1112, simple_loss=0.1737, pruned_loss=0.02437, over 949753.58 frames. ], batch size: 26, lr: 6.12e-03, +2022-12-02 06:31:21,094 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=41163.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:31:52,873 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=41196.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:31:57,707 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.878e+01 1.412e+02 1.715e+02 2.175e+02 8.240e+02, threshold=3.430e+02, percent-clipped=4.0 +2022-12-02 06:31:58,645 INFO [train.py:876] Epoch 29, batch 1100, loss[loss=0.1281, simple_loss=0.1861, pruned_loss=0.03506, over 4867.00 frames. ], tot_loss[loss=0.1116, simple_loss=0.1743, pruned_loss=0.02443, over 950622.82 frames. ], batch size: 39, lr: 6.11e-03, +2022-12-02 06:32:17,283 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=41221.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:32:47,488 INFO [train.py:876] Epoch 29, batch 1150, loss[loss=0.1286, simple_loss=0.1882, pruned_loss=0.03453, over 4853.00 frames. ], tot_loss[loss=0.1117, simple_loss=0.1749, pruned_loss=0.02429, over 952094.27 frames. ], batch size: 47, lr: 6.11e-03, +2022-12-02 06:32:51,401 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=41256.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 06:33:01,071 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.7054, 1.3800, 1.6479, 1.1030, 1.3165, 1.3717, 1.6323, 1.2629], + device='cuda:0'), covar=tensor([0.0223, 0.0223, 0.0201, 0.0283, 0.0247, 0.0222, 0.0210, 0.0194], + device='cuda:0'), in_proj_covar=tensor([0.0039, 0.0035, 0.0037, 0.0039, 0.0042, 0.0039, 0.0040, 0.0037], + device='cuda:0'), out_proj_covar=tensor([3.0907e-05, 2.7202e-05, 2.9409e-05, 3.1040e-05, 3.3420e-05, 3.0065e-05, + 3.1957e-05, 2.8187e-05], device='cuda:0') +2022-12-02 06:33:04,849 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.9337, 2.4572, 2.8104, 2.3205, 2.6111, 1.8769, 2.4534, 3.0207], + device='cuda:0'), covar=tensor([0.0226, 0.1222, 0.0638, 0.1471, 0.0392, 0.0776, 0.1774, 0.0574], + device='cuda:0'), in_proj_covar=tensor([0.0082, 0.0118, 0.0094, 0.0129, 0.0089, 0.0097, 0.0136, 0.0112], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0001, 0.0001, 0.0002, 0.0001], + device='cuda:0') +2022-12-02 06:33:28,123 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.70 vs. limit=2.0 +2022-12-02 06:33:34,464 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 1.006e+02 1.437e+02 1.740e+02 2.107e+02 7.279e+02, threshold=3.480e+02, percent-clipped=3.0 +2022-12-02 06:33:35,510 INFO [train.py:876] Epoch 29, batch 1200, loss[loss=0.08713, simple_loss=0.147, pruned_loss=0.01361, over 4743.00 frames. ], tot_loss[loss=0.1123, simple_loss=0.176, pruned_loss=0.02433, over 953586.71 frames. ], batch size: 27, lr: 6.10e-03, +2022-12-02 06:33:40,495 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=41307.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:34:25,061 INFO [train.py:876] Epoch 29, batch 1250, loss[loss=0.1007, simple_loss=0.1681, pruned_loss=0.01669, over 4849.00 frames. ], tot_loss[loss=0.1113, simple_loss=0.1744, pruned_loss=0.02406, over 950922.14 frames. ], batch size: 35, lr: 6.10e-03, +2022-12-02 06:34:26,133 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=41353.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:34:28,112 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=41355.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:35:02,487 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.8171, 2.6675, 2.6329, 2.8227, 2.4127, 2.6239, 1.7813, 2.8918], + device='cuda:0'), covar=tensor([0.0784, 0.1135, 0.0940, 0.0957, 0.1080, 0.1426, 0.1165, 0.0689], + device='cuda:0'), in_proj_covar=tensor([0.0089, 0.0093, 0.0109, 0.0091, 0.0114, 0.0100, 0.0097, 0.0097], + device='cuda:0'), out_proj_covar=tensor([9.3941e-05, 9.7895e-05, 1.1144e-04, 9.4780e-05, 1.1623e-04, 1.0478e-04, + 1.0063e-04, 1.0100e-04], device='cuda:0') +2022-12-02 06:35:09,191 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9050, 3.6060, 3.6801, 2.9512, 3.9857, 3.6830, 3.0172, 4.5196], + device='cuda:0'), covar=tensor([0.0222, 0.0839, 0.0619, 0.1107, 0.0221, 0.0607, 0.1972, 0.0215], + device='cuda:0'), in_proj_covar=tensor([0.0082, 0.0118, 0.0095, 0.0129, 0.0089, 0.0097, 0.0136, 0.0111], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0001, 0.0001, 0.0002, 0.0001], + device='cuda:0') +2022-12-02 06:35:13,282 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.057e+01 1.398e+02 1.630e+02 2.066e+02 3.929e+02, threshold=3.260e+02, percent-clipped=3.0 +2022-12-02 06:35:13,368 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=41401.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:35:14,337 INFO [train.py:876] Epoch 29, batch 1300, loss[loss=0.1056, simple_loss=0.1647, pruned_loss=0.02327, over 4904.00 frames. ], tot_loss[loss=0.1107, simple_loss=0.1735, pruned_loss=0.02395, over 951636.53 frames. ], batch size: 31, lr: 6.10e-03, +2022-12-02 06:35:26,214 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.24 vs. limit=2.0 +2022-12-02 06:36:02,554 INFO [train.py:876] Epoch 29, batch 1350, loss[loss=0.1225, simple_loss=0.1854, pruned_loss=0.02977, over 4854.00 frames. ], tot_loss[loss=0.1118, simple_loss=0.1745, pruned_loss=0.0245, over 953538.35 frames. ], batch size: 40, lr: 6.09e-03, +2022-12-02 06:36:08,371 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=41458.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:36:49,877 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.730e+01 1.355e+02 1.668e+02 2.172e+02 3.280e+02, threshold=3.337e+02, percent-clipped=1.0 +2022-12-02 06:36:50,909 INFO [train.py:876] Epoch 29, batch 1400, loss[loss=0.1548, simple_loss=0.2193, pruned_loss=0.04515, over 4059.00 frames. ], tot_loss[loss=0.1122, simple_loss=0.1753, pruned_loss=0.02459, over 949728.16 frames. ], batch size: 72, lr: 6.09e-03, +2022-12-02 06:36:54,791 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9276, 3.7839, 3.5562, 3.2274, 3.8646, 3.7854, 2.8082, 4.4046], + device='cuda:0'), covar=tensor([0.0214, 0.0582, 0.0651, 0.0807, 0.0256, 0.0491, 0.2397, 0.0246], + device='cuda:0'), in_proj_covar=tensor([0.0081, 0.0115, 0.0093, 0.0126, 0.0087, 0.0095, 0.0134, 0.0110], + device='cuda:0'), out_proj_covar=tensor([9.9273e-05, 1.3618e-04, 1.1483e-04, 1.4735e-04, 1.0108e-04, 1.1974e-04, + 1.5463e-04, 1.2433e-04], device='cuda:0') +2022-12-02 06:37:09,391 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=41521.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:37:21,685 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-29.pt +2022-12-02 06:39:14,403 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp0.9 from training. Duration: 65.77775 +2022-12-02 06:39:15,315 WARNING [train.py:1043] Exclude cut with ID a_0_njm-A05_1949-4118_sp0.9 from training. Number of frames (before subsampling): 102. Number of frames (after subsampling): 24. Text: ཕྱི རྒྱལ དང ས མཚམས འབྲེལ བའི མི རིགས རང སྐྱོང ས གནས ཀྱིས རྒྱལ སྲིད སྤྱི ཁྱབ ཁང ནས ཆོག མཆན ཐོབ རྗེས. Tokens: ['▁ཕྱི', '▁རྒྱ', 'ལ', '▁དང', '▁ས', '▁མཚ', 'མས', '▁འབྲེལ', '▁བའི', '▁མི', '▁རིགས', '▁རང', '▁སྐྱོང', '▁ས', '▁གནས', '▁ཀྱིས', '▁རྒྱ', 'ལ', '▁སྲིད', '▁སྤྱི', '▁ཁྱབ', '▁ཁང', '▁ནས', '▁ཆོག', '▁མ', 'ཆ', 'ན', '▁ཐོབ', '▁རྗེས']. Number of tokens: 29 +2022-12-02 06:39:15,613 WARNING [train.py:1028] Exclude cut with ID a_0_snzm2-A69_31023-13051_sp1.1 from training. Duration: 0.9818125 +2022-12-02 06:39:15,646 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp1.1 from training. Duration: 0.8 +2022-12-02 06:39:16,791 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027 from training. Duration: 0.88 +2022-12-02 06:39:17,110 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645_sp1.1 from training. Duration: 53.8181875 +2022-12-02 06:39:18,789 INFO [train.py:876] Epoch 30, batch 0, loss[loss=0.1112, simple_loss=0.1762, pruned_loss=0.02315, over 4826.00 frames. ], tot_loss[loss=0.1112, simple_loss=0.1762, pruned_loss=0.02315, over 4826.00 frames. ], batch size: 34, lr: 5.98e-03, +2022-12-02 06:39:18,790 INFO [train.py:901] Computing validation loss +2022-12-02 06:39:21,590 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2345, 2.9945, 3.1403, 2.8531, 3.0634, 2.7449, 2.8635, 3.4295], + device='cuda:0'), covar=tensor([0.0226, 0.0839, 0.0594, 0.1004, 0.0325, 0.0450, 0.1618, 0.0438], + device='cuda:0'), in_proj_covar=tensor([0.0081, 0.0115, 0.0093, 0.0126, 0.0087, 0.0095, 0.0134, 0.0110], + device='cuda:0'), out_proj_covar=tensor([9.9206e-05, 1.3581e-04, 1.1449e-04, 1.4714e-04, 1.0102e-04, 1.1965e-04, + 1.5448e-04, 1.2444e-04], device='cuda:0') +2022-12-02 06:39:33,993 INFO [train.py:910] Epoch 30, validation: loss=0.2451, simple_loss=0.289, pruned_loss=0.1006, over 253132.00 frames. +2022-12-02 06:39:33,993 INFO [train.py:911] Maximum memory allocated so far is 7567MB +2022-12-02 06:39:43,807 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.5382, 1.3375, 1.4470, 1.1512, 1.2340, 1.3248, 1.6170, 1.4523], + device='cuda:0'), covar=tensor([0.0347, 0.0275, 0.0284, 0.0283, 0.0269, 0.0197, 0.0171, 0.0143], + device='cuda:0'), in_proj_covar=tensor([0.0039, 0.0036, 0.0036, 0.0039, 0.0041, 0.0039, 0.0040, 0.0036], + device='cuda:0'), out_proj_covar=tensor([3.0699e-05, 2.7478e-05, 2.9438e-05, 3.0675e-05, 3.2949e-05, 3.0014e-05, + 3.1500e-05, 2.8075e-05], device='cuda:0') +2022-12-02 06:40:06,312 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=41556.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 06:40:18,530 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=41569.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:40:32,740 INFO [train.py:876] Epoch 30, batch 50, loss[loss=0.08941, simple_loss=0.1558, pruned_loss=0.01153, over 4729.00 frames. ], tot_loss[loss=0.1089, simple_loss=0.1713, pruned_loss=0.02328, over 216362.70 frames. ], batch size: 27, lr: 5.98e-03, +2022-12-02 06:40:36,343 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.00 vs. limit=2.0 +2022-12-02 06:40:37,272 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.98 vs. limit=2.0 +2022-12-02 06:40:41,314 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.93 vs. limit=2.0 +2022-12-02 06:40:49,697 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.944e+01 1.410e+02 1.697e+02 2.057e+02 7.344e+02, threshold=3.393e+02, percent-clipped=5.0 +2022-12-02 06:40:52,566 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=41604.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:40:59,166 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118_sp1.1 from training. Duration: 0.836375 +2022-12-02 06:41:07,690 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5156, 1.3523, 2.2325, 3.2961, 2.1290, 2.8786, 3.2671, 3.6234], + device='cuda:0'), covar=tensor([0.0322, 0.2500, 0.2309, 0.0403, 0.0891, 0.0645, 0.0556, 0.0297], + device='cuda:0'), in_proj_covar=tensor([0.0070, 0.0104, 0.0124, 0.0069, 0.0080, 0.0072, 0.0083, 0.0084], + device='cuda:0'), out_proj_covar=tensor([7.5855e-05, 1.1410e-04, 1.3198e-04, 8.1261e-05, 8.3485e-05, 8.1580e-05, + 9.2991e-05, 8.3972e-05], device='cuda:0') +2022-12-02 06:41:14,556 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.1667, 3.7513, 4.0965, 4.0731, 2.9159, 3.7998, 4.3252, 3.0956], + device='cuda:0'), covar=tensor([0.4623, 0.0989, 0.0770, 0.0318, 0.1132, 0.0978, 0.0483, 0.1557], + device='cuda:0'), in_proj_covar=tensor([0.0179, 0.0122, 0.0155, 0.0131, 0.0136, 0.0125, 0.0129, 0.0130], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002], + device='cuda:0') +2022-12-02 06:41:20,614 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.96 vs. limit=2.0 +2022-12-02 06:41:22,175 INFO [train.py:876] Epoch 30, batch 100, loss[loss=0.0726, simple_loss=0.1279, pruned_loss=0.008628, over 4713.00 frames. ], tot_loss[loss=0.1063, simple_loss=0.1682, pruned_loss=0.02222, over 381539.69 frames. ], batch size: 23, lr: 5.98e-03, +2022-12-02 06:41:40,784 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=41653.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:41:41,849 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=41654.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:41:43,724 WARNING [train.py:1028] Exclude cut with ID a_0_gzj2-A56_25194-17702_sp1.1 from training. Duration: 0.9818125 +2022-12-02 06:41:58,500 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.0841, 2.5494, 2.9695, 2.6150, 2.9639, 2.4434, 2.6564, 3.2754], + device='cuda:0'), covar=tensor([0.0262, 0.1275, 0.0647, 0.1380, 0.0341, 0.0497, 0.1659, 0.0511], + device='cuda:0'), in_proj_covar=tensor([0.0081, 0.0116, 0.0094, 0.0127, 0.0087, 0.0095, 0.0135, 0.0110], + device='cuda:0'), out_proj_covar=tensor([9.9755e-05, 1.3719e-04, 1.1569e-04, 1.4829e-04, 1.0137e-04, 1.1990e-04, + 1.5589e-04, 1.2521e-04], device='cuda:0') +2022-12-02 06:42:04,197 WARNING [train.py:1028] Exclude cut with ID a_1_gzdj-A33_14489-6645 from training. Duration: 59.2 +2022-12-02 06:42:11,025 INFO [train.py:876] Epoch 30, batch 150, loss[loss=0.1191, simple_loss=0.1864, pruned_loss=0.02585, over 4847.00 frames. ], tot_loss[loss=0.107, simple_loss=0.1693, pruned_loss=0.02232, over 504324.39 frames. ], batch size: 47, lr: 5.97e-03, +2022-12-02 06:42:26,914 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4757, 3.5534, 3.6904, 3.4095, 3.1831, 3.4697, 3.6459, 3.6468], + device='cuda:0'), covar=tensor([0.1170, 0.0431, 0.0443, 0.0503, 0.0542, 0.0576, 0.0433, 0.0540], + device='cuda:0'), in_proj_covar=tensor([0.0257, 0.0183, 0.0202, 0.0193, 0.0195, 0.0202, 0.0178, 0.0214], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:42:27,533 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.481e+01 1.474e+02 1.641e+02 2.012e+02 3.515e+02, threshold=3.281e+02, percent-clipped=1.0 +2022-12-02 06:42:27,617 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=41701.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:42:27,792 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=41701.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:42:41,715 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=41715.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 06:42:59,775 INFO [train.py:876] Epoch 30, batch 200, loss[loss=0.1484, simple_loss=0.1874, pruned_loss=0.05463, over 4913.00 frames. ], tot_loss[loss=0.1081, simple_loss=0.1705, pruned_loss=0.02286, over 604531.56 frames. ], batch size: 31, lr: 5.97e-03, +2022-12-02 06:43:10,768 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.6817, 3.2671, 3.5877, 3.4219, 3.7629, 2.8753, 3.5681, 3.8347], + device='cuda:0'), covar=tensor([0.0468, 0.0484, 0.0428, 0.0405, 0.0335, 0.0788, 0.0289, 0.0377], + device='cuda:0'), in_proj_covar=tensor([0.0125, 0.0135, 0.0123, 0.0122, 0.0116, 0.0153, 0.0101, 0.0123], + device='cuda:0'), out_proj_covar=tensor([1.0678e-04, 1.1586e-04, 1.0459e-04, 1.0406e-04, 9.8282e-05, 1.3107e-04, + 8.9441e-05, 1.0668e-04], device='cuda:0') +2022-12-02 06:43:14,384 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=41749.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:43:22,953 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=41758.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:43:25,958 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8471, 4.6050, 4.4489, 4.8793, 4.2922, 4.0802, 4.7225, 4.4597], + device='cuda:0'), covar=tensor([0.0285, 0.0125, 0.0183, 0.0187, 0.0166, 0.0221, 0.0122, 0.0159], + device='cuda:0'), in_proj_covar=tensor([0.0064, 0.0058, 0.0063, 0.0049, 0.0060, 0.0062, 0.0057, 0.0057], + device='cuda:0'), out_proj_covar=tensor([5.5908e-05, 4.5602e-05, 5.3368e-05, 3.9868e-05, 4.9850e-05, 5.4076e-05, + 4.4457e-05, 4.6104e-05], device='cuda:0') +2022-12-02 06:43:48,085 INFO [train.py:876] Epoch 30, batch 250, loss[loss=0.08802, simple_loss=0.1323, pruned_loss=0.02185, over 3670.00 frames. ], tot_loss[loss=0.1097, simple_loss=0.1723, pruned_loss=0.02353, over 681718.54 frames. ], batch size: 13, lr: 5.97e-03, +2022-12-02 06:43:50,118 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1803-4175_sp1.1 from training. Duration: 0.9454375 +2022-12-02 06:44:04,069 INFO [zipformer.py:1414] attn_weights_entropy = tensor([4.3094, 4.2730, 4.7792, 4.2431, 4.6175, 4.5607, 4.2158, 4.2648], + device='cuda:0'), covar=tensor([0.0819, 0.0530, 0.0736, 0.0568, 0.0804, 0.0578, 0.1316, 0.0622], + device='cuda:0'), in_proj_covar=tensor([0.0193, 0.0135, 0.0200, 0.0164, 0.0146, 0.0178, 0.0205, 0.0144], + device='cuda:0'), out_proj_covar=tensor([1.3670e-04, 8.6297e-05, 1.5088e-04, 1.1183e-04, 1.0704e-04, 1.2649e-04, + 1.5255e-04, 9.6604e-05], device='cuda:0') +2022-12-02 06:44:04,915 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.760e+01 1.373e+02 1.675e+02 1.948e+02 4.621e+02, threshold=3.349e+02, percent-clipped=4.0 +2022-12-02 06:44:06,409 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.32 vs. limit=2.0 +2022-12-02 06:44:09,951 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=41806.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:44:37,331 INFO [train.py:876] Epoch 30, batch 300, loss[loss=0.09797, simple_loss=0.1613, pruned_loss=0.01733, over 4783.00 frames. ], tot_loss[loss=0.1105, simple_loss=0.1736, pruned_loss=0.02373, over 741892.46 frames. ], batch size: 26, lr: 5.96e-03, +2022-12-02 06:44:47,861 WARNING [train.py:1028] Exclude cut with ID a_0_njm-A05_1949-4118 from training. Duration: 0.92 +2022-12-02 06:44:50,144 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4969, 2.7122, 3.1184, 3.2991, 2.4949, 3.3076, 3.0688, 3.0329], + device='cuda:0'), covar=tensor([0.0288, 0.0470, 0.0333, 0.0344, 0.0356, 0.0292, 0.0350, 0.0515], + device='cuda:0'), in_proj_covar=tensor([0.0139, 0.0131, 0.0146, 0.0134, 0.0108, 0.0138, 0.0141, 0.0154], + device='cuda:0'), out_proj_covar=tensor([9.1529e-05, 8.8116e-05, 9.7046e-05, 8.8558e-05, 6.9794e-05, 9.1410e-05, + 9.2931e-05, 1.0455e-04], device='cuda:0') +2022-12-02 06:45:26,195 INFO [train.py:876] Epoch 30, batch 350, loss[loss=0.1124, simple_loss=0.1774, pruned_loss=0.02371, over 4818.00 frames. ], tot_loss[loss=0.1098, simple_loss=0.172, pruned_loss=0.02382, over 788604.68 frames. ], batch size: 33, lr: 5.96e-03, +2022-12-02 06:45:42,404 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.075e+01 1.472e+02 1.841e+02 2.212e+02 5.343e+02, threshold=3.682e+02, percent-clipped=5.0 +2022-12-02 06:45:47,655 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=41906.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:46:14,545 INFO [train.py:876] Epoch 30, batch 400, loss[loss=0.1261, simple_loss=0.1858, pruned_loss=0.03319, over 4811.00 frames. ], tot_loss[loss=0.1104, simple_loss=0.1726, pruned_loss=0.02405, over 825720.29 frames. ], batch size: 45, lr: 5.96e-03, +2022-12-02 06:46:14,727 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3760, 3.1085, 3.1519, 2.9078, 2.0301, 1.9398, 3.2114, 1.5320], + device='cuda:0'), covar=tensor([0.0647, 0.0439, 0.0553, 0.1004, 0.2383, 0.4037, 0.0405, 0.3810], + device='cuda:0'), in_proj_covar=tensor([0.0123, 0.0106, 0.0102, 0.0141, 0.0146, 0.0174, 0.0090, 0.0185], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 06:46:28,501 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=41948.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 06:46:30,239 WARNING [train.py:1028] Exclude cut with ID a_1_ymcr-A54_23990-16027_sp0.9 from training. Duration: 0.97775 +2022-12-02 06:46:46,912 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=41967.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:46:51,852 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.9732, 3.4792, 3.4163, 3.3242, 3.4217, 3.3410, 3.3724, 3.6387], + device='cuda:0'), covar=tensor([0.2187, 0.0728, 0.1052, 0.0684, 0.0798, 0.0701, 0.0690, 0.0643], + device='cuda:0'), in_proj_covar=tensor([0.0197, 0.0183, 0.0207, 0.0171, 0.0201, 0.0184, 0.0186, 0.0193], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:46:56,595 WARNING [train.py:1028] Exclude cut with ID a_1_crzx2-A14_6289-5656_sp1.1 from training. Duration: 0.9818125 +2022-12-02 06:47:03,334 INFO [train.py:876] Epoch 30, batch 450, loss[loss=0.1259, simple_loss=0.1903, pruned_loss=0.03071, over 4839.00 frames. ], tot_loss[loss=0.11, simple_loss=0.1722, pruned_loss=0.02385, over 854253.83 frames. ], batch size: 49, lr: 5.95e-03, +2022-12-02 06:47:19,123 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/checkpoint-42000.pt +2022-12-02 06:47:22,198 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.355e+01 1.400e+02 1.684e+02 2.025e+02 5.282e+02, threshold=3.368e+02, percent-clipped=3.0 +2022-12-02 06:47:30,472 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=42009.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 06:47:31,226 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=42010.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 06:47:54,941 INFO [train.py:876] Epoch 30, batch 500, loss[loss=0.07821, simple_loss=0.1272, pruned_loss=0.01463, over 4707.00 frames. ], tot_loss[loss=0.1101, simple_loss=0.1728, pruned_loss=0.02368, over 876197.75 frames. ], batch size: 21, lr: 5.95e-03, +2022-12-02 06:48:06,761 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.3423, 3.1097, 3.2337, 3.1243, 2.2222, 2.0934, 3.3434, 1.6331], + device='cuda:0'), covar=tensor([0.0638, 0.0392, 0.0466, 0.0770, 0.2022, 0.3368, 0.0329, 0.3517], + device='cuda:0'), in_proj_covar=tensor([0.0122, 0.0106, 0.0101, 0.0140, 0.0145, 0.0172, 0.0090, 0.0184], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 06:48:50,155 INFO [train.py:876] Epoch 30, batch 550, loss[loss=0.1192, simple_loss=0.1831, pruned_loss=0.02771, over 4806.00 frames. ], tot_loss[loss=0.1103, simple_loss=0.1731, pruned_loss=0.02372, over 894671.44 frames. ], batch size: 32, lr: 5.94e-03, +2022-12-02 06:49:07,686 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.957e+01 1.363e+02 1.601e+02 1.902e+02 5.039e+02, threshold=3.202e+02, percent-clipped=2.0 +2022-12-02 06:49:35,415 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=3.63 vs. limit=5.0 +2022-12-02 06:49:39,677 INFO [train.py:876] Epoch 30, batch 600, loss[loss=0.1204, simple_loss=0.1926, pruned_loss=0.02411, over 4781.00 frames. ], tot_loss[loss=0.1103, simple_loss=0.1733, pruned_loss=0.02363, over 906609.24 frames. ], batch size: 58, lr: 5.94e-03, +2022-12-02 06:50:25,026 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.8277, 4.1036, 4.1361, 3.8639, 3.6219, 3.9465, 4.1090, 4.0353], + device='cuda:0'), covar=tensor([0.1062, 0.0374, 0.0441, 0.0515, 0.0589, 0.0567, 0.0357, 0.0619], + device='cuda:0'), in_proj_covar=tensor([0.0258, 0.0185, 0.0204, 0.0195, 0.0197, 0.0204, 0.0180, 0.0215], + device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:50:28,149 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=96, metric=1.23 vs. limit=2.0 +2022-12-02 06:50:28,704 INFO [train.py:876] Epoch 30, batch 650, loss[loss=0.1079, simple_loss=0.1731, pruned_loss=0.02138, over 4822.00 frames. ], tot_loss[loss=0.11, simple_loss=0.1726, pruned_loss=0.02372, over 916939.85 frames. ], batch size: 45, lr: 5.94e-03, +2022-12-02 06:50:45,919 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.593e+01 1.515e+02 1.804e+02 2.232e+02 8.690e+02, threshold=3.607e+02, percent-clipped=5.0 +2022-12-02 06:51:17,942 INFO [train.py:876] Epoch 30, batch 700, loss[loss=0.08587, simple_loss=0.1433, pruned_loss=0.01421, over 4769.00 frames. ], tot_loss[loss=0.1106, simple_loss=0.1736, pruned_loss=0.02375, over 924277.84 frames. ], batch size: 26, lr: 5.93e-03, +2022-12-02 06:51:25,207 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.3723, 3.9679, 3.7049, 3.6496, 3.8541, 3.8113, 3.7112, 4.1130], + device='cuda:0'), covar=tensor([0.2006, 0.0651, 0.1141, 0.0643, 0.0971, 0.0677, 0.0874, 0.0569], + device='cuda:0'), in_proj_covar=tensor([0.0196, 0.0182, 0.0205, 0.0172, 0.0200, 0.0183, 0.0184, 0.0192], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:51:43,854 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=1.94 vs. limit=2.0 +2022-12-02 06:51:45,214 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=42262.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:51:45,429 INFO [zipformer.py:1414] attn_weights_entropy = tensor([1.4455, 1.3362, 1.4741, 1.1783, 1.0430, 1.1851, 1.5616, 1.4605], + device='cuda:0'), covar=tensor([0.0214, 0.0309, 0.0219, 0.0206, 0.0279, 0.0274, 0.0176, 0.0190], + device='cuda:0'), in_proj_covar=tensor([0.0039, 0.0037, 0.0037, 0.0039, 0.0042, 0.0039, 0.0041, 0.0036], + device='cuda:0'), out_proj_covar=tensor([3.1378e-05, 2.8491e-05, 3.0223e-05, 3.1245e-05, 3.3223e-05, 3.0660e-05, + 3.2404e-05, 2.8029e-05], device='cuda:0') +2022-12-02 06:52:06,659 INFO [train.py:876] Epoch 30, batch 750, loss[loss=0.1102, simple_loss=0.1682, pruned_loss=0.02608, over 4911.00 frames. ], tot_loss[loss=0.1104, simple_loss=0.1733, pruned_loss=0.02371, over 928802.44 frames. ], batch size: 31, lr: 5.93e-03, +2022-12-02 06:52:23,106 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.535e+01 1.431e+02 1.694e+02 2.021e+02 4.086e+02, threshold=3.388e+02, percent-clipped=2.0 +2022-12-02 06:52:26,161 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=42304.0, num_to_drop=1, layers_to_drop={2} +2022-12-02 06:52:32,180 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=42310.0, num_to_drop=1, layers_to_drop={1} +2022-12-02 06:52:45,602 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=5.41 vs. limit=5.0 +2022-12-02 06:52:46,187 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5088, 2.9544, 3.5153, 3.1646, 3.5147, 2.5821, 3.4113, 3.6176], + device='cuda:0'), covar=tensor([0.0518, 0.0716, 0.0454, 0.0489, 0.0501, 0.1007, 0.0368, 0.0541], + device='cuda:0'), in_proj_covar=tensor([0.0127, 0.0138, 0.0125, 0.0125, 0.0118, 0.0156, 0.0103, 0.0125], + device='cuda:0'), out_proj_covar=tensor([1.0886e-04, 1.1862e-04, 1.0614e-04, 1.0640e-04, 1.0028e-04, 1.3377e-04, + 9.1265e-05, 1.0826e-04], device='cuda:0') +2022-12-02 06:52:49,088 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.5647, 2.7192, 3.1171, 3.5328, 2.8300, 3.3394, 3.1794, 3.2101], + device='cuda:0'), covar=tensor([0.0438, 0.0495, 0.0399, 0.0274, 0.0303, 0.0390, 0.0325, 0.0416], + device='cuda:0'), in_proj_covar=tensor([0.0137, 0.0133, 0.0147, 0.0134, 0.0109, 0.0140, 0.0141, 0.0154], + device='cuda:0'), out_proj_covar=tensor([9.0736e-05, 8.9125e-05, 9.7662e-05, 8.8940e-05, 7.0292e-05, 9.2309e-05, + 9.3590e-05, 1.0433e-04], device='cuda:0') +2022-12-02 06:52:55,157 INFO [train.py:876] Epoch 30, batch 800, loss[loss=0.1041, simple_loss=0.1633, pruned_loss=0.02246, over 4918.00 frames. ], tot_loss[loss=0.1107, simple_loss=0.174, pruned_loss=0.02367, over 934600.15 frames. ], batch size: 32, lr: 5.93e-03, +2022-12-02 06:53:18,844 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=42358.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:53:39,822 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=42379.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:53:44,715 INFO [train.py:876] Epoch 30, batch 850, loss[loss=0.106, simple_loss=0.1663, pruned_loss=0.02282, over 4908.00 frames. ], tot_loss[loss=0.1089, simple_loss=0.1718, pruned_loss=0.02301, over 937040.84 frames. ], batch size: 31, lr: 5.92e-03, +2022-12-02 06:54:01,434 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.120e+01 1.401e+02 1.659e+02 2.006e+02 6.887e+02, threshold=3.317e+02, percent-clipped=3.0 +2022-12-02 06:54:33,306 INFO [train.py:876] Epoch 30, batch 900, loss[loss=0.1265, simple_loss=0.1895, pruned_loss=0.03173, over 4847.00 frames. ], tot_loss[loss=0.1098, simple_loss=0.1731, pruned_loss=0.02327, over 942376.58 frames. ], batch size: 40, lr: 5.92e-03, +2022-12-02 06:54:39,475 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=42440.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:55:10,764 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2268, 2.2129, 2.4910, 2.1513, 2.3488, 2.7035, 2.4420, 2.0818], + device='cuda:0'), covar=tensor([0.1309, 0.0877, 0.1560, 0.0920, 0.0913, 0.0726, 0.0959, 0.0939], + device='cuda:0'), in_proj_covar=tensor([0.0073, 0.0079, 0.0076, 0.0082, 0.0071, 0.0069, 0.0070, 0.0076], + device='cuda:0'), out_proj_covar=tensor([6.7376e-05, 7.1301e-05, 6.9893e-05, 7.3755e-05, 6.6130e-05, 6.4029e-05, + 6.5399e-05, 6.9953e-05], device='cuda:0') +2022-12-02 06:55:21,980 INFO [train.py:876] Epoch 30, batch 950, loss[loss=0.1074, simple_loss=0.1701, pruned_loss=0.02233, over 4814.00 frames. ], tot_loss[loss=0.1102, simple_loss=0.1736, pruned_loss=0.0234, over 944357.37 frames. ], batch size: 32, lr: 5.92e-03, +2022-12-02 06:55:35,781 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.6556, 2.3666, 2.3625, 2.6951, 2.0867, 2.5291, 1.3429, 2.6766], + device='cuda:0'), covar=tensor([0.0961, 0.1088, 0.1093, 0.0794, 0.1221, 0.1370, 0.1359, 0.0665], + device='cuda:0'), in_proj_covar=tensor([0.0090, 0.0096, 0.0111, 0.0093, 0.0116, 0.0101, 0.0098, 0.0099], + device='cuda:0'), out_proj_covar=tensor([9.4907e-05, 1.0070e-04, 1.1326e-04, 9.6474e-05, 1.1865e-04, 1.0578e-04, + 1.0202e-04, 1.0267e-04], device='cuda:0') +2022-12-02 06:55:38,288 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.917e+01 1.382e+02 1.679e+02 2.077e+02 4.408e+02, threshold=3.357e+02, percent-clipped=4.0 +2022-12-02 06:55:43,257 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=42506.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:55:57,012 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.2958, 3.8845, 3.6459, 3.7400, 3.6078, 3.6565, 3.4892, 4.0018], + device='cuda:0'), covar=tensor([0.2296, 0.0739, 0.0948, 0.0672, 0.1064, 0.0818, 0.0843, 0.0626], + device='cuda:0'), in_proj_covar=tensor([0.0197, 0.0185, 0.0208, 0.0174, 0.0203, 0.0184, 0.0187, 0.0195], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:56:10,339 INFO [train.py:876] Epoch 30, batch 1000, loss[loss=0.08761, simple_loss=0.1369, pruned_loss=0.01914, over 3213.00 frames. ], tot_loss[loss=0.1105, simple_loss=0.1742, pruned_loss=0.02345, over 945067.14 frames. ], batch size: 12, lr: 5.91e-03, +2022-12-02 06:56:38,090 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.8518, 2.2425, 2.8899, 2.2617, 2.6485, 2.4954, 2.6199, 3.0539], + device='cuda:0'), covar=tensor([0.0206, 0.1430, 0.0577, 0.1497, 0.0433, 0.0507, 0.1598, 0.0570], + device='cuda:0'), in_proj_covar=tensor([0.0081, 0.0117, 0.0095, 0.0128, 0.0089, 0.0097, 0.0137, 0.0112], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0002, 0.0001], + device='cuda:0') +2022-12-02 06:56:38,099 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=42562.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:56:43,110 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=42567.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:56:59,658 INFO [train.py:876] Epoch 30, batch 1050, loss[loss=0.1166, simple_loss=0.1869, pruned_loss=0.02318, over 4691.00 frames. ], tot_loss[loss=0.1108, simple_loss=0.1745, pruned_loss=0.02353, over 947538.85 frames. ], batch size: 63, lr: 5.91e-03, +2022-12-02 06:57:15,915 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 9.829e+01 1.418e+02 1.772e+02 2.119e+02 1.306e+03, threshold=3.544e+02, percent-clipped=5.0 +2022-12-02 06:57:18,946 INFO [zipformer.py:626] warmup_begin=2000.0, warmup_end=2666.7, batch_count=42604.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 06:57:24,616 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=42610.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:57:39,588 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.2681, 2.9502, 3.1696, 3.0276, 2.2541, 2.0823, 3.2541, 1.6943], + device='cuda:0'), covar=tensor([0.0606, 0.0503, 0.0506, 0.0709, 0.1955, 0.3176, 0.0406, 0.3232], + device='cuda:0'), in_proj_covar=tensor([0.0121, 0.0108, 0.0102, 0.0139, 0.0146, 0.0170, 0.0090, 0.0184], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 06:57:48,112 INFO [train.py:876] Epoch 30, batch 1100, loss[loss=0.09159, simple_loss=0.1553, pruned_loss=0.01394, over 4883.00 frames. ], tot_loss[loss=0.1108, simple_loss=0.1748, pruned_loss=0.02345, over 951979.08 frames. ], batch size: 30, lr: 5.91e-03, +2022-12-02 06:58:02,785 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=42649.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:58:05,623 INFO [zipformer.py:626] warmup_begin=666.7, warmup_end=1333.3, batch_count=42652.0, num_to_drop=1, layers_to_drop={0} +2022-12-02 06:58:06,519 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.9432, 4.0161, 4.4851, 3.9251, 4.3144, 4.2852, 3.9588, 4.0057], + device='cuda:0'), covar=tensor([0.1010, 0.0605, 0.0693, 0.0657, 0.0713, 0.0666, 0.1306, 0.0544], + device='cuda:0'), in_proj_covar=tensor([0.0194, 0.0138, 0.0197, 0.0165, 0.0148, 0.0181, 0.0208, 0.0144], + device='cuda:0'), out_proj_covar=tensor([1.3689e-04, 8.8428e-05, 1.4943e-04, 1.1163e-04, 1.0802e-04, 1.2870e-04, + 1.5458e-04, 9.6535e-05], device='cuda:0') +2022-12-02 06:58:36,211 INFO [train.py:876] Epoch 30, batch 1150, loss[loss=0.1469, simple_loss=0.2013, pruned_loss=0.04626, over 4863.00 frames. ], tot_loss[loss=0.1105, simple_loss=0.1744, pruned_loss=0.02333, over 953705.84 frames. ], batch size: 36, lr: 5.90e-03, +2022-12-02 06:58:51,474 INFO [zipformer.py:1414] attn_weights_entropy = tensor([2.0602, 3.6254, 3.4665, 3.1542, 3.4929, 3.4667, 3.3839, 3.8027], + device='cuda:0'), covar=tensor([0.2413, 0.0711, 0.0903, 0.0915, 0.0907, 0.0908, 0.0898, 0.0557], + device='cuda:0'), in_proj_covar=tensor([0.0197, 0.0184, 0.0207, 0.0175, 0.0204, 0.0184, 0.0187, 0.0195], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], + device='cuda:0') +2022-12-02 06:58:52,965 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.108e+01 1.372e+02 1.602e+02 1.938e+02 3.020e+02, threshold=3.204e+02, percent-clipped=0.0 +2022-12-02 06:59:02,005 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=42710.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:59:08,841 INFO [zipformer.py:1414] attn_weights_entropy = tensor([3.4578, 3.1883, 3.2911, 3.2806, 2.4052, 2.2797, 3.5588, 1.8294], + device='cuda:0'), covar=tensor([0.0505, 0.0533, 0.0534, 0.0633, 0.1889, 0.2999, 0.0279, 0.3008], + device='cuda:0'), in_proj_covar=tensor([0.0119, 0.0107, 0.0100, 0.0137, 0.0145, 0.0167, 0.0089, 0.0181], + device='cuda:0'), out_proj_covar=tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0001, 0.0002], + device='cuda:0') +2022-12-02 06:59:25,342 INFO [train.py:876] Epoch 30, batch 1200, loss[loss=0.0904, simple_loss=0.1447, pruned_loss=0.01808, over 4710.00 frames. ], tot_loss[loss=0.1105, simple_loss=0.174, pruned_loss=0.02351, over 954386.78 frames. ], batch size: 27, lr: 5.90e-03, +2022-12-02 06:59:26,412 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=42735.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 06:59:37,019 INFO [scaling.py:679] Whitening: num_groups=1, num_channels=384, metric=4.95 vs. limit=5.0 +2022-12-02 07:00:14,729 INFO [train.py:876] Epoch 30, batch 1250, loss[loss=0.08958, simple_loss=0.1466, pruned_loss=0.01626, over 4727.00 frames. ], tot_loss[loss=0.1099, simple_loss=0.1733, pruned_loss=0.02321, over 950898.82 frames. ], batch size: 27, lr: 5.90e-03, +2022-12-02 07:00:31,672 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 7.913e+01 1.380e+02 1.727e+02 2.059e+02 4.160e+02, threshold=3.454e+02, percent-clipped=2.0 +2022-12-02 07:00:51,942 INFO [scaling.py:679] Whitening: num_groups=8, num_channels=192, metric=2.04 vs. limit=2.0 +2022-12-02 07:01:03,974 INFO [train.py:876] Epoch 30, batch 1300, loss[loss=0.1098, simple_loss=0.1752, pruned_loss=0.02217, over 4894.00 frames. ], tot_loss[loss=0.1097, simple_loss=0.173, pruned_loss=0.02316, over 949667.01 frames. ], batch size: 29, lr: 5.89e-03, +2022-12-02 07:01:28,065 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=42859.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 07:01:30,933 INFO [zipformer.py:626] warmup_begin=1333.3, warmup_end=2000.0, batch_count=42862.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 07:01:47,321 INFO [zipformer.py:626] warmup_begin=2666.7, warmup_end=3333.3, batch_count=42879.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 07:01:51,860 INFO [train.py:876] Epoch 30, batch 1350, loss[loss=0.08213, simple_loss=0.1325, pruned_loss=0.01587, over 4615.00 frames. ], tot_loss[loss=0.1094, simple_loss=0.1724, pruned_loss=0.02317, over 947590.46 frames. ], batch size: 21, lr: 5.89e-03, +2022-12-02 07:02:08,329 INFO [optim.py:341] Clipping_scale=2.0, grad-norm quartiles 8.812e+01 1.363e+02 1.710e+02 2.059e+02 6.109e+02, threshold=3.419e+02, percent-clipped=3.0 +2022-12-02 07:02:27,061 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=42920.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 07:02:40,522 INFO [train.py:876] Epoch 30, batch 1400, loss[loss=0.07015, simple_loss=0.1248, pruned_loss=0.00775, over 4705.00 frames. ], tot_loss[loss=0.1083, simple_loss=0.1712, pruned_loss=0.02268, over 945594.39 frames. ], batch size: 23, lr: 5.89e-03, +2022-12-02 07:02:46,692 INFO [zipformer.py:626] warmup_begin=3333.3, warmup_end=4000.0, batch_count=42940.0, num_to_drop=0, layers_to_drop=set() +2022-12-02 07:03:10,987 INFO [checkpoint.py:75] Saving checkpoint to pruned_transducer_stateless7/exp/epoch-30.pt +2022-12-02 07:05:01,024 INFO [train.py:1125] Done!