diff --git "a/nohup.out" "b/nohup.out" --- "a/nohup.out" +++ "b/nohup.out" @@ -43494,3 +43494,11472 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + Training...: 83% 3642/4393 [4:55:39<32:14:49, 154.58s/it]/home/sanchitgandhi/hf/lib/python3.8/site-packages/flax/training/common_utils.py:25: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map( + + Training...: 83% 3643/4393 [4:55:42<22:44:49, 109.19s/it] + Training...: 83% 3644/4393 [4:55:45<16:04:19, 77.25s/it]  + Training...: 83% 3645/4393 [4:55:47<11:23:10, 54.80s/it] + Training...: 83% 3646/4393 [4:55:49<8:05:44, 39.02s/it]  + Training...: 83% 3647/4393 [4:55:51<5:46:33, 27.87s/it] + Training...: 83% 3648/4393 [4:55:53<4:08:11, 19.99s/it] + Training...: 83% 3649/4393 [4:55:54<2:58:17, 14.38s/it] + Training...: 83% 3650/4393 [4:55:55<2:08:31, 10.38s/it] + Training...: 83% 3651/4393 [4:56:02<1:54:27, 9.26s/it] + Training...: 83% 3652/4393 [4:56:08<1:44:28, 8.46s/it] + Training...: 83% 3653/4393 [4:56:15<1:36:12, 7.80s/it] + Training...: 83% 3654/4393 [4:56:21<1:30:18, 7.33s/it] + Training...: 83% 3655/4393 [4:56:27<1:25:52, 6.98s/it] + Training...: 83% 3656/4393 [4:56:33<1:22:50, 6.74s/it] + Training...: 83% 3657/4393 [4:56:40<1:20:47, 6.59s/it] + Training...: 83% 3658/4393 [4:56:45<1:18:25, 6.40s/it] + Training...: 83% 3659/4393 [4:56:52<1:17:10, 6.31s/it] + Training...: 83% 3660/4393 [4:56:58<1:16:35, 6.27s/it] + Training...: 83% 3661/4393 [4:57:04<1:15:54, 6.22s/it] + Training...: 83% 3662/4393 [4:57:10<1:14:26, 6.11s/it] + Training...: 83% 3663/4393 [4:57:16<1:13:51, 6.07s/it] + Training...: 83% 3664/4393 [4:57:21<1:12:48, 5.99s/it] + Training...: 83% 3665/4393 [4:57:27<1:12:09, 5.95s/it] + Training...: 83% 3666/4393 [4:57:33<1:11:07, 5.87s/it]/home/sanchitgandhi/hf/lib/python3.8/site-packages/flax/jax_utils.py:61: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x[0], tree) +run_flax_speech_recognition_seq2seq.py:336: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:00:43<35:02:00, 21020.10s/it] + Training...: 83% 3666/4393 [4:57:39<1:11:07, 5.87s/it] + Training...: 83% 3667/4393 [4:57:39<1:12:55, 6.03s/it] + Training...: 83% 3668/4393 [4:57:45<1:11:14, 5.90s/it] + Training...: 84% 3669/4393 [4:57:51<1:09:57, 5.80s/it] + Training...: 84% 3670/4393 [4:57:56<1:08:55, 5.72s/it] + Training...: 84% 3671/4393 [4:58:02<1:08:06, 5.66s/it] + Training...: 84% 3672/4393 [4:58:07<1:07:12, 5.59s/it] + Training...: 84% 3673/4393 [4:58:12<1:06:30, 5.54s/it] + Training...: 84% 3674/4393 [4:58:18<1:06:58, 5.59s/it] + Training...: 84% 3675/4393 [4:58:24<1:06:25, 5.55s/it] + Training...: 84% 3676/4393 [4:58:29<1:05:31, 5.48s/it] + Training...: 84% 3677/4393 [4:58:34<1:04:53, 5.44s/it] + Training...: 84% 3678/4393 [4:58:39<1:03:53, 5.36s/it] + Training...: 84% 3679/4393 [4:58:45<1:03:37, 5.35s/it] + Training...: 84% 3680/4393 [4:58:50<1:03:06, 5.31s/it] + Training...: 84% 3681/4393 [4:58:55<1:02:27, 5.26s/it] + Training...: 84% 3682/4393 [4:59:00<1:01:41, 5.21s/it] + Training...: 84% 3683/4393 [4:59:05<1:01:05, 5.16s/it] + Training...: 84% 3684/4393 [4:59:10<1:00:07, 5.09s/it] + Training...: 84% 3685/4393 [4:59:15<59:22, 5.03s/it]  + Training...: 84% 3686/4393 [4:59:20<58:34, 4.97s/it] + Training...: 84% 3687/4393 [4:59:25<57:38, 4.90s/it] + Training...: 84% 3688/4393 [4:59:29<55:51, 4.75s/it] + Training...: 84% 3689/4393 [4:59:33<54:06, 4.61s/it] + Training...: 84% 3690/4393 [4:59:37<51:34, 4.40s/it] + Training...: 84% 3691/4393 [4:59:41<49:04, 4.19s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:02:49<35:02:00, 21020.10s/it] + Training...: 84% 3691/4393 [4:59:45<49:04, 4.19s/it] + Training...: 84% 3692/4393 [4:59:45<47:57, 4.10s/it] + Training...: 84% 3693/4393 [4:59:48<44:19, 3.80s/it] + Training...: 84% 3694/4393 [4:59:51<40:46, 3.50s/it] + Training...: 84% 3695/4393 [4:59:53<37:10, 3.20s/it] + Training...: 84% 3696/4393 [4:59:56<33:43, 2.90s/it] + Training...: 84% 3697/4393 [4:59:57<30:17, 2.61s/it] + Training...: 84% 3698/4393 [4:59:59<26:56, 2.33s/it] + Training...: 84% 3699/4393 [5:00:00<23:34, 2.04s/it] + Training...: 84% 3700/4393 [5:00:02<20:21, 1.76s/it] + Training...: 84% 3701/4393 [5:00:08<36:41, 3.18s/it] + Training...: 84% 3702/4393 [5:00:15<48:12, 4.19s/it] + Training...: 84% 3703/4393 [5:00:21<54:51, 4.77s/it] + Training...: 84% 3704/4393 [5:00:27<59:03, 5.14s/it] + Training...: 84% 3705/4393 [5:00:33<1:02:07, 5.42s/it] + Training...: 84% 3706/4393 [5:00:39<1:03:52, 5.58s/it] + Training...: 84% 3707/4393 [5:00:45<1:05:04, 5.69s/it] + Training...: 84% 3708/4393 [5:00:51<1:06:01, 5.78s/it] + Training...: 84% 3709/4393 [5:00:57<1:07:15, 5.90s/it] + Training...: 84% 3710/4393 [5:01:03<1:07:32, 5.93s/it] + Training...: 84% 3711/4393 [5:01:09<1:07:14, 5.92s/it] + Training...: 84% 3712/4393 [5:01:15<1:06:33, 5.86s/it] + Training...: 85% 3713/4393 [5:01:20<1:06:11, 5.84s/it] + Training...: 85% 3714/4393 [5:01:26<1:05:32, 5.79s/it] + Training...: 85% 3715/4393 [5:01:32<1:05:13, 5.77s/it] + Training...: 85% 3716/4393 [5:01:37<1:04:45, 5.74s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:04:48<35:02:00, 21020.10s/it] + Training...: 85% 3716/4393 [5:01:44<1:04:45, 5.74s/it] + Training...: 85% 3717/4393 [5:01:44<1:06:53, 5.94s/it] + Training...: 85% 3718/4393 [5:01:49<1:05:42, 5.84s/it] + Training...: 85% 3719/4393 [5:01:55<1:04:43, 5.76s/it] + Training...: 85% 3720/4393 [5:02:01<1:04:00, 5.71s/it] + Training...: 85% 3721/4393 [5:02:06<1:03:43, 5.69s/it] + Training...: 85% 3722/4393 [5:02:12<1:02:41, 5.61s/it] + Training...: 85% 3723/4393 [5:02:17<1:01:49, 5.54s/it] + Training...: 85% 3724/4393 [5:02:22<1:01:00, 5.47s/it] + Training...: 85% 3725/4393 [5:02:28<1:00:26, 5.43s/it] + Training...: 85% 3726/4393 [5:02:33<59:43, 5.37s/it]  + Training...: 85% 3727/4393 [5:02:38<59:07, 5.33s/it] + Training...: 85% 3728/4393 [5:02:43<58:24, 5.27s/it] + Training...: 85% 3729/4393 [5:02:48<57:48, 5.22s/it] + Training...: 85% 3730/4393 [5:02:53<57:05, 5.17s/it] + Training...: 85% 3731/4393 [5:02:58<56:14, 5.10s/it] + Training...: 85% 3732/4393 [5:03:03<55:17, 5.02s/it] + Training...: 85% 3733/4393 [5:03:08<54:32, 4.96s/it] + Training...: 85% 3734/4393 [5:03:13<54:08, 4.93s/it] + Training...: 85% 3735/4393 [5:03:17<53:10, 4.85s/it] + Training...: 85% 3736/4393 [5:03:22<51:36, 4.71s/it] + Training...: 85% 3737/4393 [5:03:26<50:03, 4.58s/it] + Training...: 85% 3738/4393 [5:03:30<48:24, 4.43s/it] + Training...: 85% 3739/4393 [5:03:34<46:35, 4.27s/it] + Training...: 85% 3740/4393 [5:03:38<44:40, 4.10s/it] + Training...: 85% 3741/4393 [5:03:41<42:08, 3.88s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:06:49<35:02:00, 21020.10s/it] + Training...: 85% 3741/4393 [5:03:45<42:08, 3.88s/it] + Training...: 85% 3742/4393 [5:03:45<41:23, 3.82s/it] + Training...: 85% 3743/4393 [5:03:48<38:16, 3.53s/it] + Training...: 85% 3744/4393 [5:03:50<35:01, 3.24s/it] + Training...: 85% 3745/4393 [5:03:53<31:52, 2.95s/it] + Training...: 85% 3746/4393 [5:03:55<28:55, 2.68s/it] + Training...: 85% 3747/4393 [5:03:56<26:10, 2.43s/it] + Training...: 85% 3748/4393 [5:03:58<23:19, 2.17s/it] + Training...: 85% 3749/4393 [5:03:59<20:22, 1.90s/it] + Training...: 85% 3750/4393 [5:04:00<17:28, 1.63s/it] + Training...: 85% 3751/4393 [5:04:07<32:22, 3.03s/it] + Training...: 85% 3752/4393 [5:04:13<42:57, 4.02s/it] + Training...: 85% 3753/4393 [5:04:19<50:08, 4.70s/it] + Training...: 85% 3754/4393 [5:04:26<55:14, 5.19s/it] + Training...: 85% 3755/4393 [5:04:32<58:08, 5.47s/it] + Training...: 85% 3756/4393 [5:04:38<59:50, 5.64s/it] + Training...: 86% 3757/4393 [5:04:44<1:00:48, 5.74s/it] + Training...: 86% 3758/4393 [5:04:50<1:01:12, 5.78s/it] + Training...: 86% 3759/4393 [5:04:55<1:01:34, 5.83s/it] + Training...: 86% 3760/4393 [5:05:01<1:01:33, 5.84s/it] + Training...: 86% 3761/4393 [5:05:07<1:01:33, 5.84s/it] + Training...: 86% 3762/4393 [5:05:13<1:01:14, 5.82s/it] + Training...: 86% 3763/4393 [5:05:19<1:01:09, 5.82s/it] + Training...: 86% 3764/4393 [5:05:24<1:00:39, 5.79s/it] + Training...: 86% 3765/4393 [5:05:30<1:00:30, 5.78s/it] + Training...: 86% 3766/4393 [5:05:36<1:00:05, 5.75s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:08:47<35:02:00, 21020.10s/it] + Training...: 86% 3766/4393 [5:05:43<1:00:05, 5.75s/it] + Training...: 86% 3767/4393 [5:05:43<1:02:53, 6.03s/it] + Training...: 86% 3768/4393 [5:05:48<1:01:19, 5.89s/it] + Training...: 86% 3769/4393 [5:05:54<1:00:12, 5.79s/it] + Training...: 86% 3770/4393 [5:05:59<59:40, 5.75s/it]  + Training...: 86% 3771/4393 [5:06:05<58:39, 5.66s/it] + Training...: 86% 3772/4393 [5:06:10<57:47, 5.58s/it] + Training...: 86% 3773/4393 [5:06:16<57:16, 5.54s/it] + Training...: 86% 3774/4393 [5:06:21<56:34, 5.48s/it] + Training...: 86% 3775/4393 [5:06:26<56:14, 5.46s/it] + Training...: 86% 3776/4393 [5:06:32<55:47, 5.42s/it] + Training...: 86% 3777/4393 [5:06:37<55:01, 5.36s/it] + Training...: 86% 3778/4393 [5:06:42<54:16, 5.30s/it] + Training...: 86% 3779/4393 [5:06:47<53:41, 5.25s/it] + Training...: 86% 3780/4393 [5:06:52<52:51, 5.17s/it] + Training...: 86% 3781/4393 [5:06:57<52:26, 5.14s/it] + Training...: 86% 3782/4393 [5:07:02<51:34, 5.06s/it] + Training...: 86% 3783/4393 [5:07:07<50:42, 4.99s/it] + Training...: 86% 3784/4393 [5:07:12<49:44, 4.90s/it] + Training...: 86% 3785/4393 [5:07:16<48:42, 4.81s/it] + Training...: 86% 3786/4393 [5:07:21<47:20, 4.68s/it] + Training...: 86% 3787/4393 [5:07:25<46:01, 4.56s/it] + Training...: 86% 3788/4393 [5:07:29<44:18, 4.39s/it] + Training...: 86% 3789/4393 [5:07:33<42:37, 4.23s/it] + Training...: 86% 3790/4393 [5:07:37<40:49, 4.06s/it] + Training...: 86% 3791/4393 [5:07:40<38:45, 3.86s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:10:48<35:02:00, 21020.10s/it] + Training...: 86% 3791/4393 [5:07:44<38:45, 3.86s/it] + Training...: 86% 3792/4393 [5:07:44<38:04, 3.80s/it] + Training...: 86% 3793/4393 [5:07:47<35:23, 3.54s/it] + Training...: 86% 3794/4393 [5:07:49<32:37, 3.27s/it] + Training...: 86% 3795/4393 [5:07:52<29:52, 3.00s/it] + Training...: 86% 3796/4393 [5:07:54<26:58, 2.71s/it] + Training...: 86% 3797/4393 [5:07:55<24:04, 2.42s/it] + Training...: 86% 3798/4393 [5:07:57<21:33, 2.17s/it] + Training...: 86% 3799/4393 [5:07:58<18:55, 1.91s/it] + Training...: 87% 3800/4393 [5:07:59<16:19, 1.65s/it] + Training...: 87% 3801/4393 [5:08:06<30:00, 3.04s/it] + Training...: 87% 3802/4393 [5:08:12<39:54, 4.05s/it] + Training...: 87% 3803/4393 [5:08:18<46:29, 4.73s/it] + Training...: 87% 3804/4393 [5:08:24<50:16, 5.12s/it] + Training...: 87% 3805/4393 [5:08:30<52:54, 5.40s/it] + Training...: 87% 3806/4393 [5:08:36<54:32, 5.57s/it] + Training...: 87% 3807/4393 [5:08:42<55:42, 5.70s/it] + Training...: 87% 3808/4393 [5:08:48<56:10, 5.76s/it] + Training...: 87% 3809/4393 [5:08:54<56:26, 5.80s/it] + Training...: 87% 3810/4393 [5:09:00<56:27, 5.81s/it] + Training...: 87% 3811/4393 [5:09:06<56:41, 5.84s/it] + Training...: 87% 3812/4393 [5:09:12<56:30, 5.84s/it] + Training...: 87% 3813/4393 [5:09:17<56:24, 5.83s/it] + Training...: 87% 3814/4393 [5:09:23<56:04, 5.81s/it] + Training...: 87% 3815/4393 [5:09:29<55:50, 5.80s/it] + Training...: 87% 3816/4393 [5:09:35<55:17, 5.75s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:12:45<35:02:00, 21020.10s/it] + Training...: 87% 3816/4393 [5:09:41<55:17, 5.75s/it] + Training...: 87% 3817/4393 [5:09:41<57:15, 5.96s/it] + Training...: 87% 3818/4393 [5:09:47<56:11, 5.86s/it] + Training...: 87% 3819/4393 [5:09:52<55:23, 5.79s/it] + Training...: 87% 3820/4393 [5:09:58<54:30, 5.71s/it] + Training...: 87% 3821/4393 [5:10:04<54:30, 5.72s/it] + Training...: 87% 3822/4393 [5:10:09<53:58, 5.67s/it] + Training...: 87% 3823/4393 [5:10:15<53:22, 5.62s/it] + Training...: 87% 3824/4393 [5:10:20<52:56, 5.58s/it] + Training...: 87% 3825/4393 [5:10:26<52:46, 5.57s/it] + Training...: 87% 3826/4393 [5:10:31<52:17, 5.53s/it] + Training...: 87% 3827/4393 [5:10:37<51:48, 5.49s/it] + Training...: 87% 3828/4393 [5:10:42<51:14, 5.44s/it] + Training...: 87% 3829/4393 [5:10:47<50:51, 5.41s/it] + Training...: 87% 3830/4393 [5:10:52<50:03, 5.34s/it] + Training...: 87% 3831/4393 [5:10:58<49:27, 5.28s/it] + Training...: 87% 3832/4393 [5:11:03<48:46, 5.22s/it] + Training...: 87% 3833/4393 [5:11:08<48:09, 5.16s/it] + Training...: 87% 3834/4393 [5:11:12<47:10, 5.06s/it] + Training...: 87% 3835/4393 [5:11:17<46:26, 4.99s/it] + Training...: 87% 3836/4393 [5:11:22<45:25, 4.89s/it] + Training...: 87% 3837/4393 [5:11:26<44:07, 4.76s/it] + Training...: 87% 3838/4393 [5:11:31<42:35, 4.60s/it] + Training...: 87% 3839/4393 [5:11:35<40:59, 4.44s/it] + Training...: 87% 3840/4393 [5:11:38<38:56, 4.22s/it] + Training...: 87% 3841/4393 [5:11:42<36:54, 4.01s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:14:50<35:02:00, 21020.10s/it] + Training...: 87% 3841/4393 [5:11:46<36:54, 4.01s/it] + Training...: 87% 3842/4393 [5:11:46<36:11, 3.94s/it] + Training...: 87% 3843/4393 [5:11:49<33:02, 3.60s/it] + Training...: 88% 3844/4393 [5:11:51<29:58, 3.28s/it] + Training...: 88% 3845/4393 [5:11:53<27:11, 2.98s/it] + Training...: 88% 3846/4393 [5:11:55<24:33, 2.69s/it] + Training...: 88% 3847/4393 [5:11:57<22:00, 2.42s/it] + Training...: 88% 3848/4393 [5:11:59<19:34, 2.15s/it] + Training...: 88% 3849/4393 [5:12:00<17:16, 1.91s/it] + Training...: 88% 3850/4393 [5:12:01<14:46, 1.63s/it] + Training...: 88% 3851/4393 [5:12:07<27:42, 3.07s/it] + Training...: 88% 3852/4393 [5:12:14<36:34, 4.06s/it] + Training...: 88% 3853/4393 [5:12:20<42:09, 4.68s/it] + Training...: 88% 3854/4393 [5:12:26<45:45, 5.09s/it] + Training...: 88% 3855/4393 [5:12:32<48:06, 5.37s/it] + Training...: 88% 3856/4393 [5:12:38<49:29, 5.53s/it] + Training...: 88% 3857/4393 [5:12:44<50:32, 5.66s/it] + Training...: 88% 3858/4393 [5:12:50<51:14, 5.75s/it] + Training...: 88% 3859/4393 [5:12:56<51:31, 5.79s/it] + Training...: 88% 3860/4393 [5:13:02<51:35, 5.81s/it] + Training...: 88% 3861/4393 [5:13:07<51:40, 5.83s/it] + Training...: 88% 3862/4393 [5:13:13<51:34, 5.83s/it] + Training...: 88% 3863/4393 [5:13:19<51:23, 5.82s/it] + Training...: 88% 3864/4393 [5:13:25<50:58, 5.78s/it] + Training...: 88% 3865/4393 [5:13:30<50:46, 5.77s/it] + Training...: 88% 3866/4393 [5:13:36<50:30, 5.75s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:16:47<35:02:00, 21020.10s/it] + Training...: 88% 3866/4393 [5:13:43<50:30, 5.75s/it] + Training...: 88% 3867/4393 [5:13:43<52:03, 5.94s/it] + Training...: 88% 3868/4393 [5:13:48<50:54, 5.82s/it] + Training...: 88% 3869/4393 [5:13:54<49:59, 5.72s/it] + Training...: 88% 3870/4393 [5:13:59<49:18, 5.66s/it] + Training...: 88% 3871/4393 [5:14:05<48:43, 5.60s/it] + Training...: 88% 3872/4393 [5:14:10<48:10, 5.55s/it] + Training...: 88% 3873/4393 [5:14:15<47:35, 5.49s/it] + Training...: 88% 3874/4393 [5:14:21<47:06, 5.45s/it] + Training...: 88% 3875/4393 [5:14:26<46:55, 5.43s/it] + Training...: 88% 3876/4393 [5:14:32<46:44, 5.42s/it] + Training...: 88% 3877/4393 [5:14:37<46:11, 5.37s/it] + Training...: 88% 3878/4393 [5:14:42<45:24, 5.29s/it] + Training...: 88% 3879/4393 [5:14:47<44:43, 5.22s/it] + Training...: 88% 3880/4393 [5:14:52<43:55, 5.14s/it] + Training...: 88% 3881/4393 [5:14:57<43:13, 5.07s/it] + Training...: 88% 3882/4393 [5:15:02<42:26, 4.98s/it] + Training...: 88% 3883/4393 [5:15:06<41:45, 4.91s/it] + Training...: 88% 3884/4393 [5:15:11<40:57, 4.83s/it] + Training...: 88% 3885/4393 [5:15:15<40:09, 4.74s/it] + Training...: 88% 3886/4393 [5:15:20<39:03, 4.62s/it] + Training...: 88% 3887/4393 [5:15:24<38:00, 4.51s/it] + Training...: 89% 3888/4393 [5:15:28<36:46, 4.37s/it] + Training...: 89% 3889/4393 [5:15:32<35:22, 4.21s/it] + Training...: 89% 3890/4393 [5:15:35<33:36, 4.01s/it] + Training...: 89% 3891/4393 [5:15:39<31:47, 3.80s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:18:46<35:02:00, 21020.10s/it] + Training...: 89% 3891/4393 [5:15:42<31:47, 3.80s/it] + Training...: 89% 3892/4393 [5:15:42<31:05, 3.72s/it] + Training...: 89% 3893/4393 [5:15:45<28:52, 3.47s/it] + Training...: 89% 3894/4393 [5:15:48<26:42, 3.21s/it] + Training...: 89% 3895/4393 [5:15:50<24:28, 2.95s/it] + Training...: 89% 3896/4393 [5:15:52<22:18, 2.69s/it] + Training...: 89% 3897/4393 [5:15:54<20:04, 2.43s/it] + Training...: 89% 3898/4393 [5:15:56<17:49, 2.16s/it] + Training...: 89% 3899/4393 [5:15:57<15:36, 1.90s/it] + Training...: 89% 3900/4393 [5:15:58<13:21, 1.63s/it] + Training...: 89% 3901/4393 [5:16:04<24:54, 3.04s/it] + Training...: 89% 3902/4393 [5:16:11<33:06, 4.05s/it] + Training...: 89% 3903/4393 [5:16:17<38:40, 4.74s/it] + Training...: 89% 3904/4393 [5:16:23<41:57, 5.15s/it] + Training...: 89% 3905/4393 [5:16:29<44:00, 5.41s/it] + Training...: 89% 3906/4393 [5:16:35<45:21, 5.59s/it] + Training...: 89% 3907/4393 [5:16:41<46:15, 5.71s/it] + Training...: 89% 3908/4393 [5:16:47<46:44, 5.78s/it] + Training...: 89% 3909/4393 [5:16:53<47:10, 5.85s/it] + Training...: 89% 3910/4393 [5:16:59<47:03, 5.85s/it] + Training...: 89% 3911/4393 [5:17:05<47:06, 5.86s/it] + Training...: 89% 3912/4393 [5:17:11<46:47, 5.84s/it] + Training...: 89% 3913/4393 [5:17:16<46:46, 5.85s/it] + Training...: 89% 3914/4393 [5:17:22<46:31, 5.83s/it] + Training...: 89% 3915/4393 [5:17:28<46:18, 5.81s/it] + Training...: 89% 3916/4393 [5:17:34<45:54, 5.78s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:20:44<35:02:00, 21020.10s/it] + Training...: 89% 3916/4393 [5:17:40<45:54, 5.78s/it] + Training...: 89% 3917/4393 [5:17:40<47:37, 6.00s/it] + Training...: 89% 3918/4393 [5:17:46<46:45, 5.91s/it] + Training...: 89% 3919/4393 [5:17:51<45:49, 5.80s/it] + Training...: 89% 3920/4393 [5:17:57<45:02, 5.71s/it] + Training...: 89% 3921/4393 [5:18:02<44:14, 5.62s/it] + Training...: 89% 3922/4393 [5:18:08<43:28, 5.54s/it] + Training...: 89% 3923/4393 [5:18:13<43:02, 5.50s/it] + Training...: 89% 3924/4393 [5:18:18<42:31, 5.44s/it] + Training...: 89% 3925/4393 [5:18:24<42:05, 5.40s/it] + Training...: 89% 3926/4393 [5:18:29<41:38, 5.35s/it] + Training...: 89% 3927/4393 [5:18:34<41:10, 5.30s/it] + Training...: 89% 3928/4393 [5:18:39<40:38, 5.24s/it] + Training...: 89% 3929/4393 [5:18:44<40:09, 5.19s/it] + Training...: 89% 3930/4393 [5:18:49<39:28, 5.12s/it] + Training...: 89% 3931/4393 [5:18:54<38:55, 5.06s/it] + Training...: 90% 3932/4393 [5:18:59<38:18, 4.99s/it] + Training...: 90% 3933/4393 [5:19:04<38:10, 4.98s/it] + Training...: 90% 3934/4393 [5:19:09<37:18, 4.88s/it] + Training...: 90% 3935/4393 [5:19:13<36:27, 4.78s/it] + Training...: 90% 3936/4393 [5:19:17<35:22, 4.64s/it] + Training...: 90% 3937/4393 [5:19:22<34:27, 4.53s/it] + Training...: 90% 3938/4393 [5:19:26<33:11, 4.38s/it] + Training...: 90% 3939/4393 [5:19:30<31:50, 4.21s/it] + Training...: 90% 3940/4393 [5:19:33<30:24, 4.03s/it] + Training...: 90% 3941/4393 [5:19:37<28:46, 3.82s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:22:44<35:02:00, 21020.10s/it] + Training...: 90% 3941/4393 [5:19:40<28:46, 3.82s/it] + Training...: 90% 3942/4393 [5:19:40<28:10, 3.75s/it] + Training...: 90% 3943/4393 [5:19:43<26:13, 3.50s/it] + Training...: 90% 3944/4393 [5:19:46<24:04, 3.22s/it] + Training...: 90% 3945/4393 [5:19:48<22:04, 2.96s/it] + Training...: 90% 3946/4393 [5:19:50<20:02, 2.69s/it] + Training...: 90% 3947/4393 [5:19:52<17:57, 2.42s/it] + Training...: 90% 3948/4393 [5:19:53<15:54, 2.15s/it] + Training...: 90% 3949/4393 [5:19:55<13:49, 1.87s/it] + Training...: 90% 3950/4393 [5:19:55<11:51, 1.61s/it] + Training...: 90% 3951/4393 [5:20:02<22:23, 3.04s/it] + Training...: 90% 3952/4393 [5:20:09<30:41, 4.18s/it] + Training...: 90% 3953/4393 [5:20:15<34:53, 4.76s/it] + Training...: 90% 3954/4393 [5:20:21<38:06, 5.21s/it] + Training...: 90% 3955/4393 [5:20:27<40:17, 5.52s/it] + Training...: 90% 3956/4393 [5:20:33<41:08, 5.65s/it] + Training...: 90% 3957/4393 [5:20:39<41:44, 5.74s/it] + Training...: 90% 3958/4393 [5:20:45<42:25, 5.85s/it] + Training...: 90% 3959/4393 [5:20:51<42:27, 5.87s/it] + Training...: 90% 3960/4393 [5:20:57<42:15, 5.86s/it] + Training...: 90% 3961/4393 [5:21:03<41:58, 5.83s/it] + Training...: 90% 3962/4393 [5:21:09<41:50, 5.82s/it] + Training...: 90% 3963/4393 [5:21:15<41:53, 5.84s/it] + Training...: 90% 3964/4393 [5:21:20<41:25, 5.79s/it] + Training...: 90% 3965/4393 [5:21:26<41:11, 5.78s/it] + Training...: 90% 3966/4393 [5:21:32<40:45, 5.73s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:24:42<35:02:00, 21020.10s/it] + Training...: 90% 3966/4393 [5:21:38<40:45, 5.73s/it] + Training...: 90% 3967/4393 [5:21:38<41:58, 5.91s/it] + Training...: 90% 3968/4393 [5:21:43<41:03, 5.80s/it] + Training...: 90% 3969/4393 [5:21:49<40:23, 5.72s/it] + Training...: 90% 3970/4393 [5:21:55<39:58, 5.67s/it] + Training...: 90% 3971/4393 [5:22:00<39:54, 5.67s/it] + Training...: 90% 3972/4393 [5:22:06<39:12, 5.59s/it] + Training...: 90% 3973/4393 [5:22:11<38:43, 5.53s/it] + Training...: 90% 3974/4393 [5:22:16<38:07, 5.46s/it] + Training...: 90% 3975/4393 [5:22:22<37:38, 5.40s/it] + Training...: 91% 3976/4393 [5:22:27<37:14, 5.36s/it] + Training...: 91% 3977/4393 [5:22:32<36:53, 5.32s/it] + Training...: 91% 3978/4393 [5:22:37<36:26, 5.27s/it] + Training...: 91% 3979/4393 [5:22:42<36:02, 5.22s/it] + Training...: 91% 3980/4393 [5:22:47<35:18, 5.13s/it] + Training...: 91% 3981/4393 [5:22:52<34:43, 5.06s/it] + Training...: 91% 3982/4393 [5:22:57<34:07, 4.98s/it] + Training...: 91% 3983/4393 [5:23:02<33:38, 4.92s/it] + Training...: 91% 3984/4393 [5:23:06<32:52, 4.82s/it] + Training...: 91% 3985/4393 [5:23:11<32:08, 4.73s/it] + Training...: 91% 3986/4393 [5:23:15<31:19, 4.62s/it] + Training...: 91% 3987/4393 [5:23:19<30:25, 4.50s/it] + Training...: 91% 3988/4393 [5:23:23<29:19, 4.34s/it] + Training...: 91% 3989/4393 [5:23:27<28:09, 4.18s/it] + Training...: 91% 3990/4393 [5:23:31<26:43, 3.98s/it] + Training...: 91% 3991/4393 [5:23:34<25:09, 3.76s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:26:41<35:02:00, 21020.10s/it] + Training...: 91% 3991/4393 [5:23:37<25:09, 3.76s/it] + Training...: 91% 3992/4393 [5:23:37<24:31, 3.67s/it] + Training...: 91% 3993/4393 [5:23:40<22:33, 3.38s/it] + Training...: 91% 3994/4393 [5:23:43<20:41, 3.11s/it] + Training...: 91% 3995/4393 [5:23:45<18:50, 2.84s/it] + Training...: 91% 3996/4393 [5:23:47<17:02, 2.57s/it] + Training...: 91% 3997/4393 [5:23:48<15:19, 2.32s/it] + Training...: 91% 3998/4393 [5:23:50<13:43, 2.09s/it] + Training...: 91% 3999/4393 [5:23:51<12:09, 1.85s/it] + Training...: 91% 4000/4393 [5:23:52<10:30, 1.60s/it] + Training...: 91% 4001/4393 [5:23:59<20:01, 3.06s/it] + Training...: 91% 4002/4393 [5:24:05<26:30, 4.07s/it] + Training...: 91% 4003/4393 [5:24:11<30:23, 4.68s/it] + Training...: 91% 4004/4393 [5:24:17<33:00, 5.09s/it] + Training...: 91% 4005/4393 [5:24:23<34:45, 5.38s/it] + Training...: 91% 4006/4393 [5:24:29<35:55, 5.57s/it] + Training...: 91% 4007/4393 [5:24:35<36:35, 5.69s/it] + Training...: 91% 4008/4393 [5:24:41<36:55, 5.75s/it] + Training...: 91% 4009/4393 [5:24:47<37:06, 5.80s/it] + Training...: 91% 4010/4393 [5:24:53<37:12, 5.83s/it] + Training...: 91% 4011/4393 [5:24:59<37:16, 5.85s/it] + Training...: 91% 4012/4393 [5:25:05<36:56, 5.82s/it] + Training...: 91% 4013/4393 [5:25:11<36:48, 5.81s/it] + Training...: 91% 4014/4393 [5:25:16<36:33, 5.79s/it] + Training...: 91% 4015/4393 [5:25:22<36:21, 5.77s/it] + Training...: 91% 4016/4393 [5:25:28<35:56, 5.72s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:28:38<35:02:00, 21020.10s/it] + Training...: 91% 4016/4393 [5:25:34<35:56, 5.72s/it] + Training...: 91% 4017/4393 [5:25:34<37:05, 5.92s/it] + Training...: 91% 4018/4393 [5:25:40<36:15, 5.80s/it] + Training...: 91% 4019/4393 [5:25:45<35:38, 5.72s/it] + Training...: 92% 4020/4393 [5:25:51<35:18, 5.68s/it] + Training...: 92% 4021/4393 [5:25:56<35:11, 5.68s/it] + Training...: 92% 4022/4393 [5:26:02<34:33, 5.59s/it] + Training...: 92% 4023/4393 [5:26:07<34:00, 5.52s/it] + Training...: 92% 4024/4393 [5:26:12<33:28, 5.44s/it] + Training...: 92% 4025/4393 [5:26:18<33:05, 5.40s/it] + Training...: 92% 4026/4393 [5:26:23<32:41, 5.35s/it] + Training...: 92% 4027/4393 [5:26:28<32:17, 5.29s/it] + Training...: 92% 4028/4393 [5:26:33<31:58, 5.26s/it] + Training...: 92% 4029/4393 [5:26:38<31:40, 5.22s/it] + Training...: 92% 4030/4393 [5:26:43<31:05, 5.14s/it] + Training...: 92% 4031/4393 [5:26:48<30:46, 5.10s/it] + Training...: 92% 4032/4393 [5:26:53<30:07, 5.01s/it] + Training...: 92% 4033/4393 [5:26:58<29:39, 4.94s/it] + Training...: 92% 4034/4393 [5:27:03<29:03, 4.86s/it] + Training...: 92% 4035/4393 [5:27:07<28:26, 4.77s/it] + Training...: 92% 4036/4393 [5:27:11<27:40, 4.65s/it] + Training...: 92% 4037/4393 [5:27:16<26:55, 4.54s/it] + Training...: 92% 4038/4393 [5:27:20<26:00, 4.40s/it] + Training...: 92% 4039/4393 [5:27:24<24:57, 4.23s/it] + Training...: 92% 4040/4393 [5:27:27<23:46, 4.04s/it] + Training...: 92% 4041/4393 [5:27:31<22:33, 3.85s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:30:38<35:02:00, 21020.10s/it] + Training...: 92% 4041/4393 [5:27:34<22:33, 3.85s/it] + Training...: 92% 4042/4393 [5:27:34<22:07, 3.78s/it] + Training...: 92% 4043/4393 [5:27:37<20:27, 3.51s/it] + Training...: 92% 4044/4393 [5:27:40<18:40, 3.21s/it] + Training...: 92% 4045/4393 [5:27:42<17:00, 2.93s/it] + Training...: 92% 4046/4393 [5:27:44<15:26, 2.67s/it] + Training...: 92% 4047/4393 [5:27:46<13:50, 2.40s/it] + Training...: 92% 4048/4393 [5:27:47<12:20, 2.15s/it] + Training...: 92% 4049/4393 [5:27:49<10:54, 1.90s/it] + Training...: 92% 4050/4393 [5:27:50<09:22, 1.64s/it] + Training...: 92% 4051/4393 [5:27:56<17:31, 3.07s/it] + Training...: 92% 4052/4393 [5:28:03<23:16, 4.09s/it] + Training...: 92% 4053/4393 [5:28:09<26:46, 4.72s/it] + Training...: 92% 4054/4393 [5:28:15<28:50, 5.11s/it] + Training...: 92% 4055/4393 [5:28:21<30:15, 5.37s/it] + Training...: 92% 4056/4393 [5:28:27<31:12, 5.56s/it] + Training...: 92% 4057/4393 [5:28:33<32:16, 5.76s/it] + Training...: 92% 4058/4393 [5:28:39<32:49, 5.88s/it] + Training...: 92% 4059/4393 [5:28:45<33:16, 5.98s/it] + Training...: 92% 4060/4393 [5:28:51<33:18, 6.00s/it] + Training...: 92% 4061/4393 [5:28:58<33:32, 6.06s/it] + Training...: 92% 4062/4393 [5:29:04<33:25, 6.06s/it] + Training...: 92% 4063/4393 [5:29:10<33:22, 6.07s/it] + Training...: 93% 4064/4393 [5:29:16<33:12, 6.05s/it] + Training...: 93% 4065/4393 [5:29:22<33:09, 6.07s/it] + Training...: 93% 4066/4393 [5:29:28<32:45, 6.01s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:32:38<35:02:00, 21020.10s/it] + Training...: 93% 4066/4393 [5:29:34<32:45, 6.01s/it] + Training...: 93% 4067/4393 [5:29:34<33:36, 6.18s/it] + Training...: 93% 4068/4393 [5:29:40<32:57, 6.08s/it] + Training...: 93% 4069/4393 [5:29:46<32:28, 6.01s/it] + Training...: 93% 4070/4393 [5:29:52<31:56, 5.93s/it] + Training...: 93% 4071/4393 [5:29:58<31:34, 5.88s/it] + Training...: 93% 4072/4393 [5:30:03<31:07, 5.82s/it] + Training...: 93% 4073/4393 [5:30:09<30:45, 5.77s/it] + Training...: 93% 4074/4393 [5:30:14<30:19, 5.71s/it] + Training...: 93% 4075/4393 [5:30:20<30:00, 5.66s/it] + Training...: 93% 4076/4393 [5:30:26<29:42, 5.62s/it] + Training...: 93% 4077/4393 [5:30:31<29:25, 5.59s/it] + Training...: 93% 4078/4393 [5:30:36<29:08, 5.55s/it] + Training...: 93% 4079/4393 [5:30:42<28:55, 5.53s/it] + Training...: 93% 4080/4393 [5:30:47<28:28, 5.46s/it] + Training...: 93% 4081/4393 [5:30:53<28:05, 5.40s/it] + Training...: 93% 4082/4393 [5:30:58<27:27, 5.30s/it] + Training...: 93% 4083/4393 [5:31:03<26:54, 5.21s/it] + Training...: 93% 4084/4393 [5:31:07<26:19, 5.11s/it] + Training...: 93% 4085/4393 [5:31:12<25:53, 5.04s/it] + Training...: 93% 4086/4393 [5:31:17<25:19, 4.95s/it] + Training...: 93% 4087/4393 [5:31:22<24:38, 4.83s/it] + Training...: 93% 4088/4393 [5:31:26<23:46, 4.68s/it] + Training...: 93% 4089/4393 [5:31:30<22:42, 4.48s/it] + Training...: 93% 4090/4393 [5:31:34<21:23, 4.23s/it] + Training...: 93% 4091/4393 [5:31:37<20:05, 3.99s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:34:45<35:02:00, 21020.10s/it] + Training...: 93% 4091/4393 [5:31:41<20:05, 3.99s/it] + Training...: 93% 4092/4393 [5:31:41<19:32, 3.89s/it] + Training...: 93% 4093/4393 [5:31:44<17:58, 3.59s/it] + Training...: 93% 4094/4393 [5:31:46<16:27, 3.30s/it] + Training...: 93% 4095/4393 [5:31:49<15:02, 3.03s/it] + Training...: 93% 4096/4393 [5:31:51<13:42, 2.77s/it] + Training...: 93% 4097/4393 [5:31:53<12:17, 2.49s/it] + Training...: 93% 4098/4393 [5:31:54<10:54, 2.22s/it] + Training...: 93% 4099/4393 [5:31:56<09:31, 1.94s/it] + Training...: 93% 4100/4393 [5:31:57<08:08, 1.67s/it] + Training...: 93% 4101/4393 [5:32:03<14:56, 3.07s/it] + Training...: 93% 4102/4393 [5:32:09<19:40, 4.06s/it] + Training...: 93% 4103/4393 [5:32:15<22:36, 4.68s/it] + Training...: 93% 4104/4393 [5:32:21<24:33, 5.10s/it] + Training...: 93% 4105/4393 [5:32:27<25:48, 5.38s/it] + Training...: 93% 4106/4393 [5:32:33<26:35, 5.56s/it] + Training...: 93% 4107/4393 [5:32:39<27:08, 5.69s/it] + Training...: 94% 4108/4393 [5:32:45<27:22, 5.76s/it] + Training...: 94% 4109/4393 [5:32:51<27:31, 5.82s/it] + Training...: 94% 4110/4393 [5:32:57<27:29, 5.83s/it] + Training...: 94% 4111/4393 [5:33:03<27:52, 5.93s/it] + Training...: 94% 4112/4393 [5:33:09<27:43, 5.92s/it] + Training...: 94% 4113/4393 [5:33:15<27:30, 5.90s/it] + Training...: 94% 4114/4393 [5:33:21<27:12, 5.85s/it] + Training...: 94% 4115/4393 [5:33:27<26:57, 5.82s/it] + Training...: 94% 4116/4393 [5:33:32<26:33, 5.75s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:36:43<35:02:00, 21020.10s/it] + Training...: 94% 4116/4393 [5:33:39<26:33, 5.75s/it] + Training...: 94% 4117/4393 [5:33:39<27:20, 5.94s/it] + Training...: 94% 4118/4393 [5:33:44<26:45, 5.84s/it] + Training...: 94% 4119/4393 [5:33:50<26:19, 5.76s/it] + Training...: 94% 4120/4393 [5:33:55<25:57, 5.71s/it] + Training...: 94% 4121/4393 [5:34:01<25:37, 5.65s/it] + Training...: 94% 4122/4393 [5:34:06<25:12, 5.58s/it] + Training...: 94% 4123/4393 [5:34:12<24:59, 5.55s/it] + Training...: 94% 4124/4393 [5:34:17<24:35, 5.48s/it] + Training...: 94% 4125/4393 [5:34:22<24:16, 5.43s/it] + Training...: 94% 4126/4393 [5:34:28<23:56, 5.38s/it] + Training...: 94% 4127/4393 [5:34:33<23:59, 5.41s/it] + Training...: 94% 4128/4393 [5:34:38<23:40, 5.36s/it] + Training...: 94% 4129/4393 [5:34:44<23:22, 5.31s/it] + Training...: 94% 4130/4393 [5:34:49<22:53, 5.22s/it] + Training...: 94% 4131/4393 [5:34:54<22:29, 5.15s/it] + Training...: 94% 4132/4393 [5:34:58<22:05, 5.08s/it] + Training...: 94% 4133/4393 [5:35:03<21:41, 5.00s/it] + Training...: 94% 4134/4393 [5:35:08<21:09, 4.90s/it] + Training...: 94% 4135/4393 [5:35:13<20:43, 4.82s/it] + Training...: 94% 4136/4393 [5:35:17<20:09, 4.71s/it] + Training...: 94% 4137/4393 [5:35:21<19:36, 4.60s/it] + Training...: 94% 4138/4393 [5:35:25<18:55, 4.45s/it] + Training...: 94% 4139/4393 [5:35:30<18:17, 4.32s/it] + Training...: 94% 4140/4393 [5:35:33<17:30, 4.15s/it] + Training...: 94% 4141/4393 [5:35:37<16:42, 3.98s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:38:45<35:02:00, 21020.10s/it] + Training...: 94% 4141/4393 [5:35:41<16:42, 3.98s/it] + Training...: 94% 4142/4393 [5:35:41<16:25, 3.92s/it] + Training...: 94% 4143/4393 [5:35:44<15:10, 3.64s/it] + Training...: 94% 4144/4393 [5:35:46<14:00, 3.37s/it] + Training...: 94% 4145/4393 [5:35:49<12:48, 3.10s/it] + Training...: 94% 4146/4393 [5:35:51<11:38, 2.83s/it] + Training...: 94% 4147/4393 [5:35:53<10:27, 2.55s/it] + Training...: 94% 4148/4393 [5:35:55<09:20, 2.29s/it] + Training...: 94% 4149/4393 [5:35:56<08:09, 2.01s/it] + Training...: 94% 4150/4393 [5:35:57<06:55, 1.71s/it] + Training...: 94% 4151/4393 [5:36:03<12:37, 3.13s/it] + Training...: 95% 4152/4393 [5:36:10<16:31, 4.11s/it] + Training...: 95% 4153/4393 [5:36:16<19:08, 4.78s/it] + Training...: 95% 4154/4393 [5:36:22<20:34, 5.16s/it] + Training...: 95% 4155/4393 [5:36:28<21:34, 5.44s/it] + Training...: 95% 4156/4393 [5:36:34<22:08, 5.61s/it] + Training...: 95% 4157/4393 [5:36:40<22:30, 5.72s/it] + Training...: 95% 4158/4393 [5:36:46<22:36, 5.77s/it] + Training...: 95% 4159/4393 [5:36:52<22:39, 5.81s/it] + Training...: 95% 4160/4393 [5:36:58<22:35, 5.82s/it] + Training...: 95% 4161/4393 [5:37:04<22:33, 5.84s/it] + Training...: 95% 4162/4393 [5:37:10<22:26, 5.83s/it] + Training...: 95% 4163/4393 [5:37:16<22:25, 5.85s/it] + Training...: 95% 4164/4393 [5:37:21<22:21, 5.86s/it] + Training...: 95% 4165/4393 [5:37:27<22:06, 5.82s/it] + Training...: 95% 4166/4393 [5:37:33<21:51, 5.78s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:40:43<35:02:00, 21020.10s/it] + Training...: 95% 4166/4393 [5:37:39<21:51, 5.78s/it] + Training...: 95% 4167/4393 [5:37:39<22:40, 6.02s/it] + Training...: 95% 4168/4393 [5:37:45<22:24, 5.98s/it] + Training...: 95% 4169/4393 [5:37:51<21:54, 5.87s/it] + Training...: 95% 4170/4393 [5:37:56<21:27, 5.77s/it] + Training...: 95% 4171/4393 [5:38:02<21:06, 5.71s/it] + Training...: 95% 4172/4393 [5:38:07<20:45, 5.64s/it] + Training...: 95% 4173/4393 [5:38:13<20:25, 5.57s/it] + Training...: 95% 4174/4393 [5:38:18<20:08, 5.52s/it] + Training...: 95% 4175/4393 [5:38:24<19:47, 5.45s/it] + Training...: 95% 4176/4393 [5:38:29<19:29, 5.39s/it] + Training...: 95% 4177/4393 [5:38:34<19:13, 5.34s/it] + Training...: 95% 4178/4393 [5:38:39<18:58, 5.29s/it] + Training...: 95% 4179/4393 [5:38:44<18:45, 5.26s/it] + Training...: 95% 4180/4393 [5:38:49<18:25, 5.19s/it] + Training...: 95% 4181/4393 [5:38:54<18:07, 5.13s/it] + Training...: 95% 4182/4393 [5:38:59<17:46, 5.05s/it] + Training...: 95% 4183/4393 [5:39:04<17:32, 5.01s/it] + Training...: 95% 4184/4393 [5:39:09<17:10, 4.93s/it] + Training...: 95% 4185/4393 [5:39:14<16:51, 4.86s/it] + Training...: 95% 4186/4393 [5:39:18<16:23, 4.75s/it] + Training...: 95% 4187/4393 [5:39:23<16:14, 4.73s/it] + Training...: 95% 4188/4393 [5:39:27<15:42, 4.60s/it] + Training...: 95% 4189/4393 [5:39:31<15:04, 4.44s/it] + Training...: 95% 4190/4393 [5:39:35<14:21, 4.25s/it] + Training...: 95% 4191/4393 [5:39:39<13:35, 4.04s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:42:46<35:02:00, 21020.10s/it] + Training...: 95% 4191/4393 [5:39:42<13:35, 4.04s/it] + Training...: 95% 4192/4393 [5:39:42<13:17, 3.97s/it] + Training...: 95% 4193/4393 [5:39:45<12:17, 3.69s/it] + Training...: 95% 4194/4393 [5:39:48<11:18, 3.41s/it] + Training...: 95% 4195/4393 [5:39:51<10:19, 3.13s/it] + Training...: 96% 4196/4393 [5:39:53<09:19, 2.84s/it] + Training...: 96% 4197/4393 [5:39:55<08:17, 2.54s/it] + Training...: 96% 4198/4393 [5:39:56<07:18, 2.25s/it] + Training...: 96% 4199/4393 [5:39:57<06:21, 1.97s/it] + Training...: 96% 4200/4393 [5:39:59<05:26, 1.69s/it] + Training...: 96% 4201/4393 [5:40:05<09:54, 3.10s/it] + Training...: 96% 4202/4393 [5:40:11<12:53, 4.05s/it] + Training...: 96% 4203/4393 [5:40:17<14:49, 4.68s/it] + Training...: 96% 4204/4393 [5:40:23<16:01, 5.09s/it] + Training...: 96% 4205/4393 [5:40:29<16:52, 5.38s/it] + Training...: 96% 4206/4393 [5:40:35<17:19, 5.56s/it] + Training...: 96% 4207/4393 [5:40:41<17:39, 5.70s/it] + Training...: 96% 4208/4393 [5:40:47<17:46, 5.76s/it] + Training...: 96% 4209/4393 [5:40:53<17:49, 5.81s/it] + Training...: 96% 4210/4393 [5:40:59<17:52, 5.86s/it] + Training...: 96% 4211/4393 [5:41:05<17:56, 5.91s/it] + Training...: 96% 4212/4393 [5:41:11<17:43, 5.88s/it] + Training...: 96% 4213/4393 [5:41:17<17:34, 5.86s/it] + Training...: 96% 4214/4393 [5:41:23<17:25, 5.84s/it] + Training...: 96% 4215/4393 [5:41:28<17:12, 5.80s/it] + Training...: 96% 4216/4393 [5:41:34<16:58, 5.75s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:44:44<35:02:00, 21020.10s/it] + Training...: 96% 4216/4393 [5:41:40<16:58, 5.75s/it] + Training...: 96% 4217/4393 [5:41:40<17:26, 5.95s/it] + Training...: 96% 4218/4393 [5:41:46<17:03, 5.85s/it] + Training...: 96% 4219/4393 [5:41:52<16:41, 5.76s/it] + Training...: 96% 4220/4393 [5:41:57<16:23, 5.68s/it] + Training...: 96% 4221/4393 [5:42:03<16:10, 5.64s/it] + Training...: 96% 4222/4393 [5:42:08<15:52, 5.57s/it] + Training...: 96% 4223/4393 [5:42:13<15:37, 5.52s/it] + Training...: 96% 4224/4393 [5:42:19<15:20, 5.45s/it] + Training...: 96% 4225/4393 [5:42:24<15:07, 5.40s/it] + Training...: 96% 4226/4393 [5:42:29<14:56, 5.37s/it] + Training...: 96% 4227/4393 [5:42:35<14:43, 5.32s/it] + Training...: 96% 4228/4393 [5:42:40<14:30, 5.28s/it] + Training...: 96% 4229/4393 [5:42:45<14:14, 5.21s/it] + Training...: 96% 4230/4393 [5:42:50<13:57, 5.14s/it] + Training...: 96% 4231/4393 [5:42:55<13:43, 5.09s/it] + Training...: 96% 4232/4393 [5:43:00<13:28, 5.02s/it] + Training...: 96% 4233/4393 [5:43:04<13:12, 4.95s/it] + Training...: 96% 4234/4393 [5:43:09<12:53, 4.86s/it] + Training...: 96% 4235/4393 [5:43:14<12:33, 4.77s/it] + Training...: 96% 4236/4393 [5:43:18<12:15, 4.69s/it] + Training...: 96% 4237/4393 [5:43:22<11:51, 4.56s/it] + Training...: 96% 4238/4393 [5:43:26<11:26, 4.43s/it] + Training...: 96% 4239/4393 [5:43:30<10:59, 4.28s/it] + Training...: 97% 4240/4393 [5:43:34<10:24, 4.08s/it] + Training...: 97% 4241/4393 [5:43:37<09:51, 3.89s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:46:45<35:02:00, 21020.10s/it] + Training...: 97% 4241/4393 [5:43:41<09:51, 3.89s/it] + Training...: 97% 4242/4393 [5:43:41<09:39, 3.84s/it] + Training...: 97% 4243/4393 [5:43:44<08:55, 3.57s/it] + Training...: 97% 4244/4393 [5:43:47<08:13, 3.32s/it] + Training...: 97% 4245/4393 [5:43:49<07:29, 3.04s/it] + Training...: 97% 4246/4393 [5:43:51<06:43, 2.75s/it] + Training...: 97% 4247/4393 [5:43:53<05:58, 2.45s/it] + Training...: 97% 4248/4393 [5:43:55<05:18, 2.19s/it] + Training...: 97% 4249/4393 [5:43:56<04:39, 1.94s/it] + Training...: 97% 4250/4393 [5:43:57<03:57, 1.66s/it] + Training...: 97% 4251/4393 [5:44:03<07:17, 3.08s/it] + Training...: 97% 4252/4393 [5:44:10<09:32, 4.06s/it] + Training...: 97% 4253/4393 [5:44:16<10:53, 4.67s/it] + Training...: 97% 4254/4393 [5:44:22<11:45, 5.07s/it] + Training...: 97% 4255/4393 [5:44:28<12:21, 5.37s/it] + Training...: 97% 4256/4393 [5:44:34<12:40, 5.55s/it] + Training...: 97% 4257/4393 [5:44:40<12:51, 5.67s/it] + Training...: 97% 4258/4393 [5:44:46<12:55, 5.74s/it] + Training...: 97% 4259/4393 [5:44:52<12:56, 5.79s/it] + Training...: 97% 4260/4393 [5:44:57<12:52, 5.81s/it] + Training...: 97% 4261/4393 [5:45:03<12:49, 5.83s/it] + Training...: 97% 4262/4393 [5:45:09<12:42, 5.82s/it] + Training...: 97% 4263/4393 [5:45:15<12:43, 5.87s/it] + Training...: 97% 4264/4393 [5:45:21<12:40, 5.89s/it] + Training...: 97% 4265/4393 [5:45:27<12:27, 5.84s/it] + Training...: 97% 4266/4393 [5:45:32<12:14, 5.79s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:48:43<35:02:00, 21020.10s/it] + Training...: 97% 4266/4393 [5:45:39<12:14, 5.79s/it] + Training...: 97% 4267/4393 [5:45:39<12:33, 5.98s/it] + Training...: 97% 4268/4393 [5:45:44<12:11, 5.85s/it] + Training...: 97% 4269/4393 [5:45:50<11:54, 5.76s/it] + Training...: 97% 4270/4393 [5:45:56<11:38, 5.68s/it] + Training...: 97% 4271/4393 [5:46:01<11:25, 5.62s/it] + Training...: 97% 4272/4393 [5:46:06<11:11, 5.55s/it] + Training...: 97% 4273/4393 [5:46:12<11:03, 5.53s/it] + Training...: 97% 4274/4393 [5:46:17<10:51, 5.48s/it] + Training...: 97% 4275/4393 [5:46:23<10:41, 5.43s/it] + Training...: 97% 4276/4393 [5:46:28<10:27, 5.36s/it] + Training...: 97% 4277/4393 [5:46:33<10:18, 5.33s/it] + Training...: 97% 4278/4393 [5:46:38<10:06, 5.28s/it] + Training...: 97% 4279/4393 [5:46:43<09:54, 5.22s/it] + Training...: 97% 4280/4393 [5:46:48<09:42, 5.15s/it] + Training...: 97% 4281/4393 [5:46:53<09:29, 5.08s/it] + Training...: 97% 4282/4393 [5:46:58<09:22, 5.06s/it] + Training...: 97% 4283/4393 [5:47:03<09:11, 5.01s/it] + Training...: 98% 4284/4393 [5:47:08<08:55, 4.91s/it] + Training...: 98% 4285/4393 [5:47:12<08:38, 4.80s/it] + Training...: 98% 4286/4393 [5:47:17<08:19, 4.67s/it] + Training...: 98% 4287/4393 [5:47:21<08:03, 4.56s/it] + Training...: 98% 4288/4393 [5:47:25<07:44, 4.43s/it] + Training...: 98% 4289/4393 [5:47:29<07:24, 4.27s/it] + Training...: 98% 4290/4393 [5:47:33<06:59, 4.08s/it] + Training...: 98% 4291/4393 [5:47:36<06:34, 3.87s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:50:44<35:02:00, 21020.10s/it] + Training...: 98% 4291/4393 [5:47:40<06:34, 3.87s/it] + Training...: 98% 4292/4393 [5:47:40<06:21, 3.78s/it] + Training...: 98% 4293/4393 [5:47:42<05:47, 3.48s/it] + Training...: 98% 4294/4393 [5:47:45<05:17, 3.21s/it] + Training...: 98% 4295/4393 [5:47:47<04:47, 2.94s/it] + Training...: 98% 4296/4393 [5:47:49<04:19, 2.67s/it] + Training...: 98% 4297/4393 [5:47:51<03:50, 2.40s/it] + Training...: 98% 4298/4393 [5:47:52<03:22, 2.13s/it] + Training...: 98% 4299/4393 [5:47:54<02:56, 1.87s/it] + Training...: 98% 4300/4393 [5:47:55<02:31, 1.62s/it] + Training...: 98% 4301/4393 [5:48:01<04:39, 3.04s/it] + Training...: 98% 4302/4393 [5:48:07<06:05, 4.02s/it] + Training...: 98% 4303/4393 [5:48:14<07:00, 4.68s/it] + Training...: 98% 4304/4393 [5:48:20<07:34, 5.11s/it] + Training...: 98% 4305/4393 [5:48:26<07:58, 5.43s/it] + Training...: 98% 4306/4393 [5:48:32<08:07, 5.61s/it] + Training...: 98% 4307/4393 [5:48:38<08:15, 5.76s/it] + Training...: 98% 4308/4393 [5:48:44<08:16, 5.84s/it] + Training...: 98% 4309/4393 [5:48:50<08:14, 5.89s/it] + Training...: 98% 4310/4393 [5:48:56<08:08, 5.88s/it] + Training...: 98% 4311/4393 [5:49:02<08:01, 5.87s/it] + Training...: 98% 4312/4393 [5:49:08<07:53, 5.84s/it] + Training...: 98% 4313/4393 [5:49:13<07:47, 5.84s/it] + Training...: 98% 4314/4393 [5:49:19<07:38, 5.81s/it] + Training...: 98% 4315/4393 [5:49:25<07:31, 5.79s/it] + Training...: 98% 4316/4393 [5:49:31<07:22, 5.75s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:52:41<35:02:00, 21020.10s/it] + Training...: 98% 4316/4393 [5:49:37<07:22, 5.75s/it] + Training...: 98% 4317/4393 [5:49:37<07:30, 5.93s/it] + Training...: 98% 4318/4393 [5:49:43<07:16, 5.82s/it] + Training...: 98% 4319/4393 [5:49:48<07:04, 5.73s/it] + Training...: 98% 4320/4393 [5:49:54<06:54, 5.68s/it] + Training...: 98% 4321/4393 [5:49:59<06:43, 5.60s/it] + Training...: 98% 4322/4393 [5:50:04<06:32, 5.53s/it] + Training...: 98% 4323/4393 [5:50:10<06:24, 5.50s/it] + Training...: 98% 4324/4393 [5:50:15<06:15, 5.44s/it] + Training...: 98% 4325/4393 [5:50:20<06:08, 5.42s/it] + Training...: 98% 4326/4393 [5:50:26<06:00, 5.38s/it] + Training...: 98% 4327/4393 [5:50:31<05:55, 5.38s/it] + Training...: 99% 4328/4393 [5:50:36<05:48, 5.36s/it] + Training...: 99% 4329/4393 [5:50:42<05:39, 5.30s/it] + Training...: 99% 4330/4393 [5:50:47<05:28, 5.22s/it] + Training...: 99% 4331/4393 [5:50:52<05:20, 5.17s/it] + Training...: 99% 4332/4393 [5:50:57<05:09, 5.08s/it] + Training...: 99% 4333/4393 [5:51:01<05:00, 5.00s/it] + Training...: 99% 4334/4393 [5:51:06<04:49, 4.90s/it] + Training...: 99% 4335/4393 [5:51:11<04:39, 4.82s/it] + Training...: 99% 4336/4393 [5:51:15<04:27, 4.70s/it] + Training...: 99% 4337/4393 [5:51:19<04:16, 4.58s/it] + Training...: 99% 4338/4393 [5:51:24<04:04, 4.44s/it] + Training...: 99% 4339/4393 [5:51:27<03:51, 4.29s/it] + Training...: 99% 4340/4393 [5:51:31<03:37, 4.10s/it] + Training...: 99% 4341/4393 [5:51:35<03:23, 3.92s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:54:42<35:02:00, 21020.10s/it] + Training...: 99% 4341/4393 [5:51:38<03:23, 3.92s/it] + Training...: 99% 4342/4393 [5:51:38<03:16, 3.85s/it] + Training...: 99% 4343/4393 [5:51:41<02:59, 3.59s/it] + Training...: 99% 4344/4393 [5:51:44<02:42, 3.32s/it] + Training...: 99% 4345/4393 [5:51:46<02:25, 3.03s/it] + Training...: 99% 4346/4393 [5:51:48<02:09, 2.75s/it] + Training...: 99% 4347/4393 [5:51:50<01:54, 2.48s/it] + Training...: 99% 4348/4393 [5:51:52<01:39, 2.21s/it] + Training...: 99% 4349/4393 [5:51:53<01:25, 1.95s/it] + Training...: 99% 4350/4393 [5:51:54<01:12, 1.68s/it] + Training...: 99% 4351/4393 [5:52:01<02:09, 3.08s/it] + Training...: 99% 4352/4393 [5:52:07<02:46, 4.06s/it] + Training...: 99% 4353/4393 [5:52:13<03:07, 4.70s/it] + Training...: 99% 4354/4393 [5:52:19<03:19, 5.11s/it] + Training...: 99% 4355/4393 [5:52:25<03:25, 5.40s/it] + Training...: 99% 4356/4393 [5:52:31<03:25, 5.56s/it] + Training...: 99% 4357/4393 [5:52:37<03:26, 5.72s/it] + Training...: 99% 4358/4393 [5:52:43<03:23, 5.81s/it] + Training...: 99% 4359/4393 [5:52:49<03:17, 5.82s/it] + Training...: 99% 4360/4393 [5:52:55<03:11, 5.80s/it] + Training...: 99% 4361/4393 [5:53:01<03:05, 5.79s/it] + Training...: 99% 4362/4393 [5:53:06<02:58, 5.75s/it] + Training...: 99% 4363/4393 [5:53:12<02:51, 5.73s/it] + Training...: 99% 4364/4393 [5:53:18<02:45, 5.69s/it] + Training...: 99% 4365/4393 [5:53:23<02:38, 5.68s/it] + Training...: 99% 4366/4393 [5:53:29<02:33, 5.68s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:56:39<35:02:00, 21020.10s/it] + Training...: 99% 4366/4393 [5:53:35<02:33, 5.68s/it] + Training...: 99% 4367/4393 [5:53:35<02:32, 5.88s/it] + Training...: 99% 4368/4393 [5:53:41<02:24, 5.78s/it] + Training...: 99% 4369/4393 [5:53:46<02:16, 5.69s/it] + Training...: 99% 4370/4393 [5:53:52<02:08, 5.60s/it] + Training...: 99% 4371/4393 [5:53:57<02:01, 5.54s/it] + Training...: 100% 4372/4393 [5:54:02<01:54, 5.46s/it] + Training...: 100% 4373/4393 [5:54:08<01:47, 5.39s/it] + Training...: 100% 4374/4393 [5:54:13<01:41, 5.34s/it] + Training...: 100% 4375/4393 [5:54:18<01:34, 5.28s/it] + Training...: 100% 4376/4393 [5:54:23<01:28, 5.18s/it] + Training...: 100% 4377/4393 [5:54:28<01:22, 5.13s/it] + Training...: 100% 4378/4393 [5:54:33<01:15, 5.05s/it] + Training...: 100% 4379/4393 [5:54:38<01:09, 4.96s/it] + Training...: 100% 4380/4393 [5:54:42<01:03, 4.91s/it] + Training...: 100% 4381/4393 [5:54:47<00:58, 4.84s/it] + Training...: 100% 4382/4393 [5:54:51<00:51, 4.70s/it] + Training...: 100% 4383/4393 [5:54:56<00:45, 4.53s/it] + Training...: 100% 4384/4393 [5:54:59<00:39, 4.34s/it] + Training...: 100% 4385/4393 [5:55:03<00:32, 4.12s/it] + Training...: 100% 4386/4393 [5:55:06<00:26, 3.85s/it] + Training...: 100% 4387/4393 [5:55:09<00:21, 3.59s/it] + Training...: 100% 4388/4393 [5:55:12<00:16, 3.32s/it] + Training...: 100% 4389/4393 [5:55:14<00:12, 3.04s/it] + Training...: 100% 4390/4393 [5:55:16<00:08, 2.75s/it] + Training...: 100% 4391/4393 [5:55:18<00:04, 2.46s/it] +  Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:58:24<35:02:00, 21020.10s/it] + Training...: 100% 4391/4393 [5:55:20<00:04, 2.46s/it] + Training...: 100% 4392/4393 [5:55:20<00:02, 2.27s/it] + Training...: 100% 4393/4393 [5:55:21<00:00, 1.96s/it] Training...: 100% 4393/4393 [5:55:21<00:00, 4.85s/it] + Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 58% 7/12 [40:58:25<29:19:56, 21119.31s/it] +Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |) +Step... (30025 | Loss: 0.01562521792948246, Learning Rate: 4.0355556848226115e-05, Gradient Norm: 0.265262246131897) +Step... (30050 | Loss: 0.016519010066986084, Learning Rate: 4.0305047150468454e-05, Gradient Norm: 0.39625251293182373) +Step... (30075 | Loss: 0.016090065240859985, Learning Rate: 4.0254544728668407e-05, Gradient Norm: 0.24512924253940582) +Step... (30100 | Loss: 0.020717907696962357, Learning Rate: 4.020404230686836e-05, Gradient Norm: 0.36649051308631897) +Step... (30125 | Loss: 0.023311305791139603, Learning Rate: 4.01535326091107e-05, Gradient Norm: 0.48252689838409424) +Step... (30150 | Loss: 0.020284006372094154, Learning Rate: 4.010303018731065e-05, Gradient Norm: 0.4051229953765869) +Step... (30175 | Loss: 0.03670956566929817, Learning Rate: 4.0052527765510604e-05, Gradient Norm: 0.3055880665779114) +Step... (30200 | Loss: 0.016665274277329445, Learning Rate: 4.000201806775294e-05, Gradient Norm: 0.3359684348106384) +Step... (30225 | Loss: 0.021576257422566414, Learning Rate: 3.9951515645952895e-05, Gradient Norm: 0.2677457928657532) +Step... (30250 | Loss: 0.018543612211942673, Learning Rate: 3.990100958617404e-05, Gradient Norm: 0.3655332922935486) +Step... (30275 | Loss: 0.02315855212509632, Learning Rate: 3.985050352639519e-05, Gradient Norm: 0.2440711408853531) +Step... (30300 | Loss: 0.012810557149350643, Learning Rate: 3.979999746661633e-05, Gradient Norm: 0.29266688227653503) +Step... (30325 | Loss: 0.016653001308441162, Learning Rate: 3.9749495044816285e-05, Gradient Norm: 0.22393402457237244) +Step... (30350 | Loss: 0.01203943882137537, Learning Rate: 3.969898898503743e-05, Gradient Norm: 0.2736056447029114) +Step... (30375 | Loss: 0.018077773973345757, Learning Rate: 3.964848292525858e-05, Gradient Norm: 0.2008410394191742) +Step... (30400 | Loss: 0.03966773301362991, Learning Rate: 3.959798050345853e-05, Gradient Norm: 0.3925747871398926) +Step... (30425 | Loss: 0.031882643699645996, Learning Rate: 3.954747080570087e-05, Gradient Norm: 0.39469194412231445) +Step... (30450 | Loss: 0.014567309990525246, Learning Rate: 3.949696838390082e-05, Gradient Norm: 0.2972516715526581) +Step... (30475 | Loss: 0.026796311140060425, Learning Rate: 3.9446465962100774e-05, Gradient Norm: 0.3355867564678192) +Step... (30500 | Loss: 0.0361606702208519, Learning Rate: 3.939595626434311e-05, Gradient Norm: 0.407830148935318) +Step... (30525 | Loss: 0.03200415521860123, Learning Rate: 3.9345453842543066e-05, Gradient Norm: 0.5616704821586609) +Step... (30550 | Loss: 0.014776033349335194, Learning Rate: 3.929495142074302e-05, Gradient Norm: 0.3266984522342682) +Step... (30575 | Loss: 0.012889200821518898, Learning Rate: 3.924444172298536e-05, Gradient Norm: 0.1923571079969406) +Step... (30600 | Loss: 0.013984130695462227, Learning Rate: 3.919393930118531e-05, Gradient Norm: 0.3364371657371521) +Step... (30625 | Loss: 0.029455211013555527, Learning Rate: 3.914343687938526e-05, Gradient Norm: 0.34443628787994385) +Step... (30650 | Loss: 0.03519538417458534, Learning Rate: 3.90929271816276e-05, Gradient Norm: 0.5291494131088257) +Step... (30675 | Loss: 0.02681455761194229, Learning Rate: 3.9042424759827554e-05, Gradient Norm: 0.30602386593818665) +Step... (30700 | Loss: 0.02662944421172142, Learning Rate: 3.899192233802751e-05, Gradient Norm: 0.36101633310317993) +Step... (30725 | Loss: 0.01868010126054287, Learning Rate: 3.8941412640269846e-05, Gradient Norm: 0.336725115776062) +Step... (30750 | Loss: 0.05492250248789787, Learning Rate: 3.88909102184698e-05, Gradient Norm: 0.7654430270195007) + Training...: 0% 0/4393 [00:00