diff --git "a/nohup.out" "b/nohup.out" --- "a/nohup.out" +++ "b/nohup.out" @@ -32041,3 +32041,11456 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + Training...: 55% 2428/4393 [3:21:24<91:44:40, 168.08s/it]/home/sanchitgandhi/hf/lib/python3.8/site-packages/flax/training/common_utils.py:25: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map( + + Training...: 55% 2429/4393 [3:21:30<65:09:51, 119.45s/it] + Training...: 55% 2430/4393 [3:21:35<46:26:55, 85.18s/it]  + Training...: 55% 2431/4393 [3:21:40<33:19:38, 61.15s/it] + Training...: 55% 2432/4393 [3:21:45<24:08:23, 44.32s/it] + Training...: 55% 2433/4393 [3:21:50<17:41:25, 32.49s/it] + Training...: 55% 2434/4393 [3:21:55<13:10:02, 24.20s/it] + Training...: 55% 2435/4393 [3:22:00<9:59:27, 18.37s/it]  + Training...: 55% 2436/4393 [3:22:04<7:45:08, 14.26s/it] + Training...: 55% 2437/4393 [3:22:09<6:08:36, 11.31s/it] + Training...: 55% 2438/4393 [3:22:13<5:00:06, 9.21s/it] + Training...: 56% 2439/4393 [3:22:17<4:08:49, 7.64s/it] + Training...: 56% 2440/4393 [3:22:21<3:30:45, 6.47s/it] + Training...: 56% 2441/4393 [3:22:24<3:01:22, 5.58s/it] + Training...: 56% 2442/4393 [3:22:27<2:37:10, 4.83s/it] + Training...: 56% 2443/4393 [3:22:30<2:17:56, 4.24s/it] + Training...: 56% 2444/4393 [3:22:33<2:01:32, 3.74s/it] + Training...: 56% 2445/4393 [3:22:35<1:47:46, 3.32s/it] + Training...: 56% 2446/4393 [3:22:37<1:34:49, 2.92s/it] + Training...: 56% 2447/4393 [3:22:39<1:23:30, 2.57s/it] + Training...: 56% 2448/4393 [3:22:40<1:12:52, 2.25s/it] + Training...: 56% 2449/4393 [3:22:42<1:03:17, 1.95s/it] + Training...: 56% 2450/4393 [3:22:43<53:52, 1.66s/it]  + Training...: 56% 2451/4393 [3:22:49<1:40:34, 3.11s/it] + Training...: 56% 2452/4393 [3:22:56<2:14:47, 4.17s/it]/home/sanchitgandhi/hf/lib/python3.8/site-packages/flax/jax_utils.py:61: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x[0], tree) +run_flax_speech_recognition_seq2seq.py:336: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [26:43:52<46:43:02, 21022.76s/it] + Training...: 56% 2452/4393 [3:23:03<2:14:47, 4.17s/it] + Training...: 56% 2453/4393 [3:23:03<2:42:07, 5.01s/it] + Training...: 56% 2454/4393 [3:23:09<2:52:41, 5.34s/it] + Training...: 56% 2455/4393 [3:23:15<3:00:09, 5.58s/it] + Training...: 56% 2456/4393 [3:23:21<3:03:24, 5.68s/it] + Training...: 56% 2457/4393 [3:23:27<3:06:25, 5.78s/it] + Training...: 56% 2458/4393 [3:23:33<3:07:22, 5.81s/it] + Training...: 56% 2459/4393 [3:23:39<3:07:16, 5.81s/it] + Training...: 56% 2460/4393 [3:23:44<3:07:02, 5.81s/it] + Training...: 56% 2461/4393 [3:23:50<3:06:43, 5.80s/it] + Training...: 56% 2462/4393 [3:23:56<3:05:58, 5.78s/it] + Training...: 56% 2463/4393 [3:24:02<3:05:08, 5.76s/it] + Training...: 56% 2464/4393 [3:24:07<3:03:25, 5.71s/it] + Training...: 56% 2465/4393 [3:24:13<3:02:22, 5.68s/it] + Training...: 56% 2466/4393 [3:24:18<3:01:03, 5.64s/it] + Training...: 56% 2467/4393 [3:24:24<3:01:43, 5.66s/it] + Training...: 56% 2468/4393 [3:24:30<3:02:05, 5.68s/it] + Training...: 56% 2469/4393 [3:24:35<3:00:19, 5.62s/it] + Training...: 56% 2470/4393 [3:24:41<2:59:14, 5.59s/it] + Training...: 56% 2471/4393 [3:24:46<2:58:06, 5.56s/it] + Training...: 56% 2472/4393 [3:24:52<2:55:23, 5.48s/it] + Training...: 56% 2473/4393 [3:24:57<2:53:34, 5.42s/it] + Training...: 56% 2474/4393 [3:25:02<2:52:02, 5.38s/it] + Training...: 56% 2475/4393 [3:25:07<2:50:18, 5.33s/it] + Training...: 56% 2476/4393 [3:25:12<2:48:08, 5.26s/it] + Training...: 56% 2477/4393 [3:25:18<2:47:16, 5.24s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [26:46:13<46:43:02, 21022.76s/it] + Training...: 56% 2477/4393 [3:25:23<2:47:16, 5.24s/it] + Training...: 56% 2478/4393 [3:25:23<2:53:19, 5.43s/it] + Training...: 56% 2479/4393 [3:25:28<2:48:46, 5.29s/it] + Training...: 56% 2480/4393 [3:25:33<2:44:46, 5.17s/it] + Training...: 56% 2481/4393 [3:25:38<2:41:54, 5.08s/it] + Training...: 56% 2482/4393 [3:25:43<2:39:06, 5.00s/it] + Training...: 57% 2483/4393 [3:25:48<2:37:46, 4.96s/it] + Training...: 57% 2484/4393 [3:25:52<2:34:29, 4.86s/it] + Training...: 57% 2485/4393 [3:25:57<2:31:54, 4.78s/it] + Training...: 57% 2486/4393 [3:26:01<2:27:54, 4.65s/it] + Training...: 57% 2487/4393 [3:26:06<2:23:27, 4.52s/it] + Training...: 57% 2488/4393 [3:26:10<2:18:11, 4.35s/it] + Training...: 57% 2489/4393 [3:26:13<2:13:01, 4.19s/it] + Training...: 57% 2490/4393 [3:26:17<2:06:28, 3.99s/it] + Training...: 57% 2491/4393 [3:26:20<1:59:40, 3.78s/it] + Training...: 57% 2492/4393 [3:26:23<1:52:06, 3.54s/it] + Training...: 57% 2493/4393 [3:26:26<1:44:47, 3.31s/it] + Training...: 57% 2494/4393 [3:26:28<1:36:45, 3.06s/it] + Training...: 57% 2495/4393 [3:26:31<1:28:59, 2.81s/it] + Training...: 57% 2496/4393 [3:26:33<1:21:26, 2.58s/it] + Training...: 57% 2497/4393 [3:26:34<1:13:20, 2.32s/it] + Training...: 57% 2498/4393 [3:26:36<1:05:15, 2.07s/it] + Training...: 57% 2499/4393 [3:26:37<57:22, 1.82s/it]  + Training...: 57% 2500/4393 [3:26:38<49:42, 1.58s/it] + Training...: 57% 2501/4393 [3:26:44<1:34:03, 2.98s/it] + Training...: 57% 2502/4393 [3:26:51<2:07:02, 4.03s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [26:47:47<46:43:02, 21022.76s/it] + Training...: 57% 2502/4393 [3:26:58<2:07:02, 4.03s/it] + Training...: 57% 2503/4393 [3:26:58<2:33:28, 4.87s/it] + Training...: 57% 2504/4393 [3:27:04<2:43:49, 5.20s/it] + Training...: 57% 2505/4393 [3:27:10<2:50:53, 5.43s/it] + Training...: 57% 2506/4393 [3:27:16<2:55:36, 5.58s/it] + Training...: 57% 2507/4393 [3:27:22<2:58:53, 5.69s/it] + Training...: 57% 2508/4393 [3:27:27<3:00:22, 5.74s/it] + Training...: 57% 2509/4393 [3:27:33<3:01:02, 5.77s/it] + Training...: 57% 2510/4393 [3:27:39<3:01:19, 5.78s/it] + Training...: 57% 2511/4393 [3:27:45<3:01:19, 5.78s/it] + Training...: 57% 2512/4393 [3:27:51<3:01:10, 5.78s/it] + Training...: 57% 2513/4393 [3:27:56<3:00:49, 5.77s/it] + Training...: 57% 2514/4393 [3:28:02<3:00:05, 5.75s/it] + Training...: 57% 2515/4393 [3:28:08<3:01:38, 5.80s/it] + Training...: 57% 2516/4393 [3:28:14<2:59:58, 5.75s/it] + Training...: 57% 2517/4393 [3:28:19<2:58:30, 5.71s/it] + Training...: 57% 2518/4393 [3:28:25<2:57:34, 5.68s/it] + Training...: 57% 2519/4393 [3:28:30<2:56:51, 5.66s/it] + Training...: 57% 2520/4393 [3:28:36<2:54:31, 5.59s/it] + Training...: 57% 2521/4393 [3:28:41<2:53:28, 5.56s/it] + Training...: 57% 2522/4393 [3:28:47<2:52:36, 5.54s/it] + Training...: 57% 2523/4393 [3:28:52<2:52:05, 5.52s/it] + Training...: 57% 2524/4393 [3:28:58<2:49:07, 5.43s/it] + Training...: 57% 2525/4393 [3:29:03<2:46:54, 5.36s/it] + Training...: 58% 2526/4393 [3:29:08<2:45:01, 5.30s/it] + Training...: 58% 2527/4393 [3:29:13<2:43:45, 5.27s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [26:50:08<46:43:02, 21022.76s/it] + Training...: 58% 2527/4393 [3:29:19<2:43:45, 5.27s/it] + Training...: 58% 2528/4393 [3:29:19<2:48:35, 5.42s/it] + Training...: 58% 2529/4393 [3:29:24<2:45:19, 5.32s/it] + Training...: 58% 2530/4393 [3:29:29<2:41:37, 5.21s/it] + Training...: 58% 2531/4393 [3:29:34<2:38:42, 5.11s/it] + Training...: 58% 2532/4393 [3:29:39<2:35:34, 5.02s/it] + Training...: 58% 2533/4393 [3:29:43<2:33:09, 4.94s/it] + Training...: 58% 2534/4393 [3:29:48<2:30:13, 4.85s/it] + Training...: 58% 2535/4393 [3:29:53<2:27:54, 4.78s/it] + Training...: 58% 2536/4393 [3:29:57<2:24:24, 4.67s/it] + Training...: 58% 2537/4393 [3:30:01<2:20:43, 4.55s/it] + Training...: 58% 2538/4393 [3:30:05<2:16:04, 4.40s/it] + Training...: 58% 2539/4393 [3:30:09<2:11:39, 4.26s/it] + Training...: 58% 2540/4393 [3:30:13<2:06:34, 4.10s/it] + Training...: 58% 2541/4393 [3:30:16<2:00:42, 3.91s/it] + Training...: 58% 2542/4393 [3:30:20<1:54:29, 3.71s/it] + Training...: 58% 2543/4393 [3:30:23<1:47:30, 3.49s/it] + Training...: 58% 2544/4393 [3:30:25<1:40:13, 3.25s/it] + Training...: 58% 2545/4393 [3:30:28<1:32:25, 3.00s/it] + Training...: 58% 2546/4393 [3:30:30<1:24:21, 2.74s/it] + Training...: 58% 2547/4393 [3:30:32<1:15:39, 2.46s/it] + Training...: 58% 2548/4393 [3:30:33<1:07:30, 2.20s/it] + Training...: 58% 2549/4393 [3:30:35<59:22, 1.93s/it]  + Training...: 58% 2550/4393 [3:30:36<50:51, 1.66s/it] + Training...: 58% 2551/4393 [3:30:42<1:36:21, 3.14s/it] + Training...: 58% 2552/4393 [3:30:49<2:06:07, 4.11s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [26:51:45<46:43:02, 21022.76s/it] + Training...: 58% 2552/4393 [3:30:56<2:06:07, 4.11s/it] + Training...: 58% 2553/4393 [3:30:56<2:31:45, 4.95s/it] + Training...: 58% 2554/4393 [3:31:01<2:41:03, 5.25s/it] + Training...: 58% 2555/4393 [3:31:07<2:47:21, 5.46s/it] + Training...: 58% 2556/4393 [3:31:13<2:52:45, 5.64s/it] + Training...: 58% 2557/4393 [3:31:19<2:55:49, 5.75s/it] + Training...: 58% 2558/4393 [3:31:25<2:57:35, 5.81s/it] + Training...: 58% 2559/4393 [3:31:31<2:58:43, 5.85s/it] + Training...: 58% 2560/4393 [3:31:37<2:58:17, 5.84s/it] + Training...: 58% 2561/4393 [3:31:43<2:57:43, 5.82s/it] + Training...: 58% 2562/4393 [3:31:49<2:56:52, 5.80s/it] + Training...: 58% 2563/4393 [3:31:54<2:56:33, 5.79s/it] + Training...: 58% 2564/4393 [3:32:00<2:55:27, 5.76s/it] + Training...: 58% 2565/4393 [3:32:06<2:54:36, 5.73s/it] + Training...: 58% 2566/4393 [3:32:11<2:52:57, 5.68s/it] + Training...: 58% 2567/4393 [3:32:17<2:52:03, 5.65s/it] + Training...: 58% 2568/4393 [3:32:22<2:50:11, 5.60s/it] + Training...: 58% 2569/4393 [3:32:28<2:48:59, 5.56s/it] + Training...: 59% 2570/4393 [3:32:34<2:49:29, 5.58s/it] + Training...: 59% 2571/4393 [3:32:39<2:49:14, 5.57s/it] + Training...: 59% 2572/4393 [3:32:45<2:47:48, 5.53s/it] + Training...: 59% 2573/4393 [3:32:50<2:46:11, 5.48s/it] + Training...: 59% 2574/4393 [3:32:55<2:44:48, 5.44s/it] + Training...: 59% 2575/4393 [3:33:00<2:42:48, 5.37s/it] + Training...: 59% 2576/4393 [3:33:06<2:41:00, 5.32s/it] + Training...: 59% 2577/4393 [3:33:11<2:40:13, 5.29s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [26:54:06<46:43:02, 21022.76s/it] + Training...: 59% 2577/4393 [3:33:17<2:40:13, 5.29s/it] + Training...: 59% 2578/4393 [3:33:17<2:45:45, 5.48s/it] + Training...: 59% 2579/4393 [3:33:22<2:41:56, 5.36s/it] + Training...: 59% 2580/4393 [3:33:27<2:38:11, 5.24s/it] + Training...: 59% 2581/4393 [3:33:32<2:35:19, 5.14s/it] + Training...: 59% 2582/4393 [3:33:37<2:33:14, 5.08s/it] + Training...: 59% 2583/4393 [3:33:41<2:31:01, 5.01s/it] + Training...: 59% 2584/4393 [3:33:46<2:27:46, 4.90s/it] + Training...: 59% 2585/4393 [3:33:51<2:24:40, 4.80s/it] + Training...: 59% 2586/4393 [3:33:55<2:21:37, 4.70s/it] + Training...: 59% 2587/4393 [3:34:00<2:17:58, 4.58s/it] + Training...: 59% 2588/4393 [3:34:04<2:13:39, 4.44s/it] + Training...: 59% 2589/4393 [3:34:08<2:09:53, 4.32s/it] + Training...: 59% 2590/4393 [3:34:11<2:04:58, 4.16s/it] + Training...: 59% 2591/4393 [3:34:15<1:58:43, 3.95s/it] + Training...: 59% 2592/4393 [3:34:18<1:51:34, 3.72s/it] + Training...: 59% 2593/4393 [3:34:21<1:43:40, 3.46s/it] + Training...: 59% 2594/4393 [3:34:24<1:36:06, 3.21s/it] + Training...: 59% 2595/4393 [3:34:26<1:28:42, 2.96s/it] + Training...: 59% 2596/4393 [3:34:28<1:21:21, 2.72s/it] + Training...: 59% 2597/4393 [3:34:30<1:13:58, 2.47s/it] + Training...: 59% 2598/4393 [3:34:32<1:06:14, 2.21s/it] + Training...: 59% 2599/4393 [3:34:33<58:38, 1.96s/it]  + Training...: 59% 2600/4393 [3:34:34<50:30, 1.69s/it] + Training...: 59% 2601/4393 [3:34:40<1:30:27, 3.03s/it] + Training...: 59% 2602/4393 [3:34:46<1:59:09, 3.99s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [26:55:42<46:43:02, 21022.76s/it] + Training...: 59% 2602/4393 [3:34:53<1:59:09, 3.99s/it] + Training...: 59% 2603/4393 [3:34:53<2:25:09, 4.87s/it] + Training...: 59% 2604/4393 [3:34:59<2:35:30, 5.22s/it] + Training...: 59% 2605/4393 [3:35:05<2:42:58, 5.47s/it] + Training...: 59% 2606/4393 [3:35:11<2:46:36, 5.59s/it] + Training...: 59% 2607/4393 [3:35:17<2:49:21, 5.69s/it] + Training...: 59% 2608/4393 [3:35:23<2:51:39, 5.77s/it] + Training...: 59% 2609/4393 [3:35:29<2:54:19, 5.86s/it] + Training...: 59% 2610/4393 [3:35:35<2:54:41, 5.88s/it] + Training...: 59% 2611/4393 [3:35:41<2:53:57, 5.86s/it] + Training...: 59% 2612/4393 [3:35:47<2:52:57, 5.83s/it] + Training...: 59% 2613/4393 [3:35:52<2:52:00, 5.80s/it] + Training...: 60% 2614/4393 [3:35:58<2:51:24, 5.78s/it] + Training...: 60% 2615/4393 [3:36:04<2:50:36, 5.76s/it] + Training...: 60% 2616/4393 [3:36:10<2:49:18, 5.72s/it] + Training...: 60% 2617/4393 [3:36:15<2:48:07, 5.68s/it] + Training...: 60% 2618/4393 [3:36:21<2:46:30, 5.63s/it] + Training...: 60% 2619/4393 [3:36:26<2:45:53, 5.61s/it] + Training...: 60% 2620/4393 [3:36:32<2:44:00, 5.55s/it] + Training...: 60% 2621/4393 [3:36:37<2:42:53, 5.52s/it] + Training...: 60% 2622/4393 [3:36:42<2:41:39, 5.48s/it] + Training...: 60% 2623/4393 [3:36:48<2:40:33, 5.44s/it] + Training...: 60% 2624/4393 [3:36:53<2:40:34, 5.45s/it] + Training...: 60% 2625/4393 [3:36:59<2:40:21, 5.44s/it] + Training...: 60% 2626/4393 [3:37:04<2:37:57, 5.36s/it] + Training...: 60% 2627/4393 [3:37:09<2:35:53, 5.30s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [26:58:04<46:43:02, 21022.76s/it] + Training...: 60% 2627/4393 [3:37:15<2:35:53, 5.30s/it] + Training...: 60% 2628/4393 [3:37:15<2:41:25, 5.49s/it] + Training...: 60% 2629/4393 [3:37:20<2:37:50, 5.37s/it] + Training...: 60% 2630/4393 [3:37:25<2:34:19, 5.25s/it] + Training...: 60% 2631/4393 [3:37:30<2:31:30, 5.16s/it] + Training...: 60% 2632/4393 [3:37:35<2:28:30, 5.06s/it] + Training...: 60% 2633/4393 [3:37:40<2:26:15, 4.99s/it] + Training...: 60% 2634/4393 [3:37:44<2:23:39, 4.90s/it] + Training...: 60% 2635/4393 [3:37:49<2:20:50, 4.81s/it] + Training...: 60% 2636/4393 [3:37:53<2:17:40, 4.70s/it] + Training...: 60% 2637/4393 [3:37:58<2:14:18, 4.59s/it] + Training...: 60% 2638/4393 [3:38:02<2:10:38, 4.47s/it] + Training...: 60% 2639/4393 [3:38:06<2:06:44, 4.34s/it] + Training...: 60% 2640/4393 [3:38:10<2:01:36, 4.16s/it] + Training...: 60% 2641/4393 [3:38:13<1:55:43, 3.96s/it] + Training...: 60% 2642/4393 [3:38:16<1:48:57, 3.73s/it] + Training...: 60% 2643/4393 [3:38:19<1:42:03, 3.50s/it] + Training...: 60% 2644/4393 [3:38:22<1:34:26, 3.24s/it] + Training...: 60% 2645/4393 [3:38:24<1:26:27, 2.97s/it] + Training...: 60% 2646/4393 [3:38:26<1:18:30, 2.70s/it] + Training...: 60% 2647/4393 [3:38:28<1:10:51, 2.44s/it] + Training...: 60% 2648/4393 [3:38:30<1:03:16, 2.18s/it] + Training...: 60% 2649/4393 [3:38:31<55:36, 1.91s/it]  + Training...: 60% 2650/4393 [3:38:32<47:56, 1.65s/it] + Training...: 60% 2651/4393 [3:38:38<1:28:17, 3.04s/it] + Training...: 60% 2652/4393 [3:38:45<1:56:08, 4.00s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [26:59:41<46:43:02, 21022.76s/it] + Training...: 60% 2652/4393 [3:38:51<1:56:08, 4.00s/it] + Training...: 60% 2653/4393 [3:38:51<2:21:19, 4.87s/it] + Training...: 60% 2654/4393 [3:38:57<2:31:17, 5.22s/it] + Training...: 60% 2655/4393 [3:39:04<2:38:02, 5.46s/it] + Training...: 60% 2656/4393 [3:39:09<2:42:11, 5.60s/it] + Training...: 60% 2657/4393 [3:39:15<2:44:45, 5.69s/it] + Training...: 61% 2658/4393 [3:39:21<2:46:06, 5.74s/it] + Training...: 61% 2659/4393 [3:39:27<2:47:04, 5.78s/it] + Training...: 61% 2660/4393 [3:39:33<2:47:28, 5.80s/it] + Training...: 61% 2661/4393 [3:39:39<2:47:14, 5.79s/it] + Training...: 61% 2662/4393 [3:39:44<2:46:39, 5.78s/it] + Training...: 61% 2663/4393 [3:39:50<2:46:06, 5.76s/it] + Training...: 61% 2664/4393 [3:39:56<2:47:14, 5.80s/it] + Training...: 61% 2665/4393 [3:40:02<2:47:30, 5.82s/it] + Training...: 61% 2666/4393 [3:40:07<2:45:17, 5.74s/it] + Training...: 61% 2667/4393 [3:40:13<2:44:27, 5.72s/it] + Training...: 61% 2668/4393 [3:40:19<2:42:32, 5.65s/it] + Training...: 61% 2669/4393 [3:40:24<2:41:01, 5.60s/it] + Training...: 61% 2670/4393 [3:40:30<2:39:32, 5.56s/it] + Training...: 61% 2671/4393 [3:40:35<2:38:09, 5.51s/it] + Training...: 61% 2672/4393 [3:40:40<2:37:00, 5.47s/it] + Training...: 61% 2673/4393 [3:40:46<2:36:27, 5.46s/it] + Training...: 61% 2674/4393 [3:40:51<2:34:58, 5.41s/it] + Training...: 61% 2675/4393 [3:40:56<2:33:27, 5.36s/it] + Training...: 61% 2676/4393 [3:41:02<2:31:50, 5.31s/it] + Training...: 61% 2677/4393 [3:41:07<2:30:16, 5.25s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:02:02<46:43:02, 21022.76s/it] + Training...: 61% 2677/4393 [3:41:12<2:30:16, 5.25s/it] + Training...: 61% 2678/4393 [3:41:12<2:35:14, 5.43s/it] + Training...: 61% 2679/4393 [3:41:18<2:32:15, 5.33s/it] + Training...: 61% 2680/4393 [3:41:23<2:29:02, 5.22s/it] + Training...: 61% 2681/4393 [3:41:28<2:26:38, 5.14s/it] + Training...: 61% 2682/4393 [3:41:32<2:24:00, 5.05s/it] + Training...: 61% 2683/4393 [3:41:37<2:21:18, 4.96s/it] + Training...: 61% 2684/4393 [3:41:42<2:19:33, 4.90s/it] + Training...: 61% 2685/4393 [3:41:47<2:17:29, 4.83s/it] + Training...: 61% 2686/4393 [3:41:51<2:13:48, 4.70s/it] + Training...: 61% 2687/4393 [3:41:55<2:09:54, 4.57s/it] + Training...: 61% 2688/4393 [3:41:59<2:05:20, 4.41s/it] + Training...: 61% 2689/4393 [3:42:03<2:00:43, 4.25s/it] + Training...: 61% 2690/4393 [3:42:07<1:55:24, 4.07s/it] + Training...: 61% 2691/4393 [3:42:10<1:49:32, 3.86s/it] + Training...: 61% 2692/4393 [3:42:13<1:43:10, 3.64s/it] + Training...: 61% 2693/4393 [3:42:16<1:36:02, 3.39s/it] + Training...: 61% 2694/4393 [3:42:19<1:29:08, 3.15s/it] + Training...: 61% 2695/4393 [3:42:21<1:22:09, 2.90s/it] + Training...: 61% 2696/4393 [3:42:23<1:15:20, 2.66s/it] + Training...: 61% 2697/4393 [3:42:25<1:08:16, 2.42s/it] + Training...: 61% 2698/4393 [3:42:26<1:01:12, 2.17s/it] + Training...: 61% 2699/4393 [3:42:28<54:04, 1.92s/it]  + Training...: 61% 2700/4393 [3:42:29<46:34, 1.65s/it] + Training...: 61% 2701/4393 [3:42:35<1:25:42, 3.04s/it] + Training...: 62% 2702/4393 [3:42:41<1:52:38, 4.00s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:03:37<46:43:02, 21022.76s/it] + Training...: 62% 2702/4393 [3:42:48<1:52:38, 4.00s/it] + Training...: 62% 2703/4393 [3:42:48<2:16:57, 4.86s/it] + Training...: 62% 2704/4393 [3:42:54<2:26:18, 5.20s/it] + Training...: 62% 2705/4393 [3:43:00<2:32:52, 5.43s/it] + Training...: 62% 2706/4393 [3:43:06<2:36:53, 5.58s/it] + Training...: 62% 2707/4393 [3:43:12<2:39:36, 5.68s/it] + Training...: 62% 2708/4393 [3:43:18<2:41:09, 5.74s/it] + Training...: 62% 2709/4393 [3:43:24<2:43:10, 5.81s/it] + Training...: 62% 2710/4393 [3:43:30<2:45:31, 5.90s/it] + Training...: 62% 2711/4393 [3:43:36<2:44:43, 5.88s/it] + Training...: 62% 2712/4393 [3:43:42<2:43:04, 5.82s/it] + Training...: 62% 2713/4393 [3:43:47<2:41:58, 5.78s/it] + Training...: 62% 2714/4393 [3:43:53<2:40:57, 5.75s/it] + Training...: 62% 2715/4393 [3:43:59<2:39:52, 5.72s/it] + Training...: 62% 2716/4393 [3:44:04<2:39:06, 5.69s/it] + Training...: 62% 2717/4393 [3:44:10<2:38:05, 5.66s/it] + Training...: 62% 2718/4393 [3:44:15<2:37:07, 5.63s/it] + Training...: 62% 2719/4393 [3:44:21<2:36:33, 5.61s/it] + Training...: 62% 2720/4393 [3:44:26<2:35:05, 5.56s/it] + Training...: 62% 2721/4393 [3:44:32<2:34:00, 5.53s/it] + Training...: 62% 2722/4393 [3:44:37<2:32:14, 5.47s/it] + Training...: 62% 2723/4393 [3:44:42<2:31:25, 5.44s/it] + Training...: 62% 2724/4393 [3:44:48<2:30:26, 5.41s/it] + Training...: 62% 2725/4393 [3:44:53<2:29:43, 5.39s/it] + Training...: 62% 2726/4393 [3:44:58<2:28:16, 5.34s/it] + Training...: 62% 2727/4393 [3:45:04<2:27:27, 5.31s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:05:59<46:43:02, 21022.76s/it] + Training...: 62% 2727/4393 [3:45:10<2:27:27, 5.31s/it] + Training...: 62% 2728/4393 [3:45:10<2:33:37, 5.54s/it] + Training...: 62% 2729/4393 [3:45:15<2:29:30, 5.39s/it] + Training...: 62% 2730/4393 [3:45:20<2:25:17, 5.24s/it] + Training...: 62% 2731/4393 [3:45:25<2:22:37, 5.15s/it] + Training...: 62% 2732/4393 [3:45:29<2:19:34, 5.04s/it] + Training...: 62% 2733/4393 [3:45:34<2:17:07, 4.96s/it] + Training...: 62% 2734/4393 [3:45:39<2:14:31, 4.87s/it] + Training...: 62% 2735/4393 [3:45:43<2:11:30, 4.76s/it] + Training...: 62% 2736/4393 [3:45:48<2:08:04, 4.64s/it] + Training...: 62% 2737/4393 [3:45:52<2:04:51, 4.52s/it] + Training...: 62% 2738/4393 [3:45:56<2:01:02, 4.39s/it] + Training...: 62% 2739/4393 [3:46:00<1:57:02, 4.25s/it] + Training...: 62% 2740/4393 [3:46:04<1:52:13, 4.07s/it] + Training...: 62% 2741/4393 [3:46:07<1:47:05, 3.89s/it] + Training...: 62% 2742/4393 [3:46:10<1:41:21, 3.68s/it] + Training...: 62% 2743/4393 [3:46:13<1:35:10, 3.46s/it] + Training...: 62% 2744/4393 [3:46:16<1:28:32, 3.22s/it] + Training...: 62% 2745/4393 [3:46:18<1:21:26, 2.97s/it] + Training...: 63% 2746/4393 [3:46:20<1:14:26, 2.71s/it] + Training...: 63% 2747/4393 [3:46:22<1:06:51, 2.44s/it] + Training...: 63% 2748/4393 [3:46:24<59:44, 2.18s/it]  + Training...: 63% 2749/4393 [3:46:25<52:21, 1.91s/it] + Training...: 63% 2750/4393 [3:46:26<44:59, 1.64s/it] + Training...: 63% 2751/4393 [3:46:32<1:22:32, 3.02s/it] + Training...: 63% 2752/4393 [3:46:38<1:48:43, 3.98s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:07:34<46:43:02, 21022.76s/it] + Training...: 63% 2752/4393 [3:46:45<1:48:43, 3.98s/it] + Training...: 63% 2753/4393 [3:46:45<2:12:30, 4.85s/it] + Training...: 63% 2754/4393 [3:46:51<2:21:49, 5.19s/it] + Training...: 63% 2755/4393 [3:46:58<2:30:24, 5.51s/it] + Training...: 63% 2756/4393 [3:47:04<2:34:25, 5.66s/it] + Training...: 63% 2757/4393 [3:47:10<2:36:48, 5.75s/it] + Training...: 63% 2758/4393 [3:47:15<2:38:24, 5.81s/it] + Training...: 63% 2759/4393 [3:47:21<2:39:15, 5.85s/it] + Training...: 63% 2760/4393 [3:47:27<2:39:04, 5.84s/it] + Training...: 63% 2761/4393 [3:47:33<2:38:31, 5.83s/it] + Training...: 63% 2762/4393 [3:47:39<2:37:49, 5.81s/it] + Training...: 63% 2763/4393 [3:47:45<2:37:34, 5.80s/it] + Training...: 63% 2764/4393 [3:47:50<2:37:04, 5.79s/it] + Training...: 63% 2765/4393 [3:47:56<2:35:56, 5.75s/it] + Training...: 63% 2766/4393 [3:48:02<2:34:20, 5.69s/it] + Training...: 63% 2767/4393 [3:48:07<2:33:45, 5.67s/it] + Training...: 63% 2768/4393 [3:48:13<2:32:08, 5.62s/it] + Training...: 63% 2769/4393 [3:48:18<2:31:58, 5.61s/it] + Training...: 63% 2770/4393 [3:48:24<2:32:21, 5.63s/it] + Training...: 63% 2771/4393 [3:48:29<2:30:25, 5.56s/it] + Training...: 63% 2772/4393 [3:48:35<2:29:07, 5.52s/it] + Training...: 63% 2773/4393 [3:48:40<2:27:37, 5.47s/it] + Training...: 63% 2774/4393 [3:48:45<2:25:43, 5.40s/it] + Training...: 63% 2775/4393 [3:48:51<2:24:11, 5.35s/it] + Training...: 63% 2776/4393 [3:48:56<2:23:15, 5.32s/it] + Training...: 63% 2777/4393 [3:49:01<2:21:47, 5.26s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:09:56<46:43:02, 21022.76s/it] + Training...: 63% 2777/4393 [3:49:07<2:21:47, 5.26s/it] + Training...: 63% 2778/4393 [3:49:07<2:25:58, 5.42s/it] + Training...: 63% 2779/4393 [3:49:12<2:22:48, 5.31s/it] + Training...: 63% 2780/4393 [3:49:17<2:19:41, 5.20s/it] + Training...: 63% 2781/4393 [3:49:22<2:16:37, 5.09s/it] + Training...: 63% 2782/4393 [3:49:26<2:14:17, 5.00s/it] + Training...: 63% 2783/4393 [3:49:31<2:11:51, 4.91s/it] + Training...: 63% 2784/4393 [3:49:36<2:09:39, 4.84s/it] + Training...: 63% 2785/4393 [3:49:40<2:06:31, 4.72s/it] + Training...: 63% 2786/4393 [3:49:44<2:03:00, 4.59s/it] + Training...: 63% 2787/4393 [3:49:49<2:00:58, 4.52s/it] + Training...: 63% 2788/4393 [3:49:53<1:57:46, 4.40s/it] + Training...: 63% 2789/4393 [3:49:57<1:52:27, 4.21s/it] + Training...: 64% 2790/4393 [3:50:00<1:46:48, 4.00s/it] + Training...: 64% 2791/4393 [3:50:03<1:40:45, 3.77s/it] + Training...: 64% 2792/4393 [3:50:07<1:34:57, 3.56s/it] + Training...: 64% 2793/4393 [3:50:09<1:28:53, 3.33s/it] + Training...: 64% 2794/4393 [3:50:12<1:22:17, 3.09s/it] + Training...: 64% 2795/4393 [3:50:14<1:15:35, 2.84s/it] + Training...: 64% 2796/4393 [3:50:16<1:08:59, 2.59s/it] + Training...: 64% 2797/4393 [3:50:18<1:02:17, 2.34s/it] + Training...: 64% 2798/4393 [3:50:19<55:55, 2.10s/it]  + Training...: 64% 2799/4393 [3:50:21<49:06, 1.85s/it] + Training...: 64% 2800/4393 [3:50:22<42:32, 1.60s/it] + Training...: 64% 2801/4393 [3:50:28<1:18:45, 2.97s/it] + Training...: 64% 2802/4393 [3:50:34<1:45:05, 3.96s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:11:30<46:43:02, 21022.76s/it] + Training...: 64% 2802/4393 [3:50:41<1:45:05, 3.96s/it] + Training...: 64% 2803/4393 [3:50:41<2:08:01, 4.83s/it] + Training...: 64% 2804/4393 [3:50:47<2:16:54, 5.17s/it] + Training...: 64% 2805/4393 [3:50:53<2:23:40, 5.43s/it] + Training...: 64% 2806/4393 [3:50:59<2:27:31, 5.58s/it] + Training...: 64% 2807/4393 [3:51:05<2:30:23, 5.69s/it] + Training...: 64% 2808/4393 [3:51:11<2:31:45, 5.74s/it] + Training...: 64% 2809/4393 [3:51:17<2:34:44, 5.86s/it] + Training...: 64% 2810/4393 [3:51:23<2:34:26, 5.85s/it] + Training...: 64% 2811/4393 [3:51:28<2:33:47, 5.83s/it] + Training...: 64% 2812/4393 [3:51:34<2:32:27, 5.79s/it] + Training...: 64% 2813/4393 [3:51:40<2:31:55, 5.77s/it] + Training...: 64% 2814/4393 [3:51:46<2:30:43, 5.73s/it] + Training...: 64% 2815/4393 [3:51:51<2:30:03, 5.71s/it] + Training...: 64% 2816/4393 [3:51:57<2:28:49, 5.66s/it] + Training...: 64% 2817/4393 [3:52:02<2:27:46, 5.63s/it] + Training...: 64% 2818/4393 [3:52:08<2:27:03, 5.60s/it] + Training...: 64% 2819/4393 [3:52:13<2:26:06, 5.57s/it] + Training...: 64% 2820/4393 [3:52:19<2:25:13, 5.54s/it] + Training...: 64% 2821/4393 [3:52:24<2:23:49, 5.49s/it] + Training...: 64% 2822/4393 [3:52:30<2:22:48, 5.45s/it] + Training...: 64% 2823/4393 [3:52:35<2:21:49, 5.42s/it] + Training...: 64% 2824/4393 [3:52:40<2:20:37, 5.38s/it] + Training...: 64% 2825/4393 [3:52:45<2:19:42, 5.35s/it] + Training...: 64% 2826/4393 [3:52:51<2:19:00, 5.32s/it] + Training...: 64% 2827/4393 [3:52:56<2:19:55, 5.36s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:13:51<46:43:02, 21022.76s/it] + Training...: 64% 2827/4393 [3:53:02<2:19:55, 5.36s/it] + Training...: 64% 2828/4393 [3:53:02<2:24:10, 5.53s/it] + Training...: 64% 2829/4393 [3:53:07<2:20:23, 5.39s/it] + Training...: 64% 2830/4393 [3:53:12<2:16:53, 5.26s/it] + Training...: 64% 2831/4393 [3:53:17<2:14:18, 5.16s/it] + Training...: 64% 2832/4393 [3:53:22<2:11:28, 5.05s/it] + Training...: 64% 2833/4393 [3:53:27<2:09:21, 4.98s/it] + Training...: 65% 2834/4393 [3:53:31<2:06:30, 4.87s/it] + Training...: 65% 2835/4393 [3:53:36<2:03:36, 4.76s/it] + Training...: 65% 2836/4393 [3:53:40<2:00:38, 4.65s/it] + Training...: 65% 2837/4393 [3:53:44<1:57:34, 4.53s/it] + Training...: 65% 2838/4393 [3:53:49<1:54:28, 4.42s/it] + Training...: 65% 2839/4393 [3:53:52<1:50:26, 4.26s/it] + Training...: 65% 2840/4393 [3:53:56<1:45:15, 4.07s/it] + Training...: 65% 2841/4393 [3:53:59<1:39:44, 3.86s/it] + Training...: 65% 2842/4393 [3:54:02<1:33:22, 3.61s/it] + Training...: 65% 2843/4393 [3:54:05<1:26:29, 3.35s/it] + Training...: 65% 2844/4393 [3:54:08<1:19:47, 3.09s/it] + Training...: 65% 2845/4393 [3:54:10<1:12:55, 2.83s/it] + Training...: 65% 2846/4393 [3:54:12<1:06:20, 2.57s/it] + Training...: 65% 2847/4393 [3:54:14<59:57, 2.33s/it]  + Training...: 65% 2848/4393 [3:54:15<53:23, 2.07s/it] + Training...: 65% 2849/4393 [3:54:16<46:51, 1.82s/it] + Training...: 65% 2850/4393 [3:54:17<40:18, 1.57s/it] + Training...: 65% 2851/4393 [3:54:24<1:16:55, 2.99s/it] + Training...: 65% 2852/4393 [3:54:30<1:42:26, 3.99s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:15:26<46:43:02, 21022.76s/it] + Training...: 65% 2852/4393 [3:54:37<1:42:26, 3.99s/it] + Training...: 65% 2853/4393 [3:54:37<2:04:14, 4.84s/it] + Training...: 65% 2854/4393 [3:54:43<2:12:59, 5.18s/it] + Training...: 65% 2855/4393 [3:54:49<2:19:23, 5.44s/it] + Training...: 65% 2856/4393 [3:54:55<2:22:48, 5.58s/it] + Training...: 65% 2857/4393 [3:55:01<2:25:02, 5.67s/it] + Training...: 65% 2858/4393 [3:55:06<2:26:09, 5.71s/it] + Training...: 65% 2859/4393 [3:55:12<2:26:35, 5.73s/it] + Training...: 65% 2860/4393 [3:55:18<2:26:59, 5.75s/it] + Training...: 65% 2861/4393 [3:55:24<2:27:07, 5.76s/it] + Training...: 65% 2862/4393 [3:55:29<2:26:28, 5.74s/it] + Training...: 65% 2863/4393 [3:55:35<2:26:54, 5.76s/it] + Training...: 65% 2864/4393 [3:55:41<2:26:28, 5.75s/it] + Training...: 65% 2865/4393 [3:55:47<2:25:54, 5.73s/it] + Training...: 65% 2866/4393 [3:55:52<2:26:27, 5.75s/it] + Training...: 65% 2867/4393 [3:55:58<2:24:55, 5.70s/it] + Training...: 65% 2868/4393 [3:56:03<2:22:46, 5.62s/it] + Training...: 65% 2869/4393 [3:56:09<2:21:16, 5.56s/it] + Training...: 65% 2870/4393 [3:56:14<2:19:37, 5.50s/it] + Training...: 65% 2871/4393 [3:56:20<2:18:16, 5.45s/it] + Training...: 65% 2872/4393 [3:56:25<2:16:42, 5.39s/it] + Training...: 65% 2873/4393 [3:56:30<2:15:36, 5.35s/it] + Training...: 65% 2874/4393 [3:56:35<2:14:29, 5.31s/it] + Training...: 65% 2875/4393 [3:56:41<2:13:57, 5.30s/it] + Training...: 65% 2876/4393 [3:56:46<2:12:22, 5.24s/it] + Training...: 65% 2877/4393 [3:56:51<2:11:12, 5.19s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:17:46<46:43:02, 21022.76s/it] + Training...: 65% 2877/4393 [3:56:56<2:11:12, 5.19s/it] + Training...: 66% 2878/4393 [3:56:56<2:15:15, 5.36s/it] + Training...: 66% 2879/4393 [3:57:01<2:12:25, 5.25s/it] + Training...: 66% 2880/4393 [3:57:06<2:09:46, 5.15s/it] + Training...: 66% 2881/4393 [3:57:11<2:07:10, 5.05s/it] + Training...: 66% 2882/4393 [3:57:16<2:05:49, 5.00s/it] + Training...: 66% 2883/4393 [3:57:21<2:04:30, 4.95s/it] + Training...: 66% 2884/4393 [3:57:25<2:01:30, 4.83s/it] + Training...: 66% 2885/4393 [3:57:30<1:58:11, 4.70s/it] + Training...: 66% 2886/4393 [3:57:34<1:54:31, 4.56s/it] + Training...: 66% 2887/4393 [3:57:38<1:50:48, 4.41s/it] + Training...: 66% 2888/4393 [3:57:42<1:46:52, 4.26s/it] + Training...: 66% 2889/4393 [3:57:46<1:42:28, 4.09s/it] + Training...: 66% 2890/4393 [3:57:49<1:37:45, 3.90s/it] + Training...: 66% 2891/4393 [3:57:52<1:32:44, 3.70s/it] + Training...: 66% 2892/4393 [3:57:55<1:27:13, 3.49s/it] + Training...: 66% 2893/4393 [3:57:58<1:21:52, 3.28s/it] + Training...: 66% 2894/4393 [3:58:01<1:15:46, 3.03s/it] + Training...: 66% 2895/4393 [3:58:03<1:09:45, 2.79s/it] + Training...: 66% 2896/4393 [3:58:05<1:03:43, 2.55s/it] + Training...: 66% 2897/4393 [3:58:07<57:40, 2.31s/it]  + Training...: 66% 2898/4393 [3:58:08<51:26, 2.06s/it] + Training...: 66% 2899/4393 [3:58:09<45:19, 1.82s/it] + Training...: 66% 2900/4393 [3:58:10<39:11, 1.57s/it] + Training...: 66% 2901/4393 [3:58:17<1:14:39, 3.00s/it] + Training...: 66% 2902/4393 [3:58:23<1:38:42, 3.97s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:19:19<46:43:02, 21022.76s/it] + Training...: 66% 2902/4393 [3:58:30<1:38:42, 3.97s/it] + Training...: 66% 2903/4393 [3:58:30<2:00:34, 4.86s/it] + Training...: 66% 2904/4393 [3:58:36<2:08:48, 5.19s/it] + Training...: 66% 2905/4393 [3:58:42<2:14:43, 5.43s/it] + Training...: 66% 2906/4393 [3:58:48<2:18:25, 5.59s/it] + Training...: 66% 2907/4393 [3:58:54<2:21:12, 5.70s/it] + Training...: 66% 2908/4393 [3:59:00<2:24:09, 5.82s/it] + Training...: 66% 2909/4393 [3:59:06<2:24:37, 5.85s/it] + Training...: 66% 2910/4393 [3:59:12<2:23:59, 5.83s/it] + Training...: 66% 2911/4393 [3:59:17<2:23:51, 5.82s/it] + Training...: 66% 2912/4393 [3:59:23<2:23:05, 5.80s/it] + Training...: 66% 2913/4393 [3:59:29<2:22:31, 5.78s/it] + Training...: 66% 2914/4393 [3:59:35<2:22:02, 5.76s/it] + Training...: 66% 2915/4393 [3:59:40<2:21:14, 5.73s/it] + Training...: 66% 2916/4393 [3:59:46<2:20:09, 5.69s/it] + Training...: 66% 2917/4393 [3:59:51<2:19:08, 5.66s/it] + Training...: 66% 2918/4393 [3:59:57<2:18:31, 5.63s/it] + Training...: 66% 2919/4393 [4:00:03<2:17:28, 5.60s/it] + Training...: 66% 2920/4393 [4:00:08<2:16:13, 5.55s/it] + Training...: 66% 2921/4393 [4:00:13<2:15:00, 5.50s/it] + Training...: 67% 2922/4393 [4:00:19<2:14:23, 5.48s/it] + Training...: 67% 2923/4393 [4:00:24<2:13:18, 5.44s/it] + Training...: 67% 2924/4393 [4:00:29<2:12:07, 5.40s/it] + Training...: 67% 2925/4393 [4:00:35<2:12:52, 5.43s/it] + Training...: 67% 2926/4393 [4:00:40<2:11:23, 5.37s/it] + Training...: 67% 2927/4393 [4:00:45<2:10:06, 5.32s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:21:40<46:43:02, 21022.76s/it] + Training...: 67% 2927/4393 [4:00:51<2:10:06, 5.32s/it] + Training...: 67% 2928/4393 [4:00:51<2:14:15, 5.50s/it] + Training...: 67% 2929/4393 [4:00:56<2:10:50, 5.36s/it] + Training...: 67% 2930/4393 [4:01:01<2:07:35, 5.23s/it] + Training...: 67% 2931/4393 [4:01:06<2:04:38, 5.11s/it] + Training...: 67% 2932/4393 [4:01:11<2:02:28, 5.03s/it] + Training...: 67% 2933/4393 [4:01:16<2:00:48, 4.96s/it] + Training...: 67% 2934/4393 [4:01:20<1:58:00, 4.85s/it] + Training...: 67% 2935/4393 [4:01:25<1:55:17, 4.74s/it] + Training...: 67% 2936/4393 [4:01:29<1:52:08, 4.62s/it] + Training...: 67% 2937/4393 [4:01:33<1:48:43, 4.48s/it] + Training...: 67% 2938/4393 [4:01:37<1:45:01, 4.33s/it] + Training...: 67% 2939/4393 [4:01:41<1:40:37, 4.15s/it] + Training...: 67% 2940/4393 [4:01:45<1:36:00, 3.96s/it] + Training...: 67% 2941/4393 [4:01:48<1:31:13, 3.77s/it] + Training...: 67% 2942/4393 [4:01:51<1:26:05, 3.56s/it] + Training...: 67% 2943/4393 [4:01:54<1:20:29, 3.33s/it] + Training...: 67% 2944/4393 [4:01:56<1:14:31, 3.09s/it] + Training...: 67% 2945/4393 [4:01:59<1:08:23, 2.83s/it] + Training...: 67% 2946/4393 [4:02:01<1:02:48, 2.60s/it] + Training...: 67% 2947/4393 [4:02:02<57:04, 2.37s/it]  + Training...: 67% 2948/4393 [4:02:04<51:02, 2.12s/it] + Training...: 67% 2949/4393 [4:02:05<45:01, 1.87s/it] + Training...: 67% 2950/4393 [4:02:06<38:45, 1.61s/it] + Training...: 67% 2951/4393 [4:02:13<1:12:57, 3.04s/it] + Training...: 67% 2952/4393 [4:02:19<1:36:35, 4.02s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:23:15<46:43:02, 21022.76s/it] + Training...: 67% 2952/4393 [4:02:26<1:36:35, 4.02s/it] + Training...: 67% 2953/4393 [4:02:26<1:56:49, 4.87s/it] + Training...: 67% 2954/4393 [4:02:32<2:04:26, 5.19s/it] + Training...: 67% 2955/4393 [4:02:38<2:09:47, 5.42s/it] + Training...: 67% 2956/4393 [4:02:44<2:13:02, 5.55s/it] + Training...: 67% 2957/4393 [4:02:49<2:15:17, 5.65s/it] + Training...: 67% 2958/4393 [4:02:55<2:16:09, 5.69s/it] + Training...: 67% 2959/4393 [4:03:01<2:17:09, 5.74s/it] + Training...: 67% 2960/4393 [4:03:07<2:17:07, 5.74s/it] + Training...: 67% 2961/4393 [4:03:13<2:17:04, 5.74s/it] + Training...: 67% 2962/4393 [4:03:18<2:17:01, 5.74s/it] + Training...: 67% 2963/4393 [4:03:24<2:16:40, 5.73s/it] + Training...: 67% 2964/4393 [4:03:30<2:17:58, 5.79s/it] + Training...: 67% 2965/4393 [4:03:36<2:16:27, 5.73s/it] + Training...: 68% 2966/4393 [4:03:41<2:15:13, 5.69s/it] + Training...: 68% 2967/4393 [4:03:47<2:14:10, 5.65s/it] + Training...: 68% 2968/4393 [4:03:52<2:13:02, 5.60s/it] + Training...: 68% 2969/4393 [4:03:58<2:11:38, 5.55s/it] + Training...: 68% 2970/4393 [4:04:03<2:10:27, 5.50s/it] + Training...: 68% 2971/4393 [4:04:08<2:09:56, 5.48s/it] + Training...: 68% 2972/4393 [4:04:14<2:08:51, 5.44s/it] + Training...: 68% 2973/4393 [4:04:19<2:07:51, 5.40s/it] + Training...: 68% 2974/4393 [4:04:24<2:06:53, 5.37s/it] + Training...: 68% 2975/4393 [4:04:30<2:05:59, 5.33s/it] + Training...: 68% 2976/4393 [4:04:35<2:04:47, 5.28s/it] + Training...: 68% 2977/4393 [4:04:40<2:04:04, 5.26s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:25:35<46:43:02, 21022.76s/it] + Training...: 68% 2977/4393 [4:04:46<2:04:04, 5.26s/it] + Training...: 68% 2978/4393 [4:04:46<2:07:36, 5.41s/it] + Training...: 68% 2979/4393 [4:04:51<2:04:40, 5.29s/it] + Training...: 68% 2980/4393 [4:04:56<2:03:08, 5.23s/it] + Training...: 68% 2981/4393 [4:05:01<2:00:59, 5.14s/it] + Training...: 68% 2982/4393 [4:05:06<1:58:59, 5.06s/it] + Training...: 68% 2983/4393 [4:05:10<1:56:47, 4.97s/it] + Training...: 68% 2984/4393 [4:05:15<1:54:15, 4.87s/it] + Training...: 68% 2985/4393 [4:05:20<1:53:13, 4.82s/it] + Training...: 68% 2986/4393 [4:05:24<1:50:37, 4.72s/it] + Training...: 68% 2987/4393 [4:05:28<1:47:37, 4.59s/it] + Training...: 68% 2988/4393 [4:05:33<1:43:42, 4.43s/it] + Training...: 68% 2989/4393 [4:05:36<1:39:35, 4.26s/it] + Training...: 68% 2990/4393 [4:05:40<1:34:37, 4.05s/it] + Training...: 68% 2991/4393 [4:05:43<1:29:30, 3.83s/it] + Training...: 68% 2992/4393 [4:05:46<1:23:54, 3.59s/it] + Training...: 68% 2993/4393 [4:05:49<1:17:54, 3.34s/it] + Training...: 68% 2994/4393 [4:05:52<1:11:53, 3.08s/it] + Training...: 68% 2995/4393 [4:05:54<1:06:05, 2.84s/it] + Training...: 68% 2996/4393 [4:05:56<1:00:04, 2.58s/it] + Training...: 68% 2997/4393 [4:05:58<53:57, 2.32s/it]  + Training...: 68% 2998/4393 [4:05:59<47:52, 2.06s/it] + Training...: 68% 2999/4393 [4:06:00<41:58, 1.81s/it] + Training...: 68% 3000/4393 [4:06:01<36:21, 1.57s/it] + Training...: 68% 3001/4393 [4:06:07<1:08:51, 2.97s/it] + Training...: 68% 3002/4393 [4:06:14<1:31:51, 3.96s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:27:10<46:43:02, 21022.76s/it] + Training...: 68% 3002/4393 [4:06:21<1:31:51, 3.96s/it] + Training...: 68% 3003/4393 [4:06:21<1:52:38, 4.86s/it] + Training...: 68% 3004/4393 [4:06:27<2:00:12, 5.19s/it] + Training...: 68% 3005/4393 [4:06:33<2:05:57, 5.45s/it] + Training...: 68% 3006/4393 [4:06:39<2:08:55, 5.58s/it] + Training...: 68% 3007/4393 [4:06:44<2:11:20, 5.69s/it] + Training...: 68% 3008/4393 [4:06:50<2:12:17, 5.73s/it] + Training...: 68% 3009/4393 [4:06:56<2:13:30, 5.79s/it] + Training...: 69% 3010/4393 [4:07:02<2:13:45, 5.80s/it] + Training...: 69% 3011/4393 [4:07:08<2:14:14, 5.83s/it] + Training...: 69% 3012/4393 [4:07:14<2:14:22, 5.84s/it] + Training...: 69% 3013/4393 [4:07:20<2:13:30, 5.80s/it] + Training...: 69% 3014/4393 [4:07:25<2:12:23, 5.76s/it] + Training...: 69% 3015/4393 [4:07:31<2:11:42, 5.73s/it] + Training...: 69% 3016/4393 [4:07:37<2:11:01, 5.71s/it] + Training...: 69% 3017/4393 [4:07:42<2:10:12, 5.68s/it] + Training...: 69% 3018/4393 [4:07:48<2:09:12, 5.64s/it] + Training...: 69% 3019/4393 [4:07:53<2:08:09, 5.60s/it] + Training...: 69% 3020/4393 [4:07:59<2:06:59, 5.55s/it] + Training...: 69% 3021/4393 [4:08:04<2:06:00, 5.51s/it] + Training...: 69% 3022/4393 [4:08:09<2:05:15, 5.48s/it] + Training...: 69% 3023/4393 [4:08:15<2:03:57, 5.43s/it] + Training...: 69% 3024/4393 [4:08:20<2:03:07, 5.40s/it] + Training...: 69% 3025/4393 [4:08:25<2:02:09, 5.36s/it] + Training...: 69% 3026/4393 [4:08:31<2:00:56, 5.31s/it] + Training...: 69% 3027/4393 [4:08:36<2:00:05, 5.28s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:29:31<46:43:02, 21022.76s/it] + Training...: 69% 3027/4393 [4:08:42<2:00:05, 5.28s/it] + Training...: 69% 3028/4393 [4:08:42<2:06:08, 5.54s/it] + Training...: 69% 3029/4393 [4:08:47<2:02:40, 5.40s/it] + Training...: 69% 3030/4393 [4:08:52<1:59:40, 5.27s/it] + Training...: 69% 3031/4393 [4:08:57<1:57:34, 5.18s/it] + Training...: 69% 3032/4393 [4:09:02<1:55:23, 5.09s/it] + Training...: 69% 3033/4393 [4:09:07<1:53:03, 4.99s/it] + Training...: 69% 3034/4393 [4:09:11<1:50:56, 4.90s/it] + Training...: 69% 3035/4393 [4:09:16<1:48:51, 4.81s/it] + Training...: 69% 3036/4393 [4:09:20<1:45:45, 4.68s/it] + Training...: 69% 3037/4393 [4:09:24<1:42:49, 4.55s/it] + Training...: 69% 3038/4393 [4:09:29<1:39:22, 4.40s/it] + Training...: 69% 3039/4393 [4:09:32<1:35:13, 4.22s/it] + Training...: 69% 3040/4393 [4:09:36<1:30:21, 4.01s/it] + Training...: 69% 3041/4393 [4:09:39<1:25:48, 3.81s/it] + Training...: 69% 3042/4393 [4:09:42<1:21:09, 3.60s/it] + Training...: 69% 3043/4393 [4:09:45<1:16:09, 3.39s/it] + Training...: 69% 3044/4393 [4:09:48<1:10:40, 3.14s/it] + Training...: 69% 3045/4393 [4:09:50<1:05:10, 2.90s/it] + Training...: 69% 3046/4393 [4:09:52<59:44, 2.66s/it]  + Training...: 69% 3047/4393 [4:09:54<53:58, 2.41s/it] + Training...: 69% 3048/4393 [4:09:56<48:27, 2.16s/it] + Training...: 69% 3049/4393 [4:09:57<42:26, 1.89s/it] + Training...: 69% 3050/4393 [4:09:58<36:29, 1.63s/it] + Training...: 69% 3051/4393 [4:10:04<1:07:24, 3.01s/it] + Training...: 69% 3052/4393 [4:10:10<1:28:39, 3.97s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:31:06<46:43:02, 21022.76s/it] + Training...: 69% 3052/4393 [4:10:17<1:28:39, 3.97s/it] + Training...: 69% 3053/4393 [4:10:17<1:49:00, 4.88s/it] + Training...: 70% 3054/4393 [4:10:24<1:57:55, 5.28s/it] + Training...: 70% 3055/4393 [4:10:30<2:02:39, 5.50s/it] + Training...: 70% 3056/4393 [4:10:35<2:04:53, 5.60s/it] + Training...: 70% 3057/4393 [4:10:41<2:06:48, 5.70s/it] + Training...: 70% 3058/4393 [4:10:47<2:07:43, 5.74s/it] + Training...: 70% 3059/4393 [4:10:53<2:08:15, 5.77s/it] + Training...: 70% 3060/4393 [4:10:59<2:08:54, 5.80s/it] + Training...: 70% 3061/4393 [4:11:05<2:08:49, 5.80s/it] + Training...: 70% 3062/4393 [4:11:10<2:08:23, 5.79s/it] + Training...: 70% 3063/4393 [4:11:16<2:08:03, 5.78s/it] + Training...: 70% 3064/4393 [4:11:22<2:07:04, 5.74s/it] + Training...: 70% 3065/4393 [4:11:27<2:06:39, 5.72s/it] + Training...: 70% 3066/4393 [4:11:33<2:06:24, 5.72s/it] + Training...: 70% 3067/4393 [4:11:39<2:05:56, 5.70s/it] + Training...: 70% 3068/4393 [4:11:45<2:06:26, 5.73s/it] + Training...: 70% 3069/4393 [4:11:50<2:05:20, 5.68s/it] + Training...: 70% 3070/4393 [4:11:56<2:03:13, 5.59s/it] + Training...: 70% 3071/4393 [4:12:01<2:02:10, 5.55s/it] + Training...: 70% 3072/4393 [4:12:06<2:01:09, 5.50s/it] + Training...: 70% 3073/4393 [4:12:12<1:59:33, 5.43s/it] + Training...: 70% 3074/4393 [4:12:17<1:58:22, 5.38s/it] + Training...: 70% 3075/4393 [4:12:22<1:57:06, 5.33s/it] + Training...: 70% 3076/4393 [4:12:27<1:55:44, 5.27s/it] + Training...: 70% 3077/4393 [4:12:32<1:54:58, 5.24s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:33:27<46:43:02, 21022.76s/it] + Training...: 70% 3077/4393 [4:12:38<1:54:58, 5.24s/it] + Training...: 70% 3078/4393 [4:12:38<1:58:24, 5.40s/it] + Training...: 70% 3079/4393 [4:12:43<1:56:00, 5.30s/it] + Training...: 70% 3080/4393 [4:12:48<1:53:21, 5.18s/it] + Training...: 70% 3081/4393 [4:12:53<1:51:22, 5.09s/it] + Training...: 70% 3082/4393 [4:12:58<1:49:23, 5.01s/it] + Training...: 70% 3083/4393 [4:13:03<1:47:34, 4.93s/it] + Training...: 70% 3084/4393 [4:13:07<1:45:36, 4.84s/it] + Training...: 70% 3085/4393 [4:13:12<1:43:39, 4.76s/it] + Training...: 70% 3086/4393 [4:13:16<1:41:07, 4.64s/it] + Training...: 70% 3087/4393 [4:13:20<1:38:26, 4.52s/it] + Training...: 70% 3088/4393 [4:13:24<1:34:57, 4.37s/it] + Training...: 70% 3089/4393 [4:13:28<1:31:31, 4.21s/it] + Training...: 70% 3090/4393 [4:13:32<1:27:52, 4.05s/it] + Training...: 70% 3091/4393 [4:13:35<1:23:50, 3.86s/it] + Training...: 70% 3092/4393 [4:13:39<1:18:42, 3.63s/it] + Training...: 70% 3093/4393 [4:13:41<1:13:14, 3.38s/it] + Training...: 70% 3094/4393 [4:13:44<1:07:28, 3.12s/it] + Training...: 70% 3095/4393 [4:13:46<1:01:45, 2.85s/it] + Training...: 70% 3096/4393 [4:13:48<56:21, 2.61s/it]  + Training...: 70% 3097/4393 [4:13:50<50:38, 2.34s/it] + Training...: 71% 3098/4393 [4:13:51<44:59, 2.08s/it] + Training...: 71% 3099/4393 [4:13:53<39:26, 1.83s/it] + Training...: 71% 3100/4393 [4:13:54<34:03, 1.58s/it] + Training...: 71% 3101/4393 [4:14:00<1:04:01, 2.97s/it] + Training...: 71% 3102/4393 [4:14:06<1:25:20, 3.97s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:35:02<46:43:02, 21022.76s/it] + Training...: 71% 3102/4393 [4:14:13<1:25:20, 3.97s/it] + Training...: 71% 3103/4393 [4:14:13<1:43:36, 4.82s/it] + Training...: 71% 3104/4393 [4:14:19<1:51:38, 5.20s/it] + Training...: 71% 3105/4393 [4:14:25<1:56:48, 5.44s/it] + Training...: 71% 3106/4393 [4:14:31<1:59:54, 5.59s/it] + Training...: 71% 3107/4393 [4:14:37<2:01:49, 5.68s/it] + Training...: 71% 3108/4393 [4:14:43<2:02:36, 5.72s/it] + Training...: 71% 3109/4393 [4:14:48<2:03:20, 5.76s/it] + Training...: 71% 3110/4393 [4:14:54<2:03:12, 5.76s/it] + Training...: 71% 3111/4393 [4:15:00<2:03:18, 5.77s/it] + Training...: 71% 3112/4393 [4:15:06<2:03:22, 5.78s/it] + Training...: 71% 3113/4393 [4:15:12<2:04:47, 5.85s/it] + Training...: 71% 3114/4393 [4:15:17<2:03:36, 5.80s/it] + Training...: 71% 3115/4393 [4:15:23<2:02:25, 5.75s/it] + Training...: 71% 3116/4393 [4:15:29<2:01:35, 5.71s/it] + Training...: 71% 3117/4393 [4:15:34<2:00:31, 5.67s/it] + Training...: 71% 3118/4393 [4:15:40<1:59:29, 5.62s/it] + Training...: 71% 3119/4393 [4:15:45<1:59:04, 5.61s/it] + Training...: 71% 3120/4393 [4:15:51<1:58:11, 5.57s/it] + Training...: 71% 3121/4393 [4:15:56<1:57:06, 5.52s/it] + Training...: 71% 3122/4393 [4:16:02<1:55:45, 5.46s/it] + Training...: 71% 3123/4393 [4:16:07<1:55:13, 5.44s/it] + Training...: 71% 3124/4393 [4:16:12<1:54:06, 5.39s/it] + Training...: 71% 3125/4393 [4:16:18<1:53:15, 5.36s/it] + Training...: 71% 3126/4393 [4:16:23<1:51:59, 5.30s/it] + Training...: 71% 3127/4393 [4:16:28<1:51:13, 5.27s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:37:23<46:43:02, 21022.76s/it] + Training...: 71% 3127/4393 [4:16:34<1:51:13, 5.27s/it] + Training...: 71% 3128/4393 [4:16:34<1:55:13, 5.47s/it] + Training...: 71% 3129/4393 [4:16:39<1:52:37, 5.35s/it] + Training...: 71% 3130/4393 [4:16:44<1:50:27, 5.25s/it] + Training...: 71% 3131/4393 [4:16:49<1:49:32, 5.21s/it] + Training...: 71% 3132/4393 [4:16:54<1:47:13, 5.10s/it] + Training...: 71% 3133/4393 [4:16:59<1:45:27, 5.02s/it] + Training...: 71% 3134/4393 [4:17:03<1:43:22, 4.93s/it] + Training...: 71% 3135/4393 [4:17:08<1:41:21, 4.83s/it] + Training...: 71% 3136/4393 [4:17:12<1:38:28, 4.70s/it] + Training...: 71% 3137/4393 [4:17:17<1:35:48, 4.58s/it] + Training...: 71% 3138/4393 [4:17:21<1:32:07, 4.40s/it] + Training...: 71% 3139/4393 [4:17:25<1:28:40, 4.24s/it] + Training...: 71% 3140/4393 [4:17:28<1:25:04, 4.07s/it] + Training...: 72% 3141/4393 [4:17:32<1:20:50, 3.87s/it] + Training...: 72% 3142/4393 [4:17:35<1:16:43, 3.68s/it] + Training...: 72% 3143/4393 [4:17:38<1:12:26, 3.48s/it] + Training...: 72% 3144/4393 [4:17:41<1:07:38, 3.25s/it] + Training...: 72% 3145/4393 [4:17:43<1:02:34, 3.01s/it] + Training...: 72% 3146/4393 [4:17:45<57:07, 2.75s/it]  + Training...: 72% 3147/4393 [4:17:47<51:35, 2.48s/it] + Training...: 72% 3148/4393 [4:17:49<46:02, 2.22s/it] + Training...: 72% 3149/4393 [4:17:50<40:11, 1.94s/it] + Training...: 72% 3150/4393 [4:17:51<34:20, 1.66s/it] + Training...: 72% 3151/4393 [4:17:57<1:02:33, 3.02s/it] + Training...: 72% 3152/4393 [4:18:03<1:22:12, 3.97s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:38:59<46:43:02, 21022.76s/it] + Training...: 72% 3152/4393 [4:18:10<1:22:12, 3.97s/it] + Training...: 72% 3153/4393 [4:18:10<1:40:37, 4.87s/it] + Training...: 72% 3154/4393 [4:18:16<1:47:38, 5.21s/it] + Training...: 72% 3155/4393 [4:18:23<1:54:17, 5.54s/it] + Training...: 72% 3156/4393 [4:18:29<1:56:38, 5.66s/it] + Training...: 72% 3157/4393 [4:18:35<1:57:58, 5.73s/it] + Training...: 72% 3158/4393 [4:18:40<1:58:19, 5.75s/it] + Training...: 72% 3159/4393 [4:18:46<1:58:50, 5.78s/it] + Training...: 72% 3160/4393 [4:18:52<1:58:22, 5.76s/it] + Training...: 72% 3161/4393 [4:18:58<1:58:05, 5.75s/it] + Training...: 72% 3162/4393 [4:19:03<1:57:31, 5.73s/it] + Training...: 72% 3163/4393 [4:19:09<1:56:59, 5.71s/it] + Training...: 72% 3164/4393 [4:19:15<1:56:18, 5.68s/it] + Training...: 72% 3165/4393 [4:19:20<1:56:01, 5.67s/it] + Training...: 72% 3166/4393 [4:19:26<1:55:10, 5.63s/it] + Training...: 72% 3167/4393 [4:19:31<1:54:57, 5.63s/it] + Training...: 72% 3168/4393 [4:19:37<1:54:06, 5.59s/it] + Training...: 72% 3169/4393 [4:19:42<1:53:13, 5.55s/it] + Training...: 72% 3170/4393 [4:19:48<1:53:31, 5.57s/it] + Training...: 72% 3171/4393 [4:19:53<1:52:58, 5.55s/it] + Training...: 72% 3172/4393 [4:19:59<1:51:28, 5.48s/it] + Training...: 72% 3173/4393 [4:20:04<1:50:36, 5.44s/it] + Training...: 72% 3174/4393 [4:20:09<1:49:25, 5.39s/it] + Training...: 72% 3175/4393 [4:20:15<1:48:27, 5.34s/it] + Training...: 72% 3176/4393 [4:20:20<1:47:23, 5.29s/it] + Training...: 72% 3177/4393 [4:20:25<1:46:01, 5.23s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:41:20<46:43:02, 21022.76s/it] + Training...: 72% 3177/4393 [4:20:31<1:46:01, 5.23s/it] + Training...: 72% 3178/4393 [4:20:31<1:49:02, 5.39s/it] + Training...: 72% 3179/4393 [4:20:36<1:46:19, 5.26s/it] + Training...: 72% 3180/4393 [4:20:41<1:44:26, 5.17s/it] + Training...: 72% 3181/4393 [4:20:45<1:42:45, 5.09s/it] + Training...: 72% 3182/4393 [4:20:50<1:40:43, 4.99s/it] + Training...: 72% 3183/4393 [4:20:55<1:39:11, 4.92s/it] + Training...: 72% 3184/4393 [4:21:00<1:37:24, 4.83s/it] + Training...: 73% 3185/4393 [4:21:04<1:35:42, 4.75s/it] + Training...: 73% 3186/4393 [4:21:08<1:33:11, 4.63s/it] + Training...: 73% 3187/4393 [4:21:13<1:30:40, 4.51s/it] + Training...: 73% 3188/4393 [4:21:17<1:27:34, 4.36s/it] + Training...: 73% 3189/4393 [4:21:21<1:24:08, 4.19s/it] + Training...: 73% 3190/4393 [4:21:24<1:20:37, 4.02s/it] + Training...: 73% 3191/4393 [4:21:28<1:19:15, 3.96s/it] + Training...: 73% 3192/4393 [4:21:31<1:14:25, 3.72s/it] + Training...: 73% 3193/4393 [4:21:34<1:09:12, 3.46s/it] + Training...: 73% 3194/4393 [4:21:37<1:04:07, 3.21s/it] + Training...: 73% 3195/4393 [4:21:39<58:38, 2.94s/it]  + Training...: 73% 3196/4393 [4:21:41<53:23, 2.68s/it] + Training...: 73% 3197/4393 [4:21:43<48:41, 2.44s/it] + Training...: 73% 3198/4393 [4:21:44<43:28, 2.18s/it] + Training...: 73% 3199/4393 [4:21:46<38:09, 1.92s/it] + Training...: 73% 3200/4393 [4:21:47<32:38, 1.64s/it] + Training...: 73% 3201/4393 [4:21:53<59:36, 3.00s/it] + Training...: 73% 3202/4393 [4:21:59<1:19:03, 3.98s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:42:55<46:43:02, 21022.76s/it] + Training...: 73% 3202/4393 [4:22:06<1:19:03, 3.98s/it] + Training...: 73% 3203/4393 [4:22:06<1:35:52, 4.83s/it] + Training...: 73% 3204/4393 [4:22:12<1:43:16, 5.21s/it] + Training...: 73% 3205/4393 [4:22:18<1:47:39, 5.44s/it] + Training...: 73% 3206/4393 [4:22:24<1:50:11, 5.57s/it] + Training...: 73% 3207/4393 [4:22:30<1:51:55, 5.66s/it] + Training...: 73% 3208/4393 [4:22:36<1:52:57, 5.72s/it] + Training...: 73% 3209/4393 [4:22:42<1:53:54, 5.77s/it] + Training...: 73% 3210/4393 [4:22:48<1:54:57, 5.83s/it] + Training...: 73% 3211/4393 [4:22:53<1:54:23, 5.81s/it] + Training...: 73% 3212/4393 [4:22:59<1:53:49, 5.78s/it] + Training...: 73% 3213/4393 [4:23:05<1:53:44, 5.78s/it] + Training...: 73% 3214/4393 [4:23:11<1:54:28, 5.83s/it] + Training...: 73% 3215/4393 [4:23:16<1:53:46, 5.80s/it] + Training...: 73% 3216/4393 [4:23:22<1:52:24, 5.73s/it] + Training...: 73% 3217/4393 [4:23:28<1:51:10, 5.67s/it] + Training...: 73% 3218/4393 [4:23:33<1:50:08, 5.62s/it] + Training...: 73% 3219/4393 [4:23:39<1:49:18, 5.59s/it] + Training...: 73% 3220/4393 [4:23:44<1:48:32, 5.55s/it] + Training...: 73% 3221/4393 [4:23:49<1:47:39, 5.51s/it] + Training...: 73% 3222/4393 [4:23:55<1:46:42, 5.47s/it] + Training...: 73% 3223/4393 [4:24:00<1:45:46, 5.42s/it] + Training...: 73% 3224/4393 [4:24:05<1:44:51, 5.38s/it] + Training...: 73% 3225/4393 [4:24:11<1:44:11, 5.35s/it] + Training...: 73% 3226/4393 [4:24:16<1:43:13, 5.31s/it] + Training...: 73% 3227/4393 [4:24:21<1:42:25, 5.27s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:45:16<46:43:02, 21022.76s/it] + Training...: 73% 3227/4393 [4:24:27<1:42:25, 5.27s/it] + Training...: 73% 3228/4393 [4:24:27<1:45:31, 5.43s/it] + Training...: 74% 3229/4393 [4:24:32<1:42:45, 5.30s/it] + Training...: 74% 3230/4393 [4:24:37<1:40:13, 5.17s/it] + Training...: 74% 3231/4393 [4:24:42<1:38:18, 5.08s/it] + Training...: 74% 3232/4393 [4:24:46<1:36:17, 4.98s/it] + Training...: 74% 3233/4393 [4:24:51<1:34:42, 4.90s/it] + Training...: 74% 3234/4393 [4:24:56<1:33:41, 4.85s/it] + Training...: 74% 3235/4393 [4:25:00<1:31:43, 4.75s/it] + Training...: 74% 3236/4393 [4:25:05<1:29:11, 4.63s/it] + Training...: 74% 3237/4393 [4:25:09<1:26:21, 4.48s/it] + Training...: 74% 3238/4393 [4:25:13<1:22:49, 4.30s/it] + Training...: 74% 3239/4393 [4:25:16<1:19:17, 4.12s/it] + Training...: 74% 3240/4393 [4:25:20<1:15:37, 3.94s/it] + Training...: 74% 3241/4393 [4:25:23<1:11:30, 3.72s/it] + Training...: 74% 3242/4393 [4:25:26<1:07:03, 3.50s/it] + Training...: 74% 3243/4393 [4:25:29<1:02:48, 3.28s/it] + Training...: 74% 3244/4393 [4:25:31<58:19, 3.05s/it]  + Training...: 74% 3245/4393 [4:25:34<53:51, 2.82s/it] + Training...: 74% 3246/4393 [4:25:36<49:09, 2.57s/it] + Training...: 74% 3247/4393 [4:25:37<44:37, 2.34s/it] + Training...: 74% 3248/4393 [4:25:39<40:07, 2.10s/it] + Training...: 74% 3249/4393 [4:25:40<35:12, 1.85s/it] + Training...: 74% 3250/4393 [4:25:41<30:15, 1.59s/it] + Training...: 74% 3251/4393 [4:25:47<56:14, 2.96s/it] + Training...: 74% 3252/4393 [4:25:54<1:14:41, 3.93s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:46:50<46:43:02, 21022.76s/it] + Training...: 74% 3252/4393 [4:26:00<1:14:41, 3.93s/it] + Training...: 74% 3253/4393 [4:26:00<1:31:28, 4.81s/it] + Training...: 74% 3254/4393 [4:26:06<1:38:18, 5.18s/it] + Training...: 74% 3255/4393 [4:26:12<1:42:36, 5.41s/it] + Training...: 74% 3256/4393 [4:26:18<1:45:21, 5.56s/it] + Training...: 74% 3257/4393 [4:26:24<1:47:30, 5.68s/it] + Training...: 74% 3258/4393 [4:26:30<1:48:24, 5.73s/it] + Training...: 74% 3259/4393 [4:26:36<1:49:06, 5.77s/it] + Training...: 74% 3260/4393 [4:26:42<1:49:19, 5.79s/it] + Training...: 74% 3261/4393 [4:26:48<1:51:43, 5.92s/it] + Training...: 74% 3262/4393 [4:26:54<1:51:43, 5.93s/it] + Training...: 74% 3263/4393 [4:27:00<1:51:06, 5.90s/it] + Training...: 74% 3264/4393 [4:27:06<1:50:22, 5.87s/it] + Training...: 74% 3265/4393 [4:27:11<1:49:54, 5.85s/it] + Training...: 74% 3266/4393 [4:27:17<1:49:22, 5.82s/it] + Training...: 74% 3267/4393 [4:27:23<1:48:45, 5.80s/it] + Training...: 74% 3268/4393 [4:27:29<1:47:28, 5.73s/it] + Training...: 74% 3269/4393 [4:27:34<1:46:44, 5.70s/it] + Training...: 74% 3270/4393 [4:27:40<1:45:49, 5.65s/it] + Training...: 74% 3271/4393 [4:27:45<1:45:43, 5.65s/it] + Training...: 74% 3272/4393 [4:27:51<1:44:44, 5.61s/it] + Training...: 75% 3273/4393 [4:27:56<1:43:58, 5.57s/it] + Training...: 75% 3274/4393 [4:28:02<1:43:00, 5.52s/it] + Training...: 75% 3275/4393 [4:28:07<1:42:03, 5.48s/it] + Training...: 75% 3276/4393 [4:28:12<1:41:13, 5.44s/it] + Training...: 75% 3277/4393 [4:28:18<1:40:21, 5.40s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:49:13<46:43:02, 21022.76s/it] + Training...: 75% 3277/4393 [4:28:24<1:40:21, 5.40s/it] + Training...: 75% 3278/4393 [4:28:24<1:44:13, 5.61s/it] + Training...: 75% 3279/4393 [4:28:29<1:41:22, 5.46s/it] + Training...: 75% 3280/4393 [4:28:34<1:38:11, 5.29s/it] + Training...: 75% 3281/4393 [4:28:39<1:35:32, 5.16s/it] + Training...: 75% 3282/4393 [4:28:43<1:33:10, 5.03s/it] + Training...: 75% 3283/4393 [4:28:48<1:31:19, 4.94s/it] + Training...: 75% 3284/4393 [4:28:53<1:29:17, 4.83s/it] + Training...: 75% 3285/4393 [4:28:57<1:27:11, 4.72s/it] + Training...: 75% 3286/4393 [4:29:02<1:25:12, 4.62s/it] + Training...: 75% 3287/4393 [4:29:06<1:23:12, 4.51s/it] + Training...: 75% 3288/4393 [4:29:10<1:20:32, 4.37s/it] + Training...: 75% 3289/4393 [4:29:14<1:17:19, 4.20s/it] + Training...: 75% 3290/4393 [4:29:17<1:13:32, 4.00s/it] + Training...: 75% 3291/4393 [4:29:21<1:09:38, 3.79s/it] + Training...: 75% 3292/4393 [4:29:24<1:05:26, 3.57s/it] + Training...: 75% 3293/4393 [4:29:26<1:01:13, 3.34s/it] + Training...: 75% 3294/4393 [4:29:29<56:29, 3.08s/it]  + Training...: 75% 3295/4393 [4:29:31<51:54, 2.84s/it] + Training...: 75% 3296/4393 [4:29:33<47:19, 2.59s/it] + Training...: 75% 3297/4393 [4:29:35<42:55, 2.35s/it] + Training...: 75% 3298/4393 [4:29:37<38:31, 2.11s/it] + Training...: 75% 3299/4393 [4:29:38<33:55, 1.86s/it] + Training...: 75% 3300/4393 [4:29:39<29:10, 1.60s/it] + Training...: 75% 3301/4393 [4:29:45<54:17, 2.98s/it] + Training...: 75% 3302/4393 [4:29:51<1:11:38, 3.94s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:50:47<46:43:02, 21022.76s/it] + Training...: 75% 3302/4393 [4:29:58<1:11:38, 3.94s/it] + Training...: 75% 3303/4393 [4:29:58<1:28:27, 4.87s/it] + Training...: 75% 3304/4393 [4:30:04<1:35:03, 5.24s/it] + Training...: 75% 3305/4393 [4:30:10<1:38:55, 5.46s/it] + Training...: 75% 3306/4393 [4:30:16<1:41:25, 5.60s/it] + Training...: 75% 3307/4393 [4:30:22<1:43:15, 5.70s/it] + Training...: 75% 3308/4393 [4:30:28<1:44:28, 5.78s/it] + Training...: 75% 3309/4393 [4:30:34<1:44:42, 5.80s/it] + Training...: 75% 3310/4393 [4:30:40<1:44:32, 5.79s/it] + Training...: 75% 3311/4393 [4:30:46<1:44:26, 5.79s/it] + Training...: 75% 3312/4393 [4:30:51<1:44:20, 5.79s/it] + Training...: 75% 3313/4393 [4:30:57<1:43:59, 5.78s/it] + Training...: 75% 3314/4393 [4:31:03<1:43:11, 5.74s/it] + Training...: 75% 3315/4393 [4:31:08<1:42:31, 5.71s/it] + Training...: 75% 3316/4393 [4:31:14<1:41:46, 5.67s/it] + Training...: 76% 3317/4393 [4:31:20<1:41:32, 5.66s/it] + Training...: 76% 3318/4393 [4:31:25<1:41:37, 5.67s/it] + Training...: 76% 3319/4393 [4:31:31<1:41:09, 5.65s/it] + Training...: 76% 3320/4393 [4:31:36<1:40:02, 5.59s/it] + Training...: 76% 3321/4393 [4:31:42<1:38:55, 5.54s/it] + Training...: 76% 3322/4393 [4:31:47<1:37:49, 5.48s/it] + Training...: 76% 3323/4393 [4:31:52<1:37:19, 5.46s/it] + Training...: 76% 3324/4393 [4:31:58<1:36:13, 5.40s/it] + Training...: 76% 3325/4393 [4:32:03<1:35:11, 5.35s/it] + Training...: 76% 3326/4393 [4:32:08<1:34:17, 5.30s/it] + Training...: 76% 3327/4393 [4:32:13<1:33:36, 5.27s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:53:08<46:43:02, 21022.76s/it] + Training...: 76% 3327/4393 [4:32:19<1:33:36, 5.27s/it] + Training...: 76% 3328/4393 [4:32:19<1:36:47, 5.45s/it] + Training...: 76% 3329/4393 [4:32:24<1:34:28, 5.33s/it] + Training...: 76% 3330/4393 [4:32:29<1:32:30, 5.22s/it] + Training...: 76% 3331/4393 [4:32:34<1:30:39, 5.12s/it] + Training...: 76% 3332/4393 [4:32:39<1:28:42, 5.02s/it] + Training...: 76% 3333/4393 [4:32:44<1:27:14, 4.94s/it] + Training...: 76% 3334/4393 [4:32:48<1:25:38, 4.85s/it] + Training...: 76% 3335/4393 [4:32:53<1:24:10, 4.77s/it] + Training...: 76% 3336/4393 [4:32:57<1:22:20, 4.67s/it] + Training...: 76% 3337/4393 [4:33:02<1:20:30, 4.57s/it] + Training...: 76% 3338/4393 [4:33:06<1:18:44, 4.48s/it] + Training...: 76% 3339/4393 [4:33:10<1:16:15, 4.34s/it] + Training...: 76% 3340/4393 [4:33:14<1:12:30, 4.13s/it] + Training...: 76% 3341/4393 [4:33:17<1:08:27, 3.90s/it] + Training...: 76% 3342/4393 [4:33:20<1:04:06, 3.66s/it] + Training...: 76% 3343/4393 [4:33:23<59:39, 3.41s/it]  + Training...: 76% 3344/4393 [4:33:25<54:54, 3.14s/it] + Training...: 76% 3345/4393 [4:33:28<49:56, 2.86s/it] + Training...: 76% 3346/4393 [4:33:30<45:28, 2.61s/it] + Training...: 76% 3347/4393 [4:33:31<40:53, 2.35s/it] + Training...: 76% 3348/4393 [4:33:33<36:35, 2.10s/it] + Training...: 76% 3349/4393 [4:33:34<32:09, 1.85s/it] + Training...: 76% 3350/4393 [4:33:35<27:36, 1.59s/it] + Training...: 76% 3351/4393 [4:33:41<51:10, 2.95s/it] + Training...: 76% 3352/4393 [4:33:47<1:07:52, 3.91s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:54:43<46:43:02, 21022.76s/it] + Training...: 76% 3352/4393 [4:33:54<1:07:52, 3.91s/it] + Training...: 76% 3353/4393 [4:33:54<1:23:10, 4.80s/it] + Training...: 76% 3354/4393 [4:34:00<1:29:10, 5.15s/it] + Training...: 76% 3355/4393 [4:34:06<1:33:43, 5.42s/it] + Training...: 76% 3356/4393 [4:34:12<1:36:21, 5.58s/it] + Training...: 76% 3357/4393 [4:34:18<1:38:18, 5.69s/it] + Training...: 76% 3358/4393 [4:34:24<1:39:03, 5.74s/it] + Training...: 76% 3359/4393 [4:34:30<1:39:31, 5.78s/it] + Training...: 76% 3360/4393 [4:34:36<1:39:46, 5.80s/it] + Training...: 77% 3361/4393 [4:34:42<1:39:39, 5.79s/it] + Training...: 77% 3362/4393 [4:34:47<1:39:54, 5.81s/it] + Training...: 77% 3363/4393 [4:34:53<1:40:33, 5.86s/it] + Training...: 77% 3364/4393 [4:34:59<1:39:37, 5.81s/it] + Training...: 77% 3365/4393 [4:35:05<1:39:09, 5.79s/it] + Training...: 77% 3366/4393 [4:35:10<1:37:53, 5.72s/it] + Training...: 77% 3367/4393 [4:35:16<1:37:06, 5.68s/it] + Training...: 77% 3368/4393 [4:35:21<1:36:03, 5.62s/it] + Training...: 77% 3369/4393 [4:35:27<1:35:29, 5.60s/it] + Training...: 77% 3370/4393 [4:35:32<1:34:37, 5.55s/it] + Training...: 77% 3371/4393 [4:35:38<1:33:49, 5.51s/it] + Training...: 77% 3372/4393 [4:35:43<1:32:49, 5.46s/it] + Training...: 77% 3373/4393 [4:35:49<1:32:16, 5.43s/it] + Training...: 77% 3374/4393 [4:35:54<1:31:37, 5.39s/it] + Training...: 77% 3375/4393 [4:35:59<1:31:00, 5.36s/it] + Training...: 77% 3376/4393 [4:36:04<1:30:07, 5.32s/it] + Training...: 77% 3377/4393 [4:36:09<1:29:18, 5.27s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:57:04<46:43:02, 21022.76s/it] + Training...: 77% 3377/4393 [4:36:15<1:29:18, 5.27s/it] + Training...: 77% 3378/4393 [4:36:15<1:31:49, 5.43s/it] + Training...: 77% 3379/4393 [4:36:20<1:29:53, 5.32s/it] + Training...: 77% 3380/4393 [4:36:25<1:27:56, 5.21s/it] + Training...: 77% 3381/4393 [4:36:30<1:26:29, 5.13s/it] + Training...: 77% 3382/4393 [4:36:35<1:24:41, 5.03s/it] + Training...: 77% 3383/4393 [4:36:40<1:23:59, 4.99s/it] + Training...: 77% 3384/4393 [4:36:45<1:22:36, 4.91s/it] + Training...: 77% 3385/4393 [4:36:49<1:20:47, 4.81s/it] + Training...: 77% 3386/4393 [4:36:54<1:18:49, 4.70s/it] + Training...: 77% 3387/4393 [4:36:58<1:16:58, 4.59s/it] + Training...: 77% 3388/4393 [4:37:02<1:14:31, 4.45s/it] + Training...: 77% 3389/4393 [4:37:06<1:11:47, 4.29s/it] + Training...: 77% 3390/4393 [4:37:10<1:08:36, 4.10s/it] + Training...: 77% 3391/4393 [4:37:13<1:05:34, 3.93s/it] + Training...: 77% 3392/4393 [4:37:16<1:01:56, 3.71s/it] + Training...: 77% 3393/4393 [4:37:19<58:05, 3.49s/it]  + Training...: 77% 3394/4393 [4:37:22<53:52, 3.24s/it] + Training...: 77% 3395/4393 [4:37:24<49:30, 2.98s/it] + Training...: 77% 3396/4393 [4:37:27<45:13, 2.72s/it] + Training...: 77% 3397/4393 [4:37:28<40:32, 2.44s/it] + Training...: 77% 3398/4393 [4:37:30<36:03, 2.17s/it] + Training...: 77% 3399/4393 [4:37:31<31:27, 1.90s/it] + Training...: 77% 3400/4393 [4:37:32<26:58, 1.63s/it] + Training...: 77% 3401/4393 [4:37:38<49:21, 2.98s/it] + Training...: 77% 3402/4393 [4:37:44<1:04:54, 3.93s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [27:58:40<46:43:02, 21022.76s/it] + Training...: 77% 3402/4393 [4:37:51<1:04:54, 3.93s/it] + Training...: 77% 3403/4393 [4:37:51<1:19:18, 4.81s/it] + Training...: 77% 3404/4393 [4:37:57<1:24:42, 5.14s/it] + Training...: 78% 3405/4393 [4:38:03<1:28:39, 5.38s/it] + Training...: 78% 3406/4393 [4:38:09<1:31:25, 5.56s/it] + Training...: 78% 3407/4393 [4:38:15<1:33:13, 5.67s/it] + Training...: 78% 3408/4393 [4:38:21<1:35:16, 5.80s/it] + Training...: 78% 3409/4393 [4:38:27<1:36:12, 5.87s/it] + Training...: 78% 3410/4393 [4:38:33<1:35:55, 5.85s/it] + Training...: 78% 3411/4393 [4:38:39<1:35:22, 5.83s/it] + Training...: 78% 3412/4393 [4:38:44<1:34:43, 5.79s/it] + Training...: 78% 3413/4393 [4:38:50<1:33:57, 5.75s/it] + Training...: 78% 3414/4393 [4:38:56<1:33:22, 5.72s/it] + Training...: 78% 3415/4393 [4:39:01<1:32:49, 5.69s/it] + Training...: 78% 3416/4393 [4:39:07<1:32:29, 5.68s/it] + Training...: 78% 3417/4393 [4:39:13<1:32:02, 5.66s/it] + Training...: 78% 3418/4393 [4:39:18<1:31:14, 5.61s/it] + Training...: 78% 3419/4393 [4:39:24<1:30:25, 5.57s/it] + Training...: 78% 3420/4393 [4:39:29<1:29:32, 5.52s/it] + Training...: 78% 3421/4393 [4:39:34<1:28:58, 5.49s/it] + Training...: 78% 3422/4393 [4:39:40<1:28:08, 5.45s/it] + Training...: 78% 3423/4393 [4:39:45<1:28:40, 5.49s/it] + Training...: 78% 3424/4393 [4:39:51<1:28:06, 5.46s/it] + Training...: 78% 3425/4393 [4:39:56<1:27:00, 5.39s/it] + Training...: 78% 3426/4393 [4:40:01<1:25:51, 5.33s/it] + Training...: 78% 3427/4393 [4:40:06<1:24:37, 5.26s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:01:01<46:43:02, 21022.76s/it] + Training...: 78% 3427/4393 [4:40:12<1:24:37, 5.26s/it] + Training...: 78% 3428/4393 [4:40:12<1:27:06, 5.42s/it] + Training...: 78% 3429/4393 [4:40:17<1:24:42, 5.27s/it] + Training...: 78% 3430/4393 [4:40:22<1:22:44, 5.15s/it] + Training...: 78% 3431/4393 [4:40:27<1:20:55, 5.05s/it] + Training...: 78% 3432/4393 [4:40:32<1:19:40, 4.97s/it] + Training...: 78% 3433/4393 [4:40:36<1:18:22, 4.90s/it] + Training...: 78% 3434/4393 [4:40:41<1:16:32, 4.79s/it] + Training...: 78% 3435/4393 [4:40:45<1:14:54, 4.69s/it] + Training...: 78% 3436/4393 [4:40:50<1:12:58, 4.58s/it] + Training...: 78% 3437/4393 [4:40:54<1:11:01, 4.46s/it] + Training...: 78% 3438/4393 [4:40:58<1:08:23, 4.30s/it] + Training...: 78% 3439/4393 [4:41:01<1:06:12, 4.16s/it] + Training...: 78% 3440/4393 [4:41:05<1:03:19, 3.99s/it] + Training...: 78% 3441/4393 [4:41:08<1:00:18, 3.80s/it] + Training...: 78% 3442/4393 [4:41:11<56:39, 3.57s/it]  + Training...: 78% 3443/4393 [4:41:14<52:55, 3.34s/it] + Training...: 78% 3444/4393 [4:41:17<48:44, 3.08s/it] + Training...: 78% 3445/4393 [4:41:19<44:41, 2.83s/it] + Training...: 78% 3446/4393 [4:41:21<40:40, 2.58s/it] + Training...: 78% 3447/4393 [4:41:23<36:31, 2.32s/it] + Training...: 78% 3448/4393 [4:41:24<32:23, 2.06s/it] + Training...: 79% 3449/4393 [4:41:25<28:21, 1.80s/it] + Training...: 79% 3450/4393 [4:41:26<24:29, 1.56s/it] + Training...: 79% 3451/4393 [4:41:33<47:06, 3.00s/it] + Training...: 79% 3452/4393 [4:41:39<1:02:31, 3.99s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:02:35<46:43:02, 21022.76s/it] + Training...: 79% 3452/4393 [4:41:46<1:02:31, 3.99s/it] + Training...: 79% 3453/4393 [4:41:46<1:16:05, 4.86s/it] + Training...: 79% 3454/4393 [4:41:52<1:21:09, 5.19s/it] + Training...: 79% 3455/4393 [4:41:58<1:24:34, 5.41s/it] + Training...: 79% 3456/4393 [4:42:04<1:26:58, 5.57s/it] + Training...: 79% 3457/4393 [4:42:10<1:28:40, 5.68s/it] + Training...: 79% 3458/4393 [4:42:16<1:29:30, 5.74s/it] + Training...: 79% 3459/4393 [4:42:21<1:30:06, 5.79s/it] + Training...: 79% 3460/4393 [4:42:27<1:30:02, 5.79s/it] + Training...: 79% 3461/4393 [4:42:33<1:30:15, 5.81s/it] + Training...: 79% 3462/4393 [4:42:39<1:29:45, 5.78s/it] + Training...: 79% 3463/4393 [4:42:45<1:29:36, 5.78s/it] + Training...: 79% 3464/4393 [4:42:50<1:29:00, 5.75s/it] + Training...: 79% 3465/4393 [4:42:56<1:28:27, 5.72s/it] + Training...: 79% 3466/4393 [4:43:02<1:28:06, 5.70s/it] + Training...: 79% 3467/4393 [4:43:07<1:27:26, 5.67s/it] + Training...: 79% 3468/4393 [4:43:13<1:26:34, 5.62s/it] + Training...: 79% 3469/4393 [4:43:18<1:25:56, 5.58s/it] + Training...: 79% 3470/4393 [4:43:24<1:25:05, 5.53s/it] + Training...: 79% 3471/4393 [4:43:29<1:24:30, 5.50s/it] + Training...: 79% 3472/4393 [4:43:34<1:23:48, 5.46s/it] + Training...: 79% 3473/4393 [4:43:40<1:23:05, 5.42s/it] + Training...: 79% 3474/4393 [4:43:45<1:22:10, 5.36s/it] + Training...: 79% 3475/4393 [4:43:50<1:21:16, 5.31s/it] + Training...: 79% 3476/4393 [4:43:55<1:20:22, 5.26s/it] + Training...: 79% 3477/4393 [4:44:00<1:19:43, 5.22s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:04:55<46:43:02, 21022.76s/it] + Training...: 79% 3477/4393 [4:44:06<1:19:43, 5.22s/it] + Training...: 79% 3478/4393 [4:44:06<1:22:27, 5.41s/it] + Training...: 79% 3479/4393 [4:44:11<1:20:39, 5.30s/it] + Training...: 79% 3480/4393 [4:44:16<1:19:15, 5.21s/it] + Training...: 79% 3481/4393 [4:44:21<1:18:13, 5.15s/it] + Training...: 79% 3482/4393 [4:44:26<1:16:40, 5.05s/it] + Training...: 79% 3483/4393 [4:44:31<1:15:01, 4.95s/it] + Training...: 79% 3484/4393 [4:44:35<1:13:18, 4.84s/it] + Training...: 79% 3485/4393 [4:44:40<1:11:46, 4.74s/it] + Training...: 79% 3486/4393 [4:44:44<1:09:56, 4.63s/it] + Training...: 79% 3487/4393 [4:44:48<1:07:55, 4.50s/it] + Training...: 79% 3488/4393 [4:44:52<1:05:37, 4.35s/it] + Training...: 79% 3489/4393 [4:44:56<1:03:13, 4.20s/it] + Training...: 79% 3490/4393 [4:45:00<1:00:25, 4.02s/it] + Training...: 79% 3491/4393 [4:45:03<57:14, 3.81s/it]  + Training...: 79% 3492/4393 [4:45:06<53:50, 3.59s/it] + Training...: 80% 3493/4393 [4:45:09<50:17, 3.35s/it] + Training...: 80% 3494/4393 [4:45:12<46:28, 3.10s/it] + Training...: 80% 3495/4393 [4:45:14<42:37, 2.85s/it] + Training...: 80% 3496/4393 [4:45:16<38:49, 2.60s/it] + Training...: 80% 3497/4393 [4:45:18<35:07, 2.35s/it] + Training...: 80% 3498/4393 [4:45:19<31:29, 2.11s/it] + Training...: 80% 3499/4393 [4:45:20<27:44, 1.86s/it] + Training...: 80% 3500/4393 [4:45:22<24:01, 1.61s/it] + Training...: 80% 3501/4393 [4:45:28<44:19, 2.98s/it] + Training...: 80% 3502/4393 [4:45:34<58:21, 3.93s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:06:30<46:43:02, 21022.76s/it] + Training...: 80% 3502/4393 [4:45:41<58:21, 3.93s/it] + Training...: 80% 3503/4393 [4:45:41<1:11:19, 4.81s/it] + Training...: 80% 3504/4393 [4:45:47<1:16:25, 5.16s/it] + Training...: 80% 3505/4393 [4:45:53<1:20:16, 5.42s/it] + Training...: 80% 3506/4393 [4:45:59<1:22:29, 5.58s/it] + Training...: 80% 3507/4393 [4:46:05<1:23:50, 5.68s/it] + Training...: 80% 3508/4393 [4:46:10<1:24:40, 5.74s/it] + Training...: 80% 3509/4393 [4:46:16<1:24:59, 5.77s/it] + Training...: 80% 3510/4393 [4:46:22<1:25:52, 5.84s/it] + Training...: 80% 3511/4393 [4:46:28<1:26:29, 5.88s/it] + Training...: 80% 3512/4393 [4:46:34<1:25:50, 5.85s/it] + Training...: 80% 3513/4393 [4:46:40<1:25:29, 5.83s/it] + Training...: 80% 3514/4393 [4:46:45<1:24:27, 5.77s/it] + Training...: 80% 3515/4393 [4:46:51<1:23:55, 5.74s/it] + Training...: 80% 3516/4393 [4:46:57<1:23:03, 5.68s/it] + Training...: 80% 3517/4393 [4:47:02<1:22:40, 5.66s/it] + Training...: 80% 3518/4393 [4:47:08<1:22:03, 5.63s/it] + Training...: 80% 3519/4393 [4:47:13<1:21:26, 5.59s/it] + Training...: 80% 3520/4393 [4:47:19<1:20:46, 5.55s/it] + Training...: 80% 3521/4393 [4:47:24<1:20:10, 5.52s/it] + Training...: 80% 3522/4393 [4:47:30<1:19:25, 5.47s/it] + Training...: 80% 3523/4393 [4:47:35<1:18:52, 5.44s/it] + Training...: 80% 3524/4393 [4:47:40<1:18:18, 5.41s/it] + Training...: 80% 3525/4393 [4:47:46<1:17:30, 5.36s/it] + Training...: 80% 3526/4393 [4:47:51<1:16:44, 5.31s/it] + Training...: 80% 3527/4393 [4:47:56<1:16:12, 5.28s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:08:51<46:43:02, 21022.76s/it] + Training...: 80% 3527/4393 [4:48:02<1:16:12, 5.28s/it] + Training...: 80% 3528/4393 [4:48:02<1:18:48, 5.47s/it] + Training...: 80% 3529/4393 [4:48:07<1:17:09, 5.36s/it] + Training...: 80% 3530/4393 [4:48:12<1:15:13, 5.23s/it] + Training...: 80% 3531/4393 [4:48:17<1:13:38, 5.13s/it] + Training...: 80% 3532/4393 [4:48:22<1:12:03, 5.02s/it] + Training...: 80% 3533/4393 [4:48:26<1:10:40, 4.93s/it] + Training...: 80% 3534/4393 [4:48:31<1:09:48, 4.88s/it] + Training...: 80% 3535/4393 [4:48:36<1:08:36, 4.80s/it] + Training...: 80% 3536/4393 [4:48:40<1:06:54, 4.68s/it] + Training...: 81% 3537/4393 [4:48:44<1:04:51, 4.55s/it] + Training...: 81% 3538/4393 [4:48:48<1:02:45, 4.40s/it] + Training...: 81% 3539/4393 [4:48:52<1:00:22, 4.24s/it] + Training...: 81% 3540/4393 [4:48:56<57:44, 4.06s/it]  + Training...: 81% 3541/4393 [4:48:59<55:02, 3.88s/it] + Training...: 81% 3542/4393 [4:49:02<52:05, 3.67s/it] + Training...: 81% 3543/4393 [4:49:05<48:59, 3.46s/it] + Training...: 81% 3544/4393 [4:49:08<45:40, 3.23s/it] + Training...: 81% 3545/4393 [4:49:11<42:05, 2.98s/it] + Training...: 81% 3546/4393 [4:49:13<38:27, 2.72s/it] + Training...: 81% 3547/4393 [4:49:15<34:48, 2.47s/it] + Training...: 81% 3548/4393 [4:49:16<31:16, 2.22s/it] + Training...: 81% 3549/4393 [4:49:18<27:32, 1.96s/it] + Training...: 81% 3550/4393 [4:49:19<23:34, 1.68s/it] + Training...: 81% 3551/4393 [4:49:25<42:37, 3.04s/it] + Training...: 81% 3552/4393 [4:49:31<55:59, 3.99s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:10:27<46:43:02, 21022.76s/it] + Training...: 81% 3552/4393 [4:49:38<55:59, 3.99s/it] + Training...: 81% 3553/4393 [4:49:38<1:07:55, 4.85s/it] + Training...: 81% 3554/4393 [4:49:44<1:12:25, 5.18s/it] + Training...: 81% 3555/4393 [4:49:50<1:15:43, 5.42s/it] + Training...: 81% 3556/4393 [4:49:56<1:17:41, 5.57s/it] + Training...: 81% 3557/4393 [4:50:02<1:20:07, 5.75s/it] + Training...: 81% 3558/4393 [4:50:08<1:20:30, 5.78s/it] + Training...: 81% 3559/4393 [4:50:14<1:20:34, 5.80s/it] + Training...: 81% 3560/4393 [4:50:19<1:20:11, 5.78s/it] + Training...: 81% 3561/4393 [4:50:25<1:19:47, 5.75s/it] + Training...: 81% 3562/4393 [4:50:31<1:19:23, 5.73s/it] + Training...: 81% 3563/4393 [4:50:36<1:19:06, 5.72s/it] + Training...: 81% 3564/4393 [4:50:42<1:18:34, 5.69s/it] + Training...: 81% 3565/4393 [4:50:48<1:18:10, 5.66s/it] + Training...: 81% 3566/4393 [4:50:53<1:17:42, 5.64s/it] + Training...: 81% 3567/4393 [4:50:59<1:17:09, 5.60s/it] + Training...: 81% 3568/4393 [4:51:04<1:16:41, 5.58s/it] + Training...: 81% 3569/4393 [4:51:10<1:16:15, 5.55s/it] + Training...: 81% 3570/4393 [4:51:15<1:15:39, 5.52s/it] + Training...: 81% 3571/4393 [4:51:21<1:15:13, 5.49s/it] + Training...: 81% 3572/4393 [4:51:26<1:14:29, 5.44s/it] + Training...: 81% 3573/4393 [4:51:31<1:13:49, 5.40s/it] + Training...: 81% 3574/4393 [4:51:36<1:13:04, 5.35s/it] + Training...: 81% 3575/4393 [4:51:42<1:12:32, 5.32s/it] + Training...: 81% 3576/4393 [4:51:47<1:11:51, 5.28s/it] + Training...: 81% 3577/4393 [4:51:52<1:11:20, 5.25s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:12:47<46:43:02, 21022.76s/it] + Training...: 81% 3577/4393 [4:51:58<1:11:20, 5.25s/it] + Training...: 81% 3578/4393 [4:51:58<1:13:29, 5.41s/it] + Training...: 81% 3579/4393 [4:52:03<1:11:59, 5.31s/it] + Training...: 81% 3580/4393 [4:52:08<1:10:16, 5.19s/it] + Training...: 82% 3581/4393 [4:52:13<1:09:00, 5.10s/it] + Training...: 82% 3582/4393 [4:52:17<1:07:40, 5.01s/it] + Training...: 82% 3583/4393 [4:52:22<1:06:26, 4.92s/it] + Training...: 82% 3584/4393 [4:52:27<1:05:05, 4.83s/it] + Training...: 82% 3585/4393 [4:52:31<1:03:46, 4.74s/it] + Training...: 82% 3586/4393 [4:52:36<1:02:05, 4.62s/it] + Training...: 82% 3587/4393 [4:52:40<1:00:27, 4.50s/it] + Training...: 82% 3588/4393 [4:52:44<58:46, 4.38s/it]  + Training...: 82% 3589/4393 [4:52:48<56:28, 4.22s/it] + Training...: 82% 3590/4393 [4:52:51<54:06, 4.04s/it] + Training...: 82% 3591/4393 [4:52:55<51:26, 3.85s/it] + Training...: 82% 3592/4393 [4:52:58<48:19, 3.62s/it] + Training...: 82% 3593/4393 [4:53:01<45:06, 3.38s/it] + Training...: 82% 3594/4393 [4:53:03<41:51, 3.14s/it] + Training...: 82% 3595/4393 [4:53:06<38:35, 2.90s/it] + Training...: 82% 3596/4393 [4:53:08<35:08, 2.65s/it] + Training...: 82% 3597/4393 [4:53:10<31:47, 2.40s/it] + Training...: 82% 3598/4393 [4:53:11<28:31, 2.15s/it] + Training...: 82% 3599/4393 [4:53:12<25:10, 1.90s/it] + Training...: 82% 3600/4393 [4:53:13<21:40, 1.64s/it] + Training...: 82% 3601/4393 [4:53:20<39:31, 2.99s/it] + Training...: 82% 3602/4393 [4:53:26<52:11, 3.96s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:14:22<46:43:02, 21022.76s/it] + Training...: 82% 3602/4393 [4:53:33<52:11, 3.96s/it] + Training...: 82% 3603/4393 [4:53:33<1:03:28, 4.82s/it] + Training...: 82% 3604/4393 [4:53:39<1:07:40, 5.15s/it] + Training...: 82% 3605/4393 [4:53:45<1:11:07, 5.42s/it] + Training...: 82% 3606/4393 [4:53:51<1:13:03, 5.57s/it] + Training...: 82% 3607/4393 [4:53:56<1:14:18, 5.67s/it] + Training...: 82% 3608/4393 [4:54:02<1:14:47, 5.72s/it] + Training...: 82% 3609/4393 [4:54:08<1:15:21, 5.77s/it] + Training...: 82% 3610/4393 [4:54:14<1:15:26, 5.78s/it] + Training...: 82% 3611/4393 [4:54:20<1:15:20, 5.78s/it] + Training...: 82% 3612/4393 [4:54:26<1:16:01, 5.84s/it] + Training...: 82% 3613/4393 [4:54:32<1:15:40, 5.82s/it] + Training...: 82% 3614/4393 [4:54:37<1:14:58, 5.78s/it] + Training...: 82% 3615/4393 [4:54:43<1:14:32, 5.75s/it] + Training...: 82% 3616/4393 [4:54:48<1:13:54, 5.71s/it] + Training...: 82% 3617/4393 [4:54:54<1:13:11, 5.66s/it] + Training...: 82% 3618/4393 [4:55:00<1:12:43, 5.63s/it] + Training...: 82% 3619/4393 [4:55:05<1:12:09, 5.59s/it] + Training...: 82% 3620/4393 [4:55:11<1:11:29, 5.55s/it] + Training...: 82% 3621/4393 [4:55:16<1:11:04, 5.52s/it] + Training...: 82% 3622/4393 [4:55:21<1:10:19, 5.47s/it] + Training...: 82% 3623/4393 [4:55:27<1:09:47, 5.44s/it] + Training...: 82% 3624/4393 [4:55:32<1:09:11, 5.40s/it] + Training...: 83% 3625/4393 [4:55:37<1:08:43, 5.37s/it] + Training...: 83% 3626/4393 [4:55:42<1:07:52, 5.31s/it] + Training...: 83% 3627/4393 [4:55:48<1:07:56, 5.32s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:16:43<46:43:02, 21022.76s/it] + Training...: 83% 3627/4393 [4:55:54<1:07:56, 5.32s/it] + Training...: 83% 3628/4393 [4:55:54<1:10:24, 5.52s/it] + Training...: 83% 3629/4393 [4:55:59<1:08:33, 5.38s/it] + Training...: 83% 3630/4393 [4:56:04<1:06:50, 5.26s/it] + Training...: 83% 3631/4393 [4:56:09<1:05:27, 5.15s/it] + Training...: 83% 3632/4393 [4:56:14<1:04:10, 5.06s/it] + Training...: 83% 3633/4393 [4:56:19<1:03:29, 5.01s/it] + Training...: 83% 3634/4393 [4:56:23<1:02:15, 4.92s/it] + Training...: 83% 3635/4393 [4:56:28<1:00:59, 4.83s/it] + Training...: 83% 3636/4393 [4:56:32<59:34, 4.72s/it]  + Training...: 83% 3637/4393 [4:56:37<58:14, 4.62s/it] + Training...: 83% 3638/4393 [4:56:41<56:22, 4.48s/it] + Training...: 83% 3639/4393 [4:56:45<54:26, 4.33s/it] + Training...: 83% 3640/4393 [4:56:49<52:06, 4.15s/it] + Training...: 83% 3641/4393 [4:56:52<49:25, 3.94s/it] + Training...: 83% 3642/4393 [4:56:55<46:24, 3.71s/it] + Training...: 83% 3643/4393 [4:56:58<43:20, 3.47s/it] + Training...: 83% 3644/4393 [4:57:01<40:04, 3.21s/it] + Training...: 83% 3645/4393 [4:57:03<36:40, 2.94s/it] + Training...: 83% 3646/4393 [4:57:05<33:27, 2.69s/it] + Training...: 83% 3647/4393 [4:57:07<30:05, 2.42s/it] + Training...: 83% 3648/4393 [4:57:08<26:44, 2.15s/it] + Training...: 83% 3649/4393 [4:57:10<23:25, 1.89s/it] + Training...: 83% 3650/4393 [4:57:11<20:09, 1.63s/it] + Training...: 83% 3651/4393 [4:57:17<37:23, 3.02s/it] + Training...: 83% 3652/4393 [4:57:23<49:22, 4.00s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:18:20<46:43:02, 21022.76s/it] + Training...: 83% 3652/4393 [4:57:30<49:22, 4.00s/it] + Training...: 83% 3653/4393 [4:57:30<1:01:03, 4.95s/it] + Training...: 83% 3654/4393 [4:57:36<1:04:52, 5.27s/it] + Training...: 83% 3655/4393 [4:57:42<1:07:16, 5.47s/it] + Training...: 83% 3656/4393 [4:57:48<1:09:05, 5.63s/it] + Training...: 83% 3657/4393 [4:57:54<1:10:12, 5.72s/it] + Training...: 83% 3658/4393 [4:58:00<1:10:39, 5.77s/it] + Training...: 83% 3659/4393 [4:58:06<1:10:42, 5.78s/it] + Training...: 83% 3660/4393 [4:58:12<1:10:34, 5.78s/it] + Training...: 83% 3661/4393 [4:58:18<1:10:34, 5.78s/it] + Training...: 83% 3662/4393 [4:58:23<1:10:21, 5.77s/it] + Training...: 83% 3663/4393 [4:58:29<1:09:59, 5.75s/it] + Training...: 83% 3664/4393 [4:58:35<1:09:31, 5.72s/it] + Training...: 83% 3665/4393 [4:58:40<1:09:23, 5.72s/it] + Training...: 83% 3666/4393 [4:58:46<1:08:54, 5.69s/it] + Training...: 83% 3667/4393 [4:58:52<1:09:22, 5.73s/it] + Training...: 83% 3668/4393 [4:58:57<1:08:30, 5.67s/it] + Training...: 84% 3669/4393 [4:59:03<1:07:47, 5.62s/it] + Training...: 84% 3670/4393 [4:59:08<1:07:00, 5.56s/it] + Training...: 84% 3671/4393 [4:59:14<1:06:38, 5.54s/it] + Training...: 84% 3672/4393 [4:59:19<1:05:47, 5.48s/it] + Training...: 84% 3673/4393 [4:59:24<1:05:19, 5.44s/it] + Training...: 84% 3674/4393 [4:59:30<1:04:30, 5.38s/it] + Training...: 84% 3675/4393 [4:59:35<1:03:49, 5.33s/it] + Training...: 84% 3676/4393 [4:59:40<1:03:09, 5.29s/it] + Training...: 84% 3677/4393 [4:59:45<1:02:38, 5.25s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:20:40<46:43:02, 21022.76s/it] + Training...: 84% 3677/4393 [4:59:51<1:02:38, 5.25s/it] + Training...: 84% 3678/4393 [4:59:51<1:04:52, 5.44s/it] + Training...: 84% 3679/4393 [4:59:56<1:03:32, 5.34s/it] + Training...: 84% 3680/4393 [5:00:01<1:02:08, 5.23s/it] + Training...: 84% 3681/4393 [5:00:06<1:00:53, 5.13s/it] + Training...: 84% 3682/4393 [5:00:11<1:00:00, 5.06s/it] + Training...: 84% 3683/4393 [5:00:16<58:51, 4.97s/it]  + Training...: 84% 3684/4393 [5:00:21<57:43, 4.89s/it] + Training...: 84% 3685/4393 [5:00:25<56:25, 4.78s/it] + Training...: 84% 3686/4393 [5:00:29<54:52, 4.66s/it] + Training...: 84% 3687/4393 [5:00:34<53:22, 4.54s/it] + Training...: 84% 3688/4393 [5:00:38<51:16, 4.36s/it] + Training...: 84% 3689/4393 [5:00:41<49:03, 4.18s/it] + Training...: 84% 3690/4393 [5:00:45<46:44, 3.99s/it] + Training...: 84% 3691/4393 [5:00:48<44:36, 3.81s/it] + Training...: 84% 3692/4393 [5:00:51<42:06, 3.60s/it] + Training...: 84% 3693/4393 [5:00:54<39:08, 3.35s/it] + Training...: 84% 3694/4393 [5:00:57<36:05, 3.10s/it] + Training...: 84% 3695/4393 [5:00:59<33:02, 2.84s/it] + Training...: 84% 3696/4393 [5:01:01<30:01, 2.58s/it] + Training...: 84% 3697/4393 [5:01:03<27:07, 2.34s/it] + Training...: 84% 3698/4393 [5:01:04<24:16, 2.10s/it] + Training...: 84% 3699/4393 [5:01:05<21:18, 1.84s/it] + Training...: 84% 3700/4393 [5:01:06<18:24, 1.59s/it] + Training...: 84% 3701/4393 [5:01:13<34:11, 2.96s/it] + Training...: 84% 3702/4393 [5:01:19<45:49, 3.98s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:22:15<46:43:02, 21022.76s/it] + Training...: 84% 3702/4393 [5:01:26<45:49, 3.98s/it] + Training...: 84% 3703/4393 [5:01:26<55:58, 4.87s/it] + Training...: 84% 3704/4393 [5:01:32<59:37, 5.19s/it] + Training...: 84% 3705/4393 [5:01:38<1:02:11, 5.42s/it] + Training...: 84% 3706/4393 [5:01:44<1:03:53, 5.58s/it] + Training...: 84% 3707/4393 [5:01:50<1:05:11, 5.70s/it] + Training...: 84% 3708/4393 [5:01:56<1:05:44, 5.76s/it] + Training...: 84% 3709/4393 [5:02:02<1:06:02, 5.79s/it] + Training...: 84% 3710/4393 [5:02:07<1:05:57, 5.79s/it] + Training...: 84% 3711/4393 [5:02:13<1:05:51, 5.79s/it] + Training...: 84% 3712/4393 [5:02:19<1:05:35, 5.78s/it] + Training...: 85% 3713/4393 [5:02:25<1:05:26, 5.77s/it] + Training...: 85% 3714/4393 [5:02:31<1:05:50, 5.82s/it] + Training...: 85% 3715/4393 [5:02:36<1:05:27, 5.79s/it] + Training...: 85% 3716/4393 [5:02:42<1:04:45, 5.74s/it] + Training...: 85% 3717/4393 [5:02:48<1:04:12, 5.70s/it] + Training...: 85% 3718/4393 [5:02:53<1:03:25, 5.64s/it] + Training...: 85% 3719/4393 [5:02:58<1:02:46, 5.59s/it] + Training...: 85% 3720/4393 [5:03:04<1:02:09, 5.54s/it] + Training...: 85% 3721/4393 [5:03:09<1:01:35, 5.50s/it] + Training...: 85% 3722/4393 [5:03:15<1:00:55, 5.45s/it] + Training...: 85% 3723/4393 [5:03:20<1:00:44, 5.44s/it] + Training...: 85% 3724/4393 [5:03:25<59:50, 5.37s/it]  + Training...: 85% 3725/4393 [5:03:30<59:11, 5.32s/it] + Training...: 85% 3726/4393 [5:03:36<58:36, 5.27s/it] + Training...: 85% 3727/4393 [5:03:41<58:03, 5.23s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:24:36<46:43:02, 21022.76s/it] + Training...: 85% 3727/4393 [5:03:47<58:03, 5.23s/it] + Training...: 85% 3728/4393 [5:03:47<59:50, 5.40s/it] + Training...: 85% 3729/4393 [5:03:52<58:15, 5.26s/it] + Training...: 85% 3730/4393 [5:03:56<56:59, 5.16s/it] + Training...: 85% 3731/4393 [5:04:01<56:06, 5.09s/it] + Training...: 85% 3732/4393 [5:04:06<54:52, 4.98s/it] + Training...: 85% 3733/4393 [5:04:11<53:48, 4.89s/it] + Training...: 85% 3734/4393 [5:04:15<52:47, 4.81s/it] + Training...: 85% 3735/4393 [5:04:20<51:40, 4.71s/it] + Training...: 85% 3736/4393 [5:04:24<50:15, 4.59s/it] + Training...: 85% 3737/4393 [5:04:28<48:40, 4.45s/it] + Training...: 85% 3738/4393 [5:04:32<46:53, 4.30s/it] + Training...: 85% 3739/4393 [5:04:36<44:54, 4.12s/it] + Training...: 85% 3740/4393 [5:04:39<42:36, 3.91s/it] + Training...: 85% 3741/4393 [5:04:43<40:17, 3.71s/it] + Training...: 85% 3742/4393 [5:04:46<37:54, 3.49s/it] + Training...: 85% 3743/4393 [5:04:48<35:33, 3.28s/it] + Training...: 85% 3744/4393 [5:04:51<33:07, 3.06s/it] + Training...: 85% 3745/4393 [5:04:53<30:25, 2.82s/it] + Training...: 85% 3746/4393 [5:04:55<27:44, 2.57s/it] + Training...: 85% 3747/4393 [5:04:57<25:00, 2.32s/it] + Training...: 85% 3748/4393 [5:04:58<22:15, 2.07s/it] + Training...: 85% 3749/4393 [5:05:00<19:28, 1.81s/it] + Training...: 85% 3750/4393 [5:05:01<16:45, 1.56s/it] + Training...: 85% 3751/4393 [5:05:07<31:55, 2.98s/it] + Training...: 85% 3752/4393 [5:05:13<42:26, 3.97s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:26:09<46:43:02, 21022.76s/it] + Training...: 85% 3752/4393 [5:05:20<42:26, 3.97s/it] + Training...: 85% 3753/4393 [5:05:20<51:25, 4.82s/it] + Training...: 85% 3754/4393 [5:05:26<54:48, 5.15s/it] + Training...: 85% 3755/4393 [5:05:32<57:16, 5.39s/it] + Training...: 85% 3756/4393 [5:05:38<58:36, 5.52s/it] + Training...: 86% 3757/4393 [5:05:44<59:36, 5.62s/it] + Training...: 86% 3758/4393 [5:05:49<1:00:10, 5.69s/it] + Training...: 86% 3759/4393 [5:05:55<1:00:32, 5.73s/it] + Training...: 86% 3760/4393 [5:06:01<1:00:36, 5.74s/it] + Training...: 86% 3761/4393 [5:06:07<1:00:33, 5.75s/it] + Training...: 86% 3762/4393 [5:06:12<1:00:15, 5.73s/it] + Training...: 86% 3763/4393 [5:06:18<1:00:03, 5.72s/it] + Training...: 86% 3764/4393 [5:06:24<1:00:04, 5.73s/it] + Training...: 86% 3765/4393 [5:06:29<59:38, 5.70s/it]  + Training...: 86% 3766/4393 [5:06:35<59:02, 5.65s/it] + Training...: 86% 3767/4393 [5:06:41<58:47, 5.63s/it] + Training...: 86% 3768/4393 [5:06:46<58:05, 5.58s/it] + Training...: 86% 3769/4393 [5:06:52<57:34, 5.54s/it] + Training...: 86% 3770/4393 [5:06:57<57:22, 5.53s/it] + Training...: 86% 3771/4393 [5:07:03<57:14, 5.52s/it] + Training...: 86% 3772/4393 [5:07:08<56:27, 5.45s/it] + Training...: 86% 3773/4393 [5:07:13<55:53, 5.41s/it] + Training...: 86% 3774/4393 [5:07:18<55:13, 5.35s/it] + Training...: 86% 3775/4393 [5:07:24<54:39, 5.31s/it] + Training...: 86% 3776/4393 [5:07:29<54:06, 5.26s/it] + Training...: 86% 3777/4393 [5:07:34<53:39, 5.23s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:28:29<46:43:02, 21022.76s/it] + Training...: 86% 3777/4393 [5:07:40<53:39, 5.23s/it] + Training...: 86% 3778/4393 [5:07:40<55:08, 5.38s/it] + Training...: 86% 3779/4393 [5:07:45<53:51, 5.26s/it] + Training...: 86% 3780/4393 [5:07:49<52:35, 5.15s/it] + Training...: 86% 3781/4393 [5:07:54<51:46, 5.08s/it] + Training...: 86% 3782/4393 [5:07:59<50:37, 4.97s/it] + Training...: 86% 3783/4393 [5:08:04<49:45, 4.89s/it] + Training...: 86% 3784/4393 [5:08:08<48:45, 4.80s/it] + Training...: 86% 3785/4393 [5:08:13<47:42, 4.71s/it] + Training...: 86% 3786/4393 [5:08:17<46:17, 4.58s/it] + Training...: 86% 3787/4393 [5:08:21<45:01, 4.46s/it] + Training...: 86% 3788/4393 [5:08:25<43:33, 4.32s/it] + Training...: 86% 3789/4393 [5:08:29<41:49, 4.15s/it] + Training...: 86% 3790/4393 [5:08:33<39:43, 3.95s/it] + Training...: 86% 3791/4393 [5:08:36<37:25, 3.73s/it] + Training...: 86% 3792/4393 [5:08:39<35:06, 3.50s/it] + Training...: 86% 3793/4393 [5:08:42<32:49, 3.28s/it] + Training...: 86% 3794/4393 [5:08:44<30:18, 3.04s/it] + Training...: 86% 3795/4393 [5:08:46<27:48, 2.79s/it] + Training...: 86% 3796/4393 [5:08:48<25:14, 2.54s/it] + Training...: 86% 3797/4393 [5:08:50<22:45, 2.29s/it] + Training...: 86% 3798/4393 [5:08:51<20:22, 2.05s/it] + Training...: 86% 3799/4393 [5:08:53<17:56, 1.81s/it] + Training...: 87% 3800/4393 [5:08:54<15:25, 1.56s/it] + Training...: 87% 3801/4393 [5:09:00<29:13, 2.96s/it] + Training...: 87% 3802/4393 [5:09:06<38:48, 3.94s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:30:02<46:43:02, 21022.76s/it] + Training...: 87% 3802/4393 [5:09:13<38:48, 3.94s/it] + Training...: 87% 3803/4393 [5:09:13<47:56, 4.88s/it] + Training...: 87% 3804/4393 [5:09:19<51:19, 5.23s/it] + Training...: 87% 3805/4393 [5:09:25<53:40, 5.48s/it] + Training...: 87% 3806/4393 [5:09:31<54:48, 5.60s/it] + Training...: 87% 3807/4393 [5:09:37<55:32, 5.69s/it] + Training...: 87% 3808/4393 [5:09:43<55:55, 5.74s/it] + Training...: 87% 3809/4393 [5:09:49<56:10, 5.77s/it] + Training...: 87% 3810/4393 [5:09:55<56:15, 5.79s/it] + Training...: 87% 3811/4393 [5:10:00<56:19, 5.81s/it] + Training...: 87% 3812/4393 [5:10:06<55:53, 5.77s/it] + Training...: 87% 3813/4393 [5:10:12<55:34, 5.75s/it] + Training...: 87% 3814/4393 [5:10:17<55:13, 5.72s/it] + Training...: 87% 3815/4393 [5:10:23<55:09, 5.73s/it] + Training...: 87% 3816/4393 [5:10:29<54:58, 5.72s/it] + Training...: 87% 3817/4393 [5:10:35<55:40, 5.80s/it] + Training...: 87% 3818/4393 [5:10:41<55:19, 5.77s/it] + Training...: 87% 3819/4393 [5:10:46<54:38, 5.71s/it] + Training...: 87% 3820/4393 [5:10:52<53:48, 5.63s/it] + Training...: 87% 3821/4393 [5:10:57<53:17, 5.59s/it] + Training...: 87% 3822/4393 [5:11:03<52:47, 5.55s/it] + Training...: 87% 3823/4393 [5:11:08<52:11, 5.49s/it] + Training...: 87% 3824/4393 [5:11:13<51:29, 5.43s/it] + Training...: 87% 3825/4393 [5:11:18<50:59, 5.39s/it] + Training...: 87% 3826/4393 [5:11:24<50:29, 5.34s/it] + Training...: 87% 3827/4393 [5:11:29<50:02, 5.30s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:32:24<46:43:02, 21022.76s/it] + Training...: 87% 3827/4393 [5:11:35<50:02, 5.30s/it] + Training...: 87% 3828/4393 [5:11:35<51:32, 5.47s/it] + Training...: 87% 3829/4393 [5:11:40<50:20, 5.36s/it] + Training...: 87% 3830/4393 [5:11:45<49:08, 5.24s/it] + Training...: 87% 3831/4393 [5:11:50<48:04, 5.13s/it] + Training...: 87% 3832/4393 [5:11:55<47:26, 5.07s/it] + Training...: 87% 3833/4393 [5:11:59<46:42, 5.00s/it] + Training...: 87% 3834/4393 [5:12:04<45:43, 4.91s/it] + Training...: 87% 3835/4393 [5:12:09<44:38, 4.80s/it] + Training...: 87% 3836/4393 [5:12:13<43:17, 4.66s/it] + Training...: 87% 3837/4393 [5:12:17<41:54, 4.52s/it] + Training...: 87% 3838/4393 [5:12:21<40:20, 4.36s/it] + Training...: 87% 3839/4393 [5:12:25<38:47, 4.20s/it] + Training...: 87% 3840/4393 [5:12:29<36:55, 4.01s/it] + Training...: 87% 3841/4393 [5:12:32<34:56, 3.80s/it] + Training...: 87% 3842/4393 [5:12:35<32:50, 3.58s/it] + Training...: 87% 3843/4393 [5:12:38<30:34, 3.34s/it] + Training...: 88% 3844/4393 [5:12:40<28:09, 3.08s/it] + Training...: 88% 3845/4393 [5:12:42<25:44, 2.82s/it] + Training...: 88% 3846/4393 [5:12:44<23:23, 2.57s/it] + Training...: 88% 3847/4393 [5:12:46<21:02, 2.31s/it] + Training...: 88% 3848/4393 [5:12:48<18:42, 2.06s/it] + Training...: 88% 3849/4393 [5:12:49<16:26, 1.81s/it] + Training...: 88% 3850/4393 [5:12:50<14:05, 1.56s/it] + Training...: 88% 3851/4393 [5:12:56<26:39, 2.95s/it] + Training...: 88% 3852/4393 [5:13:02<35:16, 3.91s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:33:58<46:43:02, 21022.76s/it] + Training...: 88% 3852/4393 [5:13:09<35:16, 3.91s/it] + Training...: 88% 3853/4393 [5:13:09<44:00, 4.89s/it] + Training...: 88% 3854/4393 [5:13:15<47:01, 5.24s/it] + Training...: 88% 3855/4393 [5:13:21<49:06, 5.48s/it] + Training...: 88% 3856/4393 [5:13:27<50:34, 5.65s/it] + Training...: 88% 3857/4393 [5:13:33<51:10, 5.73s/it] + Training...: 88% 3858/4393 [5:13:39<51:49, 5.81s/it] + Training...: 88% 3859/4393 [5:13:45<52:05, 5.85s/it] + Training...: 88% 3860/4393 [5:13:51<51:51, 5.84s/it] + Training...: 88% 3861/4393 [5:13:57<51:45, 5.84s/it] + Training...: 88% 3862/4393 [5:14:03<51:19, 5.80s/it] + Training...: 88% 3863/4393 [5:14:08<51:01, 5.78s/it] + Training...: 88% 3864/4393 [5:14:14<50:30, 5.73s/it] + Training...: 88% 3865/4393 [5:14:20<50:08, 5.70s/it] + Training...: 88% 3866/4393 [5:14:25<50:15, 5.72s/it] + Training...: 88% 3867/4393 [5:14:31<50:07, 5.72s/it] + Training...: 88% 3868/4393 [5:14:37<49:25, 5.65s/it] + Training...: 88% 3869/4393 [5:14:42<48:53, 5.60s/it] + Training...: 88% 3870/4393 [5:14:48<48:33, 5.57s/it] + Training...: 88% 3871/4393 [5:14:53<48:01, 5.52s/it] + Training...: 88% 3872/4393 [5:14:58<47:27, 5.46s/it] + Training...: 88% 3873/4393 [5:15:04<47:02, 5.43s/it] + Training...: 88% 3874/4393 [5:15:09<46:28, 5.37s/it] + Training...: 88% 3875/4393 [5:15:14<46:02, 5.33s/it] + Training...: 88% 3876/4393 [5:15:19<45:31, 5.28s/it] + Training...: 88% 3877/4393 [5:15:24<45:00, 5.23s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:36:19<46:43:02, 21022.76s/it] + Training...: 88% 3877/4393 [5:15:30<45:00, 5.23s/it] + Training...: 88% 3878/4393 [5:15:30<46:26, 5.41s/it] + Training...: 88% 3879/4393 [5:15:35<45:23, 5.30s/it] + Training...: 88% 3880/4393 [5:15:40<44:19, 5.18s/it] + Training...: 88% 3881/4393 [5:15:45<43:32, 5.10s/it] + Training...: 88% 3882/4393 [5:15:50<42:43, 5.02s/it] + Training...: 88% 3883/4393 [5:15:55<42:15, 4.97s/it] + Training...: 88% 3884/4393 [5:16:00<41:41, 4.92s/it] + Training...: 88% 3885/4393 [5:16:04<40:49, 4.82s/it] + Training...: 88% 3886/4393 [5:16:09<39:49, 4.71s/it] + Training...: 88% 3887/4393 [5:16:13<38:35, 4.58s/it] + Training...: 89% 3888/4393 [5:16:17<37:09, 4.42s/it] + Training...: 89% 3889/4393 [5:16:21<35:45, 4.26s/it] + Training...: 89% 3890/4393 [5:16:24<34:02, 4.06s/it] + Training...: 89% 3891/4393 [5:16:28<32:09, 3.84s/it] + Training...: 89% 3892/4393 [5:16:31<30:17, 3.63s/it] + Training...: 89% 3893/4393 [5:16:34<28:10, 3.38s/it] + Training...: 89% 3894/4393 [5:16:36<26:07, 3.14s/it] + Training...: 89% 3895/4393 [5:16:39<24:03, 2.90s/it] + Training...: 89% 3896/4393 [5:16:41<21:59, 2.65s/it] + Training...: 89% 3897/4393 [5:16:43<19:45, 2.39s/it] + Training...: 89% 3898/4393 [5:16:44<17:36, 2.13s/it] + Training...: 89% 3899/4393 [5:16:45<15:32, 1.89s/it] + Training...: 89% 3900/4393 [5:16:46<13:22, 1.63s/it] + Training...: 89% 3901/4393 [5:16:53<24:26, 2.98s/it] + Training...: 89% 3902/4393 [5:16:59<32:14, 3.94s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:37:55<46:43:02, 21022.76s/it] + Training...: 89% 3902/4393 [5:17:05<32:14, 3.94s/it] + Training...: 89% 3903/4393 [5:17:05<39:08, 4.79s/it] + Training...: 89% 3904/4393 [5:17:11<41:50, 5.13s/it] + Training...: 89% 3905/4393 [5:17:17<43:45, 5.38s/it] + Training...: 89% 3906/4393 [5:17:23<44:49, 5.52s/it] + Training...: 89% 3907/4393 [5:17:29<45:57, 5.67s/it] + Training...: 89% 3908/4393 [5:17:35<46:26, 5.74s/it] + Training...: 89% 3909/4393 [5:17:41<47:08, 5.84s/it] + Training...: 89% 3910/4393 [5:17:47<47:24, 5.89s/it] + Training...: 89% 3911/4393 [5:17:53<47:34, 5.92s/it] + Training...: 89% 3912/4393 [5:17:59<47:15, 5.89s/it] + Training...: 89% 3913/4393 [5:18:05<47:14, 5.90s/it] + Training...: 89% 3914/4393 [5:18:11<46:54, 5.88s/it] + Training...: 89% 3915/4393 [5:18:16<46:13, 5.80s/it] + Training...: 89% 3916/4393 [5:18:22<45:36, 5.74s/it] + Training...: 89% 3917/4393 [5:18:28<45:05, 5.68s/it] + Training...: 89% 3918/4393 [5:18:33<44:23, 5.61s/it] + Training...: 89% 3919/4393 [5:18:38<44:01, 5.57s/it] + Training...: 89% 3920/4393 [5:18:44<43:36, 5.53s/it] + Training...: 89% 3921/4393 [5:18:49<43:17, 5.50s/it] + Training...: 89% 3922/4393 [5:18:55<42:47, 5.45s/it] + Training...: 89% 3923/4393 [5:19:00<42:36, 5.44s/it] + Training...: 89% 3924/4393 [5:19:05<42:15, 5.41s/it] + Training...: 89% 3925/4393 [5:19:11<41:44, 5.35s/it] + Training...: 89% 3926/4393 [5:19:16<41:16, 5.30s/it] + Training...: 89% 3927/4393 [5:19:21<41:00, 5.28s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:40:16<46:43:02, 21022.76s/it] + Training...: 89% 3927/4393 [5:19:27<41:00, 5.28s/it] + Training...: 89% 3928/4393 [5:19:27<42:27, 5.48s/it] + Training...: 89% 3929/4393 [5:19:32<41:23, 5.35s/it] + Training...: 89% 3930/4393 [5:19:37<40:41, 5.27s/it] + Training...: 89% 3931/4393 [5:19:42<40:04, 5.20s/it] + Training...: 90% 3932/4393 [5:19:47<39:04, 5.09s/it] + Training...: 90% 3933/4393 [5:19:52<38:16, 4.99s/it] + Training...: 90% 3934/4393 [5:19:56<37:16, 4.87s/it] + Training...: 90% 3935/4393 [5:20:01<36:25, 4.77s/it] + Training...: 90% 3936/4393 [5:20:05<35:34, 4.67s/it] + Training...: 90% 3937/4393 [5:20:10<34:56, 4.60s/it] + Training...: 90% 3938/4393 [5:20:14<33:37, 4.43s/it] + Training...: 90% 3939/4393 [5:20:18<32:09, 4.25s/it] + Training...: 90% 3940/4393 [5:20:21<30:33, 4.05s/it] + Training...: 90% 3941/4393 [5:20:25<29:00, 3.85s/it] + Training...: 90% 3942/4393 [5:20:28<27:13, 3.62s/it] + Training...: 90% 3943/4393 [5:20:31<25:20, 3.38s/it] + Training...: 90% 3944/4393 [5:20:33<23:24, 3.13s/it] + Training...: 90% 3945/4393 [5:20:35<21:23, 2.87s/it] + Training...: 90% 3946/4393 [5:20:37<19:24, 2.60s/it] + Training...: 90% 3947/4393 [5:20:39<17:28, 2.35s/it] + Training...: 90% 3948/4393 [5:20:41<15:39, 2.11s/it] + Training...: 90% 3949/4393 [5:20:42<13:40, 1.85s/it] + Training...: 90% 3950/4393 [5:20:43<11:43, 1.59s/it] + Training...: 90% 3951/4393 [5:20:49<21:50, 2.96s/it] + Training...: 90% 3952/4393 [5:20:55<28:47, 3.92s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:41:51<46:43:02, 21022.76s/it] + Training...: 90% 3952/4393 [5:21:02<28:47, 3.92s/it] + Training...: 90% 3953/4393 [5:21:02<35:14, 4.81s/it] + Training...: 90% 3954/4393 [5:21:08<37:42, 5.15s/it] + Training...: 90% 3955/4393 [5:21:14<39:27, 5.41s/it] + Training...: 90% 3956/4393 [5:21:20<40:31, 5.56s/it] + Training...: 90% 3957/4393 [5:21:26<41:31, 5.71s/it] + Training...: 90% 3958/4393 [5:21:32<42:23, 5.85s/it] + Training...: 90% 3959/4393 [5:21:38<43:02, 5.95s/it] + Training...: 90% 3960/4393 [5:21:44<43:10, 5.98s/it] + Training...: 90% 3961/4393 [5:21:50<42:56, 5.96s/it] + Training...: 90% 3962/4393 [5:21:56<42:16, 5.88s/it] + Training...: 90% 3963/4393 [5:22:02<41:47, 5.83s/it] + Training...: 90% 3964/4393 [5:22:07<41:14, 5.77s/it] + Training...: 90% 3965/4393 [5:22:13<41:07, 5.76s/it] + Training...: 90% 3966/4393 [5:22:19<40:35, 5.70s/it] + Training...: 90% 3967/4393 [5:22:24<40:15, 5.67s/it] + Training...: 90% 3968/4393 [5:22:30<39:48, 5.62s/it] + Training...: 90% 3969/4393 [5:22:35<39:21, 5.57s/it] + Training...: 90% 3970/4393 [5:22:41<38:57, 5.53s/it] + Training...: 90% 3971/4393 [5:22:46<38:40, 5.50s/it] + Training...: 90% 3972/4393 [5:22:52<38:25, 5.48s/it] + Training...: 90% 3973/4393 [5:22:57<38:04, 5.44s/it] + Training...: 90% 3974/4393 [5:23:02<37:48, 5.41s/it] + Training...: 90% 3975/4393 [5:23:08<37:54, 5.44s/it] + Training...: 91% 3976/4393 [5:23:13<37:27, 5.39s/it] + Training...: 91% 3977/4393 [5:23:18<36:50, 5.31s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:44:13<46:43:02, 21022.76s/it] + Training...: 91% 3977/4393 [5:23:24<36:50, 5.31s/it] + Training...: 91% 3978/4393 [5:23:24<37:49, 5.47s/it] + Training...: 91% 3979/4393 [5:23:29<36:48, 5.33s/it] + Training...: 91% 3980/4393 [5:23:34<35:50, 5.21s/it] + Training...: 91% 3981/4393 [5:23:39<35:09, 5.12s/it] + Training...: 91% 3982/4393 [5:23:44<34:26, 5.03s/it] + Training...: 91% 3983/4393 [5:23:48<33:45, 4.94s/it] + Training...: 91% 3984/4393 [5:23:53<32:58, 4.84s/it] + Training...: 91% 3985/4393 [5:23:57<32:14, 4.74s/it] + Training...: 91% 3986/4393 [5:24:02<31:22, 4.62s/it] + Training...: 91% 3987/4393 [5:24:06<30:34, 4.52s/it] + Training...: 91% 3988/4393 [5:24:10<29:36, 4.39s/it] + Training...: 91% 3989/4393 [5:24:14<28:37, 4.25s/it] + Training...: 91% 3990/4393 [5:24:18<27:29, 4.09s/it] + Training...: 91% 3991/4393 [5:24:21<26:17, 3.92s/it] + Training...: 91% 3992/4393 [5:24:25<24:53, 3.73s/it] + Training...: 91% 3993/4393 [5:24:28<23:29, 3.52s/it] + Training...: 91% 3994/4393 [5:24:30<21:57, 3.30s/it] + Training...: 91% 3995/4393 [5:24:33<20:23, 3.07s/it] + Training...: 91% 3996/4393 [5:24:35<18:35, 2.81s/it] + Training...: 91% 3997/4393 [5:24:37<16:45, 2.54s/it] + Training...: 91% 3998/4393 [5:24:39<14:52, 2.26s/it] + Training...: 91% 3999/4393 [5:24:40<13:04, 1.99s/it] + Training...: 91% 4000/4393 [5:24:41<11:12, 1.71s/it] + Training...: 91% 4001/4393 [5:24:47<20:03, 3.07s/it] + Training...: 91% 4002/4393 [5:24:54<26:05, 4.00s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:45:50<46:43:02, 21022.76s/it] + Training...: 91% 4002/4393 [5:25:00<26:05, 4.00s/it] + Training...: 91% 4003/4393 [5:25:00<31:33, 4.86s/it] + Training...: 91% 4004/4393 [5:25:06<33:40, 5.19s/it] + Training...: 91% 4005/4393 [5:25:13<35:31, 5.49s/it] + Training...: 91% 4006/4393 [5:25:19<36:32, 5.67s/it] + Training...: 91% 4007/4393 [5:25:25<37:14, 5.79s/it] + Training...: 91% 4008/4393 [5:25:31<37:25, 5.83s/it] + Training...: 91% 4009/4393 [5:25:37<37:33, 5.87s/it] + Training...: 91% 4010/4393 [5:25:42<37:23, 5.86s/it] + Training...: 91% 4011/4393 [5:25:48<37:15, 5.85s/it] + Training...: 91% 4012/4393 [5:25:54<36:53, 5.81s/it] + Training...: 91% 4013/4393 [5:26:00<36:53, 5.83s/it] + Training...: 91% 4014/4393 [5:26:06<36:41, 5.81s/it] + Training...: 91% 4015/4393 [5:26:11<36:33, 5.80s/it] + Training...: 91% 4016/4393 [5:26:17<36:31, 5.81s/it] + Training...: 91% 4017/4393 [5:26:23<36:18, 5.79s/it] + Training...: 91% 4018/4393 [5:26:29<36:07, 5.78s/it] + Training...: 91% 4019/4393 [5:26:34<35:33, 5.70s/it] + Training...: 92% 4020/4393 [5:26:40<35:01, 5.63s/it] + Training...: 92% 4021/4393 [5:26:45<34:28, 5.56s/it] + Training...: 92% 4022/4393 [5:26:51<34:05, 5.51s/it] + Training...: 92% 4023/4393 [5:26:56<33:42, 5.47s/it] + Training...: 92% 4024/4393 [5:27:01<33:19, 5.42s/it] + Training...: 92% 4025/4393 [5:27:06<32:57, 5.37s/it] + Training...: 92% 4026/4393 [5:27:12<32:36, 5.33s/it] + Training...: 92% 4027/4393 [5:27:17<32:17, 5.29s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:48:12<46:43:02, 21022.76s/it] + Training...: 92% 4027/4393 [5:27:23<32:17, 5.29s/it] + Training...: 92% 4028/4393 [5:27:23<33:21, 5.48s/it] + Training...: 92% 4029/4393 [5:27:28<32:40, 5.39s/it] + Training...: 92% 4030/4393 [5:27:33<32:06, 5.31s/it] + Training...: 92% 4031/4393 [5:27:38<31:26, 5.21s/it] + Training...: 92% 4032/4393 [5:27:43<30:44, 5.11s/it] + Training...: 92% 4033/4393 [5:27:48<30:11, 5.03s/it] + Training...: 92% 4034/4393 [5:27:53<29:29, 4.93s/it] + Training...: 92% 4035/4393 [5:27:57<28:41, 4.81s/it] + Training...: 92% 4036/4393 [5:28:01<27:53, 4.69s/it] + Training...: 92% 4037/4393 [5:28:06<27:06, 4.57s/it] + Training...: 92% 4038/4393 [5:28:10<26:16, 4.44s/it] + Training...: 92% 4039/4393 [5:28:14<25:17, 4.29s/it] + Training...: 92% 4040/4393 [5:28:17<24:09, 4.11s/it] + Training...: 92% 4041/4393 [5:28:21<22:56, 3.91s/it] + Training...: 92% 4042/4393 [5:28:24<21:37, 3.70s/it] + Training...: 92% 4043/4393 [5:28:27<20:10, 3.46s/it] + Training...: 92% 4044/4393 [5:28:30<18:39, 3.21s/it] + Training...: 92% 4045/4393 [5:28:32<17:09, 2.96s/it] + Training...: 92% 4046/4393 [5:28:34<15:33, 2.69s/it] + Training...: 92% 4047/4393 [5:28:36<13:56, 2.42s/it] + Training...: 92% 4048/4393 [5:28:37<12:23, 2.15s/it] + Training...: 92% 4049/4393 [5:28:39<10:53, 1.90s/it] + Training...: 92% 4050/4393 [5:28:40<09:19, 1.63s/it] + Training...: 92% 4051/4393 [5:28:46<17:14, 3.02s/it] + Training...: 92% 4052/4393 [5:28:52<22:39, 3.99s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:49:48<46:43:02, 21022.76s/it] + Training...: 92% 4052/4393 [5:28:59<22:39, 3.99s/it] + Training...: 92% 4053/4393 [5:28:59<27:26, 4.84s/it] + Training...: 92% 4054/4393 [5:29:05<29:10, 5.16s/it] + Training...: 92% 4055/4393 [5:29:11<30:23, 5.40s/it] + Training...: 92% 4056/4393 [5:29:17<31:10, 5.55s/it] + Training...: 92% 4057/4393 [5:29:23<31:41, 5.66s/it] + Training...: 92% 4058/4393 [5:29:29<32:11, 5.76s/it] + Training...: 92% 4059/4393 [5:29:35<32:38, 5.86s/it] + Training...: 92% 4060/4393 [5:29:41<32:27, 5.85s/it] + Training...: 92% 4061/4393 [5:29:46<32:14, 5.83s/it] + Training...: 92% 4062/4393 [5:29:52<31:57, 5.79s/it] + Training...: 92% 4063/4393 [5:29:58<31:44, 5.77s/it] + Training...: 93% 4064/4393 [5:30:04<31:28, 5.74s/it] + Training...: 93% 4065/4393 [5:30:09<31:10, 5.70s/it] + Training...: 93% 4066/4393 [5:30:15<30:48, 5.65s/it] + Training...: 93% 4067/4393 [5:30:20<30:39, 5.64s/it] + Training...: 93% 4068/4393 [5:30:26<30:17, 5.59s/it] + Training...: 93% 4069/4393 [5:30:31<29:58, 5.55s/it] + Training...: 93% 4070/4393 [5:30:37<29:44, 5.52s/it] + Training...: 93% 4071/4393 [5:30:42<29:29, 5.50s/it] + Training...: 93% 4072/4393 [5:30:48<29:16, 5.47s/it] + Training...: 93% 4073/4393 [5:30:53<29:07, 5.46s/it] + Training...: 93% 4074/4393 [5:30:58<28:45, 5.41s/it] + Training...: 93% 4075/4393 [5:31:04<28:32, 5.38s/it] + Training...: 93% 4076/4393 [5:31:09<28:27, 5.39s/it] + Training...: 93% 4077/4393 [5:31:14<27:59, 5.31s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:52:09<46:43:02, 21022.76s/it] + Training...: 93% 4077/4393 [5:31:20<27:59, 5.31s/it] + Training...: 93% 4078/4393 [5:31:20<28:41, 5.47s/it] + Training...: 93% 4079/4393 [5:31:25<27:55, 5.33s/it] + Training...: 93% 4080/4393 [5:31:30<27:10, 5.21s/it] + Training...: 93% 4081/4393 [5:31:35<26:34, 5.11s/it] + Training...: 93% 4082/4393 [5:31:40<25:56, 5.01s/it] + Training...: 93% 4083/4393 [5:31:44<25:23, 4.91s/it] + Training...: 93% 4084/4393 [5:31:49<24:48, 4.82s/it] + Training...: 93% 4085/4393 [5:31:53<24:16, 4.73s/it] + Training...: 93% 4086/4393 [5:31:58<23:36, 4.61s/it] + Training...: 93% 4087/4393 [5:32:02<23:01, 4.51s/it] + Training...: 93% 4088/4393 [5:32:06<22:19, 4.39s/it] + Training...: 93% 4089/4393 [5:32:10<21:36, 4.27s/it] + Training...: 93% 4090/4393 [5:32:14<20:45, 4.11s/it] + Training...: 93% 4091/4393 [5:32:17<19:41, 3.91s/it] + Training...: 93% 4092/4393 [5:32:20<18:32, 3.70s/it] + Training...: 93% 4093/4393 [5:32:23<17:20, 3.47s/it] + Training...: 93% 4094/4393 [5:32:26<16:03, 3.22s/it] + Training...: 93% 4095/4393 [5:32:28<14:43, 2.96s/it] + Training...: 93% 4096/4393 [5:32:31<13:29, 2.73s/it] + Training...: 93% 4097/4393 [5:32:32<12:12, 2.48s/it] + Training...: 93% 4098/4393 [5:32:34<10:54, 2.22s/it] + Training...: 93% 4099/4393 [5:32:35<09:30, 1.94s/it] + Training...: 93% 4100/4393 [5:32:36<08:08, 1.67s/it] + Training...: 93% 4101/4393 [5:32:43<14:36, 3.00s/it] + Training...: 93% 4102/4393 [5:32:49<19:12, 3.96s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:53:45<46:43:02, 21022.76s/it] + Training...: 93% 4102/4393 [5:32:56<19:12, 3.96s/it] + Training...: 93% 4103/4393 [5:32:56<23:20, 4.83s/it] + Training...: 93% 4104/4393 [5:33:02<24:56, 5.18s/it] + Training...: 93% 4105/4393 [5:33:08<25:58, 5.41s/it] + Training...: 93% 4106/4393 [5:33:13<26:37, 5.57s/it] + Training...: 93% 4107/4393 [5:33:19<27:06, 5.69s/it] + Training...: 94% 4108/4393 [5:33:25<27:19, 5.75s/it] + Training...: 94% 4109/4393 [5:33:31<27:38, 5.84s/it] + Training...: 94% 4110/4393 [5:33:37<27:45, 5.88s/it] + Training...: 94% 4111/4393 [5:33:43<27:34, 5.87s/it] + Training...: 94% 4112/4393 [5:33:49<27:16, 5.82s/it] + Training...: 94% 4113/4393 [5:33:55<27:05, 5.81s/it] + Training...: 94% 4114/4393 [5:34:00<26:51, 5.78s/it] + Training...: 94% 4115/4393 [5:34:06<26:39, 5.75s/it] + Training...: 94% 4116/4393 [5:34:12<26:30, 5.74s/it] + Training...: 94% 4117/4393 [5:34:17<26:09, 5.69s/it] + Training...: 94% 4118/4393 [5:34:23<25:53, 5.65s/it] + Training...: 94% 4119/4393 [5:34:28<25:38, 5.61s/it] + Training...: 94% 4120/4393 [5:34:34<25:17, 5.56s/it] + Training...: 94% 4121/4393 [5:34:39<25:03, 5.53s/it] + Training...: 94% 4122/4393 [5:34:45<24:44, 5.48s/it] + Training...: 94% 4123/4393 [5:34:50<24:32, 5.45s/it] + Training...: 94% 4124/4393 [5:34:55<24:14, 5.41s/it] + Training...: 94% 4125/4393 [5:35:01<23:58, 5.37s/it] + Training...: 94% 4126/4393 [5:35:06<23:39, 5.32s/it] + Training...: 94% 4127/4393 [5:35:11<23:21, 5.27s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:56:06<46:43:02, 21022.76s/it] + Training...: 94% 4127/4393 [5:35:17<23:21, 5.27s/it] + Training...: 94% 4128/4393 [5:35:17<24:02, 5.44s/it] + Training...: 94% 4129/4393 [5:35:22<23:28, 5.33s/it] + Training...: 94% 4130/4393 [5:35:27<22:56, 5.24s/it] + Training...: 94% 4131/4393 [5:35:32<22:26, 5.14s/it] + Training...: 94% 4132/4393 [5:35:37<21:53, 5.03s/it] + Training...: 94% 4133/4393 [5:35:41<21:25, 4.94s/it] + Training...: 94% 4134/4393 [5:35:46<20:59, 4.86s/it] + Training...: 94% 4135/4393 [5:35:51<20:40, 4.81s/it] + Training...: 94% 4136/4393 [5:35:55<20:15, 4.73s/it] + Training...: 94% 4137/4393 [5:36:00<19:43, 4.62s/it] + Training...: 94% 4138/4393 [5:36:04<18:58, 4.47s/it] + Training...: 94% 4139/4393 [5:36:08<18:14, 4.31s/it] + Training...: 94% 4140/4393 [5:36:11<17:24, 4.13s/it] + Training...: 94% 4141/4393 [5:36:15<16:31, 3.93s/it] + Training...: 94% 4142/4393 [5:36:18<15:32, 3.72s/it] + Training...: 94% 4143/4393 [5:36:21<14:30, 3.48s/it] + Training...: 94% 4144/4393 [5:36:24<13:27, 3.24s/it] + Training...: 94% 4145/4393 [5:36:26<12:20, 2.99s/it] + Training...: 94% 4146/4393 [5:36:28<11:14, 2.73s/it] + Training...: 94% 4147/4393 [5:36:30<10:06, 2.47s/it] + Training...: 94% 4148/4393 [5:36:32<09:00, 2.20s/it] + Training...: 94% 4149/4393 [5:36:33<07:52, 1.94s/it] + Training...: 94% 4150/4393 [5:36:34<06:42, 1.66s/it] + Training...: 94% 4151/4393 [5:36:40<12:08, 3.01s/it] + Training...: 95% 4152/4393 [5:36:46<15:55, 3.96s/it] + Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |) +Step... (20025 | Loss: 0.04840395227074623, Learning Rate: 6.055757694412023e-05, Gradient Norm: 0.3350682854652405) +Step... (20050 | Loss: 0.040021102875471115, Learning Rate: 6.0507070884341374e-05, Gradient Norm: 0.3626783788204193) +Step... (20075 | Loss: 0.06721880286931992, Learning Rate: 6.045656118658371e-05, Gradient Norm: 0.661290168762207) +Step... (20100 | Loss: 0.039792388677597046, Learning Rate: 6.040606240276247e-05, Gradient Norm: 0.41450798511505127) +Step... (20125 | Loss: 0.06491632759571075, Learning Rate: 6.035555270500481e-05, Gradient Norm: 0.5559267401695251) +Step... (20150 | Loss: 0.04650925472378731, Learning Rate: 6.030504664522596e-05, Gradient Norm: 0.35466456413269043) +Step... (20175 | Loss: 0.04336107149720192, Learning Rate: 6.025454786140472e-05, Gradient Norm: 0.3757917582988739) +Step... (20200 | Loss: 0.05388178676366806, Learning Rate: 6.0204038163647056e-05, Gradient Norm: 0.39275360107421875) +Step... (20225 | Loss: 0.033549536019563675, Learning Rate: 6.01535321038682e-05, Gradient Norm: 0.33024901151657104) +Step... (20250 | Loss: 0.04022737964987755, Learning Rate: 6.010303332004696e-05, Gradient Norm: 0.31290462613105774) +Step... (20275 | Loss: 0.04750262200832367, Learning Rate: 6.00525236222893e-05, Gradient Norm: 0.303996205329895) +Step... (20300 | Loss: 0.05061691999435425, Learning Rate: 6.000201392453164e-05, Gradient Norm: 0.5085467100143433) +Step... (20325 | Loss: 0.03429495915770531, Learning Rate: 5.995151150273159e-05, Gradient Norm: 0.4001585841178894) +Step... (20350 | Loss: 0.04250844568014145, Learning Rate: 5.9901009080931544e-05, Gradient Norm: 0.3081212639808655) +Step... (20375 | Loss: 0.03005947545170784, Learning Rate: 5.985049938317388e-05, Gradient Norm: 0.25889503955841064) +Step... (20400 | Loss: 0.03023005835711956, Learning Rate: 5.9799996961373836e-05, Gradient Norm: 0.323447048664093) +Step... (20425 | Loss: 0.03575645387172699, Learning Rate: 5.974949453957379e-05, Gradient Norm: 0.28628385066986084) +Step... (20450 | Loss: 0.03535941615700722, Learning Rate: 5.969898484181613e-05, Gradient Norm: 0.29935961961746216) +Step... (20475 | Loss: 0.035111647099256516, Learning Rate: 5.964848242001608e-05, Gradient Norm: 0.32121407985687256) +Step... (20500 | Loss: 0.07113418728113174, Learning Rate: 5.959797999821603e-05, Gradient Norm: 0.7147608399391174) +Step... (20525 | Loss: 0.04110198840498924, Learning Rate: 5.954747030045837e-05, Gradient Norm: 0.2974580228328705) +Step... (20550 | Loss: 0.06231129169464111, Learning Rate: 5.9496967878658324e-05, Gradient Norm: 0.40457063913345337) +Step... (20575 | Loss: 0.03932349756360054, Learning Rate: 5.944646545685828e-05, Gradient Norm: 0.28113406896591187) +Step... (20600 | Loss: 0.035716500133275986, Learning Rate: 5.9395955759100616e-05, Gradient Norm: 0.30872511863708496) +Step... (20625 | Loss: 0.0612182579934597, Learning Rate: 5.934545333730057e-05, Gradient Norm: 0.4794360101222992) +Step... (20650 | Loss: 0.06335697323083878, Learning Rate: 5.929495091550052e-05, Gradient Norm: 0.9689046144485474) +Step... (20675 | Loss: 0.056457117199897766, Learning Rate: 5.924444121774286e-05, Gradient Norm: 0.4037126302719116) +Step... (20700 | Loss: 0.04219113290309906, Learning Rate: 5.919393879594281e-05, Gradient Norm: 0.49446389079093933) +Step... (20725 | Loss: 0.03880039229989052, Learning Rate: 5.914343273616396e-05, Gradient Norm: 0.522761344909668) +Step... (20750 | Loss: 0.051740579307079315, Learning Rate: 5.9092926676385105e-05, Gradient Norm: 0.4382433295249939) +Step... (20775 | Loss: 0.04807671159505844, Learning Rate: 5.904242425458506e-05, Gradient Norm: 0.3645383417606354) +Step... (20800 | Loss: 0.03817301243543625, Learning Rate: 5.89919181948062e-05, Gradient Norm: 0.3344947099685669) +Step... (20825 | Loss: 0.04550916701555252, Learning Rate: 5.894141213502735e-05, Gradient Norm: 0.30412057042121887) +Step... (20850 | Loss: 0.02587965875864029, Learning Rate: 5.8890906075248495e-05, Gradient Norm: 0.2640466094017029) +Step... (20875 | Loss: 0.04702045023441315, Learning Rate: 5.884040365344845e-05, Gradient Norm: 0.405086487531662) +Step... (20900 | Loss: 0.03893226757645607, Learning Rate: 5.8789893955690786e-05, Gradient Norm: 0.2916216552257538) +Step... (20925 | Loss: 0.05458155274391174, Learning Rate: 5.873939153389074e-05, Gradient Norm: 0.5137640237808228) +Step... (20950 | Loss: 0.026842793449759483, Learning Rate: 5.868888911209069e-05, Gradient Norm: 0.29405421018600464) +Step... (20975 | Loss: 0.04555520787835121, Learning Rate: 5.863837941433303e-05, Gradient Norm: 0.39362606406211853) +Step... (21000 | Loss: 0.06246699020266533, Learning Rate: 5.8587876992532983e-05, Gradient Norm: 0.6405248641967773) +Step... (21025 | Loss: 0.07497163116931915, Learning Rate: 5.8537374570732936e-05, Gradient Norm: 0.649531364440918) +Step... (21050 | Loss: 0.05232103168964386, Learning Rate: 5.8486864872975275e-05, Gradient Norm: 0.40732765197753906) +Step... (21075 | Loss: 0.055781468749046326, Learning Rate: 5.843636245117523e-05, Gradient Norm: 0.3719920217990875) +Step... (21100 | Loss: 0.04000868275761604, Learning Rate: 5.838586002937518e-05, Gradient Norm: 0.30638593435287476) +Step... (21125 | Loss: 0.0311842430382967, Learning Rate: 5.833535033161752e-05, Gradient Norm: 0.31067410111427307) +Step... (21150 | Loss: 0.06602907925844193, Learning Rate: 5.828484790981747e-05, Gradient Norm: 0.4416637122631073) +Step... (21175 | Loss: 0.04677121713757515, Learning Rate: 5.8234345488017425e-05, Gradient Norm: 0.5177642107009888) +Step... (21200 | Loss: 0.045037493109703064, Learning Rate: 5.8183835790259764e-05, Gradient Norm: 0.34402015805244446) +Step... (21225 | Loss: 0.03223634138703346, Learning Rate: 5.8133333368459716e-05, Gradient Norm: 0.3138290047645569) +Step... (21250 | Loss: 0.0355977863073349, Learning Rate: 5.808283094665967e-05, Gradient Norm: 0.2971906363964081) +Step... (21275 | Loss: 0.07619132101535797, Learning Rate: 5.803232124890201e-05, Gradient Norm: 0.5961235165596008) +Step... (21300 | Loss: 0.031767457723617554, Learning Rate: 5.798181882710196e-05, Gradient Norm: 0.3297065496444702) +Step... (21325 | Loss: 0.04114881902933121, Learning Rate: 5.7931312767323107e-05, Gradient Norm: 0.29805976152420044) +Step... (21350 | Loss: 0.06210632622241974, Learning Rate: 5.788080670754425e-05, Gradient Norm: 0.4565739035606384) +Step... (21375 | Loss: 0.05203666165471077, Learning Rate: 5.78303006477654e-05, Gradient Norm: 0.5167878270149231) +Step... (21400 | Loss: 0.04761577770113945, Learning Rate: 5.777979822596535e-05, Gradient Norm: 0.7846924662590027) +Step... (21425 | Loss: 0.05161000043153763, Learning Rate: 5.77292921661865e-05, Gradient Norm: 0.4291480779647827) +Step... (21450 | Loss: 0.04251234605908394, Learning Rate: 5.767878610640764e-05, Gradient Norm: 0.402444064617157) +Step... (21475 | Loss: 0.05318749323487282, Learning Rate: 5.7628283684607595e-05, Gradient Norm: 0.4376218020915985) +Step... (21500 | Loss: 0.04826676845550537, Learning Rate: 5.7577773986849934e-05, Gradient Norm: 0.38663220405578613) +Step... (21525 | Loss: 0.06910662353038788, Learning Rate: 5.752727156504989e-05, Gradient Norm: 0.44211477041244507) +Step... (21550 | Loss: 0.06470490247011185, Learning Rate: 5.747676914324984e-05, Gradient Norm: 0.4820682406425476) +Step... (21575 | Loss: 0.036486972123384476, Learning Rate: 5.742625944549218e-05, Gradient Norm: 0.3340790271759033) +Step... (21600 | Loss: 0.043834902346134186, Learning Rate: 5.737575702369213e-05, Gradient Norm: 0.40074360370635986) +Step... (21625 | Loss: 0.029955435544252396, Learning Rate: 5.7325254601892084e-05, Gradient Norm: 0.3754229247570038) +Step... (21650 | Loss: 0.039664458483457565, Learning Rate: 5.727474490413442e-05, Gradient Norm: 0.4904446005821228) +Step... (21675 | Loss: 0.04634978622198105, Learning Rate: 5.7224242482334375e-05, Gradient Norm: 0.40155136585235596) +Step... (21700 | Loss: 0.03797125443816185, Learning Rate: 5.717374006053433e-05, Gradient Norm: 0.35968825221061707) + Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [28:57:42<46:43:02, 21022.76s/it] + Training...: 95% 4152/4393 [5:36:53<15:55, 3.96s/it] + Training...: 95% 4153/4393 [5:36:53<19:20, 4.83s/it] + Training...: 95% 4154/4393 [5:36:59<20:53, 5.25s/it] + Training...: 95% 4155/4393 [5:37:05<21:41, 5.47s/it] + Training...: 95% 4156/4393 [5:37:11<22:10, 5.61s/it] + Training...: 95% 4157/4393 [5:37:17<22:25, 5.70s/it] + Training...: 95% 4158/4393 [5:37:23<22:33, 5.76s/it] + Training...: 95% 4159/4393 [5:37:29<22:45, 5.84s/it] + Training...: 95% 4160/4393 [5:37:35<22:49, 5.88s/it] + Training...: 95% 4161/4393 [5:37:41<22:50, 5.91s/it] + Training...: 95% 4162/4393 [5:37:47<22:44, 5.91s/it] + Training...: 95% 4163/4393 [5:37:53<22:43, 5.93s/it] + Training...: 95% 4164/4393 [5:37:59<22:29, 5.89s/it] + Training...: 95% 4165/4393 [5:38:05<22:19, 5.87s/it] + Training...: 95% 4166/4393 [5:38:10<22:06, 5.84s/it] + Training...: 95% 4167/4393 [5:38:16<22:00, 5.84s/it] + Training...: 95% 4168/4393 [5:38:22<21:46, 5.81s/it] + Training...: 95% 4169/4393 [5:38:28<21:37, 5.79s/it] + Training...: 95% 4170/4393 [5:38:33<21:16, 5.72s/it] + Training...: 95% 4171/4393 [5:38:39<20:58, 5.67s/it] + Training...: 95% 4172/4393 [5:38:44<20:29, 5.56s/it] + Training...: 95% 4173/4393 [5:38:49<20:07, 5.49s/it] + Training...: 95% 4174/4393 [5:38:55<19:46, 5.42s/it] + Training...: 95% 4175/4393 [5:39:00<19:30, 5.37s/it] + Training...: 95% 4176/4393 [5:39:05<19:12, 5.31s/it] + Training...: 95% 4177/4393 [5:39:10<18:58, 5.27s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [29:00:05<46:43:02, 21022.76s/it] + Training...: 95% 4177/4393 [5:39:16<18:58, 5.27s/it] + Training...: 95% 4178/4393 [5:39:16<19:35, 5.47s/it] + Training...: 95% 4179/4393 [5:39:21<18:58, 5.32s/it] + Training...: 95% 4180/4393 [5:39:26<18:27, 5.20s/it] + Training...: 95% 4181/4393 [5:39:31<18:02, 5.11s/it] + Training...: 95% 4182/4393 [5:39:36<17:40, 5.03s/it] + Training...: 95% 4183/4393 [5:39:41<17:16, 4.94s/it] + Training...: 95% 4184/4393 [5:39:45<16:51, 4.84s/it] + Training...: 95% 4185/4393 [5:39:50<16:26, 4.74s/it] + Training...: 95% 4186/4393 [5:39:54<16:01, 4.64s/it] + Training...: 95% 4187/4393 [5:39:58<15:33, 4.53s/it] + Training...: 95% 4188/4393 [5:40:03<15:02, 4.40s/it] + Training...: 95% 4189/4393 [5:40:07<14:31, 4.27s/it] + Training...: 95% 4190/4393 [5:40:10<13:56, 4.12s/it] + Training...: 95% 4191/4393 [5:40:14<13:19, 3.96s/it] + Training...: 95% 4192/4393 [5:40:17<12:34, 3.75s/it] + Training...: 95% 4193/4393 [5:40:20<11:37, 3.49s/it] + Training...: 95% 4194/4393 [5:40:23<10:39, 3.21s/it] + Training...: 95% 4195/4393 [5:40:25<09:43, 2.95s/it] + Training...: 96% 4196/4393 [5:40:27<08:47, 2.68s/it] + Training...: 96% 4197/4393 [5:40:29<07:52, 2.41s/it] + Training...: 96% 4198/4393 [5:40:30<06:59, 2.15s/it] + Training...: 96% 4199/4393 [5:40:32<06:07, 1.89s/it] + Training...: 96% 4200/4393 [5:40:33<05:15, 1.63s/it] + Training...: 96% 4201/4393 [5:40:39<09:38, 3.01s/it] + Training...: 96% 4202/4393 [5:40:45<12:43, 4.00s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [29:01:41<46:43:02, 21022.76s/it] + Training...: 96% 4202/4393 [5:40:52<12:43, 4.00s/it] + Training...: 96% 4203/4393 [5:40:52<15:22, 4.86s/it] + Training...: 96% 4204/4393 [5:40:58<16:22, 5.20s/it] + Training...: 96% 4205/4393 [5:41:04<17:03, 5.45s/it] + Training...: 96% 4206/4393 [5:41:10<17:24, 5.58s/it] + Training...: 96% 4207/4393 [5:41:16<17:39, 5.70s/it] + Training...: 96% 4208/4393 [5:41:22<17:46, 5.77s/it] + Training...: 96% 4209/4393 [5:41:28<17:53, 5.83s/it] + Training...: 96% 4210/4393 [5:41:34<17:49, 5.85s/it] + Training...: 96% 4211/4393 [5:41:40<17:45, 5.85s/it] + Training...: 96% 4212/4393 [5:41:45<17:32, 5.82s/it] + Training...: 96% 4213/4393 [5:41:51<17:22, 5.79s/it] + Training...: 96% 4214/4393 [5:41:57<17:14, 5.78s/it] + Training...: 96% 4215/4393 [5:42:03<17:17, 5.83s/it] + Training...: 96% 4216/4393 [5:42:08<17:01, 5.77s/it] + Training...: 96% 4217/4393 [5:42:14<16:45, 5.71s/it] + Training...: 96% 4218/4393 [5:42:19<16:28, 5.65s/it] + Training...: 96% 4219/4393 [5:42:25<16:18, 5.62s/it] + Training...: 96% 4220/4393 [5:42:30<16:05, 5.58s/it] + Training...: 96% 4221/4393 [5:42:36<15:53, 5.54s/it] + Training...: 96% 4222/4393 [5:42:41<15:38, 5.49s/it] + Training...: 96% 4223/4393 [5:42:47<15:26, 5.45s/it] + Training...: 96% 4224/4393 [5:42:52<15:16, 5.42s/it] + Training...: 96% 4225/4393 [5:42:57<15:07, 5.40s/it] + Training...: 96% 4226/4393 [5:43:03<14:52, 5.34s/it] + Training...: 96% 4227/4393 [5:43:08<14:38, 5.29s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [29:04:03<46:43:02, 21022.76s/it] + Training...: 96% 4227/4393 [5:43:14<14:38, 5.29s/it] + Training...: 96% 4228/4393 [5:43:14<14:58, 5.45s/it] + Training...: 96% 4229/4393 [5:43:18<14:30, 5.31s/it] + Training...: 96% 4230/4393 [5:43:23<14:07, 5.20s/it] + Training...: 96% 4231/4393 [5:43:28<13:46, 5.10s/it] + Training...: 96% 4232/4393 [5:43:33<13:27, 5.01s/it] + Training...: 96% 4233/4393 [5:43:38<13:08, 4.93s/it] + Training...: 96% 4234/4393 [5:43:42<12:48, 4.83s/it] + Training...: 96% 4235/4393 [5:43:47<12:37, 4.79s/it] + Training...: 96% 4236/4393 [5:43:52<12:17, 4.69s/it] + Training...: 96% 4237/4393 [5:43:56<11:50, 4.55s/it] + Training...: 96% 4238/4393 [5:44:00<11:21, 4.39s/it] + Training...: 96% 4239/4393 [5:44:04<10:51, 4.23s/it] + Training...: 97% 4240/4393 [5:44:07<10:18, 4.04s/it] + Training...: 97% 4241/4393 [5:44:11<09:46, 3.86s/it] + Training...: 97% 4242/4393 [5:44:14<09:11, 3.65s/it] + Training...: 97% 4243/4393 [5:44:17<08:32, 3.42s/it] + Training...: 97% 4244/4393 [5:44:19<07:55, 3.19s/it] + Training...: 97% 4245/4393 [5:44:22<07:13, 2.93s/it] + Training...: 97% 4246/4393 [5:44:24<06:34, 2.69s/it] + Training...: 97% 4247/4393 [5:44:26<05:53, 2.42s/it] + Training...: 97% 4248/4393 [5:44:27<05:14, 2.17s/it] + Training...: 97% 4249/4393 [5:44:29<04:33, 1.90s/it] + Training...: 97% 4250/4393 [5:44:30<03:53, 1.63s/it] + Training...: 97% 4251/4393 [5:44:36<07:10, 3.03s/it] + Training...: 97% 4252/4393 [5:44:42<09:20, 3.97s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [29:05:38<46:43:02, 21022.76s/it] + Training...: 97% 4252/4393 [5:44:49<09:20, 3.97s/it] + Training...: 97% 4253/4393 [5:44:49<11:18, 4.85s/it] + Training...: 97% 4254/4393 [5:44:55<12:02, 5.20s/it] + Training...: 97% 4255/4393 [5:45:01<12:34, 5.47s/it] + Training...: 97% 4256/4393 [5:45:07<12:54, 5.65s/it] + Training...: 97% 4257/4393 [5:45:13<13:01, 5.74s/it] + Training...: 97% 4258/4393 [5:45:19<12:59, 5.77s/it] + Training...: 97% 4259/4393 [5:45:25<12:58, 5.81s/it] + Training...: 97% 4260/4393 [5:45:31<12:52, 5.81s/it] + Training...: 97% 4261/4393 [5:45:36<12:48, 5.82s/it] + Training...: 97% 4262/4393 [5:45:42<12:38, 5.79s/it] + Training...: 97% 4263/4393 [5:45:48<12:31, 5.78s/it] + Training...: 97% 4264/4393 [5:45:54<12:26, 5.79s/it] + Training...: 97% 4265/4393 [5:46:00<12:21, 5.79s/it] + Training...: 97% 4266/4393 [5:46:05<12:09, 5.74s/it] + Training...: 97% 4267/4393 [5:46:11<11:56, 5.68s/it] + Training...: 97% 4268/4393 [5:46:16<11:45, 5.65s/it] + Training...: 97% 4269/4393 [5:46:22<11:33, 5.59s/it] + Training...: 97% 4270/4393 [5:46:27<11:24, 5.56s/it] + Training...: 97% 4271/4393 [5:46:33<11:15, 5.54s/it] + Training...: 97% 4272/4393 [5:46:38<11:07, 5.52s/it] + Training...: 97% 4273/4393 [5:46:44<10:58, 5.49s/it] + Training...: 97% 4274/4393 [5:46:49<10:45, 5.43s/it] + Training...: 97% 4275/4393 [5:46:54<10:35, 5.39s/it] + Training...: 97% 4276/4393 [5:46:59<10:24, 5.34s/it] + Training...: 97% 4277/4393 [5:47:05<10:14, 5.29s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [29:08:00<46:43:02, 21022.76s/it] + Training...: 97% 4277/4393 [5:47:10<10:14, 5.29s/it] + Training...: 97% 4278/4393 [5:47:10<10:28, 5.46s/it] + Training...: 97% 4279/4393 [5:47:16<10:09, 5.35s/it] + Training...: 97% 4280/4393 [5:47:20<09:49, 5.22s/it] + Training...: 97% 4281/4393 [5:47:25<09:34, 5.13s/it] + Training...: 97% 4282/4393 [5:47:30<09:18, 5.03s/it] + Training...: 97% 4283/4393 [5:47:35<09:05, 4.96s/it] + Training...: 98% 4284/4393 [5:47:40<08:49, 4.86s/it] + Training...: 98% 4285/4393 [5:47:44<08:35, 4.78s/it] + Training...: 98% 4286/4393 [5:47:49<08:19, 4.67s/it] + Training...: 98% 4287/4393 [5:47:53<08:02, 4.55s/it] + Training...: 98% 4288/4393 [5:47:57<07:43, 4.41s/it] + Training...: 98% 4289/4393 [5:48:01<07:24, 4.27s/it] + Training...: 98% 4290/4393 [5:48:05<07:02, 4.10s/it] + Training...: 98% 4291/4393 [5:48:08<06:41, 3.93s/it] + Training...: 98% 4292/4393 [5:48:11<06:15, 3.72s/it] + Training...: 98% 4293/4393 [5:48:14<05:48, 3.49s/it] + Training...: 98% 4294/4393 [5:48:17<05:21, 3.24s/it] + Training...: 98% 4295/4393 [5:48:19<04:52, 2.98s/it] + Training...: 98% 4296/4393 [5:48:21<04:24, 2.73s/it] + Training...: 98% 4297/4393 [5:48:23<03:57, 2.47s/it] + Training...: 98% 4298/4393 [5:48:25<03:30, 2.21s/it] + Training...: 98% 4299/4393 [5:48:26<03:03, 1.95s/it] + Training...: 98% 4300/4393 [5:48:27<02:36, 1.69s/it] + Training...: 98% 4301/4393 [5:48:34<04:40, 3.04s/it] + Training...: 98% 4302/4393 [5:48:40<06:02, 3.99s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [29:09:36<46:43:02, 21022.76s/it] + Training...: 98% 4302/4393 [5:48:47<06:02, 3.99s/it] + Training...: 98% 4303/4393 [5:48:47<07:16, 4.85s/it] + Training...: 98% 4304/4393 [5:48:53<07:41, 5.19s/it] + Training...: 98% 4305/4393 [5:48:59<07:56, 5.42s/it] + Training...: 98% 4306/4393 [5:49:04<08:03, 5.56s/it] + Training...: 98% 4307/4393 [5:49:10<08:08, 5.68s/it] + Training...: 98% 4308/4393 [5:49:16<08:07, 5.73s/it] + Training...: 98% 4309/4393 [5:49:22<08:04, 5.77s/it] + Training...: 98% 4310/4393 [5:49:28<07:58, 5.77s/it] + Training...: 98% 4311/4393 [5:49:34<07:55, 5.79s/it] + Training...: 98% 4312/4393 [5:49:40<07:49, 5.80s/it] + Training...: 98% 4313/4393 [5:49:46<07:48, 5.85s/it] + Training...: 98% 4314/4393 [5:49:51<07:37, 5.79s/it] + Training...: 98% 4315/4393 [5:49:57<07:27, 5.73s/it] + Training...: 98% 4316/4393 [5:50:02<07:17, 5.68s/it] + Training...: 98% 4317/4393 [5:50:08<07:08, 5.63s/it] + Training...: 98% 4318/4393 [5:50:13<06:58, 5.59s/it] + Training...: 98% 4319/4393 [5:50:19<06:51, 5.56s/it] + Training...: 98% 4320/4393 [5:50:24<06:42, 5.51s/it] + Training...: 98% 4321/4393 [5:50:30<06:34, 5.48s/it] + Training...: 98% 4322/4393 [5:50:35<06:25, 5.44s/it] + Training...: 98% 4323/4393 [5:50:40<06:18, 5.41s/it] + Training...: 98% 4324/4393 [5:50:46<06:09, 5.36s/it] + Training...: 98% 4325/4393 [5:50:51<06:00, 5.31s/it] + Training...: 98% 4326/4393 [5:50:56<05:52, 5.26s/it] + Training...: 98% 4327/4393 [5:51:01<05:45, 5.23s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [29:11:56<46:43:02, 21022.76s/it] + Training...: 98% 4327/4393 [5:51:07<05:45, 5.23s/it] + Training...: 99% 4328/4393 [5:51:07<05:51, 5.41s/it] + Training...: 99% 4329/4393 [5:51:12<05:39, 5.31s/it] + Training...: 99% 4330/4393 [5:51:17<05:27, 5.19s/it] + Training...: 99% 4331/4393 [5:51:22<05:18, 5.13s/it] + Training...: 99% 4332/4393 [5:51:27<05:09, 5.07s/it] + Training...: 99% 4333/4393 [5:51:32<04:57, 4.96s/it] + Training...: 99% 4334/4393 [5:51:36<04:45, 4.84s/it] + Training...: 99% 4335/4393 [5:51:41<04:34, 4.73s/it] + Training...: 99% 4336/4393 [5:51:45<04:22, 4.61s/it] + Training...: 99% 4337/4393 [5:51:49<04:10, 4.47s/it] + Training...: 99% 4338/4393 [5:51:53<03:57, 4.31s/it] + Training...: 99% 4339/4393 [5:51:57<03:43, 4.14s/it] + Training...: 99% 4340/4393 [5:52:00<03:29, 3.95s/it] + Training...: 99% 4341/4393 [5:52:03<03:15, 3.76s/it] + Training...: 99% 4342/4393 [5:52:07<03:00, 3.54s/it] + Training...: 99% 4343/4393 [5:52:09<02:46, 3.32s/it] + Training...: 99% 4344/4393 [5:52:12<02:30, 3.07s/it] + Training...: 99% 4345/4393 [5:52:14<02:15, 2.82s/it] + Training...: 99% 4346/4393 [5:52:16<02:00, 2.56s/it] + Training...: 99% 4347/4393 [5:52:18<01:45, 2.30s/it] + Training...: 99% 4348/4393 [5:52:19<01:32, 2.05s/it] + Training...: 99% 4349/4393 [5:52:20<01:19, 1.80s/it] + Training...: 99% 4350/4393 [5:52:21<01:07, 1.56s/it] + Training...: 99% 4351/4393 [5:52:27<02:02, 2.92s/it] + Training...: 99% 4352/4393 [5:52:34<02:39, 3.88s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [29:13:30<46:43:02, 21022.76s/it] + Training...: 99% 4352/4393 [5:52:40<02:39, 3.88s/it] + Training...: 99% 4353/4393 [5:52:40<03:10, 4.77s/it] + Training...: 99% 4354/4393 [5:52:46<03:19, 5.12s/it] + Training...: 99% 4355/4393 [5:52:52<03:23, 5.36s/it] + Training...: 99% 4356/4393 [5:52:58<03:23, 5.50s/it] + Training...: 99% 4357/4393 [5:53:04<03:22, 5.61s/it] + Training...: 99% 4358/4393 [5:53:10<03:18, 5.67s/it] + Training...: 99% 4359/4393 [5:53:16<03:13, 5.70s/it] + Training...: 99% 4360/4393 [5:53:21<03:08, 5.71s/it] + Training...: 99% 4361/4393 [5:53:27<03:02, 5.72s/it] + Training...: 99% 4362/4393 [5:53:33<02:56, 5.69s/it] + Training...: 99% 4363/4393 [5:53:39<02:51, 5.73s/it] + Training...: 99% 4364/4393 [5:53:44<02:47, 5.76s/it] + Training...: 99% 4365/4393 [5:53:50<02:40, 5.72s/it] + Training...: 99% 4366/4393 [5:53:55<02:32, 5.64s/it] + Training...: 99% 4367/4393 [5:54:01<02:25, 5.58s/it] + Training...: 99% 4368/4393 [5:54:06<02:18, 5.53s/it] + Training...: 99% 4369/4393 [5:54:12<02:11, 5.49s/it] + Training...: 99% 4370/4393 [5:54:17<02:04, 5.43s/it] + Training...: 99% 4371/4393 [5:54:22<01:58, 5.38s/it] + Training...: 100% 4372/4393 [5:54:27<01:52, 5.34s/it] + Training...: 100% 4373/4393 [5:54:33<01:46, 5.35s/it] + Training...: 100% 4374/4393 [5:54:38<01:40, 5.30s/it] + Training...: 100% 4375/4393 [5:54:43<01:34, 5.24s/it] + Training...: 100% 4376/4393 [5:54:48<01:27, 5.15s/it] + Training...: 100% 4377/4393 [5:54:53<01:21, 5.07s/it] +  Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 33% 4/12 [29:15:48<46:43:02, 21022.76s/it] + Training...: 100% 4377/4393 [5:54:59<01:21, 5.07s/it] + Training...: 100% 4378/4393 [5:54:59<01:18, 5.21s/it] + Training...: 100% 4379/4393 [5:55:03<01:10, 5.05s/it] + Training...: 100% 4380/4393 [5:55:08<01:03, 4.87s/it] + Training...: 100% 4381/4393 [5:55:12<00:56, 4.71s/it] + Training...: 100% 4382/4393 [5:55:16<00:49, 4.54s/it] + Training...: 100% 4383/4393 [5:55:20<00:43, 4.37s/it] + Training...: 100% 4384/4393 [5:55:24<00:37, 4.18s/it] + Training...: 100% 4385/4393 [5:55:27<00:31, 3.98s/it] + Training...: 100% 4386/4393 [5:55:30<00:26, 3.74s/it] + Training...: 100% 4387/4393 [5:55:33<00:20, 3.48s/it] + Training...: 100% 4388/4393 [5:55:36<00:16, 3.22s/it] + Training...: 100% 4389/4393 [5:55:38<00:11, 2.95s/it] + Training...: 100% 4390/4393 [5:55:40<00:07, 2.66s/it] + Training...: 100% 4391/4393 [5:55:42<00:04, 2.36s/it] + Training...: 100% 4392/4393 [5:55:43<00:02, 2.09s/it] + Training...: 100% 4393/4393 [5:55:45<00:00, 1.82s/it] Training...: 100% 4393/4393 [5:55:45<00:00, 4.86s/it] + Step... (20000/50000 | Eval Loss: 1.021510124206543 | Eval wer: 0.05054961214661226 | Eval cer: 0.0362100285658818 |): 42% 5/12 [29:16:34<41:06:17, 21139.65s/it] + Training...: 0% 0/4393 [00:00