ghofrani commited on
Commit
57c9bd6
1 Parent(s): 090dfb2

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +6 -6
  3. train_results.json +5 -5
  4. trainer_state.json +3417 -327
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 100.0,
3
- "eval_loss": 0.3502330183982849,
4
- "eval_runtime": 245.1864,
5
  "eval_samples": 5212,
6
- "eval_samples_per_second": 21.257,
7
- "eval_steps_per_second": 1.33,
8
- "eval_wer": 0.35502681314104234,
9
- "train_loss": 0.9013227667808533,
10
- "train_runtime": 60806.5864,
11
  "train_samples": 12806,
12
- "train_samples_per_second": 21.06,
13
- "train_steps_per_second": 0.082
14
  }
1
  {
2
+ "epoch": 200.0,
3
+ "eval_loss": 0.37058258056640625,
4
+ "eval_runtime": 246.1486,
5
  "eval_samples": 5212,
6
+ "eval_samples_per_second": 21.174,
7
+ "eval_steps_per_second": 1.324,
8
+ "eval_wer": 0.3420888217837247,
9
+ "train_loss": 0.48956193776130674,
10
+ "train_runtime": 78626.4431,
11
  "train_samples": 12806,
12
+ "train_samples_per_second": 32.574,
13
+ "train_steps_per_second": 0.127
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 100.0,
3
- "eval_loss": 0.3502330183982849,
4
- "eval_runtime": 245.1864,
5
  "eval_samples": 5212,
6
- "eval_samples_per_second": 21.257,
7
- "eval_steps_per_second": 1.33,
8
- "eval_wer": 0.35502681314104234
9
  }
1
  {
2
+ "epoch": 200.0,
3
+ "eval_loss": 0.37058258056640625,
4
+ "eval_runtime": 246.1486,
5
  "eval_samples": 5212,
6
+ "eval_samples_per_second": 21.174,
7
+ "eval_steps_per_second": 1.324,
8
+ "eval_wer": 0.3420888217837247
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 100.0,
3
- "train_loss": 0.9013227667808533,
4
- "train_runtime": 60806.5864,
5
  "train_samples": 12806,
6
- "train_samples_per_second": 21.06,
7
- "train_steps_per_second": 0.082
8
  }
1
  {
2
+ "epoch": 200.0,
3
+ "train_loss": 0.48956193776130674,
4
+ "train_runtime": 78626.4431,
5
  "train_samples": 12806,
6
+ "train_samples_per_second": 32.574,
7
+ "train_steps_per_second": 0.127
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 99.99750623441396,
5
- "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2172,944 +2172,4034 @@
2172
  },
2173
  {
2174
  "epoch": 70.2,
2175
- "learning_rate": 1.8269387755102042e-05,
2176
- "loss": 0.8579,
2177
  "step": 3510
2178
  },
2179
  {
2180
  "epoch": 70.4,
2181
- "learning_rate": 1.8146938775510205e-05,
2182
- "loss": 0.8511,
2183
  "step": 3520
2184
  },
2185
  {
2186
  "epoch": 70.6,
2187
- "learning_rate": 1.8024489795918368e-05,
2188
- "loss": 0.8437,
2189
  "step": 3530
2190
  },
2191
  {
2192
  "epoch": 70.8,
2193
- "learning_rate": 1.790204081632653e-05,
2194
- "loss": 0.8526,
2195
  "step": 3540
2196
  },
2197
  {
2198
  "epoch": 71.0,
2199
- "learning_rate": 1.7779591836734694e-05,
2200
- "loss": 0.8014,
2201
  "step": 3550
2202
  },
2203
  {
2204
  "epoch": 71.2,
2205
- "learning_rate": 1.7657142857142857e-05,
2206
- "loss": 0.868,
2207
  "step": 3560
2208
  },
2209
  {
2210
  "epoch": 71.4,
2211
- "learning_rate": 1.753469387755102e-05,
2212
- "loss": 0.8531,
2213
  "step": 3570
2214
  },
2215
  {
2216
  "epoch": 71.6,
2217
- "learning_rate": 1.7412244897959183e-05,
2218
- "loss": 0.8555,
2219
  "step": 3580
2220
  },
2221
  {
2222
  "epoch": 71.8,
2223
- "learning_rate": 1.728979591836735e-05,
2224
- "loss": 0.8413,
2225
  "step": 3590
2226
  },
2227
  {
2228
  "epoch": 72.0,
2229
- "learning_rate": 1.716734693877551e-05,
2230
- "loss": 0.8216,
2231
  "step": 3600
2232
  },
2233
  {
2234
  "epoch": 72.2,
2235
- "learning_rate": 1.7044897959183676e-05,
2236
- "loss": 0.8611,
2237
  "step": 3610
2238
  },
2239
  {
2240
  "epoch": 72.4,
2241
- "learning_rate": 1.6922448979591835e-05,
2242
- "loss": 0.8393,
2243
  "step": 3620
2244
  },
2245
  {
2246
  "epoch": 72.6,
2247
- "learning_rate": 1.6800000000000002e-05,
2248
- "loss": 0.8229,
2249
  "step": 3630
2250
  },
2251
  {
2252
  "epoch": 72.8,
2253
- "learning_rate": 1.6677551020408165e-05,
2254
- "loss": 0.8375,
2255
  "step": 3640
2256
  },
2257
  {
2258
  "epoch": 73.0,
2259
- "learning_rate": 1.6555102040816325e-05,
2260
- "loss": 0.8119,
2261
  "step": 3650
2262
  },
2263
  {
2264
  "epoch": 73.2,
2265
- "learning_rate": 1.643265306122449e-05,
2266
- "loss": 0.8706,
2267
  "step": 3660
2268
  },
2269
  {
2270
  "epoch": 73.4,
2271
- "learning_rate": 1.6310204081632654e-05,
2272
- "loss": 0.829,
2273
  "step": 3670
2274
  },
2275
  {
2276
  "epoch": 73.6,
2277
- "learning_rate": 1.6187755102040817e-05,
2278
- "loss": 0.8246,
2279
  "step": 3680
2280
  },
2281
  {
2282
  "epoch": 73.8,
2283
- "learning_rate": 1.606530612244898e-05,
2284
- "loss": 0.8462,
2285
  "step": 3690
2286
  },
2287
  {
2288
  "epoch": 74.0,
2289
- "learning_rate": 1.5942857142857143e-05,
2290
- "loss": 0.8305,
2291
  "step": 3700
2292
  },
2293
  {
2294
  "epoch": 74.2,
2295
- "learning_rate": 1.5820408163265306e-05,
2296
- "loss": 0.8568,
2297
  "step": 3710
2298
  },
2299
  {
2300
  "epoch": 74.4,
2301
- "learning_rate": 1.569795918367347e-05,
2302
- "loss": 0.8283,
2303
  "step": 3720
2304
  },
2305
  {
2306
  "epoch": 74.6,
2307
- "learning_rate": 1.5575510204081632e-05,
2308
- "loss": 0.8205,
2309
  "step": 3730
2310
  },
2311
  {
2312
  "epoch": 74.8,
2313
- "learning_rate": 1.5453061224489795e-05,
2314
- "loss": 0.8301,
2315
  "step": 3740
2316
  },
2317
  {
2318
  "epoch": 75.0,
2319
- "learning_rate": 1.5330612244897962e-05,
2320
- "loss": 0.8168,
2321
  "step": 3750
2322
  },
2323
  {
2324
  "epoch": 75.2,
2325
- "learning_rate": 1.5208163265306121e-05,
2326
- "loss": 0.8503,
2327
  "step": 3760
2328
  },
2329
  {
2330
  "epoch": 75.4,
2331
- "learning_rate": 1.5085714285714288e-05,
2332
- "loss": 0.825,
2333
  "step": 3770
2334
  },
2335
  {
2336
  "epoch": 75.6,
2337
- "learning_rate": 1.496326530612245e-05,
2338
- "loss": 0.8255,
2339
  "step": 3780
2340
  },
2341
  {
2342
  "epoch": 75.8,
2343
- "learning_rate": 1.4840816326530612e-05,
2344
- "loss": 0.827,
2345
  "step": 3790
2346
  },
2347
  {
2348
  "epoch": 76.0,
2349
- "learning_rate": 1.4718367346938775e-05,
2350
- "loss": 0.7891,
2351
  "step": 3800
2352
  },
2353
  {
2354
  "epoch": 76.2,
2355
- "learning_rate": 1.4595918367346938e-05,
2356
- "loss": 0.8504,
2357
  "step": 3810
2358
  },
2359
  {
2360
  "epoch": 76.4,
2361
- "learning_rate": 1.4473469387755103e-05,
2362
- "loss": 0.8318,
2363
  "step": 3820
2364
  },
2365
  {
2366
  "epoch": 76.6,
2367
- "learning_rate": 1.4351020408163266e-05,
2368
- "loss": 0.8401,
2369
  "step": 3830
2370
  },
2371
  {
2372
  "epoch": 76.8,
2373
- "learning_rate": 1.422857142857143e-05,
2374
- "loss": 0.8284,
2375
  "step": 3840
2376
  },
2377
  {
2378
  "epoch": 77.0,
2379
- "learning_rate": 1.410612244897959e-05,
2380
- "loss": 0.801,
2381
  "step": 3850
2382
  },
2383
  {
2384
  "epoch": 77.2,
2385
- "learning_rate": 1.3983673469387755e-05,
2386
- "loss": 0.8628,
2387
  "step": 3860
2388
  },
2389
  {
2390
  "epoch": 77.4,
2391
- "learning_rate": 1.3861224489795918e-05,
2392
- "loss": 0.8339,
2393
  "step": 3870
2394
  },
2395
  {
2396
  "epoch": 77.6,
2397
- "learning_rate": 1.3738775510204082e-05,
2398
- "loss": 0.817,
2399
  "step": 3880
2400
  },
2401
  {
2402
  "epoch": 77.8,
2403
- "learning_rate": 1.3616326530612245e-05,
2404
- "loss": 0.8374,
2405
  "step": 3890
2406
  },
2407
  {
2408
  "epoch": 78.0,
2409
- "learning_rate": 1.349387755102041e-05,
2410
- "loss": 0.7855,
2411
  "step": 3900
2412
  },
2413
  {
2414
  "epoch": 78.2,
2415
- "learning_rate": 1.3371428571428572e-05,
2416
- "loss": 0.8433,
2417
  "step": 3910
2418
  },
2419
  {
2420
  "epoch": 78.4,
2421
- "learning_rate": 1.3248979591836735e-05,
2422
- "loss": 0.821,
2423
  "step": 3920
2424
  },
2425
  {
2426
  "epoch": 78.6,
2427
- "learning_rate": 1.3126530612244897e-05,
2428
- "loss": 0.8174,
2429
  "step": 3930
2430
  },
2431
  {
2432
  "epoch": 78.8,
2433
- "learning_rate": 1.3004081632653062e-05,
2434
- "loss": 0.8496,
2435
  "step": 3940
2436
  },
2437
  {
2438
  "epoch": 79.0,
2439
- "learning_rate": 1.2881632653061225e-05,
2440
- "loss": 0.7955,
2441
  "step": 3950
2442
  },
2443
  {
2444
  "epoch": 79.2,
2445
- "learning_rate": 1.2759183673469388e-05,
2446
- "loss": 0.8538,
2447
  "step": 3960
2448
  },
2449
  {
2450
  "epoch": 79.4,
2451
- "learning_rate": 1.263673469387755e-05,
2452
- "loss": 0.8431,
2453
  "step": 3970
2454
  },
2455
  {
2456
  "epoch": 79.6,
2457
- "learning_rate": 1.2514285714285715e-05,
2458
- "loss": 0.84,
2459
  "step": 3980
2460
  },
2461
  {
2462
  "epoch": 79.8,
2463
- "learning_rate": 1.2391836734693879e-05,
2464
- "loss": 0.8186,
2465
  "step": 3990
2466
  },
2467
  {
2468
  "epoch": 80.0,
2469
- "learning_rate": 1.2269387755102042e-05,
2470
- "loss": 0.8045,
2471
  "step": 4000
2472
  },
2473
  {
2474
  "epoch": 80.0,
2475
- "eval_loss": 0.3606957793235779,
2476
- "eval_runtime": 246.7059,
2477
- "eval_samples_per_second": 21.126,
2478
- "eval_steps_per_second": 1.321,
2479
- "eval_wer": 0.3621335971260478,
2480
  "step": 4000
2481
  },
2482
  {
2483
  "epoch": 80.2,
2484
- "learning_rate": 1.2146938775510205e-05,
2485
- "loss": 0.8382,
2486
  "step": 4010
2487
  },
2488
  {
2489
  "epoch": 80.4,
2490
- "learning_rate": 1.2024489795918368e-05,
2491
- "loss": 0.8248,
2492
  "step": 4020
2493
  },
2494
  {
2495
  "epoch": 80.6,
2496
- "learning_rate": 1.190204081632653e-05,
2497
- "loss": 0.8309,
2498
  "step": 4030
2499
  },
2500
  {
2501
  "epoch": 80.8,
2502
- "learning_rate": 1.1779591836734694e-05,
2503
- "loss": 0.8149,
2504
  "step": 4040
2505
  },
2506
  {
2507
  "epoch": 81.0,
2508
- "learning_rate": 1.1657142857142857e-05,
2509
- "loss": 0.806,
2510
  "step": 4050
2511
  },
2512
  {
2513
  "epoch": 81.2,
2514
- "learning_rate": 1.1534693877551022e-05,
2515
- "loss": 0.8475,
2516
  "step": 4060
2517
  },
2518
  {
2519
  "epoch": 81.4,
2520
- "learning_rate": 1.1412244897959185e-05,
2521
- "loss": 0.8478,
2522
  "step": 4070
2523
  },
2524
  {
2525
  "epoch": 81.6,
2526
- "learning_rate": 1.1289795918367348e-05,
2527
- "loss": 0.813,
2528
  "step": 4080
2529
  },
2530
  {
2531
  "epoch": 81.8,
2532
- "learning_rate": 1.116734693877551e-05,
2533
- "loss": 0.8231,
2534
  "step": 4090
2535
  },
2536
  {
2537
  "epoch": 82.0,
2538
- "learning_rate": 1.1044897959183672e-05,
2539
- "loss": 0.8105,
2540
  "step": 4100
2541
  },
2542
  {
2543
  "epoch": 82.2,
2544
- "learning_rate": 1.0922448979591837e-05,
2545
- "loss": 0.8554,
2546
  "step": 4110
2547
  },
2548
  {
2549
  "epoch": 82.4,
2550
- "learning_rate": 1.08e-05,
2551
- "loss": 0.8084,
2552
  "step": 4120
2553
  },
2554
  {
2555
  "epoch": 82.6,
2556
- "learning_rate": 1.0677551020408163e-05,
2557
- "loss": 0.803,
2558
  "step": 4130
2559
  },
2560
  {
2561
  "epoch": 82.8,
2562
- "learning_rate": 1.0555102040816326e-05,
2563
- "loss": 0.833,
2564
  "step": 4140
2565
  },
2566
  {
2567
  "epoch": 83.0,
2568
- "learning_rate": 1.043265306122449e-05,
2569
- "loss": 0.781,
2570
  "step": 4150
2571
  },
2572
  {
2573
  "epoch": 83.2,
2574
- "learning_rate": 1.0310204081632654e-05,
2575
- "loss": 0.8371,
2576
  "step": 4160
2577
  },
2578
  {
2579
  "epoch": 83.4,
2580
- "learning_rate": 1.0187755102040817e-05,
2581
- "loss": 0.8149,
2582
  "step": 4170
2583
  },
2584
  {
2585
  "epoch": 83.6,
2586
- "learning_rate": 1.006530612244898e-05,
2587
- "loss": 0.8383,
2588
  "step": 4180
2589
  },
2590
  {
2591
  "epoch": 83.8,
2592
- "learning_rate": 9.942857142857143e-06,
2593
- "loss": 0.8085,
2594
  "step": 4190
2595
  },
2596
  {
2597
  "epoch": 84.0,
2598
- "learning_rate": 9.820408163265306e-06,
2599
- "loss": 0.7775,
2600
  "step": 4200
2601
  },
2602
  {
2603
  "epoch": 84.2,
2604
- "learning_rate": 9.697959183673469e-06,
2605
- "loss": 0.8459,
2606
  "step": 4210
2607
  },
2608
  {
2609
  "epoch": 84.4,
2610
- "learning_rate": 9.575510204081632e-06,
2611
- "loss": 0.7982,
2612
  "step": 4220
2613
  },
2614
  {
2615
  "epoch": 84.6,
2616
- "learning_rate": 9.453061224489797e-06,
2617
- "loss": 0.8121,
2618
  "step": 4230
2619
  },
2620
  {
2621
  "epoch": 84.8,
2622
- "learning_rate": 9.33061224489796e-06,
2623
- "loss": 0.848,
2624
  "step": 4240
2625
  },
2626
  {
2627
  "epoch": 85.0,
2628
- "learning_rate": 9.208163265306123e-06,
2629
- "loss": 0.7948,
2630
  "step": 4250
2631
  },
2632
  {
2633
  "epoch": 85.2,
2634
- "learning_rate": 9.085714285714286e-06,
2635
- "loss": 0.8709,
2636
  "step": 4260
2637
  },
2638
  {
2639
  "epoch": 85.4,
2640
- "learning_rate": 8.963265306122449e-06,
2641
- "loss": 0.837,
2642
  "step": 4270
2643
  },
2644
  {
2645
  "epoch": 85.6,
2646
- "learning_rate": 8.840816326530612e-06,
2647
- "loss": 0.8078,
2648
  "step": 4280
2649
  },
2650
  {
2651
  "epoch": 85.8,
2652
- "learning_rate": 8.718367346938775e-06,
2653
- "loss": 0.7987,
2654
  "step": 4290
2655
  },
2656
  {
2657
  "epoch": 86.0,
2658
- "learning_rate": 8.595918367346938e-06,
2659
- "loss": 0.8008,
2660
  "step": 4300
2661
  },
2662
  {
2663
  "epoch": 86.2,
2664
- "learning_rate": 8.473469387755103e-06,
2665
- "loss": 0.8481,
2666
  "step": 4310
2667
  },
2668
  {
2669
  "epoch": 86.4,
2670
- "learning_rate": 8.351020408163266e-06,
2671
- "loss": 0.8025,
2672
  "step": 4320
2673
  },
2674
  {
2675
  "epoch": 86.6,
2676
- "learning_rate": 8.22857142857143e-06,
2677
- "loss": 0.8173,
2678
  "step": 4330
2679
  },
2680
  {
2681
  "epoch": 86.8,
2682
- "learning_rate": 8.106122448979592e-06,
2683
- "loss": 0.8162,
2684
  "step": 4340
2685
  },
2686
  {
2687
  "epoch": 87.0,
2688
- "learning_rate": 7.983673469387755e-06,
2689
- "loss": 0.7663,
2690
  "step": 4350
2691
  },
2692
  {
2693
  "epoch": 87.2,
2694
- "learning_rate": 7.861224489795918e-06,
2695
- "loss": 0.8386,
2696
  "step": 4360
2697
  },
2698
  {
2699
  "epoch": 87.4,
2700
- "learning_rate": 7.738775510204081e-06,
2701
- "loss": 0.845,
2702
  "step": 4370
2703
  },
2704
  {
2705
  "epoch": 87.6,
2706
- "learning_rate": 7.6163265306122444e-06,
2707
- "loss": 0.8114,
2708
  "step": 4380
2709
  },
2710
  {
2711
  "epoch": 87.8,
2712
- "learning_rate": 7.493877551020408e-06,
2713
- "loss": 0.8129,
2714
  "step": 4390
2715
  },
2716
  {
2717
  "epoch": 88.0,
2718
- "learning_rate": 7.371428571428571e-06,
2719
- "loss": 0.7757,
2720
  "step": 4400
2721
  },
2722
  {
2723
  "epoch": 88.2,
2724
- "learning_rate": 7.248979591836735e-06,
2725
- "loss": 0.847,
2726
  "step": 4410
2727
  },
2728
  {
2729
  "epoch": 88.4,
2730
- "learning_rate": 7.1265306122448975e-06,
2731
- "loss": 0.8315,
2732
  "step": 4420
2733
  },
2734
  {
2735
  "epoch": 88.6,
2736
- "learning_rate": 7.004081632653061e-06,
2737
- "loss": 0.8118,
2738
  "step": 4430
2739
  },
2740
  {
2741
  "epoch": 88.8,
2742
- "learning_rate": 6.8816326530612245e-06,
2743
- "loss": 0.8117,
2744
  "step": 4440
2745
  },
2746
  {
2747
  "epoch": 89.0,
2748
- "learning_rate": 6.759183673469388e-06,
2749
- "loss": 0.7848,
2750
  "step": 4450
2751
  },
2752
  {
2753
  "epoch": 89.2,
2754
- "learning_rate": 6.6367346938775506e-06,
2755
- "loss": 0.8463,
2756
  "step": 4460
2757
  },
2758
  {
2759
  "epoch": 89.4,
2760
- "learning_rate": 6.5142857142857145e-06,
2761
- "loss": 0.8147,
2762
  "step": 4470
2763
  },
2764
  {
2765
  "epoch": 89.6,
2766
- "learning_rate": 6.3918367346938775e-06,
2767
- "loss": 0.8026,
2768
  "step": 4480
2769
  },
2770
  {
2771
  "epoch": 89.8,
2772
- "learning_rate": 6.2693877551020414e-06,
2773
- "loss": 0.8148,
2774
  "step": 4490
2775
  },
2776
  {
2777
  "epoch": 90.0,
2778
- "learning_rate": 6.146938775510204e-06,
2779
- "loss": 0.7799,
2780
  "step": 4500
2781
  },
2782
  {
2783
  "epoch": 90.0,
2784
- "eval_loss": 0.3501129448413849,
2785
- "eval_runtime": 245.2219,
2786
- "eval_samples_per_second": 21.254,
2787
- "eval_steps_per_second": 1.329,
2788
- "eval_wer": 0.35609413234758164,
2789
  "step": 4500
2790
  },
2791
  {
2792
  "epoch": 90.2,
2793
- "learning_rate": 6.0244897959183675e-06,
2794
- "loss": 0.8452,
2795
  "step": 4510
2796
  },
2797
  {
2798
  "epoch": 90.4,
2799
- "learning_rate": 5.902040816326531e-06,
2800
- "loss": 0.8101,
2801
  "step": 4520
2802
  },
2803
  {
2804
  "epoch": 90.6,
2805
- "learning_rate": 5.7795918367346945e-06,
2806
- "loss": 0.8223,
2807
  "step": 4530
2808
  },
2809
  {
2810
  "epoch": 90.8,
2811
- "learning_rate": 5.6571428571428576e-06,
2812
- "loss": 0.813,
2813
  "step": 4540
2814
  },
2815
  {
2816
  "epoch": 91.0,
2817
- "learning_rate": 5.534693877551021e-06,
2818
- "loss": 0.7841,
2819
  "step": 4550
2820
  },
2821
  {
2822
  "epoch": 91.2,
2823
- "learning_rate": 5.412244897959184e-06,
2824
- "loss": 0.8426,
2825
  "step": 4560
2826
  },
2827
  {
2828
  "epoch": 91.4,
2829
- "learning_rate": 5.2897959183673476e-06,
2830
- "loss": 0.8095,
2831
  "step": 4570
2832
  },
2833
  {
2834
  "epoch": 91.6,
2835
- "learning_rate": 5.167346938775511e-06,
2836
- "loss": 0.8121,
2837
  "step": 4580
2838
  },
2839
  {
2840
  "epoch": 91.8,
2841
- "learning_rate": 5.044897959183674e-06,
2842
- "loss": 0.8114,
2843
  "step": 4590
2844
  },
2845
  {
2846
  "epoch": 92.0,
2847
- "learning_rate": 4.922448979591837e-06,
2848
- "loss": 0.7884,
2849
  "step": 4600
2850
  },
2851
  {
2852
  "epoch": 92.2,
2853
- "learning_rate": 4.800000000000001e-06,
2854
- "loss": 0.8417,
2855
  "step": 4610
2856
  },
2857
  {
2858
  "epoch": 92.4,
2859
- "learning_rate": 4.677551020408164e-06,
2860
- "loss": 0.8047,
2861
  "step": 4620
2862
  },
2863
  {
2864
  "epoch": 92.6,
2865
- "learning_rate": 4.555102040816326e-06,
2866
- "loss": 0.8168,
2867
  "step": 4630
2868
  },
2869
  {
2870
  "epoch": 92.8,
2871
- "learning_rate": 4.43265306122449e-06,
2872
- "loss": 0.8138,
2873
  "step": 4640
2874
  },
2875
  {
2876
  "epoch": 93.0,
2877
- "learning_rate": 4.310204081632653e-06,
2878
- "loss": 0.7879,
2879
  "step": 4650
2880
  },
2881
  {
2882
  "epoch": 93.2,
2883
- "learning_rate": 4.187755102040817e-06,
2884
- "loss": 0.8256,
2885
  "step": 4660
2886
  },
2887
  {
2888
  "epoch": 93.4,
2889
- "learning_rate": 4.065306122448979e-06,
2890
- "loss": 0.8187,
2891
  "step": 4670
2892
  },
2893
  {
2894
  "epoch": 93.6,
2895
- "learning_rate": 3.942857142857143e-06,
2896
- "loss": 0.8089,
2897
  "step": 4680
2898
  },
2899
  {
2900
  "epoch": 93.8,
2901
- "learning_rate": 3.820408163265306e-06,
2902
- "loss": 0.8059,
2903
  "step": 4690
2904
  },
2905
  {
2906
  "epoch": 94.0,
2907
- "learning_rate": 3.6979591836734694e-06,
2908
- "loss": 0.7852,
2909
  "step": 4700
2910
  },
2911
  {
2912
  "epoch": 94.2,
2913
- "learning_rate": 3.575510204081633e-06,
2914
- "loss": 0.846,
2915
  "step": 4710
2916
  },
2917
  {
2918
  "epoch": 94.4,
2919
- "learning_rate": 3.4530612244897963e-06,
2920
- "loss": 0.8134,
2921
  "step": 4720
2922
  },
2923
  {
2924
  "epoch": 94.6,
2925
- "learning_rate": 3.3306122448979594e-06,
2926
- "loss": 0.7982,
2927
  "step": 4730
2928
  },
2929
  {
2930
  "epoch": 94.8,
2931
- "learning_rate": 3.208163265306123e-06,
2932
- "loss": 0.8181,
2933
  "step": 4740
2934
  },
2935
  {
2936
  "epoch": 95.0,
2937
- "learning_rate": 3.085714285714286e-06,
2938
- "loss": 0.7856,
2939
  "step": 4750
2940
  },
2941
  {
2942
  "epoch": 95.2,
2943
- "learning_rate": 2.963265306122449e-06,
2944
- "loss": 0.8306,
2945
  "step": 4760
2946
  },
2947
  {
2948
  "epoch": 95.4,
2949
- "learning_rate": 2.840816326530612e-06,
2950
- "loss": 0.8227,
2951
  "step": 4770
2952
  },
2953
  {
2954
  "epoch": 95.6,
2955
- "learning_rate": 2.7183673469387755e-06,
2956
- "loss": 0.8109,
2957
  "step": 4780
2958
  },
2959
  {
2960
  "epoch": 95.8,
2961
- "learning_rate": 2.5959183673469386e-06,
2962
- "loss": 0.787,
2963
  "step": 4790
2964
  },
2965
  {
2966
  "epoch": 96.0,
2967
- "learning_rate": 2.473469387755102e-06,
2968
- "loss": 0.7832,
2969
  "step": 4800
2970
  },
2971
  {
2972
  "epoch": 96.2,
2973
- "learning_rate": 2.351020408163265e-06,
2974
- "loss": 0.8228,
2975
  "step": 4810
2976
  },
2977
  {
2978
  "epoch": 96.4,
2979
- "learning_rate": 2.2285714285714286e-06,
2980
- "loss": 0.8168,
2981
  "step": 4820
2982
  },
2983
  {
2984
  "epoch": 96.6,
2985
- "learning_rate": 2.1061224489795916e-06,
2986
- "loss": 0.8122,
2987
  "step": 4830
2988
  },
2989
  {
2990
  "epoch": 96.8,
2991
- "learning_rate": 1.983673469387755e-06,
2992
- "loss": 0.7942,
2993
  "step": 4840
2994
  },
2995
  {
2996
  "epoch": 97.0,
2997
- "learning_rate": 1.8612244897959184e-06,
2998
- "loss": 0.7948,
2999
  "step": 4850
3000
  },
3001
  {
3002
  "epoch": 97.2,
3003
- "learning_rate": 1.7387755102040817e-06,
3004
- "loss": 0.8036,
3005
  "step": 4860
3006
  },
3007
  {
3008
  "epoch": 97.4,
3009
- "learning_rate": 1.616326530612245e-06,
3010
- "loss": 0.815,
3011
  "step": 4870
3012
  },
3013
  {
3014
  "epoch": 97.6,
3015
- "learning_rate": 1.4938775510204082e-06,
3016
- "loss": 0.8111,
3017
  "step": 4880
3018
  },
3019
  {
3020
  "epoch": 97.8,
3021
- "learning_rate": 1.3714285714285715e-06,
3022
- "loss": 0.813,
3023
  "step": 4890
3024
  },
3025
  {
3026
  "epoch": 98.0,
3027
- "learning_rate": 1.2489795918367347e-06,
3028
- "loss": 0.7897,
3029
  "step": 4900
3030
  },
3031
  {
3032
  "epoch": 98.2,
3033
- "learning_rate": 1.126530612244898e-06,
3034
- "loss": 0.8249,
3035
  "step": 4910
3036
  },
3037
  {
3038
  "epoch": 98.4,
3039
- "learning_rate": 1.0040816326530613e-06,
3040
- "loss": 0.802,
3041
  "step": 4920
3042
  },
3043
  {
3044
  "epoch": 98.6,
3045
- "learning_rate": 8.816326530612244e-07,
3046
- "loss": 0.7912,
3047
  "step": 4930
3048
  },
3049
  {
3050
  "epoch": 98.8,
3051
- "learning_rate": 7.591836734693878e-07,
3052
- "loss": 0.8246,
3053
  "step": 4940
3054
  },
3055
  {
3056
  "epoch": 99.0,
3057
- "learning_rate": 6.367346938775511e-07,
3058
- "loss": 0.774,
3059
  "step": 4950
3060
  },
3061
  {
3062
  "epoch": 99.2,
3063
- "learning_rate": 5.142857142857143e-07,
3064
- "loss": 0.831,
3065
  "step": 4960
3066
  },
3067
  {
3068
  "epoch": 99.4,
3069
- "learning_rate": 3.918367346938776e-07,
3070
- "loss": 0.8066,
3071
  "step": 4970
3072
  },
3073
  {
3074
  "epoch": 99.6,
3075
- "learning_rate": 2.693877551020408e-07,
3076
- "loss": 0.8148,
3077
  "step": 4980
3078
  },
3079
  {
3080
  "epoch": 99.8,
3081
- "learning_rate": 1.4693877551020407e-07,
3082
- "loss": 0.8274,
3083
  "step": 4990
3084
  },
3085
  {
3086
  "epoch": 100.0,
3087
- "learning_rate": 2.448979591836735e-08,
3088
- "loss": 0.7769,
3089
  "step": 5000
3090
  },
3091
  {
3092
  "epoch": 100.0,
3093
- "eval_loss": 0.3502330183982849,
3094
- "eval_runtime": 245.5235,
3095
- "eval_samples_per_second": 21.228,
3096
- "eval_steps_per_second": 1.328,
3097
- "eval_wer": 0.35502681314104234,
3098
  "step": 5000
3099
  },
3100
  {
3101
- "epoch": 100.0,
3102
- "step": 5000,
3103
- "total_flos": 1.6395774173071445e+20,
3104
- "train_loss": 0.9013227667808533,
3105
- "train_runtime": 60806.5864,
3106
- "train_samples_per_second": 21.06,
3107
- "train_steps_per_second": 0.082
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3108
  }
3109
  ],
3110
- "max_steps": 5000,
3111
- "num_train_epochs": 100,
3112
- "total_flos": 1.6395774173071445e+20,
3113
  "trial_name": null,
3114
  "trial_params": null
3115
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 199.99750623441398,
5
+ "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
2172
  },
2173
  {
2174
  "epoch": 70.2,
2175
+ "learning_rate": 3.934545454545455e-05,
2176
+ "loss": 0.8492,
2177
  "step": 3510
2178
  },
2179
  {
2180
  "epoch": 70.4,
2181
+ "learning_rate": 3.928484848484849e-05,
2182
+ "loss": 0.8477,
2183
  "step": 3520
2184
  },
2185
  {
2186
  "epoch": 70.6,
2187
+ "learning_rate": 3.9224242424242426e-05,
2188
+ "loss": 0.8657,
2189
  "step": 3530
2190
  },
2191
  {
2192
  "epoch": 70.8,
2193
+ "learning_rate": 3.9163636363636364e-05,
2194
+ "loss": 0.8734,
2195
  "step": 3540
2196
  },
2197
  {
2198
  "epoch": 71.0,
2199
+ "learning_rate": 3.91030303030303e-05,
2200
+ "loss": 0.8092,
2201
  "step": 3550
2202
  },
2203
  {
2204
  "epoch": 71.2,
2205
+ "learning_rate": 3.904242424242424e-05,
2206
+ "loss": 0.8904,
2207
  "step": 3560
2208
  },
2209
  {
2210
  "epoch": 71.4,
2211
+ "learning_rate": 3.898181818181818e-05,
2212
+ "loss": 0.8585,
2213
  "step": 3570
2214
  },
2215
  {
2216
  "epoch": 71.6,
2217
+ "learning_rate": 3.892121212121212e-05,
2218
+ "loss": 0.8565,
2219
  "step": 3580
2220
  },
2221
  {
2222
  "epoch": 71.8,
2223
+ "learning_rate": 3.8860606060606056e-05,
2224
+ "loss": 0.8823,
2225
  "step": 3590
2226
  },
2227
  {
2228
  "epoch": 72.0,
2229
+ "learning_rate": 3.8799999999999994e-05,
2230
+ "loss": 0.8282,
2231
  "step": 3600
2232
  },
2233
  {
2234
  "epoch": 72.2,
2235
+ "learning_rate": 3.8739393939393946e-05,
2236
+ "loss": 0.8659,
2237
  "step": 3610
2238
  },
2239
  {
2240
  "epoch": 72.4,
2241
+ "learning_rate": 3.8678787878787885e-05,
2242
+ "loss": 0.8746,
2243
  "step": 3620
2244
  },
2245
  {
2246
  "epoch": 72.6,
2247
+ "learning_rate": 3.861818181818182e-05,
2248
+ "loss": 0.8536,
2249
  "step": 3630
2250
  },
2251
  {
2252
  "epoch": 72.8,
2253
+ "learning_rate": 3.855757575757576e-05,
2254
+ "loss": 0.8535,
2255
  "step": 3640
2256
  },
2257
  {
2258
  "epoch": 73.0,
2259
+ "learning_rate": 3.84969696969697e-05,
2260
+ "loss": 0.8332,
2261
  "step": 3650
2262
  },
2263
  {
2264
  "epoch": 73.2,
2265
+ "learning_rate": 3.843636363636364e-05,
2266
+ "loss": 0.8779,
2267
  "step": 3660
2268
  },
2269
  {
2270
  "epoch": 73.4,
2271
+ "learning_rate": 3.8375757575757576e-05,
2272
+ "loss": 0.85,
2273
  "step": 3670
2274
  },
2275
  {
2276
  "epoch": 73.6,
2277
+ "learning_rate": 3.8315151515151515e-05,
2278
+ "loss": 0.8533,
2279
  "step": 3680
2280
  },
2281
  {
2282
  "epoch": 73.8,
2283
+ "learning_rate": 3.825454545454545e-05,
2284
+ "loss": 0.8542,
2285
  "step": 3690
2286
  },
2287
  {
2288
  "epoch": 74.0,
2289
+ "learning_rate": 3.819393939393939e-05,
2290
+ "loss": 0.8341,
2291
  "step": 3700
2292
  },
2293
  {
2294
  "epoch": 74.2,
2295
+ "learning_rate": 3.8133333333333336e-05,
2296
+ "loss": 0.8832,
2297
  "step": 3710
2298
  },
2299
  {
2300
  "epoch": 74.4,
2301
+ "learning_rate": 3.8072727272727275e-05,
2302
+ "loss": 0.8531,
2303
  "step": 3720
2304
  },
2305
  {
2306
  "epoch": 74.6,
2307
+ "learning_rate": 3.801212121212121e-05,
2308
+ "loss": 0.8413,
2309
  "step": 3730
2310
  },
2311
  {
2312
  "epoch": 74.8,
2313
+ "learning_rate": 3.795151515151515e-05,
2314
+ "loss": 0.8696,
2315
  "step": 3740
2316
  },
2317
  {
2318
  "epoch": 75.0,
2319
+ "learning_rate": 3.789090909090909e-05,
2320
+ "loss": 0.8196,
2321
  "step": 3750
2322
  },
2323
  {
2324
  "epoch": 75.2,
2325
+ "learning_rate": 3.783030303030303e-05,
2326
+ "loss": 0.8754,
2327
  "step": 3760
2328
  },
2329
  {
2330
  "epoch": 75.4,
2331
+ "learning_rate": 3.7769696969696966e-05,
2332
+ "loss": 0.8501,
2333
  "step": 3770
2334
  },
2335
  {
2336
  "epoch": 75.6,
2337
+ "learning_rate": 3.770909090909091e-05,
2338
+ "loss": 0.8606,
2339
  "step": 3780
2340
  },
2341
  {
2342
  "epoch": 75.8,
2343
+ "learning_rate": 3.764848484848485e-05,
2344
+ "loss": 0.836,
2345
  "step": 3790
2346
  },
2347
  {
2348
  "epoch": 76.0,
2349
+ "learning_rate": 3.758787878787879e-05,
2350
+ "loss": 0.814,
2351
  "step": 3800
2352
  },
2353
  {
2354
  "epoch": 76.2,
2355
+ "learning_rate": 3.752727272727273e-05,
2356
+ "loss": 0.8881,
2357
  "step": 3810
2358
  },
2359
  {
2360
  "epoch": 76.4,
2361
+ "learning_rate": 3.746666666666667e-05,
2362
+ "loss": 0.8602,
2363
  "step": 3820
2364
  },
2365
  {
2366
  "epoch": 76.6,
2367
+ "learning_rate": 3.740606060606061e-05,
2368
+ "loss": 0.8663,
2369
  "step": 3830
2370
  },
2371
  {
2372
  "epoch": 76.8,
2373
+ "learning_rate": 3.734545454545455e-05,
2374
+ "loss": 0.8413,
2375
  "step": 3840
2376
  },
2377
  {
2378
  "epoch": 77.0,
2379
+ "learning_rate": 3.7284848484848487e-05,
2380
+ "loss": 0.8251,
2381
  "step": 3850
2382
  },
2383
  {
2384
  "epoch": 77.2,
2385
+ "learning_rate": 3.7224242424242425e-05,
2386
+ "loss": 0.8757,
2387
  "step": 3860
2388
  },
2389
  {
2390
  "epoch": 77.4,
2391
+ "learning_rate": 3.716363636363636e-05,
2392
+ "loss": 0.8527,
2393
  "step": 3870
2394
  },
2395
  {
2396
  "epoch": 77.6,
2397
+ "learning_rate": 3.71030303030303e-05,
2398
+ "loss": 0.8572,
2399
  "step": 3880
2400
  },
2401
  {
2402
  "epoch": 77.8,
2403
+ "learning_rate": 3.704848484848485e-05,
2404
+ "loss": 0.8577,
2405
  "step": 3890
2406
  },
2407
  {
2408
  "epoch": 78.0,
2409
+ "learning_rate": 3.698787878787879e-05,
2410
+ "loss": 0.8164,
2411
  "step": 3900
2412
  },
2413
  {
2414
  "epoch": 78.2,
2415
+ "learning_rate": 3.692727272727273e-05,
2416
+ "loss": 0.8743,
2417
  "step": 3910
2418
  },
2419
  {
2420
  "epoch": 78.4,
2421
+ "learning_rate": 3.686666666666667e-05,
2422
+ "loss": 0.8553,
2423
  "step": 3920
2424
  },
2425
  {
2426
  "epoch": 78.6,
2427
+ "learning_rate": 3.680606060606061e-05,
2428
+ "loss": 0.8113,
2429
  "step": 3930
2430
  },
2431
  {
2432
  "epoch": 78.8,
2433
+ "learning_rate": 3.674545454545455e-05,
2434
+ "loss": 0.867,
2435
  "step": 3940
2436
  },
2437
  {
2438
  "epoch": 79.0,
2439
+ "learning_rate": 3.6684848484848486e-05,
2440
+ "loss": 0.8343,
2441
  "step": 3950
2442
  },
2443
  {
2444
  "epoch": 79.2,
2445
+ "learning_rate": 3.6624242424242424e-05,
2446
+ "loss": 0.8653,
2447
  "step": 3960
2448
  },
2449
  {
2450
  "epoch": 79.4,
2451
+ "learning_rate": 3.656363636363636e-05,
2452
+ "loss": 0.8443,
2453
  "step": 3970
2454
  },
2455
  {
2456
  "epoch": 79.6,
2457
+ "learning_rate": 3.65030303030303e-05,
2458
+ "loss": 0.8488,
2459
  "step": 3980
2460
  },
2461
  {
2462
  "epoch": 79.8,
2463
+ "learning_rate": 3.644242424242424e-05,
2464
+ "loss": 0.8573,
2465
  "step": 3990
2466
  },
2467
  {
2468
  "epoch": 80.0,
2469
+ "learning_rate": 3.638181818181818e-05,
2470
+ "loss": 0.8104,
2471
  "step": 4000
2472
  },
2473
  {
2474
  "epoch": 80.0,
2475
+ "eval_loss": 0.35952043533325195,
2476
+ "eval_runtime": 247.7629,
2477
+ "eval_samples_per_second": 21.036,
2478
+ "eval_steps_per_second": 1.316,
2479
+ "eval_wer": 0.36596032696412767,
2480
  "step": 4000
2481
  },
2482
  {
2483
  "epoch": 80.2,
2484
+ "learning_rate": 3.632121212121212e-05,
2485
+ "loss": 0.8759,
2486
  "step": 4010
2487
  },
2488
  {
2489
  "epoch": 80.4,
2490
+ "learning_rate": 3.626060606060606e-05,
2491
+ "loss": 0.835,
2492
  "step": 4020
2493
  },
2494
  {
2495
  "epoch": 80.6,
2496
+ "learning_rate": 3.6200000000000006e-05,
2497
+ "loss": 0.8466,
2498
  "step": 4030
2499
  },
2500
  {
2501
  "epoch": 80.8,
2502
+ "learning_rate": 3.6139393939393944e-05,
2503
+ "loss": 0.8545,
2504
  "step": 4040
2505
  },
2506
  {
2507
  "epoch": 81.0,
2508
+ "learning_rate": 3.607878787878788e-05,
2509
+ "loss": 0.8075,
2510
  "step": 4050
2511
  },
2512
  {
2513
  "epoch": 81.2,
2514
+ "learning_rate": 3.601818181818182e-05,
2515
+ "loss": 0.8679,
2516
  "step": 4060
2517
  },
2518
  {
2519
  "epoch": 81.4,
2520
+ "learning_rate": 3.595757575757576e-05,
2521
+ "loss": 0.8332,
2522
  "step": 4070
2523
  },
2524
  {
2525
  "epoch": 81.6,
2526
+ "learning_rate": 3.58969696969697e-05,
2527
+ "loss": 0.85,
2528
  "step": 4080
2529
  },
2530
  {
2531
  "epoch": 81.8,
2532
+ "learning_rate": 3.5836363636363636e-05,
2533
+ "loss": 0.861,
2534
  "step": 4090
2535
  },
2536
  {
2537
  "epoch": 82.0,
2538
+ "learning_rate": 3.5775757575757574e-05,
2539
+ "loss": 0.8356,
2540
  "step": 4100
2541
  },
2542
  {
2543
  "epoch": 82.2,
2544
+ "learning_rate": 3.571515151515152e-05,
2545
+ "loss": 0.8513,
2546
  "step": 4110
2547
  },
2548
  {
2549
  "epoch": 82.4,
2550
+ "learning_rate": 3.565454545454546e-05,
2551
+ "loss": 0.8467,
2552
  "step": 4120
2553
  },
2554
  {
2555
  "epoch": 82.6,
2556
+ "learning_rate": 3.5593939393939396e-05,
2557
+ "loss": 0.8141,
2558
  "step": 4130
2559
  },
2560
  {
2561
  "epoch": 82.8,
2562
+ "learning_rate": 3.5533333333333334e-05,
2563
+ "loss": 0.8386,
2564
  "step": 4140
2565
  },
2566
  {
2567
  "epoch": 83.0,
2568
+ "learning_rate": 3.547272727272727e-05,
2569
+ "loss": 0.8169,
2570
  "step": 4150
2571
  },
2572
  {
2573
  "epoch": 83.2,
2574
+ "learning_rate": 3.541212121212121e-05,
2575
+ "loss": 0.8609,
2576
  "step": 4160
2577
  },
2578
  {
2579
  "epoch": 83.4,
2580
+ "learning_rate": 3.535151515151515e-05,
2581
+ "loss": 0.8391,
2582
  "step": 4170
2583
  },
2584
  {
2585
  "epoch": 83.6,
2586
+ "learning_rate": 3.529090909090909e-05,
2587
+ "loss": 0.8345,
2588
  "step": 4180
2589
  },
2590
  {
2591
  "epoch": 83.8,
2592
+ "learning_rate": 3.5230303030303026e-05,
2593
+ "loss": 0.8368,
2594
  "step": 4190
2595
  },
2596
  {
2597
  "epoch": 84.0,
2598
+ "learning_rate": 3.516969696969697e-05,
2599
+ "loss": 0.8145,
2600
  "step": 4200
2601
  },
2602
  {
2603
  "epoch": 84.2,
2604
+ "learning_rate": 3.5109090909090916e-05,
2605
+ "loss": 0.8446,
2606
  "step": 4210
2607
  },
2608
  {
2609
  "epoch": 84.4,
2610
+ "learning_rate": 3.5048484848484854e-05,
2611
+ "loss": 0.8176,
2612
  "step": 4220
2613
  },
2614
  {
2615
  "epoch": 84.6,
2616
+ "learning_rate": 3.498787878787879e-05,
2617
+ "loss": 0.8356,
2618
  "step": 4230
2619
  },
2620
  {
2621
  "epoch": 84.8,
2622
+ "learning_rate": 3.492727272727273e-05,
2623
+ "loss": 0.8251,
2624
  "step": 4240
2625
  },
2626
  {
2627
  "epoch": 85.0,
2628
+ "learning_rate": 3.486666666666667e-05,
2629
+ "loss": 0.8151,
2630
  "step": 4250
2631
  },
2632
  {
2633
  "epoch": 85.2,
2634
+ "learning_rate": 3.480606060606061e-05,
2635
+ "loss": 0.8549,
2636
  "step": 4260
2637
  },
2638
  {
2639
  "epoch": 85.4,
2640
+ "learning_rate": 3.4745454545454546e-05,
2641
+ "loss": 0.8515,
2642
  "step": 4270
2643
  },
2644
  {
2645
  "epoch": 85.6,
2646
+ "learning_rate": 3.4684848484848484e-05,
2647
+ "loss": 0.8255,
2648
  "step": 4280
2649
  },
2650
  {
2651
  "epoch": 85.8,
2652
+ "learning_rate": 3.462424242424242e-05,
2653
+ "loss": 0.8325,
2654
  "step": 4290
2655
  },
2656
  {
2657
  "epoch": 86.0,
2658
+ "learning_rate": 3.456363636363636e-05,
2659
+ "loss": 0.8162,
2660
  "step": 4300
2661
  },
2662
  {
2663
  "epoch": 86.2,
2664
+ "learning_rate": 3.4503030303030306e-05,
2665
+ "loss": 0.8628,
2666
  "step": 4310
2667
  },
2668
  {
2669
  "epoch": 86.4,
2670
+ "learning_rate": 3.4442424242424244e-05,
2671
+ "loss": 0.8125,
2672
  "step": 4320
2673
  },
2674
  {
2675
  "epoch": 86.6,
2676
+ "learning_rate": 3.438181818181818e-05,
2677
+ "loss": 0.8436,
2678
  "step": 4330
2679
  },
2680
  {
2681
  "epoch": 86.8,
2682
+ "learning_rate": 3.432121212121212e-05,
2683
+ "loss": 0.8343,
2684
  "step": 4340
2685
  },
2686
  {
2687
  "epoch": 87.0,
2688
+ "learning_rate": 3.426060606060606e-05,
2689
+ "loss": 0.8063,
2690
  "step": 4350
2691
  },
2692
  {
2693
  "epoch": 87.2,
2694
+ "learning_rate": 3.42e-05,
2695
+ "loss": 0.8492,
2696
  "step": 4360
2697
  },
2698
  {
2699
  "epoch": 87.4,
2700
+ "learning_rate": 3.4139393939393936e-05,
2701
+ "loss": 0.8393,
2702
  "step": 4370
2703
  },
2704
  {
2705
  "epoch": 87.6,
2706
+ "learning_rate": 3.407878787878788e-05,
2707
+ "loss": 0.827,
2708
  "step": 4380
2709
  },
2710
  {
2711
  "epoch": 87.8,
2712
+ "learning_rate": 3.401818181818182e-05,
2713
+ "loss": 0.8371,
2714
  "step": 4390
2715
  },
2716
  {
2717
  "epoch": 88.0,
2718
+ "learning_rate": 3.395757575757576e-05,
2719
+ "loss": 0.8047,
2720
  "step": 4400
2721
  },
2722
  {
2723
  "epoch": 88.2,
2724
+ "learning_rate": 3.3896969696969696e-05,
2725
+ "loss": 0.8506,
2726
  "step": 4410
2727
  },
2728
  {
2729
  "epoch": 88.4,
2730
+ "learning_rate": 3.383636363636364e-05,
2731
+ "loss": 0.8203,
2732
  "step": 4420
2733
  },
2734
  {
2735
  "epoch": 88.6,
2736
+ "learning_rate": 3.377575757575758e-05,
2737
+ "loss": 0.8127,
2738
  "step": 4430
2739
  },
2740
  {
2741
  "epoch": 88.8,
2742
+ "learning_rate": 3.371515151515152e-05,
2743
+ "loss": 0.8318,
2744
  "step": 4440
2745
  },
2746
  {
2747
  "epoch": 89.0,
2748
+ "learning_rate": 3.3654545454545456e-05,
2749
+ "loss": 0.801,
2750
  "step": 4450
2751
  },
2752
  {
2753
  "epoch": 89.2,
2754
+ "learning_rate": 3.3593939393939395e-05,
2755
+ "loss": 0.8433,
2756
  "step": 4460
2757
  },
2758
  {
2759
  "epoch": 89.4,
2760
+ "learning_rate": 3.353333333333333e-05,
2761
+ "loss": 0.8188,
2762
  "step": 4470
2763
  },
2764
  {
2765
  "epoch": 89.6,
2766
+ "learning_rate": 3.347272727272727e-05,
2767
+ "loss": 0.8296,
2768
  "step": 4480
2769
  },
2770
  {
2771
  "epoch": 89.8,
2772
+ "learning_rate": 3.341212121212121e-05,
2773
+ "loss": 0.8313,
2774
  "step": 4490
2775
  },
2776
  {
2777
  "epoch": 90.0,
2778
+ "learning_rate": 3.335151515151515e-05,
2779
+ "loss": 0.8118,
2780
  "step": 4500
2781
  },
2782
  {
2783
  "epoch": 90.0,
2784
+ "eval_loss": 0.3459583818912506,
2785
+ "eval_runtime": 243.3931,
2786
+ "eval_samples_per_second": 21.414,
2787
+ "eval_steps_per_second": 1.339,
2788
+ "eval_wer": 0.3591659290883532,
2789
  "step": 4500
2790
  },
2791
  {
2792
  "epoch": 90.2,
2793
+ "learning_rate": 3.3290909090909086e-05,
2794
+ "loss": 0.8586,
2795
  "step": 4510
2796
  },
2797
  {
2798
  "epoch": 90.4,
2799
+ "learning_rate": 3.323030303030303e-05,
2800
+ "loss": 0.8303,
2801
  "step": 4520
2802
  },
2803
  {
2804
  "epoch": 90.6,
2805
+ "learning_rate": 3.316969696969697e-05,
2806
+ "loss": 0.8199,
2807
  "step": 4530
2808
  },
2809
  {
2810
  "epoch": 90.8,
2811
+ "learning_rate": 3.310909090909091e-05,
2812
+ "loss": 0.8272,
2813
  "step": 4540
2814
  },
2815
  {
2816
  "epoch": 91.0,
2817
+ "learning_rate": 3.304848484848485e-05,
2818
+ "loss": 0.7925,
2819
  "step": 4550
2820
  },
2821
  {
2822
  "epoch": 91.2,
2823
+ "learning_rate": 3.298787878787879e-05,
2824
+ "loss": 0.8542,
2825
  "step": 4560
2826
  },
2827
  {
2828
  "epoch": 91.4,
2829
+ "learning_rate": 3.292727272727273e-05,
2830
+ "loss": 0.8222,
2831
  "step": 4570
2832
  },
2833
  {
2834
  "epoch": 91.6,
2835
+ "learning_rate": 3.286666666666667e-05,
2836
+ "loss": 0.7901,
2837
  "step": 4580
2838
  },
2839
  {
2840
  "epoch": 91.8,
2841
+ "learning_rate": 3.2806060606060607e-05,
2842
+ "loss": 0.8227,
2843
  "step": 4590
2844
  },
2845
  {
2846
  "epoch": 92.0,
2847
+ "learning_rate": 3.2745454545454545e-05,
2848
+ "loss": 0.7861,
2849
  "step": 4600
2850
  },
2851
  {
2852
  "epoch": 92.2,
2853
+ "learning_rate": 3.268484848484848e-05,
2854
+ "loss": 0.8382,
2855
  "step": 4610
2856
  },
2857
  {
2858
  "epoch": 92.4,
2859
+ "learning_rate": 3.262424242424243e-05,
2860
+ "loss": 0.8279,
2861
  "step": 4620
2862
  },
2863
  {
2864
  "epoch": 92.6,
2865
+ "learning_rate": 3.256363636363637e-05,
2866
+ "loss": 0.8148,
2867
  "step": 4630
2868
  },
2869
  {
2870
  "epoch": 92.8,
2871
+ "learning_rate": 3.2503030303030305e-05,
2872
+ "loss": 0.8054,
2873
  "step": 4640
2874
  },
2875
  {
2876
  "epoch": 93.0,
2877
+ "learning_rate": 3.244242424242424e-05,
2878
+ "loss": 0.8003,
2879
  "step": 4650
2880
  },
2881
  {
2882
  "epoch": 93.2,
2883
+ "learning_rate": 3.238181818181818e-05,
2884
+ "loss": 0.8252,
2885
  "step": 4660
2886
  },
2887
  {
2888
  "epoch": 93.4,
2889
+ "learning_rate": 3.232121212121212e-05,
2890
+ "loss": 0.8,
2891
  "step": 4670
2892
  },
2893
  {
2894
  "epoch": 93.6,
2895
+ "learning_rate": 3.226060606060606e-05,
2896
+ "loss": 0.811,
2897
  "step": 4680
2898
  },
2899
  {
2900
  "epoch": 93.8,
2901
+ "learning_rate": 3.22e-05,
2902
+ "loss": 0.8202,
2903
  "step": 4690
2904
  },
2905
  {
2906
  "epoch": 94.0,
2907
+ "learning_rate": 3.2139393939393935e-05,
2908
+ "loss": 0.7872,
2909
  "step": 4700
2910
  },
2911
  {
2912
  "epoch": 94.2,
2913
+ "learning_rate": 3.207878787878787e-05,
2914
+ "loss": 0.8567,
2915
  "step": 4710
2916
  },
2917
  {
2918
  "epoch": 94.4,
2919
+ "learning_rate": 3.2018181818181825e-05,
2920
+ "loss": 0.8226,
2921
  "step": 4720
2922
  },
2923
  {
2924
  "epoch": 94.6,
2925
+ "learning_rate": 3.1957575757575764e-05,
2926
+ "loss": 0.7977,
2927
  "step": 4730
2928
  },
2929
  {
2930
  "epoch": 94.8,
2931
+ "learning_rate": 3.18969696969697e-05,
2932
+ "loss": 0.8119,
2933
  "step": 4740
2934
  },
2935
  {
2936
  "epoch": 95.0,
2937
+ "learning_rate": 3.183636363636364e-05,
2938
+ "loss": 0.7972,
2939
  "step": 4750
2940
  },
2941
  {
2942
  "epoch": 95.2,
2943
+ "learning_rate": 3.177575757575758e-05,
2944
+ "loss": 0.8297,
2945
  "step": 4760
2946
  },
2947
  {
2948
  "epoch": 95.4,
2949
+ "learning_rate": 3.171515151515152e-05,
2950
+ "loss": 0.8184,
2951
  "step": 4770
2952
  },
2953
  {
2954
  "epoch": 95.6,
2955
+ "learning_rate": 3.1654545454545455e-05,
2956
+ "loss": 0.8319,
2957
  "step": 4780
2958
  },
2959
  {
2960
  "epoch": 95.8,
2961
+ "learning_rate": 3.1593939393939394e-05,
2962
+ "loss": 0.7927,
2963
  "step": 4790
2964
  },
2965
  {
2966
  "epoch": 96.0,
2967
+ "learning_rate": 3.153333333333333e-05,
2968
+ "loss": 0.781,
2969
  "step": 4800
2970
  },
2971
  {
2972
  "epoch": 96.2,
2973
+ "learning_rate": 3.147272727272727e-05,
2974
+ "loss": 0.8324,
2975
  "step": 4810
2976
  },
2977
  {
2978
  "epoch": 96.4,
2979
+ "learning_rate": 3.1412121212121215e-05,
2980
+ "loss": 0.8089,
2981
  "step": 4820
2982
  },
2983
  {
2984
  "epoch": 96.6,
2985
+ "learning_rate": 3.1351515151515154e-05,
2986
+ "loss": 0.8066,
2987
  "step": 4830
2988
  },
2989
  {
2990
  "epoch": 96.8,
2991
+ "learning_rate": 3.129090909090909e-05,
2992
+ "loss": 0.8038,
2993
  "step": 4840
2994
  },
2995
  {
2996
  "epoch": 97.0,
2997
+ "learning_rate": 3.123030303030303e-05,
2998
+ "loss": 0.7848,
2999
  "step": 4850
3000
  },
3001
  {
3002
  "epoch": 97.2,
3003
+ "learning_rate": 3.116969696969697e-05,
3004
+ "loss": 0.7951,
3005
  "step": 4860
3006
  },
3007
  {
3008
  "epoch": 97.4,
3009
+ "learning_rate": 3.110909090909091e-05,
3010
+ "loss": 0.8023,
3011
  "step": 4870
3012
  },
3013
  {
3014
  "epoch": 97.6,
3015
+ "learning_rate": 3.1048484848484845e-05,
3016
+ "loss": 0.8058,
3017
  "step": 4880
3018
  },
3019
  {
3020
  "epoch": 97.8,
3021
+ "learning_rate": 3.098787878787879e-05,
3022
+ "loss": 0.8003,
3023
  "step": 4890
3024
  },
3025
  {
3026
  "epoch": 98.0,
3027
+ "learning_rate": 3.092727272727273e-05,
3028
+ "loss": 0.7844,
3029
  "step": 4900
3030
  },
3031
  {
3032
  "epoch": 98.2,
3033
+ "learning_rate": 3.086666666666667e-05,
3034
+ "loss": 0.8175,
3035
  "step": 4910
3036
  },
3037
  {
3038
  "epoch": 98.4,
3039
+ "learning_rate": 3.0806060606060605e-05,
3040
+ "loss": 0.8186,
3041
  "step": 4920
3042
  },
3043
  {
3044
  "epoch": 98.6,
3045
+ "learning_rate": 3.074545454545455e-05,
3046
+ "loss": 0.8055,
3047
  "step": 4930
3048
  },
3049
  {
3050
  "epoch": 98.8,
3051
+ "learning_rate": 3.068484848484849e-05,
3052
+ "loss": 0.8004,
3053
  "step": 4940
3054
  },
3055
  {
3056
  "epoch": 99.0,
3057
+ "learning_rate": 3.062424242424243e-05,
3058
+ "loss": 0.7751,
3059
  "step": 4950
3060
  },
3061
  {
3062
  "epoch": 99.2,
3063
+ "learning_rate": 3.0563636363636365e-05,
3064
+ "loss": 0.8077,
3065
  "step": 4960
3066
  },
3067
  {
3068
  "epoch": 99.4,
3069
+ "learning_rate": 3.0503030303030304e-05,
3070
+ "loss": 0.7806,
3071
  "step": 4970
3072
  },
3073
  {
3074
  "epoch": 99.6,
3075
+ "learning_rate": 3.0442424242424242e-05,
3076
+ "loss": 0.8005,
3077
  "step": 4980
3078
  },
3079
  {
3080
  "epoch": 99.8,
3081
+ "learning_rate": 3.038181818181818e-05,
3082
+ "loss": 0.7966,
3083
  "step": 4990
3084
  },
3085
  {
3086
  "epoch": 100.0,
3087
+ "learning_rate": 3.0321212121212122e-05,
3088
+ "loss": 0.7831,
3089
  "step": 5000
3090
  },
3091
  {
3092
  "epoch": 100.0,
3093
+ "eval_loss": 0.35662171244621277,
3094
+ "eval_runtime": 244.6026,
3095
+ "eval_samples_per_second": 21.308,
3096
+ "eval_steps_per_second": 1.333,
3097
+ "eval_wer": 0.35934815431873796,
3098
  "step": 5000
3099
  },
3100
  {
3101
+ "epoch": 100.2,
3102
+ "learning_rate": 3.026060606060606e-05,
3103
+ "loss": 0.8186,
3104
+ "step": 5010
3105
+ },
3106
+ {
3107
+ "epoch": 100.4,
3108
+ "learning_rate": 3.02e-05,
3109
+ "loss": 0.7988,
3110
+ "step": 5020
3111
+ },
3112
+ {
3113
+ "epoch": 100.6,
3114
+ "learning_rate": 3.0139393939393937e-05,
3115
+ "loss": 0.7912,
3116
+ "step": 5030
3117
+ },
3118
+ {
3119
+ "epoch": 100.8,
3120
+ "learning_rate": 3.0078787878787876e-05,
3121
+ "loss": 0.7922,
3122
+ "step": 5040
3123
+ },
3124
+ {
3125
+ "epoch": 101.0,
3126
+ "learning_rate": 3.0018181818181817e-05,
3127
+ "loss": 0.8013,
3128
+ "step": 5050
3129
+ },
3130
+ {
3131
+ "epoch": 101.2,
3132
+ "learning_rate": 2.995757575757576e-05,
3133
+ "loss": 0.8025,
3134
+ "step": 5060
3135
+ },
3136
+ {
3137
+ "epoch": 101.4,
3138
+ "learning_rate": 2.9896969696969697e-05,
3139
+ "loss": 0.8028,
3140
+ "step": 5070
3141
+ },
3142
+ {
3143
+ "epoch": 101.6,
3144
+ "learning_rate": 2.9836363636363636e-05,
3145
+ "loss": 0.8106,
3146
+ "step": 5080
3147
+ },
3148
+ {
3149
+ "epoch": 101.8,
3150
+ "learning_rate": 2.9775757575757574e-05,
3151
+ "loss": 0.8195,
3152
+ "step": 5090
3153
+ },
3154
+ {
3155
+ "epoch": 102.0,
3156
+ "learning_rate": 2.971515151515152e-05,
3157
+ "loss": 0.7637,
3158
+ "step": 5100
3159
+ },
3160
+ {
3161
+ "epoch": 102.2,
3162
+ "learning_rate": 2.9654545454545457e-05,
3163
+ "loss": 0.8283,
3164
+ "step": 5110
3165
+ },
3166
+ {
3167
+ "epoch": 102.4,
3168
+ "learning_rate": 2.9593939393939396e-05,
3169
+ "loss": 0.7809,
3170
+ "step": 5120
3171
+ },
3172
+ {
3173
+ "epoch": 102.6,
3174
+ "learning_rate": 2.9533333333333334e-05,
3175
+ "loss": 0.7841,
3176
+ "step": 5130
3177
+ },
3178
+ {
3179
+ "epoch": 102.8,
3180
+ "learning_rate": 2.9472727272727272e-05,
3181
+ "loss": 0.7974,
3182
+ "step": 5140
3183
+ },
3184
+ {
3185
+ "epoch": 103.0,
3186
+ "learning_rate": 2.9412121212121214e-05,
3187
+ "loss": 0.786,
3188
+ "step": 5150
3189
+ },
3190
+ {
3191
+ "epoch": 103.2,
3192
+ "learning_rate": 2.9351515151515152e-05,
3193
+ "loss": 0.8257,
3194
+ "step": 5160
3195
+ },
3196
+ {
3197
+ "epoch": 103.4,
3198
+ "learning_rate": 2.929090909090909e-05,
3199
+ "loss": 0.7662,
3200
+ "step": 5170
3201
+ },
3202
+ {
3203
+ "epoch": 103.6,
3204
+ "learning_rate": 2.923030303030303e-05,
3205
+ "loss": 0.7893,
3206
+ "step": 5180
3207
+ },
3208
+ {
3209
+ "epoch": 103.8,
3210
+ "learning_rate": 2.916969696969697e-05,
3211
+ "loss": 0.7976,
3212
+ "step": 5190
3213
+ },
3214
+ {
3215
+ "epoch": 104.0,
3216
+ "learning_rate": 2.910909090909091e-05,
3217
+ "loss": 0.7868,
3218
+ "step": 5200
3219
+ },
3220
+ {
3221
+ "epoch": 104.2,
3222
+ "learning_rate": 2.904848484848485e-05,
3223
+ "loss": 0.8219,
3224
+ "step": 5210
3225
+ },
3226
+ {
3227
+ "epoch": 104.4,
3228
+ "learning_rate": 2.898787878787879e-05,
3229
+ "loss": 0.7969,
3230
+ "step": 5220
3231
+ },
3232
+ {
3233
+ "epoch": 104.6,
3234
+ "learning_rate": 2.8927272727272728e-05,
3235
+ "loss": 0.7914,
3236
+ "step": 5230
3237
+ },
3238
+ {
3239
+ "epoch": 104.8,
3240
+ "learning_rate": 2.8866666666666666e-05,
3241
+ "loss": 0.7971,
3242
+ "step": 5240
3243
+ },
3244
+ {
3245
+ "epoch": 105.0,
3246
+ "learning_rate": 2.8806060606060604e-05,
3247
+ "loss": 0.7807,
3248
+ "step": 5250
3249
+ },
3250
+ {
3251
+ "epoch": 105.2,
3252
+ "learning_rate": 2.8745454545454546e-05,
3253
+ "loss": 0.803,
3254
+ "step": 5260
3255
+ },
3256
+ {
3257
+ "epoch": 105.4,
3258
+ "learning_rate": 2.8684848484848488e-05,
3259
+ "loss": 0.81,
3260
+ "step": 5270
3261
+ },
3262
+ {
3263
+ "epoch": 105.6,
3264
+ "learning_rate": 2.8624242424242426e-05,
3265
+ "loss": 0.7943,
3266
+ "step": 5280
3267
+ },
3268
+ {
3269
+ "epoch": 105.8,
3270
+ "learning_rate": 2.8563636363636364e-05,
3271
+ "loss": 0.7767,
3272
+ "step": 5290
3273
+ },
3274
+ {
3275
+ "epoch": 106.0,
3276
+ "learning_rate": 2.8503030303030303e-05,
3277
+ "loss": 0.7721,
3278
+ "step": 5300
3279
+ },
3280
+ {
3281
+ "epoch": 106.2,
3282
+ "learning_rate": 2.8442424242424244e-05,
3283
+ "loss": 0.7863,
3284
+ "step": 5310
3285
+ },
3286
+ {
3287
+ "epoch": 106.4,
3288
+ "learning_rate": 2.8381818181818183e-05,
3289
+ "loss": 0.7824,
3290
+ "step": 5320
3291
+ },
3292
+ {
3293
+ "epoch": 106.6,
3294
+ "learning_rate": 2.832121212121212e-05,
3295
+ "loss": 0.8002,
3296
+ "step": 5330
3297
+ },
3298
+ {
3299
+ "epoch": 106.8,
3300
+ "learning_rate": 2.826060606060606e-05,
3301
+ "loss": 0.7921,
3302
+ "step": 5340
3303
+ },
3304
+ {
3305
+ "epoch": 107.0,
3306
+ "learning_rate": 2.8199999999999998e-05,
3307
+ "loss": 0.7757,
3308
+ "step": 5350
3309
+ },
3310
+ {
3311
+ "epoch": 107.2,
3312
+ "learning_rate": 2.8139393939393943e-05,
3313
+ "loss": 0.8046,
3314
+ "step": 5360
3315
+ },
3316
+ {
3317
+ "epoch": 107.4,
3318
+ "learning_rate": 2.807878787878788e-05,
3319
+ "loss": 0.775,
3320
+ "step": 5370
3321
+ },
3322
+ {
3323
+ "epoch": 107.6,
3324
+ "learning_rate": 2.801818181818182e-05,
3325
+ "loss": 0.7975,
3326
+ "step": 5380
3327
+ },
3328
+ {
3329
+ "epoch": 107.8,
3330
+ "learning_rate": 2.7957575757575758e-05,
3331
+ "loss": 0.794,
3332
+ "step": 5390
3333
+ },
3334
+ {
3335
+ "epoch": 108.0,
3336
+ "learning_rate": 2.7896969696969696e-05,
3337
+ "loss": 0.7465,
3338
+ "step": 5400
3339
+ },
3340
+ {
3341
+ "epoch": 108.2,
3342
+ "learning_rate": 2.7836363636363638e-05,
3343
+ "loss": 0.8104,
3344
+ "step": 5410
3345
+ },
3346
+ {
3347
+ "epoch": 108.4,
3348
+ "learning_rate": 2.7775757575757576e-05,
3349
+ "loss": 0.7862,
3350
+ "step": 5420
3351
+ },
3352
+ {
3353
+ "epoch": 108.6,
3354
+ "learning_rate": 2.7715151515151514e-05,
3355
+ "loss": 0.7767,
3356
+ "step": 5430
3357
+ },
3358
+ {
3359
+ "epoch": 108.8,
3360
+ "learning_rate": 2.7654545454545456e-05,
3361
+ "loss": 0.7807,
3362
+ "step": 5440
3363
+ },
3364
+ {
3365
+ "epoch": 109.0,
3366
+ "learning_rate": 2.7593939393939395e-05,
3367
+ "loss": 0.7628,
3368
+ "step": 5450
3369
+ },
3370
+ {
3371
+ "epoch": 109.2,
3372
+ "learning_rate": 2.7533333333333336e-05,
3373
+ "loss": 0.8077,
3374
+ "step": 5460
3375
+ },
3376
+ {
3377
+ "epoch": 109.4,
3378
+ "learning_rate": 2.7472727272727275e-05,
3379
+ "loss": 0.8049,
3380
+ "step": 5470
3381
+ },
3382
+ {
3383
+ "epoch": 109.6,
3384
+ "learning_rate": 2.7412121212121213e-05,
3385
+ "loss": 0.777,
3386
+ "step": 5480
3387
+ },
3388
+ {
3389
+ "epoch": 109.8,
3390
+ "learning_rate": 2.735151515151515e-05,
3391
+ "loss": 0.7799,
3392
+ "step": 5490
3393
+ },
3394
+ {
3395
+ "epoch": 110.0,
3396
+ "learning_rate": 2.729090909090909e-05,
3397
+ "loss": 0.744,
3398
+ "step": 5500
3399
+ },
3400
+ {
3401
+ "epoch": 110.0,
3402
+ "eval_loss": 0.35784289240837097,
3403
+ "eval_runtime": 244.0889,
3404
+ "eval_samples_per_second": 21.353,
3405
+ "eval_steps_per_second": 1.336,
3406
+ "eval_wer": 0.35351694694642577,
3407
+ "step": 5500
3408
+ },
3409
+ {
3410
+ "epoch": 110.2,
3411
+ "learning_rate": 2.723030303030303e-05,
3412
+ "loss": 0.7943,
3413
+ "step": 5510
3414
+ },
3415
+ {
3416
+ "epoch": 110.4,
3417
+ "learning_rate": 2.716969696969697e-05,
3418
+ "loss": 0.7865,
3419
+ "step": 5520
3420
+ },
3421
+ {
3422
+ "epoch": 110.6,
3423
+ "learning_rate": 2.710909090909091e-05,
3424
+ "loss": 0.7929,
3425
+ "step": 5530
3426
+ },
3427
+ {
3428
+ "epoch": 110.8,
3429
+ "learning_rate": 2.704848484848485e-05,
3430
+ "loss": 0.7974,
3431
+ "step": 5540
3432
+ },
3433
+ {
3434
+ "epoch": 111.0,
3435
+ "learning_rate": 2.6987878787878788e-05,
3436
+ "loss": 0.765,
3437
+ "step": 5550
3438
+ },
3439
+ {
3440
+ "epoch": 111.2,
3441
+ "learning_rate": 2.692727272727273e-05,
3442
+ "loss": 0.7867,
3443
+ "step": 5560
3444
+ },
3445
+ {
3446
+ "epoch": 111.4,
3447
+ "learning_rate": 2.6866666666666668e-05,
3448
+ "loss": 0.7903,
3449
+ "step": 5570
3450
+ },
3451
+ {
3452
+ "epoch": 111.6,
3453
+ "learning_rate": 2.6806060606060606e-05,
3454
+ "loss": 0.7852,
3455
+ "step": 5580
3456
+ },
3457
+ {
3458
+ "epoch": 111.8,
3459
+ "learning_rate": 2.6745454545454545e-05,
3460
+ "loss": 0.7745,
3461
+ "step": 5590
3462
+ },
3463
+ {
3464
+ "epoch": 112.0,
3465
+ "learning_rate": 2.6684848484848483e-05,
3466
+ "loss": 0.7464,
3467
+ "step": 5600
3468
+ },
3469
+ {
3470
+ "epoch": 112.2,
3471
+ "learning_rate": 2.6624242424242428e-05,
3472
+ "loss": 0.8193,
3473
+ "step": 5610
3474
+ },
3475
+ {
3476
+ "epoch": 112.4,
3477
+ "learning_rate": 2.6563636363636366e-05,
3478
+ "loss": 0.7732,
3479
+ "step": 5620
3480
+ },
3481
+ {
3482
+ "epoch": 112.6,
3483
+ "learning_rate": 2.6503030303030305e-05,
3484
+ "loss": 0.7941,
3485
+ "step": 5630
3486
+ },
3487
+ {
3488
+ "epoch": 112.8,
3489
+ "learning_rate": 2.6442424242424243e-05,
3490
+ "loss": 0.7822,
3491
+ "step": 5640
3492
+ },
3493
+ {
3494
+ "epoch": 113.0,
3495
+ "learning_rate": 2.638181818181818e-05,
3496
+ "loss": 0.7487,
3497
+ "step": 5650
3498
+ },
3499
+ {
3500
+ "epoch": 113.2,
3501
+ "learning_rate": 2.632121212121212e-05,
3502
+ "loss": 0.7932,
3503
+ "step": 5660
3504
+ },
3505
+ {
3506
+ "epoch": 113.4,
3507
+ "learning_rate": 2.626060606060606e-05,
3508
+ "loss": 0.7677,
3509
+ "step": 5670
3510
+ },
3511
+ {
3512
+ "epoch": 113.6,
3513
+ "learning_rate": 2.62e-05,
3514
+ "loss": 0.7945,
3515
+ "step": 5680
3516
+ },
3517
+ {
3518
+ "epoch": 113.8,
3519
+ "learning_rate": 2.6139393939393938e-05,
3520
+ "loss": 0.781,
3521
+ "step": 5690
3522
+ },
3523
+ {
3524
+ "epoch": 114.0,
3525
+ "learning_rate": 2.607878787878788e-05,
3526
+ "loss": 0.7669,
3527
+ "step": 5700
3528
+ },
3529
+ {
3530
+ "epoch": 114.2,
3531
+ "learning_rate": 2.6018181818181818e-05,
3532
+ "loss": 0.7967,
3533
+ "step": 5710
3534
+ },
3535
+ {
3536
+ "epoch": 114.4,
3537
+ "learning_rate": 2.595757575757576e-05,
3538
+ "loss": 0.7637,
3539
+ "step": 5720
3540
+ },
3541
+ {
3542
+ "epoch": 114.6,
3543
+ "learning_rate": 2.58969696969697e-05,
3544
+ "loss": 0.766,
3545
+ "step": 5730
3546
+ },
3547
+ {
3548
+ "epoch": 114.8,
3549
+ "learning_rate": 2.5836363636363637e-05,
3550
+ "loss": 0.7611,
3551
+ "step": 5740
3552
+ },
3553
+ {
3554
+ "epoch": 115.0,
3555
+ "learning_rate": 2.5775757575757575e-05,
3556
+ "loss": 0.761,
3557
+ "step": 5750
3558
+ },
3559
+ {
3560
+ "epoch": 115.2,
3561
+ "learning_rate": 2.5715151515151513e-05,
3562
+ "loss": 0.7997,
3563
+ "step": 5760
3564
+ },
3565
+ {
3566
+ "epoch": 115.4,
3567
+ "learning_rate": 2.5654545454545455e-05,
3568
+ "loss": 0.773,
3569
+ "step": 5770
3570
+ },
3571
+ {
3572
+ "epoch": 115.6,
3573
+ "learning_rate": 2.5593939393939397e-05,
3574
+ "loss": 0.7727,
3575
+ "step": 5780
3576
+ },
3577
+ {
3578
+ "epoch": 115.8,
3579
+ "learning_rate": 2.5533333333333335e-05,
3580
+ "loss": 0.771,
3581
+ "step": 5790
3582
+ },
3583
+ {
3584
+ "epoch": 116.0,
3585
+ "learning_rate": 2.5472727272727273e-05,
3586
+ "loss": 0.7392,
3587
+ "step": 5800
3588
+ },
3589
+ {
3590
+ "epoch": 116.2,
3591
+ "learning_rate": 2.5412121212121212e-05,
3592
+ "loss": 0.795,
3593
+ "step": 5810
3594
+ },
3595
+ {
3596
+ "epoch": 116.4,
3597
+ "learning_rate": 2.5351515151515153e-05,
3598
+ "loss": 0.7673,
3599
+ "step": 5820
3600
+ },
3601
+ {
3602
+ "epoch": 116.6,
3603
+ "learning_rate": 2.5290909090909092e-05,
3604
+ "loss": 0.7486,
3605
+ "step": 5830
3606
+ },
3607
+ {
3608
+ "epoch": 116.8,
3609
+ "learning_rate": 2.523030303030303e-05,
3610
+ "loss": 0.78,
3611
+ "step": 5840
3612
+ },
3613
+ {
3614
+ "epoch": 117.0,
3615
+ "learning_rate": 2.516969696969697e-05,
3616
+ "loss": 0.7339,
3617
+ "step": 5850
3618
+ },
3619
+ {
3620
+ "epoch": 117.2,
3621
+ "learning_rate": 2.5109090909090907e-05,
3622
+ "loss": 0.8006,
3623
+ "step": 5860
3624
+ },
3625
+ {
3626
+ "epoch": 117.4,
3627
+ "learning_rate": 2.5048484848484852e-05,
3628
+ "loss": 0.7578,
3629
+ "step": 5870
3630
+ },
3631
+ {
3632
+ "epoch": 117.6,
3633
+ "learning_rate": 2.498787878787879e-05,
3634
+ "loss": 0.765,
3635
+ "step": 5880
3636
+ },
3637
+ {
3638
+ "epoch": 117.8,
3639
+ "learning_rate": 2.492727272727273e-05,
3640
+ "loss": 0.7907,
3641
+ "step": 5890
3642
+ },
3643
+ {
3644
+ "epoch": 118.0,
3645
+ "learning_rate": 2.4866666666666667e-05,
3646
+ "loss": 0.7417,
3647
+ "step": 5900
3648
+ },
3649
+ {
3650
+ "epoch": 118.2,
3651
+ "learning_rate": 2.4806060606060605e-05,
3652
+ "loss": 0.7902,
3653
+ "step": 5910
3654
+ },
3655
+ {
3656
+ "epoch": 118.4,
3657
+ "learning_rate": 2.4745454545454547e-05,
3658
+ "loss": 0.7617,
3659
+ "step": 5920
3660
+ },
3661
+ {
3662
+ "epoch": 118.6,
3663
+ "learning_rate": 2.4684848484848485e-05,
3664
+ "loss": 0.771,
3665
+ "step": 5930
3666
+ },
3667
+ {
3668
+ "epoch": 118.8,
3669
+ "learning_rate": 2.4624242424242424e-05,
3670
+ "loss": 0.7884,
3671
+ "step": 5940
3672
+ },
3673
+ {
3674
+ "epoch": 119.0,
3675
+ "learning_rate": 2.4563636363636365e-05,
3676
+ "loss": 0.7506,
3677
+ "step": 5950
3678
+ },
3679
+ {
3680
+ "epoch": 119.2,
3681
+ "learning_rate": 2.4503030303030304e-05,
3682
+ "loss": 0.7876,
3683
+ "step": 5960
3684
+ },
3685
+ {
3686
+ "epoch": 119.4,
3687
+ "learning_rate": 2.4442424242424245e-05,
3688
+ "loss": 0.7781,
3689
+ "step": 5970
3690
+ },
3691
+ {
3692
+ "epoch": 119.6,
3693
+ "learning_rate": 2.4381818181818184e-05,
3694
+ "loss": 0.7787,
3695
+ "step": 5980
3696
+ },
3697
+ {
3698
+ "epoch": 119.8,
3699
+ "learning_rate": 2.4321212121212122e-05,
3700
+ "loss": 0.7615,
3701
+ "step": 5990
3702
+ },
3703
+ {
3704
+ "epoch": 120.0,
3705
+ "learning_rate": 2.426060606060606e-05,
3706
+ "loss": 0.7388,
3707
+ "step": 6000
3708
+ },
3709
+ {
3710
+ "epoch": 120.0,
3711
+ "eval_loss": 0.353779673576355,
3712
+ "eval_runtime": 243.4641,
3713
+ "eval_samples_per_second": 21.408,
3714
+ "eval_steps_per_second": 1.339,
3715
+ "eval_wer": 0.3520331129275785,
3716
+ "step": 6000
3717
+ },
3718
+ {
3719
+ "epoch": 120.2,
3720
+ "learning_rate": 2.42e-05,
3721
+ "loss": 0.7812,
3722
+ "step": 6010
3723
+ },
3724
+ {
3725
+ "epoch": 120.4,
3726
+ "learning_rate": 2.413939393939394e-05,
3727
+ "loss": 0.7723,
3728
+ "step": 6020
3729
+ },
3730
+ {
3731
+ "epoch": 120.6,
3732
+ "learning_rate": 2.407878787878788e-05,
3733
+ "loss": 0.7685,
3734
+ "step": 6030
3735
+ },
3736
+ {
3737
+ "epoch": 120.8,
3738
+ "learning_rate": 2.401818181818182e-05,
3739
+ "loss": 0.7561,
3740
+ "step": 6040
3741
+ },
3742
+ {
3743
+ "epoch": 121.0,
3744
+ "learning_rate": 2.395757575757576e-05,
3745
+ "loss": 0.7497,
3746
+ "step": 6050
3747
+ },
3748
+ {
3749
+ "epoch": 121.2,
3750
+ "learning_rate": 2.3896969696969697e-05,
3751
+ "loss": 0.8065,
3752
+ "step": 6060
3753
+ },
3754
+ {
3755
+ "epoch": 121.4,
3756
+ "learning_rate": 2.383636363636364e-05,
3757
+ "loss": 0.7661,
3758
+ "step": 6070
3759
+ },
3760
+ {
3761
+ "epoch": 121.6,
3762
+ "learning_rate": 2.3775757575757577e-05,
3763
+ "loss": 0.7622,
3764
+ "step": 6080
3765
+ },
3766
+ {
3767
+ "epoch": 121.8,
3768
+ "learning_rate": 2.3715151515151516e-05,
3769
+ "loss": 0.7465,
3770
+ "step": 6090
3771
+ },
3772
+ {
3773
+ "epoch": 122.0,
3774
+ "learning_rate": 2.3654545454545454e-05,
3775
+ "loss": 0.7463,
3776
+ "step": 6100
3777
+ },
3778
+ {
3779
+ "epoch": 122.2,
3780
+ "learning_rate": 2.3593939393939392e-05,
3781
+ "loss": 0.7849,
3782
+ "step": 6110
3783
+ },
3784
+ {
3785
+ "epoch": 122.4,
3786
+ "learning_rate": 2.3533333333333337e-05,
3787
+ "loss": 0.7639,
3788
+ "step": 6120
3789
+ },
3790
+ {
3791
+ "epoch": 122.6,
3792
+ "learning_rate": 2.3472727272727276e-05,
3793
+ "loss": 0.7712,
3794
+ "step": 6130
3795
+ },
3796
+ {
3797
+ "epoch": 122.8,
3798
+ "learning_rate": 2.3412121212121214e-05,
3799
+ "loss": 0.7513,
3800
+ "step": 6140
3801
+ },
3802
+ {
3803
+ "epoch": 123.0,
3804
+ "learning_rate": 2.3351515151515152e-05,
3805
+ "loss": 0.7454,
3806
+ "step": 6150
3807
+ },
3808
+ {
3809
+ "epoch": 123.2,
3810
+ "learning_rate": 2.329090909090909e-05,
3811
+ "loss": 0.7733,
3812
+ "step": 6160
3813
+ },
3814
+ {
3815
+ "epoch": 123.4,
3816
+ "learning_rate": 2.323030303030303e-05,
3817
+ "loss": 0.7506,
3818
+ "step": 6170
3819
+ },
3820
+ {
3821
+ "epoch": 123.6,
3822
+ "learning_rate": 2.316969696969697e-05,
3823
+ "loss": 0.7561,
3824
+ "step": 6180
3825
+ },
3826
+ {
3827
+ "epoch": 123.8,
3828
+ "learning_rate": 2.310909090909091e-05,
3829
+ "loss": 0.7694,
3830
+ "step": 6190
3831
+ },
3832
+ {
3833
+ "epoch": 124.0,
3834
+ "learning_rate": 2.3048484848484847e-05,
3835
+ "loss": 0.7404,
3836
+ "step": 6200
3837
+ },
3838
+ {
3839
+ "epoch": 124.2,
3840
+ "learning_rate": 2.298787878787879e-05,
3841
+ "loss": 0.7819,
3842
+ "step": 6210
3843
+ },
3844
+ {
3845
+ "epoch": 124.4,
3846
+ "learning_rate": 2.2927272727272727e-05,
3847
+ "loss": 0.7499,
3848
+ "step": 6220
3849
+ },
3850
+ {
3851
+ "epoch": 124.6,
3852
+ "learning_rate": 2.286666666666667e-05,
3853
+ "loss": 0.756,
3854
+ "step": 6230
3855
+ },
3856
+ {
3857
+ "epoch": 124.8,
3858
+ "learning_rate": 2.2806060606060607e-05,
3859
+ "loss": 0.7716,
3860
+ "step": 6240
3861
+ },
3862
+ {
3863
+ "epoch": 125.0,
3864
+ "learning_rate": 2.2745454545454546e-05,
3865
+ "loss": 0.7427,
3866
+ "step": 6250
3867
+ },
3868
+ {
3869
+ "epoch": 125.2,
3870
+ "learning_rate": 2.2684848484848484e-05,
3871
+ "loss": 0.7816,
3872
+ "step": 6260
3873
+ },
3874
+ {
3875
+ "epoch": 125.4,
3876
+ "learning_rate": 2.2624242424242422e-05,
3877
+ "loss": 0.7623,
3878
+ "step": 6270
3879
+ },
3880
+ {
3881
+ "epoch": 125.6,
3882
+ "learning_rate": 2.2563636363636364e-05,
3883
+ "loss": 0.7561,
3884
+ "step": 6280
3885
+ },
3886
+ {
3887
+ "epoch": 125.8,
3888
+ "learning_rate": 2.2503030303030302e-05,
3889
+ "loss": 0.7409,
3890
+ "step": 6290
3891
+ },
3892
+ {
3893
+ "epoch": 126.0,
3894
+ "learning_rate": 2.2442424242424244e-05,
3895
+ "loss": 0.7169,
3896
+ "step": 6300
3897
+ },
3898
+ {
3899
+ "epoch": 126.2,
3900
+ "learning_rate": 2.2381818181818183e-05,
3901
+ "loss": 0.7972,
3902
+ "step": 6310
3903
+ },
3904
+ {
3905
+ "epoch": 126.4,
3906
+ "learning_rate": 2.232121212121212e-05,
3907
+ "loss": 0.7426,
3908
+ "step": 6320
3909
+ },
3910
+ {
3911
+ "epoch": 126.6,
3912
+ "learning_rate": 2.2260606060606063e-05,
3913
+ "loss": 0.7575,
3914
+ "step": 6330
3915
+ },
3916
+ {
3917
+ "epoch": 126.8,
3918
+ "learning_rate": 2.22e-05,
3919
+ "loss": 0.7446,
3920
+ "step": 6340
3921
+ },
3922
+ {
3923
+ "epoch": 127.0,
3924
+ "learning_rate": 2.213939393939394e-05,
3925
+ "loss": 0.7381,
3926
+ "step": 6350
3927
+ },
3928
+ {
3929
+ "epoch": 127.2,
3930
+ "learning_rate": 2.2078787878787878e-05,
3931
+ "loss": 0.7664,
3932
+ "step": 6360
3933
+ },
3934
+ {
3935
+ "epoch": 127.4,
3936
+ "learning_rate": 2.2018181818181816e-05,
3937
+ "loss": 0.7522,
3938
+ "step": 6370
3939
+ },
3940
+ {
3941
+ "epoch": 127.6,
3942
+ "learning_rate": 2.195757575757576e-05,
3943
+ "loss": 0.7696,
3944
+ "step": 6380
3945
+ },
3946
+ {
3947
+ "epoch": 127.8,
3948
+ "learning_rate": 2.18969696969697e-05,
3949
+ "loss": 0.7475,
3950
+ "step": 6390
3951
+ },
3952
+ {
3953
+ "epoch": 128.0,
3954
+ "learning_rate": 2.1836363636363638e-05,
3955
+ "loss": 0.7163,
3956
+ "step": 6400
3957
+ },
3958
+ {
3959
+ "epoch": 128.2,
3960
+ "learning_rate": 2.1775757575757576e-05,
3961
+ "loss": 0.7693,
3962
+ "step": 6410
3963
+ },
3964
+ {
3965
+ "epoch": 128.4,
3966
+ "learning_rate": 2.1715151515151514e-05,
3967
+ "loss": 0.7437,
3968
+ "step": 6420
3969
+ },
3970
+ {
3971
+ "epoch": 128.6,
3972
+ "learning_rate": 2.1654545454545456e-05,
3973
+ "loss": 0.7631,
3974
+ "step": 6430
3975
+ },
3976
+ {
3977
+ "epoch": 128.8,
3978
+ "learning_rate": 2.1593939393939394e-05,
3979
+ "loss": 0.7367,
3980
+ "step": 6440
3981
+ },
3982
+ {
3983
+ "epoch": 129.0,
3984
+ "learning_rate": 2.1533333333333333e-05,
3985
+ "loss": 0.7289,
3986
+ "step": 6450
3987
+ },
3988
+ {
3989
+ "epoch": 129.2,
3990
+ "learning_rate": 2.147272727272727e-05,
3991
+ "loss": 0.7635,
3992
+ "step": 6460
3993
+ },
3994
+ {
3995
+ "epoch": 129.4,
3996
+ "learning_rate": 2.1412121212121213e-05,
3997
+ "loss": 0.7493,
3998
+ "step": 6470
3999
+ },
4000
+ {
4001
+ "epoch": 129.6,
4002
+ "learning_rate": 2.1351515151515154e-05,
4003
+ "loss": 0.769,
4004
+ "step": 6480
4005
+ },
4006
+ {
4007
+ "epoch": 129.8,
4008
+ "learning_rate": 2.1290909090909093e-05,
4009
+ "loss": 0.7307,
4010
+ "step": 6490
4011
+ },
4012
+ {
4013
+ "epoch": 130.0,
4014
+ "learning_rate": 2.123030303030303e-05,
4015
+ "loss": 0.714,
4016
+ "step": 6500
4017
+ },
4018
+ {
4019
+ "epoch": 130.0,
4020
+ "eval_loss": 0.3682139217853546,
4021
+ "eval_runtime": 243.0361,
4022
+ "eval_samples_per_second": 21.445,
4023
+ "eval_steps_per_second": 1.341,
4024
+ "eval_wer": 0.3506013432602697,
4025
+ "step": 6500
4026
+ },
4027
+ {
4028
+ "epoch": 130.2,
4029
+ "learning_rate": 2.116969696969697e-05,
4030
+ "loss": 0.7817,
4031
+ "step": 6510
4032
+ },
4033
+ {
4034
+ "epoch": 130.4,
4035
+ "learning_rate": 2.1109090909090908e-05,
4036
+ "loss": 0.7611,
4037
+ "step": 6520
4038
+ },
4039
+ {
4040
+ "epoch": 130.6,
4041
+ "learning_rate": 2.104848484848485e-05,
4042
+ "loss": 0.7488,
4043
+ "step": 6530
4044
+ },
4045
+ {
4046
+ "epoch": 130.8,
4047
+ "learning_rate": 2.0987878787878788e-05,
4048
+ "loss": 0.7409,
4049
+ "step": 6540
4050
+ },
4051
+ {
4052
+ "epoch": 131.0,
4053
+ "learning_rate": 2.092727272727273e-05,
4054
+ "loss": 0.7171,
4055
+ "step": 6550
4056
+ },
4057
+ {
4058
+ "epoch": 131.2,
4059
+ "learning_rate": 2.0866666666666668e-05,
4060
+ "loss": 0.7772,
4061
+ "step": 6560
4062
+ },
4063
+ {
4064
+ "epoch": 131.4,
4065
+ "learning_rate": 2.0806060606060606e-05,
4066
+ "loss": 0.7418,
4067
+ "step": 6570
4068
+ },
4069
+ {
4070
+ "epoch": 131.6,
4071
+ "learning_rate": 2.0745454545454548e-05,
4072
+ "loss": 0.7403,
4073
+ "step": 6580
4074
+ },
4075
+ {
4076
+ "epoch": 131.8,
4077
+ "learning_rate": 2.0684848484848486e-05,
4078
+ "loss": 0.7406,
4079
+ "step": 6590
4080
+ },
4081
+ {
4082
+ "epoch": 132.0,
4083
+ "learning_rate": 2.0624242424242425e-05,
4084
+ "loss": 0.7281,
4085
+ "step": 6600
4086
+ },
4087
+ {
4088
+ "epoch": 132.2,
4089
+ "learning_rate": 2.0563636363636363e-05,
4090
+ "loss": 0.7583,
4091
+ "step": 6610
4092
+ },
4093
+ {
4094
+ "epoch": 132.4,
4095
+ "learning_rate": 2.05030303030303e-05,
4096
+ "loss": 0.7586,
4097
+ "step": 6620
4098
+ },
4099
+ {
4100
+ "epoch": 132.6,
4101
+ "learning_rate": 2.0442424242424243e-05,
4102
+ "loss": 0.7361,
4103
+ "step": 6630
4104
+ },
4105
+ {
4106
+ "epoch": 132.8,
4107
+ "learning_rate": 2.0381818181818185e-05,
4108
+ "loss": 0.7476,
4109
+ "step": 6640
4110
+ },
4111
+ {
4112
+ "epoch": 133.0,
4113
+ "learning_rate": 2.0321212121212123e-05,
4114
+ "loss": 0.7112,
4115
+ "step": 6650
4116
+ },
4117
+ {
4118
+ "epoch": 133.2,
4119
+ "learning_rate": 2.026060606060606e-05,
4120
+ "loss": 0.762,
4121
+ "step": 6660
4122
+ },
4123
+ {
4124
+ "epoch": 133.4,
4125
+ "learning_rate": 2.02e-05,
4126
+ "loss": 0.7406,
4127
+ "step": 6670
4128
+ },
4129
+ {
4130
+ "epoch": 133.6,
4131
+ "learning_rate": 2.0139393939393938e-05,
4132
+ "loss": 0.7421,
4133
+ "step": 6680
4134
+ },
4135
+ {
4136
+ "epoch": 133.8,
4137
+ "learning_rate": 2.007878787878788e-05,
4138
+ "loss": 0.7531,
4139
+ "step": 6690
4140
+ },
4141
+ {
4142
+ "epoch": 134.0,
4143
+ "learning_rate": 2.0018181818181818e-05,
4144
+ "loss": 0.7052,
4145
+ "step": 6700
4146
+ },
4147
+ {
4148
+ "epoch": 134.2,
4149
+ "learning_rate": 1.9957575757575756e-05,
4150
+ "loss": 0.7579,
4151
+ "step": 6710
4152
+ },
4153
+ {
4154
+ "epoch": 134.4,
4155
+ "learning_rate": 1.9896969696969698e-05,
4156
+ "loss": 0.7309,
4157
+ "step": 6720
4158
+ },
4159
+ {
4160
+ "epoch": 134.6,
4161
+ "learning_rate": 1.9836363636363636e-05,
4162
+ "loss": 0.7491,
4163
+ "step": 6730
4164
+ },
4165
+ {
4166
+ "epoch": 134.8,
4167
+ "learning_rate": 1.9775757575757578e-05,
4168
+ "loss": 0.7368,
4169
+ "step": 6740
4170
+ },
4171
+ {
4172
+ "epoch": 135.0,
4173
+ "learning_rate": 1.9715151515151517e-05,
4174
+ "loss": 0.6959,
4175
+ "step": 6750
4176
+ },
4177
+ {
4178
+ "epoch": 135.2,
4179
+ "learning_rate": 1.9654545454545455e-05,
4180
+ "loss": 0.7675,
4181
+ "step": 6760
4182
+ },
4183
+ {
4184
+ "epoch": 135.4,
4185
+ "learning_rate": 1.9593939393939393e-05,
4186
+ "loss": 0.7377,
4187
+ "step": 6770
4188
+ },
4189
+ {
4190
+ "epoch": 135.6,
4191
+ "learning_rate": 1.953333333333333e-05,
4192
+ "loss": 0.7589,
4193
+ "step": 6780
4194
+ },
4195
+ {
4196
+ "epoch": 135.8,
4197
+ "learning_rate": 1.9472727272727273e-05,
4198
+ "loss": 0.7398,
4199
+ "step": 6790
4200
+ },
4201
+ {
4202
+ "epoch": 136.0,
4203
+ "learning_rate": 1.941212121212121e-05,
4204
+ "loss": 0.7044,
4205
+ "step": 6800
4206
+ },
4207
+ {
4208
+ "epoch": 136.2,
4209
+ "learning_rate": 1.9351515151515153e-05,
4210
+ "loss": 0.759,
4211
+ "step": 6810
4212
+ },
4213
+ {
4214
+ "epoch": 136.4,
4215
+ "learning_rate": 1.929090909090909e-05,
4216
+ "loss": 0.7356,
4217
+ "step": 6820
4218
+ },
4219
+ {
4220
+ "epoch": 136.6,
4221
+ "learning_rate": 1.923030303030303e-05,
4222
+ "loss": 0.7181,
4223
+ "step": 6830
4224
+ },
4225
+ {
4226
+ "epoch": 136.8,
4227
+ "learning_rate": 1.916969696969697e-05,
4228
+ "loss": 0.7349,
4229
+ "step": 6840
4230
+ },
4231
+ {
4232
+ "epoch": 137.0,
4233
+ "learning_rate": 1.910909090909091e-05,
4234
+ "loss": 0.7004,
4235
+ "step": 6850
4236
+ },
4237
+ {
4238
+ "epoch": 137.2,
4239
+ "learning_rate": 1.904848484848485e-05,
4240
+ "loss": 0.7531,
4241
+ "step": 6860
4242
+ },
4243
+ {
4244
+ "epoch": 137.4,
4245
+ "learning_rate": 1.8987878787878787e-05,
4246
+ "loss": 0.749,
4247
+ "step": 6870
4248
+ },
4249
+ {
4250
+ "epoch": 137.6,
4251
+ "learning_rate": 1.8927272727272725e-05,
4252
+ "loss": 0.7306,
4253
+ "step": 6880
4254
+ },
4255
+ {
4256
+ "epoch": 137.8,
4257
+ "learning_rate": 1.886666666666667e-05,
4258
+ "loss": 0.7281,
4259
+ "step": 6890
4260
+ },
4261
+ {
4262
+ "epoch": 138.0,
4263
+ "learning_rate": 1.880606060606061e-05,
4264
+ "loss": 0.7322,
4265
+ "step": 6900
4266
+ },
4267
+ {
4268
+ "epoch": 138.2,
4269
+ "learning_rate": 1.8745454545454547e-05,
4270
+ "loss": 0.7754,
4271
+ "step": 6910
4272
+ },
4273
+ {
4274
+ "epoch": 138.4,
4275
+ "learning_rate": 1.8684848484848485e-05,
4276
+ "loss": 0.7235,
4277
+ "step": 6920
4278
+ },
4279
+ {
4280
+ "epoch": 138.6,
4281
+ "learning_rate": 1.8624242424242423e-05,
4282
+ "loss": 0.7408,
4283
+ "step": 6930
4284
+ },
4285
+ {
4286
+ "epoch": 138.8,
4287
+ "learning_rate": 1.8563636363636365e-05,
4288
+ "loss": 0.7376,
4289
+ "step": 6940
4290
+ },
4291
+ {
4292
+ "epoch": 139.0,
4293
+ "learning_rate": 1.8503030303030303e-05,
4294
+ "loss": 0.7067,
4295
+ "step": 6950
4296
+ },
4297
+ {
4298
+ "epoch": 139.2,
4299
+ "learning_rate": 1.8442424242424242e-05,
4300
+ "loss": 0.7594,
4301
+ "step": 6960
4302
+ },
4303
+ {
4304
+ "epoch": 139.4,
4305
+ "learning_rate": 1.838181818181818e-05,
4306
+ "loss": 0.7245,
4307
+ "step": 6970
4308
+ },
4309
+ {
4310
+ "epoch": 139.6,
4311
+ "learning_rate": 1.8321212121212122e-05,
4312
+ "loss": 0.7527,
4313
+ "step": 6980
4314
+ },
4315
+ {
4316
+ "epoch": 139.8,
4317
+ "learning_rate": 1.8260606060606064e-05,
4318
+ "loss": 0.7291,
4319
+ "step": 6990
4320
+ },
4321
+ {
4322
+ "epoch": 140.0,
4323
+ "learning_rate": 1.8200000000000002e-05,
4324
+ "loss": 0.7291,
4325
+ "step": 7000
4326
+ },
4327
+ {
4328
+ "epoch": 140.0,
4329
+ "eval_loss": 0.36252087354660034,
4330
+ "eval_runtime": 244.2417,
4331
+ "eval_samples_per_second": 21.34,
4332
+ "eval_steps_per_second": 1.335,
4333
+ "eval_wer": 0.3504972145571927,
4334
+ "step": 7000
4335
+ },
4336
+ {
4337
+ "epoch": 140.2,
4338
+ "learning_rate": 1.813939393939394e-05,
4339
+ "loss": 0.7564,
4340
+ "step": 7010
4341
+ },
4342
+ {
4343
+ "epoch": 140.4,
4344
+ "learning_rate": 1.807878787878788e-05,
4345
+ "loss": 0.7686,
4346
+ "step": 7020
4347
+ },
4348
+ {
4349
+ "epoch": 140.6,
4350
+ "learning_rate": 1.8018181818181817e-05,
4351
+ "loss": 0.7334,
4352
+ "step": 7030
4353
+ },
4354
+ {
4355
+ "epoch": 140.8,
4356
+ "learning_rate": 1.795757575757576e-05,
4357
+ "loss": 0.7459,
4358
+ "step": 7040
4359
+ },
4360
+ {
4361
+ "epoch": 141.0,
4362
+ "learning_rate": 1.7896969696969697e-05,
4363
+ "loss": 0.7179,
4364
+ "step": 7050
4365
+ },
4366
+ {
4367
+ "epoch": 141.2,
4368
+ "learning_rate": 1.783636363636364e-05,
4369
+ "loss": 0.7633,
4370
+ "step": 7060
4371
+ },
4372
+ {
4373
+ "epoch": 141.4,
4374
+ "learning_rate": 1.7775757575757577e-05,
4375
+ "loss": 0.7249,
4376
+ "step": 7070
4377
+ },
4378
+ {
4379
+ "epoch": 141.6,
4380
+ "learning_rate": 1.7715151515151515e-05,
4381
+ "loss": 0.726,
4382
+ "step": 7080
4383
+ },
4384
+ {
4385
+ "epoch": 141.8,
4386
+ "learning_rate": 1.7654545454545457e-05,
4387
+ "loss": 0.7401,
4388
+ "step": 7090
4389
+ },
4390
+ {
4391
+ "epoch": 142.0,
4392
+ "learning_rate": 1.7593939393939395e-05,
4393
+ "loss": 0.7075,
4394
+ "step": 7100
4395
+ },
4396
+ {
4397
+ "epoch": 142.2,
4398
+ "learning_rate": 1.7533333333333334e-05,
4399
+ "loss": 0.7549,
4400
+ "step": 7110
4401
+ },
4402
+ {
4403
+ "epoch": 142.4,
4404
+ "learning_rate": 1.7472727272727272e-05,
4405
+ "loss": 0.7279,
4406
+ "step": 7120
4407
+ },
4408
+ {
4409
+ "epoch": 142.6,
4410
+ "learning_rate": 1.741212121212121e-05,
4411
+ "loss": 0.7364,
4412
+ "step": 7130
4413
+ },
4414
+ {
4415
+ "epoch": 142.8,
4416
+ "learning_rate": 1.7351515151515152e-05,
4417
+ "loss": 0.7469,
4418
+ "step": 7140
4419
+ },
4420
+ {
4421
+ "epoch": 143.0,
4422
+ "learning_rate": 1.7290909090909094e-05,
4423
+ "loss": 0.7131,
4424
+ "step": 7150
4425
+ },
4426
+ {
4427
+ "epoch": 143.2,
4428
+ "learning_rate": 1.7230303030303032e-05,
4429
+ "loss": 0.751,
4430
+ "step": 7160
4431
+ },
4432
+ {
4433
+ "epoch": 143.4,
4434
+ "learning_rate": 1.716969696969697e-05,
4435
+ "loss": 0.7274,
4436
+ "step": 7170
4437
+ },
4438
+ {
4439
+ "epoch": 143.6,
4440
+ "learning_rate": 1.710909090909091e-05,
4441
+ "loss": 0.7217,
4442
+ "step": 7180
4443
+ },
4444
+ {
4445
+ "epoch": 143.8,
4446
+ "learning_rate": 1.7048484848484847e-05,
4447
+ "loss": 0.7416,
4448
+ "step": 7190
4449
+ },
4450
+ {
4451
+ "epoch": 144.0,
4452
+ "learning_rate": 1.698787878787879e-05,
4453
+ "loss": 0.6873,
4454
+ "step": 7200
4455
+ },
4456
+ {
4457
+ "epoch": 144.2,
4458
+ "learning_rate": 1.6927272727272727e-05,
4459
+ "loss": 0.7562,
4460
+ "step": 7210
4461
+ },
4462
+ {
4463
+ "epoch": 144.4,
4464
+ "learning_rate": 1.6866666666666666e-05,
4465
+ "loss": 0.7104,
4466
+ "step": 7220
4467
+ },
4468
+ {
4469
+ "epoch": 144.6,
4470
+ "learning_rate": 1.6806060606060607e-05,
4471
+ "loss": 0.7403,
4472
+ "step": 7230
4473
+ },
4474
+ {
4475
+ "epoch": 144.8,
4476
+ "learning_rate": 1.6745454545454546e-05,
4477
+ "loss": 0.7242,
4478
+ "step": 7240
4479
+ },
4480
+ {
4481
+ "epoch": 145.0,
4482
+ "learning_rate": 1.6684848484848487e-05,
4483
+ "loss": 0.7091,
4484
+ "step": 7250
4485
+ },
4486
+ {
4487
+ "epoch": 145.2,
4488
+ "learning_rate": 1.6624242424242426e-05,
4489
+ "loss": 0.7634,
4490
+ "step": 7260
4491
+ },
4492
+ {
4493
+ "epoch": 145.4,
4494
+ "learning_rate": 1.6563636363636364e-05,
4495
+ "loss": 0.7249,
4496
+ "step": 7270
4497
+ },
4498
+ {
4499
+ "epoch": 145.6,
4500
+ "learning_rate": 1.6503030303030302e-05,
4501
+ "loss": 0.7354,
4502
+ "step": 7280
4503
+ },
4504
+ {
4505
+ "epoch": 145.8,
4506
+ "learning_rate": 1.644242424242424e-05,
4507
+ "loss": 0.7336,
4508
+ "step": 7290
4509
+ },
4510
+ {
4511
+ "epoch": 146.0,
4512
+ "learning_rate": 1.6381818181818182e-05,
4513
+ "loss": 0.69,
4514
+ "step": 7300
4515
+ },
4516
+ {
4517
+ "epoch": 146.2,
4518
+ "learning_rate": 1.632121212121212e-05,
4519
+ "loss": 0.7587,
4520
+ "step": 7310
4521
+ },
4522
+ {
4523
+ "epoch": 146.4,
4524
+ "learning_rate": 1.6260606060606062e-05,
4525
+ "loss": 0.7295,
4526
+ "step": 7320
4527
+ },
4528
+ {
4529
+ "epoch": 146.6,
4530
+ "learning_rate": 1.62e-05,
4531
+ "loss": 0.733,
4532
+ "step": 7330
4533
+ },
4534
+ {
4535
+ "epoch": 146.8,
4536
+ "learning_rate": 1.613939393939394e-05,
4537
+ "loss": 0.7043,
4538
+ "step": 7340
4539
+ },
4540
+ {
4541
+ "epoch": 147.0,
4542
+ "learning_rate": 1.607878787878788e-05,
4543
+ "loss": 0.6902,
4544
+ "step": 7350
4545
+ },
4546
+ {
4547
+ "epoch": 147.2,
4548
+ "learning_rate": 1.601818181818182e-05,
4549
+ "loss": 0.7559,
4550
+ "step": 7360
4551
+ },
4552
+ {
4553
+ "epoch": 147.4,
4554
+ "learning_rate": 1.5957575757575757e-05,
4555
+ "loss": 0.733,
4556
+ "step": 7370
4557
+ },
4558
+ {
4559
+ "epoch": 147.6,
4560
+ "learning_rate": 1.5896969696969696e-05,
4561
+ "loss": 0.7107,
4562
+ "step": 7380
4563
+ },
4564
+ {
4565
+ "epoch": 147.8,
4566
+ "learning_rate": 1.5836363636363634e-05,
4567
+ "loss": 0.7186,
4568
+ "step": 7390
4569
+ },
4570
+ {
4571
+ "epoch": 148.0,
4572
+ "learning_rate": 1.577575757575758e-05,
4573
+ "loss": 0.6978,
4574
+ "step": 7400
4575
+ },
4576
+ {
4577
+ "epoch": 148.2,
4578
+ "learning_rate": 1.5715151515151518e-05,
4579
+ "loss": 0.7545,
4580
+ "step": 7410
4581
+ },
4582
+ {
4583
+ "epoch": 148.4,
4584
+ "learning_rate": 1.5654545454545456e-05,
4585
+ "loss": 0.7321,
4586
+ "step": 7420
4587
+ },
4588
+ {
4589
+ "epoch": 148.6,
4590
+ "learning_rate": 1.5593939393939394e-05,
4591
+ "loss": 0.7223,
4592
+ "step": 7430
4593
+ },
4594
+ {
4595
+ "epoch": 148.8,
4596
+ "learning_rate": 1.5533333333333333e-05,
4597
+ "loss": 0.7216,
4598
+ "step": 7440
4599
+ },
4600
+ {
4601
+ "epoch": 149.0,
4602
+ "learning_rate": 1.5472727272727274e-05,
4603
+ "loss": 0.7029,
4604
+ "step": 7450
4605
+ },
4606
+ {
4607
+ "epoch": 149.2,
4608
+ "learning_rate": 1.5412121212121213e-05,
4609
+ "loss": 0.7595,
4610
+ "step": 7460
4611
+ },
4612
+ {
4613
+ "epoch": 149.4,
4614
+ "learning_rate": 1.535151515151515e-05,
4615
+ "loss": 0.7406,
4616
+ "step": 7470
4617
+ },
4618
+ {
4619
+ "epoch": 149.6,
4620
+ "learning_rate": 1.529090909090909e-05,
4621
+ "loss": 0.7215,
4622
+ "step": 7480
4623
+ },
4624
+ {
4625
+ "epoch": 149.8,
4626
+ "learning_rate": 1.5230303030303033e-05,
4627
+ "loss": 0.7085,
4628
+ "step": 7490
4629
+ },
4630
+ {
4631
+ "epoch": 150.0,
4632
+ "learning_rate": 1.5169696969696971e-05,
4633
+ "loss": 0.697,
4634
+ "step": 7500
4635
+ },
4636
+ {
4637
+ "epoch": 150.0,
4638
+ "eval_loss": 0.36194926500320435,
4639
+ "eval_runtime": 244.3442,
4640
+ "eval_samples_per_second": 21.331,
4641
+ "eval_steps_per_second": 1.334,
4642
+ "eval_wer": 0.3479460613318061,
4643
+ "step": 7500
4644
+ },
4645
+ {
4646
+ "epoch": 150.2,
4647
+ "learning_rate": 1.510909090909091e-05,
4648
+ "loss": 0.7518,
4649
+ "step": 7510
4650
+ },
4651
+ {
4652
+ "epoch": 150.4,
4653
+ "learning_rate": 1.504848484848485e-05,
4654
+ "loss": 0.7236,
4655
+ "step": 7520
4656
+ },
4657
+ {
4658
+ "epoch": 150.6,
4659
+ "learning_rate": 1.4987878787878788e-05,
4660
+ "loss": 0.7105,
4661
+ "step": 7530
4662
+ },
4663
+ {
4664
+ "epoch": 150.8,
4665
+ "learning_rate": 1.4927272727272728e-05,
4666
+ "loss": 0.7035,
4667
+ "step": 7540
4668
+ },
4669
+ {
4670
+ "epoch": 151.0,
4671
+ "learning_rate": 1.4866666666666668e-05,
4672
+ "loss": 0.7026,
4673
+ "step": 7550
4674
+ },
4675
+ {
4676
+ "epoch": 151.2,
4677
+ "learning_rate": 1.4806060606060606e-05,
4678
+ "loss": 0.7266,
4679
+ "step": 7560
4680
+ },
4681
+ {
4682
+ "epoch": 151.4,
4683
+ "learning_rate": 1.4745454545454546e-05,
4684
+ "loss": 0.7391,
4685
+ "step": 7570
4686
+ },
4687
+ {
4688
+ "epoch": 151.6,
4689
+ "learning_rate": 1.4684848484848484e-05,
4690
+ "loss": 0.7343,
4691
+ "step": 7580
4692
+ },
4693
+ {
4694
+ "epoch": 151.8,
4695
+ "learning_rate": 1.4624242424242424e-05,
4696
+ "loss": 0.7262,
4697
+ "step": 7590
4698
+ },
4699
+ {
4700
+ "epoch": 152.0,
4701
+ "learning_rate": 1.4563636363636364e-05,
4702
+ "loss": 0.697,
4703
+ "step": 7600
4704
+ },
4705
+ {
4706
+ "epoch": 152.2,
4707
+ "learning_rate": 1.4503030303030303e-05,
4708
+ "loss": 0.7542,
4709
+ "step": 7610
4710
+ },
4711
+ {
4712
+ "epoch": 152.4,
4713
+ "learning_rate": 1.4442424242424243e-05,
4714
+ "loss": 0.716,
4715
+ "step": 7620
4716
+ },
4717
+ {
4718
+ "epoch": 152.6,
4719
+ "learning_rate": 1.4381818181818181e-05,
4720
+ "loss": 0.7246,
4721
+ "step": 7630
4722
+ },
4723
+ {
4724
+ "epoch": 152.8,
4725
+ "learning_rate": 1.4321212121212123e-05,
4726
+ "loss": 0.7253,
4727
+ "step": 7640
4728
+ },
4729
+ {
4730
+ "epoch": 153.0,
4731
+ "learning_rate": 1.4260606060606061e-05,
4732
+ "loss": 0.684,
4733
+ "step": 7650
4734
+ },
4735
+ {
4736
+ "epoch": 153.2,
4737
+ "learning_rate": 1.42e-05,
4738
+ "loss": 0.7596,
4739
+ "step": 7660
4740
+ },
4741
+ {
4742
+ "epoch": 153.4,
4743
+ "learning_rate": 1.413939393939394e-05,
4744
+ "loss": 0.7173,
4745
+ "step": 7670
4746
+ },
4747
+ {
4748
+ "epoch": 153.6,
4749
+ "learning_rate": 1.407878787878788e-05,
4750
+ "loss": 0.7008,
4751
+ "step": 7680
4752
+ },
4753
+ {
4754
+ "epoch": 153.8,
4755
+ "learning_rate": 1.401818181818182e-05,
4756
+ "loss": 0.7028,
4757
+ "step": 7690
4758
+ },
4759
+ {
4760
+ "epoch": 154.0,
4761
+ "learning_rate": 1.3957575757575758e-05,
4762
+ "loss": 0.6925,
4763
+ "step": 7700
4764
+ },
4765
+ {
4766
+ "epoch": 154.2,
4767
+ "learning_rate": 1.3896969696969696e-05,
4768
+ "loss": 0.7455,
4769
+ "step": 7710
4770
+ },
4771
+ {
4772
+ "epoch": 154.4,
4773
+ "learning_rate": 1.3836363636363636e-05,
4774
+ "loss": 0.7204,
4775
+ "step": 7720
4776
+ },
4777
+ {
4778
+ "epoch": 154.6,
4779
+ "learning_rate": 1.3775757575757576e-05,
4780
+ "loss": 0.7157,
4781
+ "step": 7730
4782
+ },
4783
+ {
4784
+ "epoch": 154.8,
4785
+ "learning_rate": 1.3715151515151516e-05,
4786
+ "loss": 0.7088,
4787
+ "step": 7740
4788
+ },
4789
+ {
4790
+ "epoch": 155.0,
4791
+ "learning_rate": 1.3654545454545455e-05,
4792
+ "loss": 0.7002,
4793
+ "step": 7750
4794
+ },
4795
+ {
4796
+ "epoch": 155.2,
4797
+ "learning_rate": 1.3593939393939393e-05,
4798
+ "loss": 0.7425,
4799
+ "step": 7760
4800
+ },
4801
+ {
4802
+ "epoch": 155.4,
4803
+ "learning_rate": 1.3533333333333335e-05,
4804
+ "loss": 0.737,
4805
+ "step": 7770
4806
+ },
4807
+ {
4808
+ "epoch": 155.6,
4809
+ "learning_rate": 1.3472727272727273e-05,
4810
+ "loss": 0.7224,
4811
+ "step": 7780
4812
+ },
4813
+ {
4814
+ "epoch": 155.8,
4815
+ "learning_rate": 1.3412121212121211e-05,
4816
+ "loss": 0.7324,
4817
+ "step": 7790
4818
+ },
4819
+ {
4820
+ "epoch": 156.0,
4821
+ "learning_rate": 1.3351515151515151e-05,
4822
+ "loss": 0.6667,
4823
+ "step": 7800
4824
+ },
4825
+ {
4826
+ "epoch": 156.2,
4827
+ "learning_rate": 1.3290909090909091e-05,
4828
+ "loss": 0.7378,
4829
+ "step": 7810
4830
+ },
4831
+ {
4832
+ "epoch": 156.4,
4833
+ "learning_rate": 1.3230303030303032e-05,
4834
+ "loss": 0.7136,
4835
+ "step": 7820
4836
+ },
4837
+ {
4838
+ "epoch": 156.6,
4839
+ "learning_rate": 1.316969696969697e-05,
4840
+ "loss": 0.7082,
4841
+ "step": 7830
4842
+ },
4843
+ {
4844
+ "epoch": 156.8,
4845
+ "learning_rate": 1.3109090909090908e-05,
4846
+ "loss": 0.6986,
4847
+ "step": 7840
4848
+ },
4849
+ {
4850
+ "epoch": 157.0,
4851
+ "learning_rate": 1.304848484848485e-05,
4852
+ "loss": 0.6915,
4853
+ "step": 7850
4854
+ },
4855
+ {
4856
+ "epoch": 157.2,
4857
+ "learning_rate": 1.2987878787878788e-05,
4858
+ "loss": 0.7195,
4859
+ "step": 7860
4860
+ },
4861
+ {
4862
+ "epoch": 157.4,
4863
+ "learning_rate": 1.2927272727272728e-05,
4864
+ "loss": 0.7176,
4865
+ "step": 7870
4866
+ },
4867
+ {
4868
+ "epoch": 157.6,
4869
+ "learning_rate": 1.2866666666666667e-05,
4870
+ "loss": 0.7083,
4871
+ "step": 7880
4872
+ },
4873
+ {
4874
+ "epoch": 157.8,
4875
+ "learning_rate": 1.2806060606060605e-05,
4876
+ "loss": 0.7098,
4877
+ "step": 7890
4878
+ },
4879
+ {
4880
+ "epoch": 158.0,
4881
+ "learning_rate": 1.2745454545454547e-05,
4882
+ "loss": 0.6857,
4883
+ "step": 7900
4884
+ },
4885
+ {
4886
+ "epoch": 158.2,
4887
+ "learning_rate": 1.2684848484848485e-05,
4888
+ "loss": 0.7282,
4889
+ "step": 7910
4890
+ },
4891
+ {
4892
+ "epoch": 158.4,
4893
+ "learning_rate": 1.2624242424242425e-05,
4894
+ "loss": 0.7154,
4895
+ "step": 7920
4896
+ },
4897
+ {
4898
+ "epoch": 158.6,
4899
+ "learning_rate": 1.2563636363636363e-05,
4900
+ "loss": 0.706,
4901
+ "step": 7930
4902
+ },
4903
+ {
4904
+ "epoch": 158.8,
4905
+ "learning_rate": 1.2503030303030303e-05,
4906
+ "loss": 0.722,
4907
+ "step": 7940
4908
+ },
4909
+ {
4910
+ "epoch": 159.0,
4911
+ "learning_rate": 1.2442424242424243e-05,
4912
+ "loss": 0.6904,
4913
+ "step": 7950
4914
+ },
4915
+ {
4916
+ "epoch": 159.2,
4917
+ "learning_rate": 1.2381818181818182e-05,
4918
+ "loss": 0.7324,
4919
+ "step": 7960
4920
+ },
4921
+ {
4922
+ "epoch": 159.4,
4923
+ "learning_rate": 1.2321212121212122e-05,
4924
+ "loss": 0.723,
4925
+ "step": 7970
4926
+ },
4927
+ {
4928
+ "epoch": 159.6,
4929
+ "learning_rate": 1.2260606060606062e-05,
4930
+ "loss": 0.7141,
4931
+ "step": 7980
4932
+ },
4933
+ {
4934
+ "epoch": 159.8,
4935
+ "learning_rate": 1.22e-05,
4936
+ "loss": 0.7177,
4937
+ "step": 7990
4938
+ },
4939
+ {
4940
+ "epoch": 160.0,
4941
+ "learning_rate": 1.213939393939394e-05,
4942
+ "loss": 0.6811,
4943
+ "step": 8000
4944
+ },
4945
+ {
4946
+ "epoch": 160.0,
4947
+ "eval_loss": 0.36307966709136963,
4948
+ "eval_runtime": 242.5601,
4949
+ "eval_samples_per_second": 21.487,
4950
+ "eval_steps_per_second": 1.344,
4951
+ "eval_wer": 0.3440412349664185,
4952
+ "step": 8000
4953
+ },
4954
+ {
4955
+ "epoch": 160.2,
4956
+ "learning_rate": 1.2078787878787878e-05,
4957
+ "loss": 0.7477,
4958
+ "step": 8010
4959
+ },
4960
+ {
4961
+ "epoch": 160.4,
4962
+ "learning_rate": 1.2018181818181818e-05,
4963
+ "loss": 0.7098,
4964
+ "step": 8020
4965
+ },
4966
+ {
4967
+ "epoch": 160.6,
4968
+ "learning_rate": 1.1957575757575758e-05,
4969
+ "loss": 0.6994,
4970
+ "step": 8030
4971
+ },
4972
+ {
4973
+ "epoch": 160.8,
4974
+ "learning_rate": 1.1896969696969697e-05,
4975
+ "loss": 0.7192,
4976
+ "step": 8040
4977
+ },
4978
+ {
4979
+ "epoch": 161.0,
4980
+ "learning_rate": 1.1836363636363637e-05,
4981
+ "loss": 0.6757,
4982
+ "step": 8050
4983
+ },
4984
+ {
4985
+ "epoch": 161.2,
4986
+ "learning_rate": 1.1775757575757575e-05,
4987
+ "loss": 0.7521,
4988
+ "step": 8060
4989
+ },
4990
+ {
4991
+ "epoch": 161.4,
4992
+ "learning_rate": 1.1715151515151515e-05,
4993
+ "loss": 0.6944,
4994
+ "step": 8070
4995
+ },
4996
+ {
4997
+ "epoch": 161.6,
4998
+ "learning_rate": 1.1654545454545455e-05,
4999
+ "loss": 0.7216,
5000
+ "step": 8080
5001
+ },
5002
+ {
5003
+ "epoch": 161.8,
5004
+ "learning_rate": 1.1593939393939394e-05,
5005
+ "loss": 0.7034,
5006
+ "step": 8090
5007
+ },
5008
+ {
5009
+ "epoch": 162.0,
5010
+ "learning_rate": 1.1533333333333334e-05,
5011
+ "loss": 0.6685,
5012
+ "step": 8100
5013
+ },
5014
+ {
5015
+ "epoch": 162.2,
5016
+ "learning_rate": 1.1478787878787879e-05,
5017
+ "loss": 0.7395,
5018
+ "step": 8110
5019
+ },
5020
+ {
5021
+ "epoch": 162.4,
5022
+ "learning_rate": 1.1418181818181819e-05,
5023
+ "loss": 0.7139,
5024
+ "step": 8120
5025
+ },
5026
+ {
5027
+ "epoch": 162.6,
5028
+ "learning_rate": 1.1357575757575757e-05,
5029
+ "loss": 0.6898,
5030
+ "step": 8130
5031
+ },
5032
+ {
5033
+ "epoch": 162.8,
5034
+ "learning_rate": 1.1296969696969697e-05,
5035
+ "loss": 0.7013,
5036
+ "step": 8140
5037
+ },
5038
+ {
5039
+ "epoch": 163.0,
5040
+ "learning_rate": 1.1236363636363638e-05,
5041
+ "loss": 0.6784,
5042
+ "step": 8150
5043
+ },
5044
+ {
5045
+ "epoch": 163.2,
5046
+ "learning_rate": 1.1175757575757576e-05,
5047
+ "loss": 0.7114,
5048
+ "step": 8160
5049
+ },
5050
+ {
5051
+ "epoch": 163.4,
5052
+ "learning_rate": 1.1115151515151516e-05,
5053
+ "loss": 0.7094,
5054
+ "step": 8170
5055
+ },
5056
+ {
5057
+ "epoch": 163.6,
5058
+ "learning_rate": 1.1054545454545454e-05,
5059
+ "loss": 0.7064,
5060
+ "step": 8180
5061
+ },
5062
+ {
5063
+ "epoch": 163.8,
5064
+ "learning_rate": 1.0993939393939394e-05,
5065
+ "loss": 0.6946,
5066
+ "step": 8190
5067
+ },
5068
+ {
5069
+ "epoch": 164.0,
5070
+ "learning_rate": 1.0933333333333334e-05,
5071
+ "loss": 0.6638,
5072
+ "step": 8200
5073
+ },
5074
+ {
5075
+ "epoch": 164.2,
5076
+ "learning_rate": 1.0872727272727273e-05,
5077
+ "loss": 0.7257,
5078
+ "step": 8210
5079
+ },
5080
+ {
5081
+ "epoch": 164.4,
5082
+ "learning_rate": 1.0812121212121213e-05,
5083
+ "loss": 0.7131,
5084
+ "step": 8220
5085
+ },
5086
+ {
5087
+ "epoch": 164.6,
5088
+ "learning_rate": 1.0751515151515151e-05,
5089
+ "loss": 0.7032,
5090
+ "step": 8230
5091
+ },
5092
+ {
5093
+ "epoch": 164.8,
5094
+ "learning_rate": 1.0690909090909091e-05,
5095
+ "loss": 0.707,
5096
+ "step": 8240
5097
+ },
5098
+ {
5099
+ "epoch": 165.0,
5100
+ "learning_rate": 1.0630303030303031e-05,
5101
+ "loss": 0.6797,
5102
+ "step": 8250
5103
+ },
5104
+ {
5105
+ "epoch": 165.2,
5106
+ "learning_rate": 1.056969696969697e-05,
5107
+ "loss": 0.7334,
5108
+ "step": 8260
5109
+ },
5110
+ {
5111
+ "epoch": 165.4,
5112
+ "learning_rate": 1.050909090909091e-05,
5113
+ "loss": 0.7339,
5114
+ "step": 8270
5115
+ },
5116
+ {
5117
+ "epoch": 165.6,
5118
+ "learning_rate": 1.044848484848485e-05,
5119
+ "loss": 0.7148,
5120
+ "step": 8280
5121
+ },
5122
+ {
5123
+ "epoch": 165.8,
5124
+ "learning_rate": 1.0387878787878788e-05,
5125
+ "loss": 0.7074,
5126
+ "step": 8290
5127
+ },
5128
+ {
5129
+ "epoch": 166.0,
5130
+ "learning_rate": 1.0327272727272728e-05,
5131
+ "loss": 0.6874,
5132
+ "step": 8300
5133
+ },
5134
+ {
5135
+ "epoch": 166.2,
5136
+ "learning_rate": 1.0266666666666666e-05,
5137
+ "loss": 0.7472,
5138
+ "step": 8310
5139
+ },
5140
+ {
5141
+ "epoch": 166.4,
5142
+ "learning_rate": 1.0206060606060608e-05,
5143
+ "loss": 0.711,
5144
+ "step": 8320
5145
+ },
5146
+ {
5147
+ "epoch": 166.6,
5148
+ "learning_rate": 1.0145454545454546e-05,
5149
+ "loss": 0.707,
5150
+ "step": 8330
5151
+ },
5152
+ {
5153
+ "epoch": 166.8,
5154
+ "learning_rate": 1.0084848484848484e-05,
5155
+ "loss": 0.7023,
5156
+ "step": 8340
5157
+ },
5158
+ {
5159
+ "epoch": 167.0,
5160
+ "learning_rate": 1.0024242424242424e-05,
5161
+ "loss": 0.6812,
5162
+ "step": 8350
5163
+ },
5164
+ {
5165
+ "epoch": 167.2,
5166
+ "learning_rate": 9.963636363636363e-06,
5167
+ "loss": 0.7549,
5168
+ "step": 8360
5169
+ },
5170
+ {
5171
+ "epoch": 167.4,
5172
+ "learning_rate": 9.903030303030305e-06,
5173
+ "loss": 0.69,
5174
+ "step": 8370
5175
+ },
5176
+ {
5177
+ "epoch": 167.6,
5178
+ "learning_rate": 9.842424242424243e-06,
5179
+ "loss": 0.6981,
5180
+ "step": 8380
5181
+ },
5182
+ {
5183
+ "epoch": 167.8,
5184
+ "learning_rate": 9.781818181818181e-06,
5185
+ "loss": 0.7245,
5186
+ "step": 8390
5187
+ },
5188
+ {
5189
+ "epoch": 168.0,
5190
+ "learning_rate": 9.721212121212121e-06,
5191
+ "loss": 0.6736,
5192
+ "step": 8400
5193
+ },
5194
+ {
5195
+ "epoch": 168.2,
5196
+ "learning_rate": 9.660606060606061e-06,
5197
+ "loss": 0.7403,
5198
+ "step": 8410
5199
+ },
5200
+ {
5201
+ "epoch": 168.4,
5202
+ "learning_rate": 9.600000000000001e-06,
5203
+ "loss": 0.68,
5204
+ "step": 8420
5205
+ },
5206
+ {
5207
+ "epoch": 168.6,
5208
+ "learning_rate": 9.53939393939394e-06,
5209
+ "loss": 0.7077,
5210
+ "step": 8430
5211
+ },
5212
+ {
5213
+ "epoch": 168.8,
5214
+ "learning_rate": 9.478787878787878e-06,
5215
+ "loss": 0.6924,
5216
+ "step": 8440
5217
+ },
5218
+ {
5219
+ "epoch": 169.0,
5220
+ "learning_rate": 9.41818181818182e-06,
5221
+ "loss": 0.679,
5222
+ "step": 8450
5223
+ },
5224
+ {
5225
+ "epoch": 169.2,
5226
+ "learning_rate": 9.357575757575758e-06,
5227
+ "loss": 0.7259,
5228
+ "step": 8460
5229
+ },
5230
+ {
5231
+ "epoch": 169.4,
5232
+ "learning_rate": 9.296969696969696e-06,
5233
+ "loss": 0.7021,
5234
+ "step": 8470
5235
+ },
5236
+ {
5237
+ "epoch": 169.6,
5238
+ "learning_rate": 9.236363636363636e-06,
5239
+ "loss": 0.6986,
5240
+ "step": 8480
5241
+ },
5242
+ {
5243
+ "epoch": 169.8,
5244
+ "learning_rate": 9.175757575757576e-06,
5245
+ "loss": 0.6945,
5246
+ "step": 8490
5247
+ },
5248
+ {
5249
+ "epoch": 170.0,
5250
+ "learning_rate": 9.115151515151516e-06,
5251
+ "loss": 0.6841,
5252
+ "step": 8500
5253
+ },
5254
+ {
5255
+ "epoch": 170.0,
5256
+ "eval_loss": 0.3671566843986511,
5257
+ "eval_runtime": 243.7376,
5258
+ "eval_samples_per_second": 21.384,
5259
+ "eval_steps_per_second": 1.338,
5260
+ "eval_wer": 0.3460457125006508,
5261
+ "step": 8500
5262
+ },
5263
+ {
5264
+ "epoch": 170.2,
5265
+ "learning_rate": 9.054545454545455e-06,
5266
+ "loss": 0.7282,
5267
+ "step": 8510
5268
+ },
5269
+ {
5270
+ "epoch": 170.4,
5271
+ "learning_rate": 8.993939393939393e-06,
5272
+ "loss": 0.6899,
5273
+ "step": 8520
5274
+ },
5275
+ {
5276
+ "epoch": 170.6,
5277
+ "learning_rate": 8.933333333333333e-06,
5278
+ "loss": 0.7181,
5279
+ "step": 8530
5280
+ },
5281
+ {
5282
+ "epoch": 170.8,
5283
+ "learning_rate": 8.872727272727273e-06,
5284
+ "loss": 0.6958,
5285
+ "step": 8540
5286
+ },
5287
+ {
5288
+ "epoch": 171.0,
5289
+ "learning_rate": 8.812121212121213e-06,
5290
+ "loss": 0.6931,
5291
+ "step": 8550
5292
+ },
5293
+ {
5294
+ "epoch": 171.2,
5295
+ "learning_rate": 8.751515151515151e-06,
5296
+ "loss": 0.7227,
5297
+ "step": 8560
5298
+ },
5299
+ {
5300
+ "epoch": 171.4,
5301
+ "learning_rate": 8.69090909090909e-06,
5302
+ "loss": 0.7281,
5303
+ "step": 8570
5304
+ },
5305
+ {
5306
+ "epoch": 171.6,
5307
+ "learning_rate": 8.630303030303032e-06,
5308
+ "loss": 0.7049,
5309
+ "step": 8580
5310
+ },
5311
+ {
5312
+ "epoch": 171.8,
5313
+ "learning_rate": 8.56969696969697e-06,
5314
+ "loss": 0.7046,
5315
+ "step": 8590
5316
+ },
5317
+ {
5318
+ "epoch": 172.0,
5319
+ "learning_rate": 8.50909090909091e-06,
5320
+ "loss": 0.661,
5321
+ "step": 8600
5322
+ },
5323
+ {
5324
+ "epoch": 172.2,
5325
+ "learning_rate": 8.448484848484848e-06,
5326
+ "loss": 0.723,
5327
+ "step": 8610
5328
+ },
5329
+ {
5330
+ "epoch": 172.4,
5331
+ "learning_rate": 8.387878787878788e-06,
5332
+ "loss": 0.7063,
5333
+ "step": 8620
5334
+ },
5335
+ {
5336
+ "epoch": 172.6,
5337
+ "learning_rate": 8.327272727272728e-06,
5338
+ "loss": 0.7094,
5339
+ "step": 8630
5340
+ },
5341
+ {
5342
+ "epoch": 172.8,
5343
+ "learning_rate": 8.266666666666667e-06,
5344
+ "loss": 0.7137,
5345
+ "step": 8640
5346
+ },
5347
+ {
5348
+ "epoch": 173.0,
5349
+ "learning_rate": 8.206060606060607e-06,
5350
+ "loss": 0.6544,
5351
+ "step": 8650
5352
+ },
5353
+ {
5354
+ "epoch": 173.2,
5355
+ "learning_rate": 8.145454545454547e-06,
5356
+ "loss": 0.7202,
5357
+ "step": 8660
5358
+ },
5359
+ {
5360
+ "epoch": 173.4,
5361
+ "learning_rate": 8.084848484848485e-06,
5362
+ "loss": 0.6964,
5363
+ "step": 8670
5364
+ },
5365
+ {
5366
+ "epoch": 173.6,
5367
+ "learning_rate": 8.024242424242425e-06,
5368
+ "loss": 0.7093,
5369
+ "step": 8680
5370
+ },
5371
+ {
5372
+ "epoch": 173.8,
5373
+ "learning_rate": 7.963636363636363e-06,
5374
+ "loss": 0.6938,
5375
+ "step": 8690
5376
+ },
5377
+ {
5378
+ "epoch": 174.0,
5379
+ "learning_rate": 7.903030303030303e-06,
5380
+ "loss": 0.6669,
5381
+ "step": 8700
5382
+ },
5383
+ {
5384
+ "epoch": 174.2,
5385
+ "learning_rate": 7.842424242424243e-06,
5386
+ "loss": 0.7225,
5387
+ "step": 8710
5388
+ },
5389
+ {
5390
+ "epoch": 174.4,
5391
+ "learning_rate": 7.781818181818182e-06,
5392
+ "loss": 0.7163,
5393
+ "step": 8720
5394
+ },
5395
+ {
5396
+ "epoch": 174.6,
5397
+ "learning_rate": 7.721212121212122e-06,
5398
+ "loss": 0.6901,
5399
+ "step": 8730
5400
+ },
5401
+ {
5402
+ "epoch": 174.8,
5403
+ "learning_rate": 7.66060606060606e-06,
5404
+ "loss": 0.6937,
5405
+ "step": 8740
5406
+ },
5407
+ {
5408
+ "epoch": 175.0,
5409
+ "learning_rate": 7.600000000000001e-06,
5410
+ "loss": 0.6705,
5411
+ "step": 8750
5412
+ },
5413
+ {
5414
+ "epoch": 175.2,
5415
+ "learning_rate": 7.53939393939394e-06,
5416
+ "loss": 0.7241,
5417
+ "step": 8760
5418
+ },
5419
+ {
5420
+ "epoch": 175.4,
5421
+ "learning_rate": 7.4787878787878784e-06,
5422
+ "loss": 0.6951,
5423
+ "step": 8770
5424
+ },
5425
+ {
5426
+ "epoch": 175.6,
5427
+ "learning_rate": 7.4181818181818185e-06,
5428
+ "loss": 0.6993,
5429
+ "step": 8780
5430
+ },
5431
+ {
5432
+ "epoch": 175.8,
5433
+ "learning_rate": 7.357575757575758e-06,
5434
+ "loss": 0.7076,
5435
+ "step": 8790
5436
+ },
5437
+ {
5438
+ "epoch": 176.0,
5439
+ "learning_rate": 7.296969696969698e-06,
5440
+ "loss": 0.6692,
5441
+ "step": 8800
5442
+ },
5443
+ {
5444
+ "epoch": 176.2,
5445
+ "learning_rate": 7.236363636363637e-06,
5446
+ "loss": 0.7284,
5447
+ "step": 8810
5448
+ },
5449
+ {
5450
+ "epoch": 176.4,
5451
+ "learning_rate": 7.175757575757576e-06,
5452
+ "loss": 0.6959,
5453
+ "step": 8820
5454
+ },
5455
+ {
5456
+ "epoch": 176.6,
5457
+ "learning_rate": 7.115151515151515e-06,
5458
+ "loss": 0.678,
5459
+ "step": 8830
5460
+ },
5461
+ {
5462
+ "epoch": 176.8,
5463
+ "learning_rate": 7.054545454545454e-06,
5464
+ "loss": 0.7012,
5465
+ "step": 8840
5466
+ },
5467
+ {
5468
+ "epoch": 177.0,
5469
+ "learning_rate": 6.993939393939394e-06,
5470
+ "loss": 0.6797,
5471
+ "step": 8850
5472
+ },
5473
+ {
5474
+ "epoch": 177.2,
5475
+ "learning_rate": 6.933333333333334e-06,
5476
+ "loss": 0.7238,
5477
+ "step": 8860
5478
+ },
5479
+ {
5480
+ "epoch": 177.4,
5481
+ "learning_rate": 6.872727272727273e-06,
5482
+ "loss": 0.6986,
5483
+ "step": 8870
5484
+ },
5485
+ {
5486
+ "epoch": 177.6,
5487
+ "learning_rate": 6.812121212121212e-06,
5488
+ "loss": 0.7004,
5489
+ "step": 8880
5490
+ },
5491
+ {
5492
+ "epoch": 177.8,
5493
+ "learning_rate": 6.751515151515152e-06,
5494
+ "loss": 0.6681,
5495
+ "step": 8890
5496
+ },
5497
+ {
5498
+ "epoch": 178.0,
5499
+ "learning_rate": 6.690909090909091e-06,
5500
+ "loss": 0.6817,
5501
+ "step": 8900
5502
+ },
5503
+ {
5504
+ "epoch": 178.2,
5505
+ "learning_rate": 6.63030303030303e-06,
5506
+ "loss": 0.7241,
5507
+ "step": 8910
5508
+ },
5509
+ {
5510
+ "epoch": 178.4,
5511
+ "learning_rate": 6.5696969696969695e-06,
5512
+ "loss": 0.7102,
5513
+ "step": 8920
5514
+ },
5515
+ {
5516
+ "epoch": 178.6,
5517
+ "learning_rate": 6.5090909090909095e-06,
5518
+ "loss": 0.6847,
5519
+ "step": 8930
5520
+ },
5521
+ {
5522
+ "epoch": 178.8,
5523
+ "learning_rate": 6.448484848484849e-06,
5524
+ "loss": 0.7004,
5525
+ "step": 8940
5526
+ },
5527
+ {
5528
+ "epoch": 179.0,
5529
+ "learning_rate": 6.387878787878789e-06,
5530
+ "loss": 0.6661,
5531
+ "step": 8950
5532
+ },
5533
+ {
5534
+ "epoch": 179.2,
5535
+ "learning_rate": 6.327272727272727e-06,
5536
+ "loss": 0.7154,
5537
+ "step": 8960
5538
+ },
5539
+ {
5540
+ "epoch": 179.4,
5541
+ "learning_rate": 6.266666666666667e-06,
5542
+ "loss": 0.69,
5543
+ "step": 8970
5544
+ },
5545
+ {
5546
+ "epoch": 179.6,
5547
+ "learning_rate": 6.206060606060606e-06,
5548
+ "loss": 0.6799,
5549
+ "step": 8980
5550
+ },
5551
+ {
5552
+ "epoch": 179.8,
5553
+ "learning_rate": 6.1454545454545454e-06,
5554
+ "loss": 0.6946,
5555
+ "step": 8990
5556
+ },
5557
+ {
5558
+ "epoch": 180.0,
5559
+ "learning_rate": 6.0848484848484855e-06,
5560
+ "loss": 0.6616,
5561
+ "step": 9000
5562
+ },
5563
+ {
5564
+ "epoch": 180.0,
5565
+ "eval_loss": 0.36765044927597046,
5566
+ "eval_runtime": 243.8885,
5567
+ "eval_samples_per_second": 21.37,
5568
+ "eval_steps_per_second": 1.337,
5569
+ "eval_wer": 0.34099547040141615,
5570
+ "step": 9000
5571
+ },
5572
+ {
5573
+ "epoch": 180.2,
5574
+ "learning_rate": 6.024242424242424e-06,
5575
+ "loss": 0.718,
5576
+ "step": 9010
5577
+ },
5578
+ {
5579
+ "epoch": 180.4,
5580
+ "learning_rate": 5.963636363636364e-06,
5581
+ "loss": 0.7075,
5582
+ "step": 9020
5583
+ },
5584
+ {
5585
+ "epoch": 180.6,
5586
+ "learning_rate": 5.903030303030303e-06,
5587
+ "loss": 0.7133,
5588
+ "step": 9030
5589
+ },
5590
+ {
5591
+ "epoch": 180.8,
5592
+ "learning_rate": 5.842424242424243e-06,
5593
+ "loss": 0.698,
5594
+ "step": 9040
5595
+ },
5596
+ {
5597
+ "epoch": 181.0,
5598
+ "learning_rate": 5.781818181818181e-06,
5599
+ "loss": 0.6749,
5600
+ "step": 9050
5601
+ },
5602
+ {
5603
+ "epoch": 181.2,
5604
+ "learning_rate": 5.721212121212121e-06,
5605
+ "loss": 0.7197,
5606
+ "step": 9060
5607
+ },
5608
+ {
5609
+ "epoch": 181.4,
5610
+ "learning_rate": 5.6606060606060606e-06,
5611
+ "loss": 0.7206,
5612
+ "step": 9070
5613
+ },
5614
+ {
5615
+ "epoch": 181.6,
5616
+ "learning_rate": 5.600000000000001e-06,
5617
+ "loss": 0.6867,
5618
+ "step": 9080
5619
+ },
5620
+ {
5621
+ "epoch": 181.8,
5622
+ "learning_rate": 5.53939393939394e-06,
5623
+ "loss": 0.6846,
5624
+ "step": 9090
5625
+ },
5626
+ {
5627
+ "epoch": 182.0,
5628
+ "learning_rate": 5.478787878787879e-06,
5629
+ "loss": 0.6683,
5630
+ "step": 9100
5631
+ },
5632
+ {
5633
+ "epoch": 182.2,
5634
+ "learning_rate": 5.418181818181818e-06,
5635
+ "loss": 0.7129,
5636
+ "step": 9110
5637
+ },
5638
+ {
5639
+ "epoch": 182.4,
5640
+ "learning_rate": 5.357575757575758e-06,
5641
+ "loss": 0.6931,
5642
+ "step": 9120
5643
+ },
5644
+ {
5645
+ "epoch": 182.6,
5646
+ "learning_rate": 5.296969696969697e-06,
5647
+ "loss": 0.6892,
5648
+ "step": 9130
5649
+ },
5650
+ {
5651
+ "epoch": 182.8,
5652
+ "learning_rate": 5.2363636363636365e-06,
5653
+ "loss": 0.6927,
5654
+ "step": 9140
5655
+ },
5656
+ {
5657
+ "epoch": 183.0,
5658
+ "learning_rate": 5.175757575757576e-06,
5659
+ "loss": 0.6858,
5660
+ "step": 9150
5661
+ },
5662
+ {
5663
+ "epoch": 183.2,
5664
+ "learning_rate": 5.115151515151515e-06,
5665
+ "loss": 0.726,
5666
+ "step": 9160
5667
+ },
5668
+ {
5669
+ "epoch": 183.4,
5670
+ "learning_rate": 5.054545454545455e-06,
5671
+ "loss": 0.6908,
5672
+ "step": 9170
5673
+ },
5674
+ {
5675
+ "epoch": 183.6,
5676
+ "learning_rate": 4.993939393939394e-06,
5677
+ "loss": 0.7123,
5678
+ "step": 9180
5679
+ },
5680
+ {
5681
+ "epoch": 183.8,
5682
+ "learning_rate": 4.933333333333333e-06,
5683
+ "loss": 0.7041,
5684
+ "step": 9190
5685
+ },
5686
+ {
5687
+ "epoch": 184.0,
5688
+ "learning_rate": 4.8727272727272724e-06,
5689
+ "loss": 0.6502,
5690
+ "step": 9200
5691
+ },
5692
+ {
5693
+ "epoch": 184.2,
5694
+ "learning_rate": 4.8121212121212125e-06,
5695
+ "loss": 0.7302,
5696
+ "step": 9210
5697
+ },
5698
+ {
5699
+ "epoch": 184.4,
5700
+ "learning_rate": 4.751515151515152e-06,
5701
+ "loss": 0.6944,
5702
+ "step": 9220
5703
+ },
5704
+ {
5705
+ "epoch": 184.6,
5706
+ "learning_rate": 4.690909090909092e-06,
5707
+ "loss": 0.6774,
5708
+ "step": 9230
5709
+ },
5710
+ {
5711
+ "epoch": 184.8,
5712
+ "learning_rate": 4.63030303030303e-06,
5713
+ "loss": 0.6884,
5714
+ "step": 9240
5715
+ },
5716
+ {
5717
+ "epoch": 185.0,
5718
+ "learning_rate": 4.56969696969697e-06,
5719
+ "loss": 0.6506,
5720
+ "step": 9250
5721
+ },
5722
+ {
5723
+ "epoch": 185.2,
5724
+ "learning_rate": 4.509090909090909e-06,
5725
+ "loss": 0.7361,
5726
+ "step": 9260
5727
+ },
5728
+ {
5729
+ "epoch": 185.4,
5730
+ "learning_rate": 4.448484848484849e-06,
5731
+ "loss": 0.6978,
5732
+ "step": 9270
5733
+ },
5734
+ {
5735
+ "epoch": 185.6,
5736
+ "learning_rate": 4.3878787878787876e-06,
5737
+ "loss": 0.68,
5738
+ "step": 9280
5739
+ },
5740
+ {
5741
+ "epoch": 185.8,
5742
+ "learning_rate": 4.327272727272728e-06,
5743
+ "loss": 0.6859,
5744
+ "step": 9290
5745
+ },
5746
+ {
5747
+ "epoch": 186.0,
5748
+ "learning_rate": 4.266666666666667e-06,
5749
+ "loss": 0.6716,
5750
+ "step": 9300
5751
+ },
5752
+ {
5753
+ "epoch": 186.2,
5754
+ "learning_rate": 4.206060606060606e-06,
5755
+ "loss": 0.6995,
5756
+ "step": 9310
5757
+ },
5758
+ {
5759
+ "epoch": 186.4,
5760
+ "learning_rate": 4.145454545454546e-06,
5761
+ "loss": 0.7181,
5762
+ "step": 9320
5763
+ },
5764
+ {
5765
+ "epoch": 186.6,
5766
+ "learning_rate": 4.084848484848484e-06,
5767
+ "loss": 0.7069,
5768
+ "step": 9330
5769
+ },
5770
+ {
5771
+ "epoch": 186.8,
5772
+ "learning_rate": 4.024242424242424e-06,
5773
+ "loss": 0.7083,
5774
+ "step": 9340
5775
+ },
5776
+ {
5777
+ "epoch": 187.0,
5778
+ "learning_rate": 3.9636363636363635e-06,
5779
+ "loss": 0.662,
5780
+ "step": 9350
5781
+ },
5782
+ {
5783
+ "epoch": 187.2,
5784
+ "learning_rate": 3.9030303030303035e-06,
5785
+ "loss": 0.7155,
5786
+ "step": 9360
5787
+ },
5788
+ {
5789
+ "epoch": 187.4,
5790
+ "learning_rate": 3.842424242424243e-06,
5791
+ "loss": 0.7061,
5792
+ "step": 9370
5793
+ },
5794
+ {
5795
+ "epoch": 187.6,
5796
+ "learning_rate": 3.7818181818181823e-06,
5797
+ "loss": 0.6878,
5798
+ "step": 9380
5799
+ },
5800
+ {
5801
+ "epoch": 187.8,
5802
+ "learning_rate": 3.721212121212121e-06,
5803
+ "loss": 0.687,
5804
+ "step": 9390
5805
+ },
5806
+ {
5807
+ "epoch": 188.0,
5808
+ "learning_rate": 3.6606060606060607e-06,
5809
+ "loss": 0.6586,
5810
+ "step": 9400
5811
+ },
5812
+ {
5813
+ "epoch": 188.2,
5814
+ "learning_rate": 3.6e-06,
5815
+ "loss": 0.6959,
5816
+ "step": 9410
5817
+ },
5818
+ {
5819
+ "epoch": 188.4,
5820
+ "learning_rate": 3.5393939393939394e-06,
5821
+ "loss": 0.698,
5822
+ "step": 9420
5823
+ },
5824
+ {
5825
+ "epoch": 188.6,
5826
+ "learning_rate": 3.4787878787878786e-06,
5827
+ "loss": 0.6989,
5828
+ "step": 9430
5829
+ },
5830
+ {
5831
+ "epoch": 188.8,
5832
+ "learning_rate": 3.4181818181818182e-06,
5833
+ "loss": 0.6927,
5834
+ "step": 9440
5835
+ },
5836
+ {
5837
+ "epoch": 189.0,
5838
+ "learning_rate": 3.357575757575758e-06,
5839
+ "loss": 0.6649,
5840
+ "step": 9450
5841
+ },
5842
+ {
5843
+ "epoch": 189.2,
5844
+ "learning_rate": 3.296969696969697e-06,
5845
+ "loss": 0.715,
5846
+ "step": 9460
5847
+ },
5848
+ {
5849
+ "epoch": 189.4,
5850
+ "learning_rate": 3.2363636363636366e-06,
5851
+ "loss": 0.694,
5852
+ "step": 9470
5853
+ },
5854
+ {
5855
+ "epoch": 189.6,
5856
+ "learning_rate": 3.1757575757575758e-06,
5857
+ "loss": 0.7162,
5858
+ "step": 9480
5859
+ },
5860
+ {
5861
+ "epoch": 189.8,
5862
+ "learning_rate": 3.1151515151515154e-06,
5863
+ "loss": 0.685,
5864
+ "step": 9490
5865
+ },
5866
+ {
5867
+ "epoch": 190.0,
5868
+ "learning_rate": 3.0545454545454546e-06,
5869
+ "loss": 0.6471,
5870
+ "step": 9500
5871
+ },
5872
+ {
5873
+ "epoch": 190.0,
5874
+ "eval_loss": 0.37067291140556335,
5875
+ "eval_runtime": 243.2712,
5876
+ "eval_samples_per_second": 21.425,
5877
+ "eval_steps_per_second": 1.34,
5878
+ "eval_wer": 0.3420367574321862,
5879
+ "step": 9500
5880
+ },
5881
+ {
5882
+ "epoch": 190.2,
5883
+ "learning_rate": 2.993939393939394e-06,
5884
+ "loss": 0.7389,
5885
+ "step": 9510
5886
+ },
5887
+ {
5888
+ "epoch": 190.4,
5889
+ "learning_rate": 2.9333333333333338e-06,
5890
+ "loss": 0.6763,
5891
+ "step": 9520
5892
+ },
5893
+ {
5894
+ "epoch": 190.6,
5895
+ "learning_rate": 2.872727272727273e-06,
5896
+ "loss": 0.6837,
5897
+ "step": 9530
5898
+ },
5899
+ {
5900
+ "epoch": 190.8,
5901
+ "learning_rate": 2.8121212121212125e-06,
5902
+ "loss": 0.6793,
5903
+ "step": 9540
5904
+ },
5905
+ {
5906
+ "epoch": 191.0,
5907
+ "learning_rate": 2.7515151515151513e-06,
5908
+ "loss": 0.6621,
5909
+ "step": 9550
5910
+ },
5911
+ {
5912
+ "epoch": 191.2,
5913
+ "learning_rate": 2.690909090909091e-06,
5914
+ "loss": 0.7213,
5915
+ "step": 9560
5916
+ },
5917
+ {
5918
+ "epoch": 191.4,
5919
+ "learning_rate": 2.63030303030303e-06,
5920
+ "loss": 0.6993,
5921
+ "step": 9570
5922
+ },
5923
+ {
5924
+ "epoch": 191.6,
5925
+ "learning_rate": 2.5696969696969697e-06,
5926
+ "loss": 0.6682,
5927
+ "step": 9580
5928
+ },
5929
+ {
5930
+ "epoch": 191.8,
5931
+ "learning_rate": 2.5090909090909093e-06,
5932
+ "loss": 0.6759,
5933
+ "step": 9590
5934
+ },
5935
+ {
5936
+ "epoch": 192.0,
5937
+ "learning_rate": 2.4484848484848485e-06,
5938
+ "loss": 0.6718,
5939
+ "step": 9600
5940
+ },
5941
+ {
5942
+ "epoch": 192.2,
5943
+ "learning_rate": 2.387878787878788e-06,
5944
+ "loss": 0.7276,
5945
+ "step": 9610
5946
+ },
5947
+ {
5948
+ "epoch": 192.4,
5949
+ "learning_rate": 2.3272727272727272e-06,
5950
+ "loss": 0.6857,
5951
+ "step": 9620
5952
+ },
5953
+ {
5954
+ "epoch": 192.6,
5955
+ "learning_rate": 2.266666666666667e-06,
5956
+ "loss": 0.6914,
5957
+ "step": 9630
5958
+ },
5959
+ {
5960
+ "epoch": 192.8,
5961
+ "learning_rate": 2.206060606060606e-06,
5962
+ "loss": 0.7176,
5963
+ "step": 9640
5964
+ },
5965
+ {
5966
+ "epoch": 193.0,
5967
+ "learning_rate": 2.1454545454545456e-06,
5968
+ "loss": 0.6491,
5969
+ "step": 9650
5970
+ },
5971
+ {
5972
+ "epoch": 193.2,
5973
+ "learning_rate": 2.084848484848485e-06,
5974
+ "loss": 0.6961,
5975
+ "step": 9660
5976
+ },
5977
+ {
5978
+ "epoch": 193.4,
5979
+ "learning_rate": 2.0242424242424244e-06,
5980
+ "loss": 0.7066,
5981
+ "step": 9670
5982
+ },
5983
+ {
5984
+ "epoch": 193.6,
5985
+ "learning_rate": 1.963636363636364e-06,
5986
+ "loss": 0.6874,
5987
+ "step": 9680
5988
+ },
5989
+ {
5990
+ "epoch": 193.8,
5991
+ "learning_rate": 1.9030303030303032e-06,
5992
+ "loss": 0.7069,
5993
+ "step": 9690
5994
+ },
5995
+ {
5996
+ "epoch": 194.0,
5997
+ "learning_rate": 1.8424242424242424e-06,
5998
+ "loss": 0.6636,
5999
+ "step": 9700
6000
+ },
6001
+ {
6002
+ "epoch": 194.2,
6003
+ "learning_rate": 1.781818181818182e-06,
6004
+ "loss": 0.7159,
6005
+ "step": 9710
6006
+ },
6007
+ {
6008
+ "epoch": 194.4,
6009
+ "learning_rate": 1.7212121212121214e-06,
6010
+ "loss": 0.6926,
6011
+ "step": 9720
6012
+ },
6013
+ {
6014
+ "epoch": 194.6,
6015
+ "learning_rate": 1.6606060606060607e-06,
6016
+ "loss": 0.6831,
6017
+ "step": 9730
6018
+ },
6019
+ {
6020
+ "epoch": 194.8,
6021
+ "learning_rate": 1.6000000000000001e-06,
6022
+ "loss": 0.6727,
6023
+ "step": 9740
6024
+ },
6025
+ {
6026
+ "epoch": 195.0,
6027
+ "learning_rate": 1.5393939393939393e-06,
6028
+ "loss": 0.6714,
6029
+ "step": 9750
6030
+ },
6031
+ {
6032
+ "epoch": 195.2,
6033
+ "learning_rate": 1.4787878787878787e-06,
6034
+ "loss": 0.7056,
6035
+ "step": 9760
6036
+ },
6037
+ {
6038
+ "epoch": 195.4,
6039
+ "learning_rate": 1.418181818181818e-06,
6040
+ "loss": 0.68,
6041
+ "step": 9770
6042
+ },
6043
+ {
6044
+ "epoch": 195.6,
6045
+ "learning_rate": 1.3575757575757577e-06,
6046
+ "loss": 0.6768,
6047
+ "step": 9780
6048
+ },
6049
+ {
6050
+ "epoch": 195.8,
6051
+ "learning_rate": 1.296969696969697e-06,
6052
+ "loss": 0.6806,
6053
+ "step": 9790
6054
+ },
6055
+ {
6056
+ "epoch": 196.0,
6057
+ "learning_rate": 1.2363636363636365e-06,
6058
+ "loss": 0.6654,
6059
+ "step": 9800
6060
+ },
6061
+ {
6062
+ "epoch": 196.2,
6063
+ "learning_rate": 1.1757575757575759e-06,
6064
+ "loss": 0.7139,
6065
+ "step": 9810
6066
+ },
6067
+ {
6068
+ "epoch": 196.4,
6069
+ "learning_rate": 1.1151515151515153e-06,
6070
+ "loss": 0.6718,
6071
+ "step": 9820
6072
+ },
6073
+ {
6074
+ "epoch": 196.6,
6075
+ "learning_rate": 1.0545454545454544e-06,
6076
+ "loss": 0.6785,
6077
+ "step": 9830
6078
+ },
6079
+ {
6080
+ "epoch": 196.8,
6081
+ "learning_rate": 9.939393939393938e-07,
6082
+ "loss": 0.6821,
6083
+ "step": 9840
6084
+ },
6085
+ {
6086
+ "epoch": 197.0,
6087
+ "learning_rate": 9.333333333333333e-07,
6088
+ "loss": 0.6711,
6089
+ "step": 9850
6090
+ },
6091
+ {
6092
+ "epoch": 197.2,
6093
+ "learning_rate": 8.727272727272727e-07,
6094
+ "loss": 0.7221,
6095
+ "step": 9860
6096
+ },
6097
+ {
6098
+ "epoch": 197.4,
6099
+ "learning_rate": 8.121212121212122e-07,
6100
+ "loss": 0.6933,
6101
+ "step": 9870
6102
+ },
6103
+ {
6104
+ "epoch": 197.6,
6105
+ "learning_rate": 7.515151515151516e-07,
6106
+ "loss": 0.6855,
6107
+ "step": 9880
6108
+ },
6109
+ {
6110
+ "epoch": 197.8,
6111
+ "learning_rate": 6.909090909090909e-07,
6112
+ "loss": 0.6902,
6113
+ "step": 9890
6114
+ },
6115
+ {
6116
+ "epoch": 198.0,
6117
+ "learning_rate": 6.303030303030303e-07,
6118
+ "loss": 0.6686,
6119
+ "step": 9900
6120
+ },
6121
+ {
6122
+ "epoch": 198.2,
6123
+ "learning_rate": 5.696969696969698e-07,
6124
+ "loss": 0.7282,
6125
+ "step": 9910
6126
+ },
6127
+ {
6128
+ "epoch": 198.4,
6129
+ "learning_rate": 5.090909090909092e-07,
6130
+ "loss": 0.6848,
6131
+ "step": 9920
6132
+ },
6133
+ {
6134
+ "epoch": 198.6,
6135
+ "learning_rate": 4.484848484848485e-07,
6136
+ "loss": 0.6939,
6137
+ "step": 9930
6138
+ },
6139
+ {
6140
+ "epoch": 198.8,
6141
+ "learning_rate": 3.878787878787879e-07,
6142
+ "loss": 0.6961,
6143
+ "step": 9940
6144
+ },
6145
+ {
6146
+ "epoch": 199.0,
6147
+ "learning_rate": 3.3333333333333335e-07,
6148
+ "loss": 0.6564,
6149
+ "step": 9950
6150
+ },
6151
+ {
6152
+ "epoch": 199.2,
6153
+ "learning_rate": 2.7272727272727274e-07,
6154
+ "loss": 0.7132,
6155
+ "step": 9960
6156
+ },
6157
+ {
6158
+ "epoch": 199.4,
6159
+ "learning_rate": 2.1212121212121213e-07,
6160
+ "loss": 0.6813,
6161
+ "step": 9970
6162
+ },
6163
+ {
6164
+ "epoch": 199.6,
6165
+ "learning_rate": 1.5151515151515152e-07,
6166
+ "loss": 0.6865,
6167
+ "step": 9980
6168
+ },
6169
+ {
6170
+ "epoch": 199.8,
6171
+ "learning_rate": 9.090909090909091e-08,
6172
+ "loss": 0.6773,
6173
+ "step": 9990
6174
+ },
6175
+ {
6176
+ "epoch": 200.0,
6177
+ "learning_rate": 3.0303030303030305e-08,
6178
+ "loss": 0.6759,
6179
+ "step": 10000
6180
+ },
6181
+ {
6182
+ "epoch": 200.0,
6183
+ "eval_loss": 0.37058258056640625,
6184
+ "eval_runtime": 244.1231,
6185
+ "eval_samples_per_second": 21.35,
6186
+ "eval_steps_per_second": 1.335,
6187
+ "eval_wer": 0.3420888217837247,
6188
+ "step": 10000
6189
+ },
6190
+ {
6191
+ "epoch": 200.0,
6192
+ "step": 10000,
6193
+ "total_flos": 3.2791120983682476e+20,
6194
+ "train_loss": 0.48956193776130674,
6195
+ "train_runtime": 78626.4431,
6196
+ "train_samples_per_second": 32.574,
6197
+ "train_steps_per_second": 0.127
6198
  }
6199
  ],
6200
+ "max_steps": 10000,
6201
+ "num_train_epochs": 200,
6202
+ "total_flos": 3.2791120983682476e+20,
6203
  "trial_name": null,
6204
  "trial_params": null
6205
  }