{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.9989417989418, "global_step": 5900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.85, "learning_rate": 0.0003, "loss": 0.0549, "step": 100 }, { "epoch": 0.85, "eval_loss": 0.3047053813934326, "eval_runtime": 213.654, "eval_samples_per_second": 15.778, "eval_steps_per_second": 1.975, "eval_wer": 0.26644545348701826, "step": 100 }, { "epoch": 1.69, "learning_rate": 0.00029974554707379135, "loss": 0.0916, "step": 200 }, { "epoch": 1.69, "eval_loss": 0.3038010597229004, "eval_runtime": 151.8957, "eval_samples_per_second": 22.193, "eval_steps_per_second": 2.778, "eval_wer": 0.26597974152986376, "step": 200 }, { "epoch": 2.54, "learning_rate": 0.00029949109414758267, "loss": 0.0917, "step": 300 }, { "epoch": 2.54, "eval_loss": 0.31602439284324646, "eval_runtime": 152.2031, "eval_samples_per_second": 22.148, "eval_steps_per_second": 2.773, "eval_wer": 0.28134823611596227, "step": 300 }, { "epoch": 3.39, "learning_rate": 0.00029923664122137405, "loss": 0.0968, "step": 400 }, { "epoch": 3.39, "eval_loss": 0.34380972385406494, "eval_runtime": 153.4688, "eval_samples_per_second": 21.965, "eval_steps_per_second": 2.75, "eval_wer": 0.2737222028175573, "step": 400 }, { "epoch": 4.24, "learning_rate": 0.00029898218829516537, "loss": 0.0977, "step": 500 }, { "epoch": 4.24, "eval_loss": 0.36153408885002136, "eval_runtime": 166.8287, "eval_samples_per_second": 20.206, "eval_steps_per_second": 2.53, "eval_wer": 0.2702875771335429, "step": 500 }, { "epoch": 5.08, "learning_rate": 0.00029872773536895674, "loss": 0.0916, "step": 600 }, { "epoch": 5.08, "eval_loss": 0.3286847770214081, "eval_runtime": 171.9341, "eval_samples_per_second": 19.606, "eval_steps_per_second": 2.454, "eval_wer": 0.2669693794388171, "step": 600 }, { "epoch": 5.93, "learning_rate": 0.00029847328244274806, "loss": 0.0943, "step": 700 }, { "epoch": 5.93, "eval_loss": 0.3330075442790985, "eval_runtime": 152.0172, "eval_samples_per_second": 22.175, "eval_steps_per_second": 2.776, "eval_wer": 0.2651647456048434, "step": 700 }, { "epoch": 6.78, "learning_rate": 0.0002982213740458015, "loss": 0.0959, "step": 800 }, { "epoch": 6.78, "eval_loss": 0.3155308663845062, "eval_runtime": 176.3551, "eval_samples_per_second": 19.115, "eval_steps_per_second": 2.393, "eval_wer": 0.2723250669460938, "step": 800 }, { "epoch": 7.63, "learning_rate": 0.00029796692111959286, "loss": 0.0953, "step": 900 }, { "epoch": 7.63, "eval_loss": 0.3184454143047333, "eval_runtime": 179.411, "eval_samples_per_second": 18.789, "eval_steps_per_second": 2.352, "eval_wer": 0.25940156013505644, "step": 900 }, { "epoch": 8.47, "learning_rate": 0.0002977124681933842, "loss": 0.0989, "step": 1000 }, { "epoch": 8.47, "eval_loss": 0.3282919228076935, "eval_runtime": 177.9041, "eval_samples_per_second": 18.948, "eval_steps_per_second": 2.372, "eval_wer": 0.28070788217487486, "step": 1000 }, { "epoch": 9.32, "learning_rate": 0.00029745801526717556, "loss": 0.0962, "step": 1100 }, { "epoch": 9.32, "eval_loss": 0.3113383948802948, "eval_runtime": 183.182, "eval_samples_per_second": 18.402, "eval_steps_per_second": 2.304, "eval_wer": 0.2656886715566422, "step": 1100 }, { "epoch": 10.17, "learning_rate": 0.0002972035623409669, "loss": 0.0911, "step": 1200 }, { "epoch": 10.17, "eval_loss": 0.31265875697135925, "eval_runtime": 178.4394, "eval_samples_per_second": 18.892, "eval_steps_per_second": 2.365, "eval_wer": 0.2594597741297008, "step": 1200 }, { "epoch": 11.02, "learning_rate": 0.00029694910941475825, "loss": 0.093, "step": 1300 }, { "epoch": 11.02, "eval_loss": 0.33750081062316895, "eval_runtime": 185.9856, "eval_samples_per_second": 18.125, "eval_steps_per_second": 2.269, "eval_wer": 0.2635347537548027, "step": 1300 }, { "epoch": 11.86, "learning_rate": 0.0002966946564885496, "loss": 0.0908, "step": 1400 }, { "epoch": 11.86, "eval_loss": 0.31224948167800903, "eval_runtime": 182.1874, "eval_samples_per_second": 18.503, "eval_steps_per_second": 2.316, "eval_wer": 0.2616136919315403, "step": 1400 }, { "epoch": 12.71, "learning_rate": 0.00029644020356234095, "loss": 0.1039, "step": 1500 }, { "epoch": 12.71, "eval_loss": 0.33441564440727234, "eval_runtime": 187.7233, "eval_samples_per_second": 17.957, "eval_steps_per_second": 2.248, "eval_wer": 0.2726161369193154, "step": 1500 }, { "epoch": 13.56, "learning_rate": 0.00029618575063613227, "loss": 0.0921, "step": 1600 }, { "epoch": 13.56, "eval_loss": 0.3115340769290924, "eval_runtime": 189.6708, "eval_samples_per_second": 17.773, "eval_steps_per_second": 2.225, "eval_wer": 0.26859937128885786, "step": 1600 }, { "epoch": 14.41, "learning_rate": 0.00029593129770992364, "loss": 0.0995, "step": 1700 }, { "epoch": 14.41, "eval_loss": 0.3103960156440735, "eval_runtime": 183.1481, "eval_samples_per_second": 18.406, "eval_steps_per_second": 2.304, "eval_wer": 0.2650483176155548, "step": 1700 }, { "epoch": 15.25, "learning_rate": 0.00029567684478371497, "loss": 0.1027, "step": 1800 }, { "epoch": 15.25, "eval_loss": 0.33657944202423096, "eval_runtime": 185.2922, "eval_samples_per_second": 18.193, "eval_steps_per_second": 2.277, "eval_wer": 0.28891605541972293, "step": 1800 }, { "epoch": 16.1, "learning_rate": 0.00029542239185750634, "loss": 0.1001, "step": 1900 }, { "epoch": 16.1, "eval_loss": 0.32664933800697327, "eval_runtime": 182.6597, "eval_samples_per_second": 18.455, "eval_steps_per_second": 2.31, "eval_wer": 0.2692979392245896, "step": 1900 }, { "epoch": 16.95, "learning_rate": 0.0002951679389312977, "loss": 0.0955, "step": 2000 }, { "epoch": 16.95, "eval_loss": 0.32146599888801575, "eval_runtime": 175.25, "eval_samples_per_second": 19.235, "eval_steps_per_second": 2.408, "eval_wer": 0.25986727209221094, "step": 2000 }, { "epoch": 17.8, "learning_rate": 0.00029491348600508904, "loss": 0.0872, "step": 2100 }, { "epoch": 17.8, "eval_loss": 0.31995928287506104, "eval_runtime": 168.2812, "eval_samples_per_second": 20.032, "eval_steps_per_second": 2.508, "eval_wer": 0.2623704738619164, "step": 2100 }, { "epoch": 18.64, "learning_rate": 0.0002946590330788804, "loss": 0.0919, "step": 2200 }, { "epoch": 18.64, "eval_loss": 0.3285907208919525, "eval_runtime": 176.4154, "eval_samples_per_second": 19.108, "eval_steps_per_second": 2.392, "eval_wer": 0.26405867970660146, "step": 2200 }, { "epoch": 19.49, "learning_rate": 0.00029440458015267173, "loss": 0.0953, "step": 2300 }, { "epoch": 19.49, "eval_loss": 0.35332390666007996, "eval_runtime": 173.8594, "eval_samples_per_second": 19.389, "eval_steps_per_second": 2.427, "eval_wer": 0.2674350913959716, "step": 2300 }, { "epoch": 20.34, "learning_rate": 0.0002941501272264631, "loss": 0.0923, "step": 2400 }, { "epoch": 20.34, "eval_loss": 0.30950167775154114, "eval_runtime": 173.4531, "eval_samples_per_second": 19.435, "eval_steps_per_second": 2.433, "eval_wer": 0.2600419140761439, "step": 2400 }, { "epoch": 21.19, "learning_rate": 0.00029389567430025443, "loss": 0.0961, "step": 2500 }, { "epoch": 21.19, "eval_loss": 0.3377102315425873, "eval_runtime": 183.4219, "eval_samples_per_second": 18.378, "eval_steps_per_second": 2.301, "eval_wer": 0.255210152520666, "step": 2500 }, { "epoch": 22.03, "learning_rate": 0.00029364122137404575, "loss": 0.0919, "step": 2600 }, { "epoch": 22.03, "eval_loss": 0.3226545751094818, "eval_runtime": 183.0312, "eval_samples_per_second": 18.418, "eval_steps_per_second": 2.306, "eval_wer": 0.2614390499476074, "step": 2600 }, { "epoch": 22.88, "learning_rate": 0.00029338676844783713, "loss": 0.0859, "step": 2700 }, { "epoch": 22.88, "eval_loss": 0.30848973989486694, "eval_runtime": 176.2969, "eval_samples_per_second": 19.121, "eval_steps_per_second": 2.394, "eval_wer": 0.25416230061706835, "step": 2700 }, { "epoch": 23.73, "learning_rate": 0.00029313231552162845, "loss": 0.0915, "step": 2800 }, { "epoch": 23.73, "eval_loss": 0.3403824269771576, "eval_runtime": 172.6719, "eval_samples_per_second": 19.523, "eval_steps_per_second": 2.444, "eval_wer": 0.2610315519850972, "step": 2800 }, { "epoch": 24.58, "learning_rate": 0.00029288040712468187, "loss": 0.0917, "step": 2900 }, { "epoch": 24.58, "eval_loss": 0.2996799647808075, "eval_runtime": 178.4531, "eval_samples_per_second": 18.89, "eval_steps_per_second": 2.365, "eval_wer": 0.2529980207241821, "step": 2900 }, { "epoch": 25.42, "learning_rate": 0.00029262595419847324, "loss": 0.0967, "step": 3000 }, { "epoch": 25.42, "eval_loss": 0.3144609332084656, "eval_runtime": 177.4531, "eval_samples_per_second": 18.997, "eval_steps_per_second": 2.378, "eval_wer": 0.25555943648853185, "step": 3000 }, { "epoch": 26.27, "learning_rate": 0.0002923715012722646, "loss": 0.0973, "step": 3100 }, { "epoch": 26.27, "eval_loss": 0.3294685482978821, "eval_runtime": 178.7969, "eval_samples_per_second": 18.854, "eval_steps_per_second": 2.36, "eval_wer": 0.25940156013505644, "step": 3100 }, { "epoch": 27.12, "learning_rate": 0.00029211704834605594, "loss": 0.0932, "step": 3200 }, { "epoch": 27.12, "eval_loss": 0.3125886023044586, "eval_runtime": 168.7969, "eval_samples_per_second": 19.971, "eval_steps_per_second": 2.5, "eval_wer": 0.25684014437070674, "step": 3200 }, { "epoch": 27.97, "learning_rate": 0.0002918625954198473, "loss": 0.0945, "step": 3300 }, { "epoch": 27.97, "eval_loss": 0.3468785583972931, "eval_runtime": 179.6094, "eval_samples_per_second": 18.769, "eval_steps_per_second": 2.35, "eval_wer": 0.2523576667830947, "step": 3300 }, { "epoch": 28.81, "learning_rate": 0.00029160814249363864, "loss": 0.0852, "step": 3400 }, { "epoch": 28.81, "eval_loss": 0.31798404455184937, "eval_runtime": 173.2031, "eval_samples_per_second": 19.463, "eval_steps_per_second": 2.436, "eval_wer": 0.254104086622424, "step": 3400 }, { "epoch": 29.66, "learning_rate": 0.00029135368956743, "loss": 0.0866, "step": 3500 }, { "epoch": 29.66, "eval_loss": 0.31360727548599243, "eval_runtime": 167.4063, "eval_samples_per_second": 20.137, "eval_steps_per_second": 2.521, "eval_wer": 0.255210152520666, "step": 3500 }, { "epoch": 30.51, "learning_rate": 0.00029109923664122133, "loss": 0.0844, "step": 3600 }, { "epoch": 30.51, "eval_loss": 0.33361586928367615, "eval_runtime": 166.3125, "eval_samples_per_second": 20.269, "eval_steps_per_second": 2.537, "eval_wer": 0.2660379555245081, "step": 3600 }, { "epoch": 31.36, "learning_rate": 0.0002908447837150127, "loss": 0.0847, "step": 3700 }, { "epoch": 31.36, "eval_loss": 0.31821873784065247, "eval_runtime": 178.8437, "eval_samples_per_second": 18.849, "eval_steps_per_second": 2.36, "eval_wer": 0.2507858889276982, "step": 3700 }, { "epoch": 32.2, "learning_rate": 0.0002905903307888041, "loss": 0.0885, "step": 3800 }, { "epoch": 32.2, "eval_loss": 0.32577720284461975, "eval_runtime": 175.0625, "eval_samples_per_second": 19.256, "eval_steps_per_second": 2.411, "eval_wer": 0.2674933053906159, "step": 3800 }, { "epoch": 33.05, "learning_rate": 0.0002903358778625954, "loss": 0.0855, "step": 3900 }, { "epoch": 33.05, "eval_loss": 0.31836631894111633, "eval_runtime": 169.0469, "eval_samples_per_second": 19.941, "eval_steps_per_second": 2.496, "eval_wer": 0.25375480265455813, "step": 3900 }, { "epoch": 33.9, "learning_rate": 0.0002900814249363867, "loss": 0.0813, "step": 4000 }, { "epoch": 33.9, "eval_loss": 0.30345430970191956, "eval_runtime": 175.0938, "eval_samples_per_second": 19.253, "eval_steps_per_second": 2.41, "eval_wer": 0.2506112469437653, "step": 4000 }, { "epoch": 34.74, "learning_rate": 0.0002898269720101781, "loss": 0.0822, "step": 4100 }, { "epoch": 34.74, "eval_loss": 0.3159136176109314, "eval_runtime": 174.1406, "eval_samples_per_second": 19.358, "eval_steps_per_second": 2.423, "eval_wer": 0.2572476423332169, "step": 4100 }, { "epoch": 35.59, "learning_rate": 0.0002895725190839694, "loss": 0.0849, "step": 4200 }, { "epoch": 35.59, "eval_loss": 0.2940651774406433, "eval_runtime": 173.233, "eval_samples_per_second": 19.459, "eval_steps_per_second": 2.436, "eval_wer": 0.2512516008848527, "step": 4200 }, { "epoch": 36.44, "learning_rate": 0.0002893180661577608, "loss": 0.0885, "step": 4300 }, { "epoch": 36.44, "eval_loss": 0.32734107971191406, "eval_runtime": 193.9206, "eval_samples_per_second": 17.383, "eval_steps_per_second": 2.176, "eval_wer": 0.26423332169053443, "step": 4300 }, { "epoch": 37.29, "learning_rate": 0.0002890636132315521, "loss": 0.0866, "step": 4400 }, { "epoch": 37.29, "eval_loss": 0.33303678035736084, "eval_runtime": 197.0429, "eval_samples_per_second": 17.108, "eval_steps_per_second": 2.142, "eval_wer": 0.255966934451042, "step": 4400 }, { "epoch": 38.14, "learning_rate": 0.0002888091603053435, "loss": 0.0841, "step": 4500 }, { "epoch": 38.14, "eval_loss": 0.32818496227264404, "eval_runtime": 192.3874, "eval_samples_per_second": 17.522, "eval_steps_per_second": 2.193, "eval_wer": 0.24997089300267786, "step": 4500 }, { "epoch": 38.98, "learning_rate": 0.0002885547073791348, "loss": 0.0848, "step": 4600 }, { "epoch": 38.98, "eval_loss": 0.32277733087539673, "eval_runtime": 188.7845, "eval_samples_per_second": 17.856, "eval_steps_per_second": 2.235, "eval_wer": 0.2605658400279427, "step": 4600 }, { "epoch": 39.83, "learning_rate": 0.0002883002544529262, "loss": 0.0752, "step": 4700 }, { "epoch": 39.83, "eval_loss": 0.3181003928184509, "eval_runtime": 190.576, "eval_samples_per_second": 17.688, "eval_steps_per_second": 2.214, "eval_wer": 0.2516008848527186, "step": 4700 }, { "epoch": 40.68, "learning_rate": 0.0002880458015267175, "loss": 0.0827, "step": 4800 }, { "epoch": 40.68, "eval_loss": 0.3244548439979553, "eval_runtime": 196.2976, "eval_samples_per_second": 17.173, "eval_steps_per_second": 2.15, "eval_wer": 0.2487483991151473, "step": 4800 }, { "epoch": 41.52, "learning_rate": 0.0002877913486005089, "loss": 0.0765, "step": 4900 }, { "epoch": 41.52, "eval_loss": 0.31394141912460327, "eval_runtime": 194.1554, "eval_samples_per_second": 17.362, "eval_steps_per_second": 2.174, "eval_wer": 0.24508091745255559, "step": 4900 }, { "epoch": 42.37, "learning_rate": 0.0002875368956743002, "loss": 0.0777, "step": 5000 }, { "epoch": 42.37, "eval_loss": 0.3149695098400116, "eval_runtime": 192.9814, "eval_samples_per_second": 17.468, "eval_steps_per_second": 2.187, "eval_wer": 0.24508091745255559, "step": 5000 }, { "epoch": 43.22, "learning_rate": 0.0002872824427480916, "loss": 0.0804, "step": 5100 }, { "epoch": 43.22, "eval_loss": 0.3207753598690033, "eval_runtime": 190.3397, "eval_samples_per_second": 17.71, "eval_steps_per_second": 2.217, "eval_wer": 0.25381301664920247, "step": 5100 }, { "epoch": 44.07, "learning_rate": 0.000287030534351145, "loss": 0.0838, "step": 5200 }, { "epoch": 44.07, "eval_loss": 0.3102128505706787, "eval_runtime": 175.1719, "eval_samples_per_second": 19.244, "eval_steps_per_second": 2.409, "eval_wer": 0.24845732914192573, "step": 5200 }, { "epoch": 44.91, "learning_rate": 0.0002867760814249364, "loss": 0.0731, "step": 5300 }, { "epoch": 44.91, "eval_loss": 0.29943132400512695, "eval_runtime": 173.7343, "eval_samples_per_second": 19.403, "eval_steps_per_second": 2.429, "eval_wer": 0.24449877750611246, "step": 5300 }, { "epoch": 45.76, "learning_rate": 0.0002865216284987277, "loss": 0.0736, "step": 5400 }, { "epoch": 45.76, "eval_loss": 0.3185470998287201, "eval_runtime": 174.5938, "eval_samples_per_second": 19.308, "eval_steps_per_second": 2.417, "eval_wer": 0.2591687041564792, "step": 5400 }, { "epoch": 46.61, "learning_rate": 0.0002862671755725191, "loss": 0.0795, "step": 5500 }, { "epoch": 46.61, "eval_loss": 0.3023243546485901, "eval_runtime": 174.2188, "eval_samples_per_second": 19.349, "eval_steps_per_second": 2.422, "eval_wer": 0.24583769938293165, "step": 5500 }, { "epoch": 47.46, "learning_rate": 0.0002860127226463104, "loss": 0.0753, "step": 5600 }, { "epoch": 47.46, "eval_loss": 0.32648247480392456, "eval_runtime": 179.8281, "eval_samples_per_second": 18.746, "eval_steps_per_second": 2.347, "eval_wer": 0.2464780533240191, "step": 5600 }, { "epoch": 48.3, "learning_rate": 0.0002857582697201018, "loss": 0.0716, "step": 5700 }, { "epoch": 48.3, "eval_loss": 0.3370068073272705, "eval_runtime": 190.9301, "eval_samples_per_second": 17.656, "eval_steps_per_second": 2.21, "eval_wer": 0.24903946908836885, "step": 5700 }, { "epoch": 49.15, "learning_rate": 0.0002855038167938931, "loss": 0.074, "step": 5800 }, { "epoch": 49.15, "eval_loss": 0.299947589635849, "eval_runtime": 192.7259, "eval_samples_per_second": 17.491, "eval_steps_per_second": 2.19, "eval_wer": 0.2431598556292933, "step": 5800 }, { "epoch": 50.0, "learning_rate": 0.00028524936386768447, "loss": 0.0678, "step": 5900 }, { "epoch": 50.0, "eval_loss": 0.32643795013427734, "eval_runtime": 195.0205, "eval_samples_per_second": 17.285, "eval_steps_per_second": 2.164, "eval_wer": 0.2549190825474444, "step": 5900 } ], "max_steps": 118000, "num_train_epochs": 1000, "total_flos": 1.3321101567847278e+20, "trial_name": null, "trial_params": null }