{ "best_metric": null, "best_model_checkpoint": null, "epoch": 72.72103004291846, "global_step": 5600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.3, "learning_rate": 0.0001, "loss": 0.0529, "step": 100 }, { "epoch": 1.3, "eval_loss": 0.3332812786102295, "eval_runtime": 188.2511, "eval_samples_per_second": 16.558, "eval_steps_per_second": 2.072, "eval_wer": 0.2371088585279859, "step": 100 }, { "epoch": 2.59, "learning_rate": 9.98699609882965e-05, "loss": 0.0471, "step": 200 }, { "epoch": 2.59, "eval_loss": 0.35051167011260986, "eval_runtime": 136.2299, "eval_samples_per_second": 22.88, "eval_steps_per_second": 2.863, "eval_wer": 0.23723477932380532, "step": 200 }, { "epoch": 3.89, "learning_rate": 9.973992197659299e-05, "loss": 0.0463, "step": 300 }, { "epoch": 3.89, "eval_loss": 0.3505023121833801, "eval_runtime": 142.4796, "eval_samples_per_second": 21.877, "eval_steps_per_second": 2.737, "eval_wer": 0.23931247245482593, "step": 300 }, { "epoch": 5.19, "learning_rate": 9.960988296488946e-05, "loss": 0.0461, "step": 400 }, { "epoch": 5.19, "eval_loss": 0.4004528224468231, "eval_runtime": 143.9556, "eval_samples_per_second": 21.653, "eval_steps_per_second": 2.709, "eval_wer": 0.2405716804130202, "step": 400 }, { "epoch": 6.49, "learning_rate": 9.947984395318596e-05, "loss": 0.0442, "step": 500 }, { "epoch": 6.49, "eval_loss": 0.4172374904155731, "eval_runtime": 151.5028, "eval_samples_per_second": 20.574, "eval_steps_per_second": 2.574, "eval_wer": 0.24863061134546371, "step": 500 }, { "epoch": 7.79, "learning_rate": 9.934980494148245e-05, "loss": 0.0431, "step": 600 }, { "epoch": 7.79, "eval_loss": 0.3723874092102051, "eval_runtime": 155.4666, "eval_samples_per_second": 20.049, "eval_steps_per_second": 2.509, "eval_wer": 0.24006799722974248, "step": 600 }, { "epoch": 9.09, "learning_rate": 9.921976592977894e-05, "loss": 0.0413, "step": 700 }, { "epoch": 9.09, "eval_loss": 0.38365304470062256, "eval_runtime": 157.1121, "eval_samples_per_second": 19.839, "eval_steps_per_second": 2.482, "eval_wer": 0.24466410627715168, "step": 700 }, { "epoch": 10.39, "learning_rate": 9.908972691807543e-05, "loss": 0.0417, "step": 800 }, { "epoch": 10.39, "eval_loss": 0.37991076707839966, "eval_runtime": 160.431, "eval_samples_per_second": 19.429, "eval_steps_per_second": 2.431, "eval_wer": 0.23786438330290247, "step": 800 }, { "epoch": 11.68, "learning_rate": 9.89596879063719e-05, "loss": 0.0423, "step": 900 }, { "epoch": 11.68, "eval_loss": 0.41009148955345154, "eval_runtime": 161.1546, "eval_samples_per_second": 19.342, "eval_steps_per_second": 2.42, "eval_wer": 0.24485298747088083, "step": 900 }, { "epoch": 12.98, "learning_rate": 9.88296488946684e-05, "loss": 0.0425, "step": 1000 }, { "epoch": 12.98, "eval_loss": 0.38417309522628784, "eval_runtime": 162.4688, "eval_samples_per_second": 19.185, "eval_steps_per_second": 2.4, "eval_wer": 0.23931247245482593, "step": 1000 }, { "epoch": 14.28, "learning_rate": 9.869960988296489e-05, "loss": 0.0413, "step": 1100 }, { "epoch": 14.28, "eval_loss": 0.37902718782424927, "eval_runtime": 164.4062, "eval_samples_per_second": 18.959, "eval_steps_per_second": 2.372, "eval_wer": 0.24491594786879053, "step": 1100 }, { "epoch": 15.58, "learning_rate": 9.856957087126138e-05, "loss": 0.0416, "step": 1200 }, { "epoch": 15.58, "eval_loss": 0.38784804940223694, "eval_runtime": 163.9531, "eval_samples_per_second": 19.012, "eval_steps_per_second": 2.379, "eval_wer": 0.23931247245482593, "step": 1200 }, { "epoch": 16.88, "learning_rate": 9.844083224967491e-05, "loss": 0.0436, "step": 1300 }, { "epoch": 16.88, "eval_loss": 0.36406952142715454, "eval_runtime": 162.4063, "eval_samples_per_second": 19.193, "eval_steps_per_second": 2.401, "eval_wer": 0.23817918529245105, "step": 1300 }, { "epoch": 18.18, "learning_rate": 9.83107932379714e-05, "loss": 0.0424, "step": 1400 }, { "epoch": 18.18, "eval_loss": 0.3773825764656067, "eval_runtime": 161.8281, "eval_samples_per_second": 19.261, "eval_steps_per_second": 2.41, "eval_wer": 0.2359126109677013, "step": 1400 }, { "epoch": 19.48, "learning_rate": 9.818075422626789e-05, "loss": 0.0379, "step": 1500 }, { "epoch": 19.48, "eval_loss": 0.39104992151260376, "eval_runtime": 153.8281, "eval_samples_per_second": 20.263, "eval_steps_per_second": 2.535, "eval_wer": 0.23396083863250017, "step": 1500 }, { "epoch": 20.77, "learning_rate": 9.805071521456437e-05, "loss": 0.0401, "step": 1600 }, { "epoch": 20.77, "eval_loss": 0.4012731611728668, "eval_runtime": 166.1564, "eval_samples_per_second": 18.759, "eval_steps_per_second": 2.347, "eval_wer": 0.23786438330290247, "step": 1600 }, { "epoch": 22.08, "learning_rate": 9.792067620286086e-05, "loss": 0.0397, "step": 1700 }, { "epoch": 22.08, "eval_loss": 0.3917081952095032, "eval_runtime": 151.5157, "eval_samples_per_second": 20.572, "eval_steps_per_second": 2.574, "eval_wer": 0.23981615563810363, "step": 1700 }, { "epoch": 23.37, "learning_rate": 9.779063719115735e-05, "loss": 0.0399, "step": 1800 }, { "epoch": 23.37, "eval_loss": 0.41352856159210205, "eval_runtime": 169.9898, "eval_samples_per_second": 18.336, "eval_steps_per_second": 2.294, "eval_wer": 0.24151608638166594, "step": 1800 }, { "epoch": 24.67, "learning_rate": 9.766059817945384e-05, "loss": 0.0407, "step": 1900 }, { "epoch": 24.67, "eval_loss": 0.3818851113319397, "eval_runtime": 176.1576, "eval_samples_per_second": 17.694, "eval_steps_per_second": 2.214, "eval_wer": 0.23717181892589562, "step": 1900 }, { "epoch": 25.97, "learning_rate": 9.753055916775033e-05, "loss": 0.0392, "step": 2000 }, { "epoch": 25.97, "eval_loss": 0.3882431983947754, "eval_runtime": 167.0577, "eval_samples_per_second": 18.658, "eval_steps_per_second": 2.335, "eval_wer": 0.23786438330290247, "step": 2000 }, { "epoch": 27.27, "learning_rate": 9.740052015604681e-05, "loss": 0.0381, "step": 2100 }, { "epoch": 27.27, "eval_loss": 0.3893887996673584, "eval_runtime": 171.6954, "eval_samples_per_second": 18.154, "eval_steps_per_second": 2.271, "eval_wer": 0.23943839325064534, "step": 2100 }, { "epoch": 28.57, "learning_rate": 9.72704811443433e-05, "loss": 0.0401, "step": 2200 }, { "epoch": 28.57, "eval_loss": 0.3673301041126251, "eval_runtime": 174.7303, "eval_samples_per_second": 17.839, "eval_steps_per_second": 2.232, "eval_wer": 0.2357866901718819, "step": 2200 }, { "epoch": 29.86, "learning_rate": 9.71404421326398e-05, "loss": 0.0391, "step": 2300 }, { "epoch": 29.86, "eval_loss": 0.3780101239681244, "eval_runtime": 157.4375, "eval_samples_per_second": 19.798, "eval_steps_per_second": 2.477, "eval_wer": 0.2354089277844236, "step": 2300 }, { "epoch": 31.17, "learning_rate": 9.701040312093628e-05, "loss": 0.0377, "step": 2400 }, { "epoch": 31.17, "eval_loss": 0.3910522758960724, "eval_runtime": 159.5625, "eval_samples_per_second": 19.535, "eval_steps_per_second": 2.444, "eval_wer": 0.23654221494679847, "step": 2400 }, { "epoch": 32.46, "learning_rate": 9.688036410923278e-05, "loss": 0.0378, "step": 2500 }, { "epoch": 32.46, "eval_loss": 0.3806402087211609, "eval_runtime": 170.7978, "eval_samples_per_second": 18.25, "eval_steps_per_second": 2.283, "eval_wer": 0.23918655165900649, "step": 2500 }, { "epoch": 33.76, "learning_rate": 9.675032509752925e-05, "loss": 0.0347, "step": 2600 }, { "epoch": 33.76, "eval_loss": 0.3844529390335083, "eval_runtime": 160.3906, "eval_samples_per_second": 19.434, "eval_steps_per_second": 2.432, "eval_wer": 0.23515708619278475, "step": 2600 }, { "epoch": 35.06, "learning_rate": 9.662028608582574e-05, "loss": 0.0369, "step": 2700 }, { "epoch": 35.06, "eval_loss": 0.39440667629241943, "eval_runtime": 167.7032, "eval_samples_per_second": 18.586, "eval_steps_per_second": 2.326, "eval_wer": 0.23931247245482593, "step": 2700 }, { "epoch": 36.36, "learning_rate": 9.649024707412224e-05, "loss": 0.0375, "step": 2800 }, { "epoch": 36.36, "eval_loss": 0.37219446897506714, "eval_runtime": 165.2969, "eval_samples_per_second": 18.857, "eval_steps_per_second": 2.359, "eval_wer": 0.23012025436000755, "step": 2800 }, { "epoch": 37.66, "learning_rate": 9.636020806241873e-05, "loss": 0.0363, "step": 2900 }, { "epoch": 37.66, "eval_loss": 0.3664211332798004, "eval_runtime": 167.8076, "eval_samples_per_second": 18.575, "eval_steps_per_second": 2.324, "eval_wer": 0.23112762072656298, "step": 2900 }, { "epoch": 38.95, "learning_rate": 9.623016905071522e-05, "loss": 0.034, "step": 3000 }, { "epoch": 38.95, "eval_loss": 0.392531156539917, "eval_runtime": 173.8435, "eval_samples_per_second": 17.93, "eval_steps_per_second": 2.243, "eval_wer": 0.23081281873701442, "step": 3000 }, { "epoch": 40.26, "learning_rate": 9.610013003901171e-05, "loss": 0.0331, "step": 3100 }, { "epoch": 40.26, "eval_loss": 0.3893636465072632, "eval_runtime": 167.8125, "eval_samples_per_second": 18.574, "eval_steps_per_second": 2.324, "eval_wer": 0.2323868286847573, "step": 3100 }, { "epoch": 41.55, "learning_rate": 9.59700910273082e-05, "loss": 0.0339, "step": 3200 }, { "epoch": 41.55, "eval_loss": 0.39426469802856445, "eval_runtime": 168.7188, "eval_samples_per_second": 18.475, "eval_steps_per_second": 2.312, "eval_wer": 0.233331234653403, "step": 3200 }, { "epoch": 42.85, "learning_rate": 9.584005201560469e-05, "loss": 0.033, "step": 3300 }, { "epoch": 42.85, "eval_loss": 0.3865768313407898, "eval_runtime": 161.5781, "eval_samples_per_second": 19.291, "eval_steps_per_second": 2.414, "eval_wer": 0.2350311653969653, "step": 3300 }, { "epoch": 44.15, "learning_rate": 9.571001300390118e-05, "loss": 0.0334, "step": 3400 }, { "epoch": 44.15, "eval_loss": 0.38849422335624695, "eval_runtime": 166.5778, "eval_samples_per_second": 18.712, "eval_steps_per_second": 2.341, "eval_wer": 0.2320720266952087, "step": 3400 }, { "epoch": 45.45, "learning_rate": 9.557997399219767e-05, "loss": 0.0331, "step": 3500 }, { "epoch": 45.45, "eval_loss": 0.41077303886413574, "eval_runtime": 165.0937, "eval_samples_per_second": 18.88, "eval_steps_per_second": 2.362, "eval_wer": 0.232009066297299, "step": 3500 }, { "epoch": 46.75, "learning_rate": 9.544993498049415e-05, "loss": 0.0326, "step": 3600 }, { "epoch": 46.75, "eval_loss": 0.3925323784351349, "eval_runtime": 159.9375, "eval_samples_per_second": 19.489, "eval_steps_per_second": 2.438, "eval_wer": 0.2414531259837562, "step": 3600 }, { "epoch": 48.05, "learning_rate": 9.531989596879064e-05, "loss": 0.036, "step": 3700 }, { "epoch": 48.05, "eval_loss": 0.3832598030567169, "eval_runtime": 171.125, "eval_samples_per_second": 18.215, "eval_steps_per_second": 2.279, "eval_wer": 0.2387458288736385, "step": 3700 }, { "epoch": 49.35, "learning_rate": 9.518985695708713e-05, "loss": 0.0346, "step": 3800 }, { "epoch": 49.35, "eval_loss": 0.3931749761104584, "eval_runtime": 166.4063, "eval_samples_per_second": 18.731, "eval_steps_per_second": 2.344, "eval_wer": 0.23263867027639615, "step": 3800 }, { "epoch": 50.64, "learning_rate": 9.505981794538362e-05, "loss": 0.0349, "step": 3900 }, { "epoch": 50.64, "eval_loss": 0.3744593858718872, "eval_runtime": 157.9687, "eval_samples_per_second": 19.732, "eval_steps_per_second": 2.469, "eval_wer": 0.2354089277844236, "step": 3900 }, { "epoch": 51.94, "learning_rate": 9.492977893368012e-05, "loss": 0.034, "step": 4000 }, { "epoch": 51.94, "eval_loss": 0.39862367510795593, "eval_runtime": 155.8749, "eval_samples_per_second": 19.997, "eval_steps_per_second": 2.502, "eval_wer": 0.2328275514701253, "step": 4000 }, { "epoch": 53.24, "learning_rate": 9.479973992197659e-05, "loss": 0.0334, "step": 4100 }, { "epoch": 53.24, "eval_loss": 0.39466869831085205, "eval_runtime": 162.0467, "eval_samples_per_second": 19.235, "eval_steps_per_second": 2.407, "eval_wer": 0.23352011584713214, "step": 4100 }, { "epoch": 54.54, "learning_rate": 9.466970091027308e-05, "loss": 0.0325, "step": 4200 }, { "epoch": 54.54, "eval_loss": 0.39441126585006714, "eval_runtime": 162.4844, "eval_samples_per_second": 19.183, "eval_steps_per_second": 2.4, "eval_wer": 0.23616445255934018, "step": 4200 }, { "epoch": 55.84, "learning_rate": 9.453966189856957e-05, "loss": 0.0308, "step": 4300 }, { "epoch": 55.84, "eval_loss": 0.39918699860572815, "eval_runtime": 155.5625, "eval_samples_per_second": 20.037, "eval_steps_per_second": 2.507, "eval_wer": 0.23477932380532646, "step": 4300 }, { "epoch": 57.14, "learning_rate": 9.440962288686607e-05, "loss": 0.0316, "step": 4400 }, { "epoch": 57.14, "eval_loss": 0.39010030031204224, "eval_runtime": 163.0782, "eval_samples_per_second": 19.114, "eval_steps_per_second": 2.391, "eval_wer": 0.229805452370459, "step": 4400 }, { "epoch": 58.44, "learning_rate": 9.427958387516256e-05, "loss": 0.0308, "step": 4500 }, { "epoch": 58.44, "eval_loss": 0.404751718044281, "eval_runtime": 161.3906, "eval_samples_per_second": 19.313, "eval_steps_per_second": 2.416, "eval_wer": 0.23194610589938927, "step": 4500 }, { "epoch": 59.73, "learning_rate": 9.414954486345903e-05, "loss": 0.028, "step": 4600 }, { "epoch": 59.73, "eval_loss": 0.41103655099868774, "eval_runtime": 154.3905, "eval_samples_per_second": 20.189, "eval_steps_per_second": 2.526, "eval_wer": 0.2277277592394384, "step": 4600 }, { "epoch": 61.04, "learning_rate": 9.401950585175553e-05, "loss": 0.0299, "step": 4700 }, { "epoch": 61.04, "eval_loss": 0.40084338188171387, "eval_runtime": 168.5468, "eval_samples_per_second": 18.493, "eval_steps_per_second": 2.314, "eval_wer": 0.23295347226594473, "step": 4700 }, { "epoch": 62.33, "learning_rate": 9.388946684005202e-05, "loss": 0.033, "step": 4800 }, { "epoch": 62.33, "eval_loss": 0.3985605835914612, "eval_runtime": 168.5159, "eval_samples_per_second": 18.497, "eval_steps_per_second": 2.314, "eval_wer": 0.23490524460114587, "step": 4800 }, { "epoch": 63.63, "learning_rate": 9.375942782834851e-05, "loss": 0.0345, "step": 4900 }, { "epoch": 63.63, "eval_loss": 0.3911936581134796, "eval_runtime": 158.9531, "eval_samples_per_second": 19.61, "eval_steps_per_second": 2.454, "eval_wer": 0.23559780897815275, "step": 4900 }, { "epoch": 64.93, "learning_rate": 9.3629388816645e-05, "loss": 0.0322, "step": 5000 }, { "epoch": 64.93, "eval_loss": 0.3985706567764282, "eval_runtime": 175.1875, "eval_samples_per_second": 17.792, "eval_steps_per_second": 2.226, "eval_wer": 0.231568343511931, "step": 5000 }, { "epoch": 66.23, "learning_rate": 9.349934980494148e-05, "loss": 0.0313, "step": 5100 }, { "epoch": 66.23, "eval_loss": 0.4163081645965576, "eval_runtime": 179.4796, "eval_samples_per_second": 17.367, "eval_steps_per_second": 2.173, "eval_wer": 0.23049801674746584, "step": 5100 }, { "epoch": 67.53, "learning_rate": 9.336931079323797e-05, "loss": 0.0315, "step": 5200 }, { "epoch": 67.53, "eval_loss": 0.3985958993434906, "eval_runtime": 170.0289, "eval_samples_per_second": 18.332, "eval_steps_per_second": 2.294, "eval_wer": 0.23528300698860416, "step": 5200 }, { "epoch": 68.82, "learning_rate": 9.323927178153446e-05, "loss": 0.0294, "step": 5300 }, { "epoch": 68.82, "eval_loss": 0.40526434779167175, "eval_runtime": 177.9803, "eval_samples_per_second": 17.513, "eval_steps_per_second": 2.191, "eval_wer": 0.2295536107788201, "step": 5300 }, { "epoch": 70.13, "learning_rate": 9.310923276983095e-05, "loss": 0.0302, "step": 5400 }, { "epoch": 70.13, "eval_loss": 0.3818342089653015, "eval_runtime": 177.545, "eval_samples_per_second": 17.556, "eval_steps_per_second": 2.197, "eval_wer": 0.23131650192029213, "step": 5400 }, { "epoch": 71.42, "learning_rate": 9.297919375812744e-05, "loss": 0.0318, "step": 5500 }, { "epoch": 71.42, "eval_loss": 0.3933159410953522, "eval_runtime": 168.7969, "eval_samples_per_second": 18.466, "eval_steps_per_second": 2.31, "eval_wer": 0.23301643266385444, "step": 5500 }, { "epoch": 72.72, "learning_rate": 9.284915474642393e-05, "loss": 0.0289, "step": 5600 }, { "epoch": 72.72, "eval_loss": 0.38790163397789, "eval_runtime": 167.7031, "eval_samples_per_second": 18.586, "eval_steps_per_second": 2.326, "eval_wer": 0.23396083863250017, "step": 5600 } ], "max_steps": 77000, "num_train_epochs": 1000, "total_flos": 1.80069386799743e+20, "trial_name": null, "trial_params": null }