|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.99916317991632, |
|
"global_step": 11940, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000194, |
|
"loss": 4.6647, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00039400000000000004, |
|
"loss": 3.2495, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.000594, |
|
"loss": 2.8044, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0007940000000000001, |
|
"loss": 2.6636, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.000994, |
|
"loss": 2.6638, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 2.3851921558380127, |
|
"eval_runtime": 415.3375, |
|
"eval_samples_per_second": 25.011, |
|
"eval_steps_per_second": 3.128, |
|
"eval_wer": 0.9974286401391124, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0009822669104204754, |
|
"loss": 2.6437, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0009639853747714809, |
|
"loss": 2.6394, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0009457038391224862, |
|
"loss": 2.6303, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0009274223034734918, |
|
"loss": 2.6351, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0009091407678244972, |
|
"loss": 2.6578, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 2.2796220779418945, |
|
"eval_runtime": 416.0072, |
|
"eval_samples_per_second": 24.971, |
|
"eval_steps_per_second": 3.123, |
|
"eval_wer": 0.9970586603030135, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0008908592321755028, |
|
"loss": 2.6462, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0008725776965265082, |
|
"loss": 2.6498, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0008542961608775137, |
|
"loss": 2.614, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0008360146252285192, |
|
"loss": 2.6209, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0008177330895795247, |
|
"loss": 2.6016, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 2.004575252532959, |
|
"eval_runtime": 417.037, |
|
"eval_samples_per_second": 24.909, |
|
"eval_steps_per_second": 3.115, |
|
"eval_wer": 0.9960782137373513, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.0007994515539305302, |
|
"loss": 2.5937, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.0007811700182815357, |
|
"loss": 2.5909, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.0007628884826325412, |
|
"loss": 2.5913, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.0007446069469835466, |
|
"loss": 2.5828, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.0007263254113345521, |
|
"loss": 2.5752, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_loss": 1.960595726966858, |
|
"eval_runtime": 416.9162, |
|
"eval_samples_per_second": 24.916, |
|
"eval_steps_per_second": 3.116, |
|
"eval_wer": 0.9961152117209612, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.0007080438756855576, |
|
"loss": 2.5663, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.0006897623400365632, |
|
"loss": 2.5729, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.0006714808043875685, |
|
"loss": 2.5767, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.0006531992687385741, |
|
"loss": 2.5661, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.0006349177330895795, |
|
"loss": 2.539, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_loss": 1.8835679292678833, |
|
"eval_runtime": 417.6452, |
|
"eval_samples_per_second": 24.873, |
|
"eval_steps_per_second": 3.11, |
|
"eval_wer": 0.9939693286715874, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.000616636197440585, |
|
"loss": 2.5237, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.0005983546617915904, |
|
"loss": 2.5464, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.000580073126142596, |
|
"loss": 2.5135, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0005617915904936015, |
|
"loss": 2.5058, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.0005435100548446069, |
|
"loss": 2.5214, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_loss": 1.859293818473816, |
|
"eval_runtime": 418.88, |
|
"eval_samples_per_second": 24.799, |
|
"eval_steps_per_second": 3.101, |
|
"eval_wer": 0.9933033649666093, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.0005252285191956125, |
|
"loss": 2.4984, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.0005069469835466179, |
|
"loss": 2.4812, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.0004886654478976234, |
|
"loss": 2.4626, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 0.0004703839122486289, |
|
"loss": 2.476, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.00045210237659963436, |
|
"loss": 2.4684, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_loss": 1.781636357307434, |
|
"eval_runtime": 415.6525, |
|
"eval_samples_per_second": 24.992, |
|
"eval_steps_per_second": 3.125, |
|
"eval_wer": 0.9884566291137133, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.0004338208409506398, |
|
"loss": 2.4739, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.0004155393053016453, |
|
"loss": 2.4494, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00039725776965265084, |
|
"loss": 2.4263, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.00037897623400365635, |
|
"loss": 2.4187, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.0003606946983546618, |
|
"loss": 2.4134, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"eval_loss": 1.7167690992355347, |
|
"eval_runtime": 416.8699, |
|
"eval_samples_per_second": 24.919, |
|
"eval_steps_per_second": 3.116, |
|
"eval_wer": 0.9808165454982704, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.0003424131627056673, |
|
"loss": 2.4008, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 0.00032413162705667277, |
|
"loss": 2.4048, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.0003058500914076783, |
|
"loss": 2.3795, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.00028756855575868374, |
|
"loss": 2.3803, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.0002692870201096892, |
|
"loss": 2.3732, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"eval_loss": 1.6406092643737793, |
|
"eval_runtime": 415.1084, |
|
"eval_samples_per_second": 25.025, |
|
"eval_steps_per_second": 3.129, |
|
"eval_wer": 0.976432284440498, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.0002510054844606947, |
|
"loss": 2.3657, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.0002327239488117002, |
|
"loss": 2.3565, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 0.00021462522851919562, |
|
"loss": 2.3679, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 0.0001963436928702011, |
|
"loss": 2.34, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.00017806215722120658, |
|
"loss": 2.3371, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"eval_loss": 1.6087424755096436, |
|
"eval_runtime": 417.7716, |
|
"eval_samples_per_second": 24.865, |
|
"eval_steps_per_second": 3.109, |
|
"eval_wer": 0.9739349205468302, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 0.00015978062157221207, |
|
"loss": 2.3216, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 0.00014149908592321755, |
|
"loss": 2.3004, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 0.00012321755027422303, |
|
"loss": 2.3028, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 0.00010493601462522852, |
|
"loss": 2.3099, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 8.6654478976234e-05, |
|
"loss": 2.2824, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"eval_loss": 1.5476473569869995, |
|
"eval_runtime": 417.8751, |
|
"eval_samples_per_second": 24.859, |
|
"eval_steps_per_second": 3.109, |
|
"eval_wer": 0.9695691584808628, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 0.0005545454545454546, |
|
"loss": 2.3577, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 0.0005458041958041959, |
|
"loss": 2.3723, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.000537062937062937, |
|
"loss": 2.3758, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 0.0005283216783216783, |
|
"loss": 2.3833, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 0.0005195804195804196, |
|
"loss": 2.3771, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"eval_loss": 1.6468309164047241, |
|
"eval_runtime": 414.4359, |
|
"eval_samples_per_second": 25.065, |
|
"eval_steps_per_second": 3.134, |
|
"eval_wer": 0.9773017370553305, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 0.0005108391608391608, |
|
"loss": 2.3673, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 0.0005020979020979021, |
|
"loss": 2.3555, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 0.0004933566433566434, |
|
"loss": 2.3645, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 0.0004846153846153846, |
|
"loss": 2.3617, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 0.0004758741258741259, |
|
"loss": 2.3499, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"eval_loss": 1.6116454601287842, |
|
"eval_runtime": 413.4109, |
|
"eval_samples_per_second": 25.128, |
|
"eval_steps_per_second": 3.142, |
|
"eval_wer": 0.9737314316369757, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 0.0004671328671328671, |
|
"loss": 2.3634, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.0004583916083916084, |
|
"loss": 2.3573, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"learning_rate": 0.0004496503496503497, |
|
"loss": 2.355, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 0.00044090909090909093, |
|
"loss": 2.3543, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 0.00043216783216783216, |
|
"loss": 2.3283, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"eval_loss": 1.6059322357177734, |
|
"eval_runtime": 409.6633, |
|
"eval_samples_per_second": 25.357, |
|
"eval_steps_per_second": 3.171, |
|
"eval_wer": 0.9743973953419539, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 0.00042342657342657344, |
|
"loss": 2.3277, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 0.0004146853146853147, |
|
"loss": 2.3361, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 0.00040594405594405596, |
|
"loss": 2.3199, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 0.00039720279720279725, |
|
"loss": 2.3216, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 0.0003884615384615385, |
|
"loss": 2.3153, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"eval_loss": 1.5888867378234863, |
|
"eval_runtime": 416.4655, |
|
"eval_samples_per_second": 24.943, |
|
"eval_steps_per_second": 3.119, |
|
"eval_wer": 0.9758218177109348, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 0.0003797202797202797, |
|
"loss": 2.3192, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.000370979020979021, |
|
"loss": 2.3053, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 0.00036223776223776223, |
|
"loss": 2.3185, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"learning_rate": 0.0003534965034965035, |
|
"loss": 2.3101, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 0.0003447552447552448, |
|
"loss": 2.3016, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"eval_loss": 1.5663487911224365, |
|
"eval_runtime": 415.4876, |
|
"eval_samples_per_second": 25.002, |
|
"eval_steps_per_second": 3.126, |
|
"eval_wer": 0.9727509850713136, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 0.00033601398601398603, |
|
"loss": 2.2916, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 0.00032727272727272726, |
|
"loss": 2.2904, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 0.00031853146853146855, |
|
"loss": 2.2708, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 0.0003097902097902098, |
|
"loss": 2.2876, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 0.00030104895104895107, |
|
"loss": 2.2731, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"eval_loss": 1.567448377609253, |
|
"eval_runtime": 407.9046, |
|
"eval_samples_per_second": 25.467, |
|
"eval_steps_per_second": 3.185, |
|
"eval_wer": 0.9626135375622029, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 0.00029230769230769235, |
|
"loss": 2.2482, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 0.0002835664335664336, |
|
"loss": 2.2559, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 0.0002748251748251748, |
|
"loss": 2.2726, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 0.0002660839160839161, |
|
"loss": 2.2508, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 0.00025743006993006993, |
|
"loss": 2.2617, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"eval_loss": 1.5032401084899902, |
|
"eval_runtime": 409.6274, |
|
"eval_samples_per_second": 25.36, |
|
"eval_steps_per_second": 3.171, |
|
"eval_wer": 0.9583402704552602, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 0.0002486888111888112, |
|
"loss": 2.2396, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 0.00023994755244755245, |
|
"loss": 2.2448, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 0.0002312062937062937, |
|
"loss": 2.225, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 15.74, |
|
"learning_rate": 0.00022246503496503497, |
|
"loss": 2.2319, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 0.00021372377622377623, |
|
"loss": 2.2252, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"eval_loss": 1.466213345527649, |
|
"eval_runtime": 409.5049, |
|
"eval_samples_per_second": 25.367, |
|
"eval_steps_per_second": 3.172, |
|
"eval_wer": 0.9516436354218695, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"learning_rate": 0.00020498251748251749, |
|
"loss": 2.2324, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"learning_rate": 0.00019624125874125875, |
|
"loss": 2.2197, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 16.42, |
|
"learning_rate": 0.0001875, |
|
"loss": 2.2061, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"learning_rate": 0.00017875874125874126, |
|
"loss": 2.2062, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"learning_rate": 0.00017001748251748252, |
|
"loss": 2.2048, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"eval_loss": 1.4410929679870605, |
|
"eval_runtime": 408.38, |
|
"eval_samples_per_second": 25.437, |
|
"eval_steps_per_second": 3.181, |
|
"eval_wer": 0.9561018924468616, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 16.92, |
|
"learning_rate": 0.00016127622377622378, |
|
"loss": 2.1942, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 0.00015253496503496504, |
|
"loss": 2.2158, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"learning_rate": 0.0001437937062937063, |
|
"loss": 2.1851, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 17.42, |
|
"learning_rate": 0.00013505244755244756, |
|
"loss": 2.1798, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"learning_rate": 0.00012631118881118882, |
|
"loss": 2.1731, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"eval_loss": 1.422843337059021, |
|
"eval_runtime": 412.7138, |
|
"eval_samples_per_second": 25.17, |
|
"eval_steps_per_second": 3.147, |
|
"eval_wer": 0.9521061102169932, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 17.75, |
|
"learning_rate": 0.00011756993006993007, |
|
"loss": 2.1736, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 0.00010882867132867133, |
|
"loss": 2.182, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 18.09, |
|
"learning_rate": 0.00010008741258741259, |
|
"loss": 2.1741, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 9.134615384615384e-05, |
|
"loss": 2.1636, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 18.43, |
|
"learning_rate": 8.260489510489511e-05, |
|
"loss": 2.1732, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 18.43, |
|
"eval_loss": 1.4052633047103882, |
|
"eval_runtime": 410.158, |
|
"eval_samples_per_second": 25.327, |
|
"eval_steps_per_second": 3.167, |
|
"eval_wer": 0.9428566143145198, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"learning_rate": 7.386363636363637e-05, |
|
"loss": 2.1666, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 18.76, |
|
"learning_rate": 6.512237762237761e-05, |
|
"loss": 2.1612, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"learning_rate": 5.638111888111888e-05, |
|
"loss": 2.1616, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 19.1, |
|
"learning_rate": 4.763986013986014e-05, |
|
"loss": 2.1752, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 19.26, |
|
"learning_rate": 3.88986013986014e-05, |
|
"loss": 2.1502, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 19.26, |
|
"eval_loss": 1.3827834129333496, |
|
"eval_runtime": 410.5205, |
|
"eval_samples_per_second": 25.304, |
|
"eval_steps_per_second": 3.164, |
|
"eval_wer": 0.9399522726011432, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 19.43, |
|
"learning_rate": 3.0157342657342658e-05, |
|
"loss": 2.1506, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 2.1416083916083917e-05, |
|
"loss": 2.1489, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 19.76, |
|
"learning_rate": 1.2674825174825174e-05, |
|
"loss": 2.1472, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 19.93, |
|
"learning_rate": 3.933566433566434e-06, |
|
"loss": 2.1453, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 11940, |
|
"total_flos": 2.027398301943103e+20, |
|
"train_loss": 1.2195568717304786, |
|
"train_runtime": 26512.2441, |
|
"train_samples_per_second": 28.824, |
|
"train_steps_per_second": 0.45 |
|
} |
|
], |
|
"max_steps": 11940, |
|
"num_train_epochs": 20, |
|
"total_flos": 2.027398301943103e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|