|
{ |
|
"best_metric": 36.4960604331718, |
|
"best_model_checkpoint": "./whisper-medium-ka/checkpoint-10000", |
|
"epoch": 2.5131942699170646, |
|
"eval_steps": 1000, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02535496957403651, |
|
"grad_norm": 10.789830207824707, |
|
"learning_rate": 4.6000000000000004e-07, |
|
"loss": 1.2488, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05070993914807302, |
|
"grad_norm": 6.402094841003418, |
|
"learning_rate": 9.600000000000001e-07, |
|
"loss": 0.9647, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07606490872210954, |
|
"grad_norm": 6.477363586425781, |
|
"learning_rate": 1.46e-06, |
|
"loss": 0.7572, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.10141987829614604, |
|
"grad_norm": 5.079893589019775, |
|
"learning_rate": 1.9600000000000003e-06, |
|
"loss": 0.636, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12677484787018256, |
|
"grad_norm": 4.092740535736084, |
|
"learning_rate": 2.46e-06, |
|
"loss": 0.5668, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.15212981744421908, |
|
"grad_norm": 5.041500091552734, |
|
"learning_rate": 2.96e-06, |
|
"loss": 0.5045, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17748478701825557, |
|
"grad_norm": 5.795814514160156, |
|
"learning_rate": 3.46e-06, |
|
"loss": 0.4991, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2028397565922921, |
|
"grad_norm": 5.117966651916504, |
|
"learning_rate": 3.96e-06, |
|
"loss": 0.4602, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2281947261663286, |
|
"grad_norm": 5.343120574951172, |
|
"learning_rate": 4.4600000000000005e-06, |
|
"loss": 0.4458, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2535496957403651, |
|
"grad_norm": 3.3774187564849854, |
|
"learning_rate": 4.960000000000001e-06, |
|
"loss": 0.4174, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2789046653144016, |
|
"grad_norm": 3.576996326446533, |
|
"learning_rate": 5.460000000000001e-06, |
|
"loss": 0.4296, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.30425963488843816, |
|
"grad_norm": 3.852977991104126, |
|
"learning_rate": 5.9600000000000005e-06, |
|
"loss": 0.3889, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32961460446247465, |
|
"grad_norm": 4.133119583129883, |
|
"learning_rate": 6.460000000000001e-06, |
|
"loss": 0.3864, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.35496957403651114, |
|
"grad_norm": 4.041072368621826, |
|
"learning_rate": 6.96e-06, |
|
"loss": 0.3787, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3803245436105477, |
|
"grad_norm": 4.2009806632995605, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 0.3722, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.4056795131845842, |
|
"grad_norm": 3.9817683696746826, |
|
"learning_rate": 7.960000000000002e-06, |
|
"loss": 0.366, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43103448275862066, |
|
"grad_norm": 3.8101816177368164, |
|
"learning_rate": 8.46e-06, |
|
"loss": 0.3712, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.4563894523326572, |
|
"grad_norm": 3.7913081645965576, |
|
"learning_rate": 8.96e-06, |
|
"loss": 0.3743, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4817444219066937, |
|
"grad_norm": 4.321682929992676, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 0.3467, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.5070993914807302, |
|
"grad_norm": 3.346144199371338, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 0.3429, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5324543610547667, |
|
"grad_norm": 4.033421516418457, |
|
"learning_rate": 9.975789473684211e-06, |
|
"loss": 0.3521, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.5578093306288032, |
|
"grad_norm": 4.106046676635742, |
|
"learning_rate": 9.949473684210526e-06, |
|
"loss": 0.3219, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5831643002028397, |
|
"grad_norm": 3.8651669025421143, |
|
"learning_rate": 9.923157894736844e-06, |
|
"loss": 0.3323, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.6085192697768763, |
|
"grad_norm": 4.054211616516113, |
|
"learning_rate": 9.89684210526316e-06, |
|
"loss": 0.3443, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6338742393509128, |
|
"grad_norm": 3.2193450927734375, |
|
"learning_rate": 9.870526315789474e-06, |
|
"loss": 0.3159, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6592292089249493, |
|
"grad_norm": 2.698641300201416, |
|
"learning_rate": 9.84421052631579e-06, |
|
"loss": 0.3197, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6845841784989858, |
|
"grad_norm": 2.68990159034729, |
|
"learning_rate": 9.817894736842106e-06, |
|
"loss": 0.3151, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.7099391480730223, |
|
"grad_norm": 3.144531726837158, |
|
"learning_rate": 9.791578947368422e-06, |
|
"loss": 0.3094, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"grad_norm": 3.1064047813415527, |
|
"learning_rate": 9.765263157894737e-06, |
|
"loss": 0.3026, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.7606490872210954, |
|
"grad_norm": 2.2833447456359863, |
|
"learning_rate": 9.738947368421054e-06, |
|
"loss": 0.3152, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7860040567951319, |
|
"grad_norm": 2.9134676456451416, |
|
"learning_rate": 9.712631578947369e-06, |
|
"loss": 0.3018, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.8113590263691683, |
|
"grad_norm": 3.249375820159912, |
|
"learning_rate": 9.686315789473684e-06, |
|
"loss": 0.306, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8367139959432048, |
|
"grad_norm": 3.1664018630981445, |
|
"learning_rate": 9.66e-06, |
|
"loss": 0.3053, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 2.8798232078552246, |
|
"learning_rate": 9.633684210526316e-06, |
|
"loss": 0.2919, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8874239350912779, |
|
"grad_norm": 3.306312322616577, |
|
"learning_rate": 9.607368421052632e-06, |
|
"loss": 0.3008, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.9127789046653144, |
|
"grad_norm": 3.241096258163452, |
|
"learning_rate": 9.581052631578947e-06, |
|
"loss": 0.3066, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9381338742393509, |
|
"grad_norm": 3.2305490970611572, |
|
"learning_rate": 9.554736842105264e-06, |
|
"loss": 0.293, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.9634888438133874, |
|
"grad_norm": 4.4702229499816895, |
|
"learning_rate": 9.52842105263158e-06, |
|
"loss": 0.2901, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9888438133874239, |
|
"grad_norm": 3.1923646926879883, |
|
"learning_rate": 9.502105263157896e-06, |
|
"loss": 0.29, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.0141987829614605, |
|
"grad_norm": 2.5901284217834473, |
|
"learning_rate": 9.475789473684212e-06, |
|
"loss": 0.2611, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0141987829614605, |
|
"eval_loss": 0.2935636639595032, |
|
"eval_runtime": 7046.6021, |
|
"eval_samples_per_second": 0.69, |
|
"eval_steps_per_second": 0.086, |
|
"eval_wer": 62.766441678696374, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.039553752535497, |
|
"grad_norm": 2.9228506088256836, |
|
"learning_rate": 9.449473684210527e-06, |
|
"loss": 0.2488, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.0649087221095335, |
|
"grad_norm": 2.1438565254211426, |
|
"learning_rate": 9.423157894736842e-06, |
|
"loss": 0.2441, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09026369168357, |
|
"grad_norm": 3.6437671184539795, |
|
"learning_rate": 9.396842105263159e-06, |
|
"loss": 0.2512, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.1156186612576064, |
|
"grad_norm": 2.413125991821289, |
|
"learning_rate": 9.370526315789474e-06, |
|
"loss": 0.2447, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.140973630831643, |
|
"grad_norm": 2.880699872970581, |
|
"learning_rate": 9.34421052631579e-06, |
|
"loss": 0.2463, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.1663286004056794, |
|
"grad_norm": 2.283259630203247, |
|
"learning_rate": 9.317894736842105e-06, |
|
"loss": 0.2488, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.1916835699797161, |
|
"grad_norm": 3.4118285179138184, |
|
"learning_rate": 9.291578947368422e-06, |
|
"loss": 0.2482, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.2170385395537526, |
|
"grad_norm": 2.1249563694000244, |
|
"learning_rate": 9.265263157894737e-06, |
|
"loss": 0.243, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.2423935091277891, |
|
"grad_norm": 2.6854898929595947, |
|
"learning_rate": 9.238947368421052e-06, |
|
"loss": 0.2563, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.2677484787018256, |
|
"grad_norm": 2.975567102432251, |
|
"learning_rate": 9.21263157894737e-06, |
|
"loss": 0.2529, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.293103448275862, |
|
"grad_norm": 2.6156153678894043, |
|
"learning_rate": 9.186315789473685e-06, |
|
"loss": 0.2378, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.3184584178498986, |
|
"grad_norm": 2.3254947662353516, |
|
"learning_rate": 9.16e-06, |
|
"loss": 0.2457, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.343813387423935, |
|
"grad_norm": 3.162607431411743, |
|
"learning_rate": 9.133684210526317e-06, |
|
"loss": 0.2452, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.3691683569979716, |
|
"grad_norm": 2.94197678565979, |
|
"learning_rate": 9.107368421052632e-06, |
|
"loss": 0.2411, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.394523326572008, |
|
"grad_norm": 2.53021502494812, |
|
"learning_rate": 9.081052631578949e-06, |
|
"loss": 0.2497, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.4198782961460445, |
|
"grad_norm": 2.740830421447754, |
|
"learning_rate": 9.054736842105264e-06, |
|
"loss": 0.228, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.445233265720081, |
|
"grad_norm": 3.1709723472595215, |
|
"learning_rate": 9.02842105263158e-06, |
|
"loss": 0.2598, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 2.508091688156128, |
|
"learning_rate": 9.002105263157895e-06, |
|
"loss": 0.2508, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.495943204868154, |
|
"grad_norm": 2.7562551498413086, |
|
"learning_rate": 8.97578947368421e-06, |
|
"loss": 0.2462, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.5212981744421907, |
|
"grad_norm": 3.4288785457611084, |
|
"learning_rate": 8.949473684210527e-06, |
|
"loss": 0.2351, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5466531440162272, |
|
"grad_norm": 2.4875781536102295, |
|
"learning_rate": 8.923157894736842e-06, |
|
"loss": 0.249, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.5720081135902637, |
|
"grad_norm": 2.5765998363494873, |
|
"learning_rate": 8.896842105263159e-06, |
|
"loss": 0.2354, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.5973630831643002, |
|
"grad_norm": 2.0511434078216553, |
|
"learning_rate": 8.870526315789474e-06, |
|
"loss": 0.248, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.6227180527383367, |
|
"grad_norm": 3.0492985248565674, |
|
"learning_rate": 8.84421052631579e-06, |
|
"loss": 0.2443, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.6480730223123732, |
|
"grad_norm": 3.98416805267334, |
|
"learning_rate": 8.817894736842107e-06, |
|
"loss": 0.2348, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.6734279918864097, |
|
"grad_norm": 2.4991466999053955, |
|
"learning_rate": 8.791578947368422e-06, |
|
"loss": 0.2514, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.6987829614604464, |
|
"grad_norm": 2.768167734146118, |
|
"learning_rate": 8.765263157894739e-06, |
|
"loss": 0.2517, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"grad_norm": 3.1816017627716064, |
|
"learning_rate": 8.738947368421053e-06, |
|
"loss": 0.2421, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.7494929006085194, |
|
"grad_norm": 3.7011590003967285, |
|
"learning_rate": 8.712631578947368e-06, |
|
"loss": 0.2376, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.7748478701825556, |
|
"grad_norm": 2.3581457138061523, |
|
"learning_rate": 8.686315789473685e-06, |
|
"loss": 0.2281, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.8002028397565923, |
|
"grad_norm": 2.812277317047119, |
|
"learning_rate": 8.66e-06, |
|
"loss": 0.2357, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.8255578093306288, |
|
"grad_norm": 3.1732356548309326, |
|
"learning_rate": 8.633684210526317e-06, |
|
"loss": 0.237, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.8509127789046653, |
|
"grad_norm": 1.9399900436401367, |
|
"learning_rate": 8.607368421052632e-06, |
|
"loss": 0.2395, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.8762677484787018, |
|
"grad_norm": 1.8843597173690796, |
|
"learning_rate": 8.581052631578948e-06, |
|
"loss": 0.2282, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.9016227180527383, |
|
"grad_norm": 2.314880132675171, |
|
"learning_rate": 8.554736842105263e-06, |
|
"loss": 0.2454, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.9269776876267748, |
|
"grad_norm": 4.113000392913818, |
|
"learning_rate": 8.528421052631578e-06, |
|
"loss": 0.2342, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.9523326572008113, |
|
"grad_norm": 2.076021432876587, |
|
"learning_rate": 8.502105263157897e-06, |
|
"loss": 0.2313, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.977687626774848, |
|
"grad_norm": 3.887740135192871, |
|
"learning_rate": 8.475789473684212e-06, |
|
"loss": 0.245, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.0030425963488843, |
|
"grad_norm": 2.359464406967163, |
|
"learning_rate": 8.449473684210527e-06, |
|
"loss": 0.2539, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.028397565922921, |
|
"grad_norm": 2.299802303314209, |
|
"learning_rate": 8.423157894736843e-06, |
|
"loss": 0.18, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.028397565922921, |
|
"eval_loss": 0.27181410789489746, |
|
"eval_runtime": 7074.1416, |
|
"eval_samples_per_second": 0.688, |
|
"eval_steps_per_second": 0.086, |
|
"eval_wer": 59.46705161718232, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0537525354969572, |
|
"grad_norm": 4.299188613891602, |
|
"learning_rate": 8.396842105263158e-06, |
|
"loss": 0.1803, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 2.079107505070994, |
|
"grad_norm": 2.5189449787139893, |
|
"learning_rate": 8.370526315789475e-06, |
|
"loss": 0.1927, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.1044624746450302, |
|
"grad_norm": 2.5737998485565186, |
|
"learning_rate": 8.34421052631579e-06, |
|
"loss": 0.1818, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.129817444219067, |
|
"grad_norm": 3.098142623901367, |
|
"learning_rate": 8.317894736842107e-06, |
|
"loss": 0.1907, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.1551724137931036, |
|
"grad_norm": 1.9791940450668335, |
|
"learning_rate": 8.291578947368422e-06, |
|
"loss": 0.1809, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 2.18052738336714, |
|
"grad_norm": 1.9798074960708618, |
|
"learning_rate": 8.265263157894737e-06, |
|
"loss": 0.1842, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.2058823529411766, |
|
"grad_norm": 2.5827345848083496, |
|
"learning_rate": 8.238947368421053e-06, |
|
"loss": 0.1769, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 2.231237322515213, |
|
"grad_norm": 2.1781139373779297, |
|
"learning_rate": 8.212631578947368e-06, |
|
"loss": 0.1904, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.2565922920892496, |
|
"grad_norm": 2.2573533058166504, |
|
"learning_rate": 8.186315789473685e-06, |
|
"loss": 0.1892, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 2.281947261663286, |
|
"grad_norm": 2.1169838905334473, |
|
"learning_rate": 8.16e-06, |
|
"loss": 0.1868, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.3073022312373226, |
|
"grad_norm": 3.3498687744140625, |
|
"learning_rate": 8.133684210526316e-06, |
|
"loss": 0.197, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 2.332657200811359, |
|
"grad_norm": 2.25311541557312, |
|
"learning_rate": 8.107368421052633e-06, |
|
"loss": 0.1869, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.3580121703853956, |
|
"grad_norm": 2.396975040435791, |
|
"learning_rate": 8.081052631578948e-06, |
|
"loss": 0.189, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 2.3833671399594323, |
|
"grad_norm": 2.147547960281372, |
|
"learning_rate": 8.054736842105265e-06, |
|
"loss": 0.1893, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.4087221095334685, |
|
"grad_norm": 1.9963374137878418, |
|
"learning_rate": 8.02842105263158e-06, |
|
"loss": 0.2003, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 2.4340770791075053, |
|
"grad_norm": 3.571808099746704, |
|
"learning_rate": 8.002105263157895e-06, |
|
"loss": 0.1904, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.4594320486815415, |
|
"grad_norm": 2.6122543811798096, |
|
"learning_rate": 7.975789473684211e-06, |
|
"loss": 0.1855, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 2.4847870182555782, |
|
"grad_norm": 2.9478933811187744, |
|
"learning_rate": 7.949473684210526e-06, |
|
"loss": 0.1822, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.5101419878296145, |
|
"grad_norm": 2.368997097015381, |
|
"learning_rate": 7.923157894736843e-06, |
|
"loss": 0.186, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 2.535496957403651, |
|
"grad_norm": 2.529630661010742, |
|
"learning_rate": 7.896842105263158e-06, |
|
"loss": 0.191, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.5608519269776875, |
|
"grad_norm": 2.9214324951171875, |
|
"learning_rate": 7.870526315789475e-06, |
|
"loss": 0.1885, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 2.586206896551724, |
|
"grad_norm": 2.141688823699951, |
|
"learning_rate": 7.84421052631579e-06, |
|
"loss": 0.1847, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.6115618661257605, |
|
"grad_norm": 2.1608083248138428, |
|
"learning_rate": 7.817894736842105e-06, |
|
"loss": 0.1873, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 2.636916835699797, |
|
"grad_norm": 2.489492416381836, |
|
"learning_rate": 7.791578947368423e-06, |
|
"loss": 0.1959, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.662271805273834, |
|
"grad_norm": 2.31791353225708, |
|
"learning_rate": 7.765263157894738e-06, |
|
"loss": 0.1915, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 2.68762677484787, |
|
"grad_norm": 2.779777765274048, |
|
"learning_rate": 7.738947368421053e-06, |
|
"loss": 0.1878, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.7129817444219064, |
|
"grad_norm": 2.5656425952911377, |
|
"learning_rate": 7.71263157894737e-06, |
|
"loss": 0.1925, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 2.738336713995943, |
|
"grad_norm": 3.0531349182128906, |
|
"learning_rate": 7.686315789473685e-06, |
|
"loss": 0.189, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.76369168356998, |
|
"grad_norm": 2.912122964859009, |
|
"learning_rate": 7.660000000000001e-06, |
|
"loss": 0.1906, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 2.789046653144016, |
|
"grad_norm": 3.0555505752563477, |
|
"learning_rate": 7.633684210526316e-06, |
|
"loss": 0.1834, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.814401622718053, |
|
"grad_norm": 2.2603671550750732, |
|
"learning_rate": 7.607368421052632e-06, |
|
"loss": 0.1872, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 2.839756592292089, |
|
"grad_norm": 2.1887290477752686, |
|
"learning_rate": 7.581052631578948e-06, |
|
"loss": 0.1787, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.865111561866126, |
|
"grad_norm": 2.393935203552246, |
|
"learning_rate": 7.554736842105264e-06, |
|
"loss": 0.1883, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 2.890466531440162, |
|
"grad_norm": 2.3543920516967773, |
|
"learning_rate": 7.5284210526315794e-06, |
|
"loss": 0.1883, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.915821501014199, |
|
"grad_norm": 2.139833927154541, |
|
"learning_rate": 7.502105263157895e-06, |
|
"loss": 0.1997, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 2.270552158355713, |
|
"learning_rate": 7.475789473684211e-06, |
|
"loss": 0.1932, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.9665314401622718, |
|
"grad_norm": 2.4720232486724854, |
|
"learning_rate": 7.449473684210526e-06, |
|
"loss": 0.1926, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 2.991886409736308, |
|
"grad_norm": 2.318767786026001, |
|
"learning_rate": 7.4231578947368436e-06, |
|
"loss": 0.1837, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 3.0172413793103448, |
|
"grad_norm": 1.8572547435760498, |
|
"learning_rate": 7.3968421052631585e-06, |
|
"loss": 0.1584, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 3.0425963488843815, |
|
"grad_norm": 2.6684017181396484, |
|
"learning_rate": 7.370526315789474e-06, |
|
"loss": 0.15, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.0425963488843815, |
|
"eval_loss": 0.2813817858695984, |
|
"eval_runtime": 7104.648, |
|
"eval_samples_per_second": 0.685, |
|
"eval_steps_per_second": 0.086, |
|
"eval_wer": 59.24206364688593, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.760241266649912, |
|
"grad_norm": 2.2316994667053223, |
|
"learning_rate": 8.26e-06, |
|
"loss": 0.1649, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.7665242523247047, |
|
"grad_norm": 1.9445712566375732, |
|
"learning_rate": 8.242758620689655e-06, |
|
"loss": 0.1373, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7728072379994974, |
|
"grad_norm": 1.9695370197296143, |
|
"learning_rate": 8.22551724137931e-06, |
|
"loss": 0.1303, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.77909022367429, |
|
"grad_norm": 2.19242262840271, |
|
"learning_rate": 8.208275862068967e-06, |
|
"loss": 0.1204, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7853732093490827, |
|
"grad_norm": 1.6107076406478882, |
|
"learning_rate": 8.191034482758622e-06, |
|
"loss": 0.1206, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.7916561950238753, |
|
"grad_norm": 1.8804924488067627, |
|
"learning_rate": 8.173793103448277e-06, |
|
"loss": 0.116, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.797939180698668, |
|
"grad_norm": 1.8132656812667847, |
|
"learning_rate": 8.156551724137931e-06, |
|
"loss": 0.1193, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.8042221663734607, |
|
"grad_norm": 1.9557982683181763, |
|
"learning_rate": 8.139310344827586e-06, |
|
"loss": 0.1139, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8105051520482534, |
|
"grad_norm": 1.4428989887237549, |
|
"learning_rate": 8.122068965517243e-06, |
|
"loss": 0.1033, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.816788137723046, |
|
"grad_norm": 1.4548839330673218, |
|
"learning_rate": 8.104827586206898e-06, |
|
"loss": 0.0975, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8230711233978386, |
|
"grad_norm": 1.8930418491363525, |
|
"learning_rate": 8.087586206896553e-06, |
|
"loss": 0.0997, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.8293541090726313, |
|
"grad_norm": 1.905731439590454, |
|
"learning_rate": 8.070344827586207e-06, |
|
"loss": 0.1069, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.835637094747424, |
|
"grad_norm": 1.7036992311477661, |
|
"learning_rate": 8.053103448275862e-06, |
|
"loss": 0.0921, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.8419200804222167, |
|
"grad_norm": 1.7952214479446411, |
|
"learning_rate": 8.035862068965517e-06, |
|
"loss": 0.1027, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8482030660970094, |
|
"grad_norm": 2.0625522136688232, |
|
"learning_rate": 8.018620689655174e-06, |
|
"loss": 0.1051, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.8544860517718019, |
|
"grad_norm": 1.8595600128173828, |
|
"learning_rate": 8.001379310344829e-06, |
|
"loss": 0.0968, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8607690374465946, |
|
"grad_norm": 1.5569204092025757, |
|
"learning_rate": 7.984137931034484e-06, |
|
"loss": 0.099, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.8670520231213873, |
|
"grad_norm": 1.6187876462936401, |
|
"learning_rate": 7.966896551724138e-06, |
|
"loss": 0.0942, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.87333500879618, |
|
"grad_norm": 1.494591474533081, |
|
"learning_rate": 7.949655172413793e-06, |
|
"loss": 0.096, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.8796179944709726, |
|
"grad_norm": 1.5578557252883911, |
|
"learning_rate": 7.932413793103448e-06, |
|
"loss": 0.0885, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8859009801457652, |
|
"grad_norm": 2.047339677810669, |
|
"learning_rate": 7.915172413793105e-06, |
|
"loss": 0.098, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.8921839658205579, |
|
"grad_norm": 1.7731865644454956, |
|
"learning_rate": 7.89793103448276e-06, |
|
"loss": 0.0889, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8984669514953506, |
|
"grad_norm": 2.1644279956817627, |
|
"learning_rate": 7.880689655172414e-06, |
|
"loss": 0.0864, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.9047499371701433, |
|
"grad_norm": 1.8717072010040283, |
|
"learning_rate": 7.86344827586207e-06, |
|
"loss": 0.0847, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.9110329228449359, |
|
"grad_norm": 1.481933832168579, |
|
"learning_rate": 7.846206896551724e-06, |
|
"loss": 0.0892, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.9173159085197286, |
|
"grad_norm": 2.3373663425445557, |
|
"learning_rate": 7.82896551724138e-06, |
|
"loss": 0.0904, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.9235988941945212, |
|
"grad_norm": 1.509282112121582, |
|
"learning_rate": 7.811724137931036e-06, |
|
"loss": 0.0973, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.9298818798693139, |
|
"grad_norm": 1.7900352478027344, |
|
"learning_rate": 7.79448275862069e-06, |
|
"loss": 0.0815, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9361648655441066, |
|
"grad_norm": 1.6436471939086914, |
|
"learning_rate": 7.777241379310345e-06, |
|
"loss": 0.0853, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.9424478512188992, |
|
"grad_norm": 1.4675796031951904, |
|
"learning_rate": 7.76e-06, |
|
"loss": 0.0876, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9487308368936919, |
|
"grad_norm": 1.8452798128128052, |
|
"learning_rate": 7.742758620689655e-06, |
|
"loss": 0.0808, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.9550138225684845, |
|
"grad_norm": 1.3618487119674683, |
|
"learning_rate": 7.725517241379312e-06, |
|
"loss": 0.086, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9612968082432772, |
|
"grad_norm": 1.2715941667556763, |
|
"learning_rate": 7.708275862068967e-06, |
|
"loss": 0.088, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.9675797939180699, |
|
"grad_norm": 1.8105791807174683, |
|
"learning_rate": 7.691034482758621e-06, |
|
"loss": 0.0899, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9738627795928625, |
|
"grad_norm": 2.1864423751831055, |
|
"learning_rate": 7.673793103448276e-06, |
|
"loss": 0.0857, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.9801457652676552, |
|
"grad_norm": 1.5500792264938354, |
|
"learning_rate": 7.656551724137931e-06, |
|
"loss": 0.0859, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9864287509424479, |
|
"grad_norm": 1.8707691431045532, |
|
"learning_rate": 7.639310344827588e-06, |
|
"loss": 0.0882, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.9927117366172405, |
|
"grad_norm": 1.5884675979614258, |
|
"learning_rate": 7.622068965517242e-06, |
|
"loss": 0.0832, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9989947222920331, |
|
"grad_norm": 1.444044828414917, |
|
"learning_rate": 7.6048275862068975e-06, |
|
"loss": 0.0786, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.0052777079668258, |
|
"grad_norm": 1.6613413095474243, |
|
"learning_rate": 7.588275862068966e-06, |
|
"loss": 0.0694, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0052777079668258, |
|
"eval_loss": 0.13177034258842468, |
|
"eval_runtime": 28232.4806, |
|
"eval_samples_per_second": 0.692, |
|
"eval_steps_per_second": 0.086, |
|
"eval_wer": 40.84150014082778, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0115606936416186, |
|
"grad_norm": 1.5078247785568237, |
|
"learning_rate": 7.571034482758622e-06, |
|
"loss": 0.0786, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.0178436793164112, |
|
"grad_norm": 2.1607069969177246, |
|
"learning_rate": 7.553793103448277e-06, |
|
"loss": 0.0748, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.0241266649912038, |
|
"grad_norm": 1.3016276359558105, |
|
"learning_rate": 7.5365517241379315e-06, |
|
"loss": 0.0782, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.0304096506659965, |
|
"grad_norm": 1.1447079181671143, |
|
"learning_rate": 7.519310344827587e-06, |
|
"loss": 0.0708, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.0366926363407891, |
|
"grad_norm": 1.6340376138687134, |
|
"learning_rate": 7.502068965517242e-06, |
|
"loss": 0.0705, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.042975622015582, |
|
"grad_norm": 1.5793544054031372, |
|
"learning_rate": 7.484827586206898e-06, |
|
"loss": 0.0723, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.0492586076903745, |
|
"grad_norm": 1.2882245779037476, |
|
"learning_rate": 7.467586206896552e-06, |
|
"loss": 0.0692, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.055541593365167, |
|
"grad_norm": 1.822688102722168, |
|
"learning_rate": 7.4503448275862075e-06, |
|
"loss": 0.071, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.0618245790399599, |
|
"grad_norm": 1.410294771194458, |
|
"learning_rate": 7.433103448275862e-06, |
|
"loss": 0.0666, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.0681075647147524, |
|
"grad_norm": 1.4143624305725098, |
|
"learning_rate": 7.415862068965518e-06, |
|
"loss": 0.0662, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.0743905503895452, |
|
"grad_norm": 1.5564229488372803, |
|
"learning_rate": 7.398620689655173e-06, |
|
"loss": 0.0752, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.0806735360643378, |
|
"grad_norm": 2.0174007415771484, |
|
"learning_rate": 7.381379310344829e-06, |
|
"loss": 0.0647, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"grad_norm": 1.2202295064926147, |
|
"learning_rate": 7.364137931034483e-06, |
|
"loss": 0.0728, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.0932395074139232, |
|
"grad_norm": 1.7252171039581299, |
|
"learning_rate": 7.346896551724138e-06, |
|
"loss": 0.0613, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.0995224930887157, |
|
"grad_norm": 1.6477744579315186, |
|
"learning_rate": 7.329655172413793e-06, |
|
"loss": 0.0762, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.1058054787635083, |
|
"grad_norm": 2.01273512840271, |
|
"learning_rate": 7.312413793103449e-06, |
|
"loss": 0.0723, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.1120884644383011, |
|
"grad_norm": 2.0177805423736572, |
|
"learning_rate": 7.295172413793105e-06, |
|
"loss": 0.0713, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.1183714501130937, |
|
"grad_norm": 1.0823686122894287, |
|
"learning_rate": 7.27793103448276e-06, |
|
"loss": 0.0699, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.1246544357878865, |
|
"grad_norm": 1.8306968212127686, |
|
"learning_rate": 7.2606896551724145e-06, |
|
"loss": 0.0692, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.130937421462679, |
|
"grad_norm": 1.9577009677886963, |
|
"learning_rate": 7.243448275862069e-06, |
|
"loss": 0.0707, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.1372204071374716, |
|
"grad_norm": 1.8456298112869263, |
|
"learning_rate": 7.226206896551725e-06, |
|
"loss": 0.0702, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.1435033928122644, |
|
"grad_norm": 1.369918942451477, |
|
"learning_rate": 7.20896551724138e-06, |
|
"loss": 0.0716, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.149786378487057, |
|
"grad_norm": 1.7100856304168701, |
|
"learning_rate": 7.191724137931036e-06, |
|
"loss": 0.0773, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.1560693641618498, |
|
"grad_norm": 1.2440359592437744, |
|
"learning_rate": 7.17448275862069e-06, |
|
"loss": 0.0736, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.1623523498366424, |
|
"grad_norm": 1.26316237449646, |
|
"learning_rate": 7.157241379310345e-06, |
|
"loss": 0.0683, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.168635335511435, |
|
"grad_norm": 1.6966075897216797, |
|
"learning_rate": 7.14e-06, |
|
"loss": 0.064, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.1749183211862277, |
|
"grad_norm": 1.7377158403396606, |
|
"learning_rate": 7.122758620689656e-06, |
|
"loss": 0.0685, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.1812013068610203, |
|
"grad_norm": 1.851913571357727, |
|
"learning_rate": 7.105517241379311e-06, |
|
"loss": 0.0586, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.1874842925358131, |
|
"grad_norm": 1.6973158121109009, |
|
"learning_rate": 7.0882758620689666e-06, |
|
"loss": 0.0696, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.1937672782106057, |
|
"grad_norm": 1.451185703277588, |
|
"learning_rate": 7.0710344827586206e-06, |
|
"loss": 0.0671, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.2000502638853983, |
|
"grad_norm": 1.7117061614990234, |
|
"learning_rate": 7.053793103448276e-06, |
|
"loss": 0.0563, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.206333249560191, |
|
"grad_norm": 1.4349240064620972, |
|
"learning_rate": 7.036551724137931e-06, |
|
"loss": 0.0696, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.2126162352349836, |
|
"grad_norm": 1.5705769062042236, |
|
"learning_rate": 7.019310344827587e-06, |
|
"loss": 0.0634, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.2188992209097762, |
|
"grad_norm": 1.5555791854858398, |
|
"learning_rate": 7.002068965517243e-06, |
|
"loss": 0.0665, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.225182206584569, |
|
"grad_norm": 1.4251762628555298, |
|
"learning_rate": 6.9848275862068975e-06, |
|
"loss": 0.0636, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.2314651922593616, |
|
"grad_norm": 1.7479225397109985, |
|
"learning_rate": 6.967586206896552e-06, |
|
"loss": 0.0736, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.2377481779341544, |
|
"grad_norm": 1.4017658233642578, |
|
"learning_rate": 6.950344827586207e-06, |
|
"loss": 0.0642, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.244031163608947, |
|
"grad_norm": 1.34666907787323, |
|
"learning_rate": 6.933103448275863e-06, |
|
"loss": 0.0647, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.2503141492837395, |
|
"grad_norm": 1.6432591676712036, |
|
"learning_rate": 6.915862068965518e-06, |
|
"loss": 0.069, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.2565971349585323, |
|
"grad_norm": 1.2850168943405151, |
|
"learning_rate": 6.8986206896551735e-06, |
|
"loss": 0.0638, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.2565971349585323, |
|
"eval_loss": 0.12251746654510498, |
|
"eval_runtime": 28262.6172, |
|
"eval_samples_per_second": 0.691, |
|
"eval_steps_per_second": 0.086, |
|
"eval_wer": 38.8171910996844, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.2628801206333249, |
|
"grad_norm": 1.6795750856399536, |
|
"learning_rate": 6.8813793103448275e-06, |
|
"loss": 0.069, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 1.2691631063081177, |
|
"grad_norm": 1.418845772743225, |
|
"learning_rate": 6.864137931034483e-06, |
|
"loss": 0.0609, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.2754460919829103, |
|
"grad_norm": 1.2165697813034058, |
|
"learning_rate": 6.846896551724138e-06, |
|
"loss": 0.0638, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 1.2817290776577028, |
|
"grad_norm": 2.068269729614258, |
|
"learning_rate": 6.829655172413794e-06, |
|
"loss": 0.0673, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.2880120633324956, |
|
"grad_norm": 1.2388827800750732, |
|
"learning_rate": 6.812413793103449e-06, |
|
"loss": 0.0701, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 1.2942950490072882, |
|
"grad_norm": 1.2347630262374878, |
|
"learning_rate": 6.7951724137931044e-06, |
|
"loss": 0.065, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.300578034682081, |
|
"grad_norm": 1.15070378780365, |
|
"learning_rate": 6.7779310344827585e-06, |
|
"loss": 0.0637, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 1.3068610203568736, |
|
"grad_norm": 1.2261390686035156, |
|
"learning_rate": 6.760689655172414e-06, |
|
"loss": 0.0676, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.3131440060316661, |
|
"grad_norm": 1.9254522323608398, |
|
"learning_rate": 6.74344827586207e-06, |
|
"loss": 0.0594, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 1.319426991706459, |
|
"grad_norm": 1.3794054985046387, |
|
"learning_rate": 6.726206896551725e-06, |
|
"loss": 0.0649, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.3257099773812515, |
|
"grad_norm": 1.6740128993988037, |
|
"learning_rate": 6.7089655172413805e-06, |
|
"loss": 0.0619, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 1.3319929630560443, |
|
"grad_norm": 1.2363388538360596, |
|
"learning_rate": 6.691724137931035e-06, |
|
"loss": 0.0646, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.3382759487308369, |
|
"grad_norm": 1.433228850364685, |
|
"learning_rate": 6.67448275862069e-06, |
|
"loss": 0.0663, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 1.3445589344056295, |
|
"grad_norm": 1.898812174797058, |
|
"learning_rate": 6.657241379310345e-06, |
|
"loss": 0.0574, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.3508419200804223, |
|
"grad_norm": 1.3165233135223389, |
|
"learning_rate": 6.640000000000001e-06, |
|
"loss": 0.0597, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 1.3571249057552148, |
|
"grad_norm": 1.8794306516647339, |
|
"learning_rate": 6.622758620689656e-06, |
|
"loss": 0.0697, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.3634078914300076, |
|
"grad_norm": 1.1872018575668335, |
|
"learning_rate": 6.605517241379311e-06, |
|
"loss": 0.0587, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 1.3696908771048002, |
|
"grad_norm": 1.639711856842041, |
|
"learning_rate": 6.588275862068965e-06, |
|
"loss": 0.0645, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.3759738627795928, |
|
"grad_norm": 1.2173725366592407, |
|
"learning_rate": 6.571034482758621e-06, |
|
"loss": 0.055, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 1.3822568484543856, |
|
"grad_norm": 1.2602193355560303, |
|
"learning_rate": 6.553793103448276e-06, |
|
"loss": 0.0543, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.3885398341291781, |
|
"grad_norm": 1.350376009941101, |
|
"learning_rate": 6.536551724137932e-06, |
|
"loss": 0.0675, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 1.394822819803971, |
|
"grad_norm": 1.314760446548462, |
|
"learning_rate": 6.519310344827587e-06, |
|
"loss": 0.0597, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.4011058054787635, |
|
"grad_norm": 1.0545654296875, |
|
"learning_rate": 6.502068965517242e-06, |
|
"loss": 0.0553, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 1.407388791153556, |
|
"grad_norm": 1.2332383394241333, |
|
"learning_rate": 6.484827586206896e-06, |
|
"loss": 0.0607, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.4136717768283489, |
|
"grad_norm": 1.3052699565887451, |
|
"learning_rate": 6.467586206896552e-06, |
|
"loss": 0.0689, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 1.4199547625031415, |
|
"grad_norm": 1.505159854888916, |
|
"learning_rate": 6.450344827586208e-06, |
|
"loss": 0.0585, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.4262377481779343, |
|
"grad_norm": 1.2076035737991333, |
|
"learning_rate": 6.433103448275863e-06, |
|
"loss": 0.0624, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 1.4325207338527268, |
|
"grad_norm": 0.716097354888916, |
|
"learning_rate": 6.415862068965518e-06, |
|
"loss": 0.056, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.4388037195275194, |
|
"grad_norm": 0.8870618939399719, |
|
"learning_rate": 6.398620689655173e-06, |
|
"loss": 0.0592, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 1.4450867052023122, |
|
"grad_norm": 2.086239814758301, |
|
"learning_rate": 6.381379310344828e-06, |
|
"loss": 0.0647, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.4513696908771048, |
|
"grad_norm": 1.2493882179260254, |
|
"learning_rate": 6.364137931034483e-06, |
|
"loss": 0.0664, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 1.4576526765518976, |
|
"grad_norm": 0.78863924741745, |
|
"learning_rate": 6.346896551724139e-06, |
|
"loss": 0.0545, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.4639356622266901, |
|
"grad_norm": 1.2974257469177246, |
|
"learning_rate": 6.3296551724137935e-06, |
|
"loss": 0.0643, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 1.4702186479014827, |
|
"grad_norm": 1.220800518989563, |
|
"learning_rate": 6.312413793103449e-06, |
|
"loss": 0.0664, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.4765016335762755, |
|
"grad_norm": 1.9717583656311035, |
|
"learning_rate": 6.295172413793103e-06, |
|
"loss": 0.0585, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 1.482784619251068, |
|
"grad_norm": 1.9224556684494019, |
|
"learning_rate": 6.277931034482759e-06, |
|
"loss": 0.0607, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.4890676049258609, |
|
"grad_norm": 1.3678847551345825, |
|
"learning_rate": 6.260689655172414e-06, |
|
"loss": 0.0619, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 1.4953505906006535, |
|
"grad_norm": 1.2710736989974976, |
|
"learning_rate": 6.24344827586207e-06, |
|
"loss": 0.0615, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.501633576275446, |
|
"grad_norm": 1.1889654397964478, |
|
"learning_rate": 6.2262068965517245e-06, |
|
"loss": 0.0595, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 1.5079165619502386, |
|
"grad_norm": 1.165711760520935, |
|
"learning_rate": 6.20896551724138e-06, |
|
"loss": 0.0529, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.5079165619502386, |
|
"eval_loss": 0.12242772430181503, |
|
"eval_runtime": 28299.0323, |
|
"eval_samples_per_second": 0.69, |
|
"eval_steps_per_second": 0.086, |
|
"eval_wer": 37.89097763366001, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.5141995476250314, |
|
"grad_norm": 1.0195108652114868, |
|
"learning_rate": 6.191724137931034e-06, |
|
"loss": 0.0586, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 1.5204825332998242, |
|
"grad_norm": 1.1056115627288818, |
|
"learning_rate": 6.17448275862069e-06, |
|
"loss": 0.0576, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.5267655189746168, |
|
"grad_norm": 1.2018024921417236, |
|
"learning_rate": 6.157241379310346e-06, |
|
"loss": 0.0641, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 1.5330485046494093, |
|
"grad_norm": 1.426589846611023, |
|
"learning_rate": 6.1400000000000005e-06, |
|
"loss": 0.0575, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.539331490324202, |
|
"grad_norm": 1.0339651107788086, |
|
"learning_rate": 6.122758620689656e-06, |
|
"loss": 0.0459, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 1.5456144759989947, |
|
"grad_norm": 1.3441721200942993, |
|
"learning_rate": 6.105517241379311e-06, |
|
"loss": 0.0588, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.5518974616737875, |
|
"grad_norm": 1.2048940658569336, |
|
"learning_rate": 6.088275862068966e-06, |
|
"loss": 0.0573, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 1.55818044734858, |
|
"grad_norm": 1.5876215696334839, |
|
"learning_rate": 6.071034482758621e-06, |
|
"loss": 0.0616, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.5644634330233727, |
|
"grad_norm": 1.1515843868255615, |
|
"learning_rate": 6.0537931034482766e-06, |
|
"loss": 0.0566, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 1.5707464186981652, |
|
"grad_norm": 1.2697322368621826, |
|
"learning_rate": 6.036551724137931e-06, |
|
"loss": 0.0654, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.577029404372958, |
|
"grad_norm": 0.8662827014923096, |
|
"learning_rate": 6.019310344827587e-06, |
|
"loss": 0.0568, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 1.5833123900477508, |
|
"grad_norm": 1.5702407360076904, |
|
"learning_rate": 6.002068965517241e-06, |
|
"loss": 0.0563, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.5895953757225434, |
|
"grad_norm": 1.2121763229370117, |
|
"learning_rate": 5.984827586206897e-06, |
|
"loss": 0.0638, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 1.595878361397336, |
|
"grad_norm": 1.257488489151001, |
|
"learning_rate": 5.967586206896552e-06, |
|
"loss": 0.0577, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.6021613470721285, |
|
"grad_norm": 1.1616463661193848, |
|
"learning_rate": 5.9503448275862075e-06, |
|
"loss": 0.0604, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 1.6084443327469213, |
|
"grad_norm": 1.3494690656661987, |
|
"learning_rate": 5.933103448275862e-06, |
|
"loss": 0.0628, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.6147273184217141, |
|
"grad_norm": 1.5194650888442993, |
|
"learning_rate": 5.915862068965518e-06, |
|
"loss": 0.0549, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 1.6210103040965067, |
|
"grad_norm": 1.3437527418136597, |
|
"learning_rate": 5.898620689655174e-06, |
|
"loss": 0.0579, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.6272932897712993, |
|
"grad_norm": 1.445145845413208, |
|
"learning_rate": 5.881379310344828e-06, |
|
"loss": 0.0619, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 1.6335762754460919, |
|
"grad_norm": 1.3654954433441162, |
|
"learning_rate": 5.8641379310344835e-06, |
|
"loss": 0.0572, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.6398592611208846, |
|
"grad_norm": 1.4988460540771484, |
|
"learning_rate": 5.846896551724138e-06, |
|
"loss": 0.0599, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 1.6461422467956774, |
|
"grad_norm": 1.3341230154037476, |
|
"learning_rate": 5.829655172413794e-06, |
|
"loss": 0.0555, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.65242523247047, |
|
"grad_norm": 1.2729185819625854, |
|
"learning_rate": 5.812413793103449e-06, |
|
"loss": 0.0542, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 1.6587082181452626, |
|
"grad_norm": 1.7550246715545654, |
|
"learning_rate": 5.795172413793104e-06, |
|
"loss": 0.0582, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.6649912038200552, |
|
"grad_norm": 1.5241115093231201, |
|
"learning_rate": 5.777931034482759e-06, |
|
"loss": 0.063, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 1.671274189494848, |
|
"grad_norm": 1.8420275449752808, |
|
"learning_rate": 5.7606896551724144e-06, |
|
"loss": 0.0656, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.6775571751696408, |
|
"grad_norm": 1.0507006645202637, |
|
"learning_rate": 5.743448275862069e-06, |
|
"loss": 0.0562, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 1.6838401608444333, |
|
"grad_norm": 2.0767569541931152, |
|
"learning_rate": 5.726206896551725e-06, |
|
"loss": 0.0582, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.690123146519226, |
|
"grad_norm": 0.8954183459281921, |
|
"learning_rate": 5.708965517241379e-06, |
|
"loss": 0.0602, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 1.6964061321940185, |
|
"grad_norm": 0.9078446626663208, |
|
"learning_rate": 5.691724137931035e-06, |
|
"loss": 0.0529, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.7026891178688113, |
|
"grad_norm": 1.581921935081482, |
|
"learning_rate": 5.67448275862069e-06, |
|
"loss": 0.0548, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 1.708972103543604, |
|
"grad_norm": 1.4554569721221924, |
|
"learning_rate": 5.657241379310345e-06, |
|
"loss": 0.0563, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.7152550892183966, |
|
"grad_norm": 0.9179530739784241, |
|
"learning_rate": 5.64e-06, |
|
"loss": 0.0544, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 1.7215380748931892, |
|
"grad_norm": 1.1374155282974243, |
|
"learning_rate": 5.622758620689656e-06, |
|
"loss": 0.057, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.7278210605679818, |
|
"grad_norm": 1.0349596738815308, |
|
"learning_rate": 5.605517241379312e-06, |
|
"loss": 0.0573, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 1.7341040462427746, |
|
"grad_norm": 1.4229092597961426, |
|
"learning_rate": 5.588275862068966e-06, |
|
"loss": 0.0487, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.7403870319175674, |
|
"grad_norm": 1.2893837690353394, |
|
"learning_rate": 5.571034482758621e-06, |
|
"loss": 0.0605, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 1.74667001759236, |
|
"grad_norm": 1.1475664377212524, |
|
"learning_rate": 5.553793103448276e-06, |
|
"loss": 0.0577, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.7529530032671525, |
|
"grad_norm": 1.1052597761154175, |
|
"learning_rate": 5.536551724137932e-06, |
|
"loss": 0.0531, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 1.759235988941945, |
|
"grad_norm": 1.0279254913330078, |
|
"learning_rate": 5.519310344827587e-06, |
|
"loss": 0.0624, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.759235988941945, |
|
"eval_loss": 0.12031704932451248, |
|
"eval_runtime": 28303.9606, |
|
"eval_samples_per_second": 0.69, |
|
"eval_steps_per_second": 0.086, |
|
"eval_wer": 37.24605862768746, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.765518974616738, |
|
"grad_norm": 1.0434989929199219, |
|
"learning_rate": 5.502068965517242e-06, |
|
"loss": 0.0488, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 1.7718019602915307, |
|
"grad_norm": 1.1990073919296265, |
|
"learning_rate": 5.484827586206897e-06, |
|
"loss": 0.0525, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.7780849459663233, |
|
"grad_norm": 1.026079535484314, |
|
"learning_rate": 5.467586206896552e-06, |
|
"loss": 0.0531, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 1.7843679316411158, |
|
"grad_norm": 0.9900615215301514, |
|
"learning_rate": 5.450344827586207e-06, |
|
"loss": 0.0545, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.7906509173159084, |
|
"grad_norm": 1.5279738903045654, |
|
"learning_rate": 5.433103448275863e-06, |
|
"loss": 0.0566, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 1.7969339029907012, |
|
"grad_norm": 0.8226191401481628, |
|
"learning_rate": 5.415862068965517e-06, |
|
"loss": 0.056, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.8032168886654938, |
|
"grad_norm": 1.3544007539749146, |
|
"learning_rate": 5.398620689655173e-06, |
|
"loss": 0.061, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 1.8094998743402866, |
|
"grad_norm": 1.2771939039230347, |
|
"learning_rate": 5.3813793103448275e-06, |
|
"loss": 0.0542, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.8157828600150792, |
|
"grad_norm": 1.200951099395752, |
|
"learning_rate": 5.364137931034483e-06, |
|
"loss": 0.0544, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 1.8220658456898717, |
|
"grad_norm": 0.9072504639625549, |
|
"learning_rate": 5.346896551724139e-06, |
|
"loss": 0.0538, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.8283488313646645, |
|
"grad_norm": 1.088958978652954, |
|
"learning_rate": 5.329655172413794e-06, |
|
"loss": 0.054, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 1.834631817039457, |
|
"grad_norm": 1.359937071800232, |
|
"learning_rate": 5.3124137931034495e-06, |
|
"loss": 0.0623, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.84091480271425, |
|
"grad_norm": 1.467264175415039, |
|
"learning_rate": 5.2951724137931035e-06, |
|
"loss": 0.0536, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 1.8471977883890425, |
|
"grad_norm": 1.4082632064819336, |
|
"learning_rate": 5.277931034482759e-06, |
|
"loss": 0.051, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.853480774063835, |
|
"grad_norm": 1.472396969795227, |
|
"learning_rate": 5.260689655172414e-06, |
|
"loss": 0.0662, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 1.8597637597386278, |
|
"grad_norm": 1.587661862373352, |
|
"learning_rate": 5.24344827586207e-06, |
|
"loss": 0.055, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.8660467454134204, |
|
"grad_norm": 1.0919044017791748, |
|
"learning_rate": 5.226206896551725e-06, |
|
"loss": 0.0509, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 1.8723297310882132, |
|
"grad_norm": 0.9456779956817627, |
|
"learning_rate": 5.20896551724138e-06, |
|
"loss": 0.0557, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.8786127167630058, |
|
"grad_norm": 1.6846345663070679, |
|
"learning_rate": 5.1917241379310345e-06, |
|
"loss": 0.0531, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 1.8848957024377984, |
|
"grad_norm": 0.5773513317108154, |
|
"learning_rate": 5.17448275862069e-06, |
|
"loss": 0.0554, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.8911786881125912, |
|
"grad_norm": 0.977917492389679, |
|
"learning_rate": 5.157241379310345e-06, |
|
"loss": 0.0561, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 1.8974616737873837, |
|
"grad_norm": 1.2408355474472046, |
|
"learning_rate": 5.140000000000001e-06, |
|
"loss": 0.0615, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.9037446594621765, |
|
"grad_norm": 1.273364543914795, |
|
"learning_rate": 5.122758620689655e-06, |
|
"loss": 0.0572, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 1.910027645136969, |
|
"grad_norm": 1.2105774879455566, |
|
"learning_rate": 5.1055172413793105e-06, |
|
"loss": 0.0469, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.9163106308117617, |
|
"grad_norm": 0.6686076521873474, |
|
"learning_rate": 5.088275862068965e-06, |
|
"loss": 0.0596, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 1.9225936164865542, |
|
"grad_norm": 1.473767876625061, |
|
"learning_rate": 5.071034482758621e-06, |
|
"loss": 0.0558, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.928876602161347, |
|
"grad_norm": 1.183693289756775, |
|
"learning_rate": 5.053793103448277e-06, |
|
"loss": 0.0578, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 1.9351595878361398, |
|
"grad_norm": 1.661081075668335, |
|
"learning_rate": 5.036551724137932e-06, |
|
"loss": 0.0577, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.9414425735109324, |
|
"grad_norm": 1.035583734512329, |
|
"learning_rate": 5.019310344827587e-06, |
|
"loss": 0.0504, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 1.947725559185725, |
|
"grad_norm": 1.2706879377365112, |
|
"learning_rate": 5.002068965517241e-06, |
|
"loss": 0.0523, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.9540085448605176, |
|
"grad_norm": 1.558969497680664, |
|
"learning_rate": 4.984827586206897e-06, |
|
"loss": 0.0527, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 1.9602915305353104, |
|
"grad_norm": 2.107837677001953, |
|
"learning_rate": 4.967586206896552e-06, |
|
"loss": 0.0496, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.9665745162101032, |
|
"grad_norm": 1.1281065940856934, |
|
"learning_rate": 4.950344827586207e-06, |
|
"loss": 0.0495, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 1.9728575018848957, |
|
"grad_norm": 0.92071133852005, |
|
"learning_rate": 4.933103448275863e-06, |
|
"loss": 0.0543, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.9791404875596883, |
|
"grad_norm": 1.5125892162322998, |
|
"learning_rate": 4.9158620689655175e-06, |
|
"loss": 0.0567, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 1.9854234732344809, |
|
"grad_norm": 1.4018179178237915, |
|
"learning_rate": 4.898620689655173e-06, |
|
"loss": 0.0577, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.9917064589092737, |
|
"grad_norm": 1.599665880203247, |
|
"learning_rate": 4.881379310344828e-06, |
|
"loss": 0.0511, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 1.9979894445840665, |
|
"grad_norm": 1.3747309446334839, |
|
"learning_rate": 4.864137931034483e-06, |
|
"loss": 0.0545, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.004272430258859, |
|
"grad_norm": 1.2372041940689087, |
|
"learning_rate": 4.846896551724139e-06, |
|
"loss": 0.0434, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 2.0105554159336516, |
|
"grad_norm": 1.0974595546722412, |
|
"learning_rate": 4.8296551724137935e-06, |
|
"loss": 0.0426, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.0105554159336516, |
|
"eval_loss": 0.1260567456483841, |
|
"eval_runtime": 28488.8565, |
|
"eval_samples_per_second": 0.686, |
|
"eval_steps_per_second": 0.086, |
|
"eval_wer": 36.9741526861996, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.016838401608444, |
|
"grad_norm": 0.5434245467185974, |
|
"learning_rate": 4.812413793103448e-06, |
|
"loss": 0.0372, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 2.023121387283237, |
|
"grad_norm": 1.4904873371124268, |
|
"learning_rate": 4.795172413793104e-06, |
|
"loss": 0.0322, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.02940437295803, |
|
"grad_norm": 0.8786129355430603, |
|
"learning_rate": 4.777931034482759e-06, |
|
"loss": 0.0378, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 2.0356873586328224, |
|
"grad_norm": 1.062193512916565, |
|
"learning_rate": 4.760689655172414e-06, |
|
"loss": 0.0335, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.041970344307615, |
|
"grad_norm": 1.1229702234268188, |
|
"learning_rate": 4.7434482758620696e-06, |
|
"loss": 0.0341, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 2.0482533299824075, |
|
"grad_norm": 1.5959960222244263, |
|
"learning_rate": 4.726206896551724e-06, |
|
"loss": 0.0326, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.0545363156572005, |
|
"grad_norm": 1.3407766819000244, |
|
"learning_rate": 4.708965517241379e-06, |
|
"loss": 0.0282, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 2.060819301331993, |
|
"grad_norm": 0.8177748322486877, |
|
"learning_rate": 4.691724137931035e-06, |
|
"loss": 0.0334, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.0671022870067857, |
|
"grad_norm": 0.5434231162071228, |
|
"learning_rate": 4.67448275862069e-06, |
|
"loss": 0.0332, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 2.0733852726815782, |
|
"grad_norm": 0.6810811758041382, |
|
"learning_rate": 4.657241379310346e-06, |
|
"loss": 0.0343, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 2.079668258356371, |
|
"grad_norm": 1.3621633052825928, |
|
"learning_rate": 4.6400000000000005e-06, |
|
"loss": 0.0358, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 2.085951244031164, |
|
"grad_norm": 0.8485309481620789, |
|
"learning_rate": 4.622758620689655e-06, |
|
"loss": 0.0283, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.0922342297059564, |
|
"grad_norm": 1.2133398056030273, |
|
"learning_rate": 4.605517241379311e-06, |
|
"loss": 0.0342, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 2.098517215380749, |
|
"grad_norm": 1.9074926376342773, |
|
"learning_rate": 4.588275862068966e-06, |
|
"loss": 0.0372, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 2.1048002010555416, |
|
"grad_norm": 1.3371448516845703, |
|
"learning_rate": 4.571034482758621e-06, |
|
"loss": 0.0356, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 2.111083186730334, |
|
"grad_norm": 1.3409150838851929, |
|
"learning_rate": 4.5537931034482765e-06, |
|
"loss": 0.0354, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.117366172405127, |
|
"grad_norm": 1.1407537460327148, |
|
"learning_rate": 4.536551724137931e-06, |
|
"loss": 0.0338, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 2.1236491580799197, |
|
"grad_norm": 1.0432764291763306, |
|
"learning_rate": 4.519310344827586e-06, |
|
"loss": 0.0325, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 2.1299321437547123, |
|
"grad_norm": 1.2592930793762207, |
|
"learning_rate": 4.502068965517242e-06, |
|
"loss": 0.0374, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 2.136215129429505, |
|
"grad_norm": 0.9935320019721985, |
|
"learning_rate": 4.484827586206897e-06, |
|
"loss": 0.0349, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.1424981151042974, |
|
"grad_norm": 1.1453524827957153, |
|
"learning_rate": 4.467586206896552e-06, |
|
"loss": 0.0357, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 2.1487811007790905, |
|
"grad_norm": 0.8577796816825867, |
|
"learning_rate": 4.4503448275862074e-06, |
|
"loss": 0.0349, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 2.155064086453883, |
|
"grad_norm": 1.0337741374969482, |
|
"learning_rate": 4.433103448275862e-06, |
|
"loss": 0.0321, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 2.1613470721286756, |
|
"grad_norm": 1.4780592918395996, |
|
"learning_rate": 4.415862068965517e-06, |
|
"loss": 0.0359, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.167630057803468, |
|
"grad_norm": 1.6528609991073608, |
|
"learning_rate": 4.398620689655173e-06, |
|
"loss": 0.0384, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 2.1739130434782608, |
|
"grad_norm": 0.7156565189361572, |
|
"learning_rate": 4.381379310344829e-06, |
|
"loss": 0.0353, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 2.1801960291530533, |
|
"grad_norm": 1.1753544807434082, |
|
"learning_rate": 4.3641379310344835e-06, |
|
"loss": 0.031, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 2.1864790148278463, |
|
"grad_norm": 0.7453944087028503, |
|
"learning_rate": 4.346896551724138e-06, |
|
"loss": 0.0348, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.192762000502639, |
|
"grad_norm": 1.872745156288147, |
|
"learning_rate": 4.329655172413793e-06, |
|
"loss": 0.0351, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 2.1990449861774315, |
|
"grad_norm": 0.6683670282363892, |
|
"learning_rate": 4.312413793103449e-06, |
|
"loss": 0.0351, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 2.205327971852224, |
|
"grad_norm": 1.3862112760543823, |
|
"learning_rate": 4.295172413793104e-06, |
|
"loss": 0.0333, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 2.2116109575270166, |
|
"grad_norm": 1.027766466140747, |
|
"learning_rate": 4.277931034482759e-06, |
|
"loss": 0.0287, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.2178939432018097, |
|
"grad_norm": 0.8764299154281616, |
|
"learning_rate": 4.260689655172414e-06, |
|
"loss": 0.0324, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 2.2241769288766022, |
|
"grad_norm": 0.8723062872886658, |
|
"learning_rate": 4.243448275862069e-06, |
|
"loss": 0.0283, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 2.230459914551395, |
|
"grad_norm": 1.1235435009002686, |
|
"learning_rate": 4.226206896551724e-06, |
|
"loss": 0.0376, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 2.2367429002261874, |
|
"grad_norm": 1.0615513324737549, |
|
"learning_rate": 4.20896551724138e-06, |
|
"loss": 0.0354, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.24302588590098, |
|
"grad_norm": 0.8142825365066528, |
|
"learning_rate": 4.191724137931035e-06, |
|
"loss": 0.0274, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 2.249308871575773, |
|
"grad_norm": 1.7816015481948853, |
|
"learning_rate": 4.17448275862069e-06, |
|
"loss": 0.0349, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 2.2555918572505655, |
|
"grad_norm": 1.0881839990615845, |
|
"learning_rate": 4.157241379310345e-06, |
|
"loss": 0.0344, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 2.261874842925358, |
|
"grad_norm": 0.554862916469574, |
|
"learning_rate": 4.14e-06, |
|
"loss": 0.0305, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.261874842925358, |
|
"eval_loss": 0.1346791386604309, |
|
"eval_runtime": 28846.9785, |
|
"eval_samples_per_second": 0.677, |
|
"eval_steps_per_second": 0.085, |
|
"eval_wer": 36.786382314919805, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.2681578286001507, |
|
"grad_norm": 1.2704734802246094, |
|
"learning_rate": 4.122758620689655e-06, |
|
"loss": 0.0334, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 2.2744408142749433, |
|
"grad_norm": 1.3331466913223267, |
|
"learning_rate": 4.105517241379311e-06, |
|
"loss": 0.0343, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 2.2807237999497363, |
|
"grad_norm": 0.3723588287830353, |
|
"learning_rate": 4.0882758620689665e-06, |
|
"loss": 0.0343, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 2.287006785624529, |
|
"grad_norm": 2.03139591217041, |
|
"learning_rate": 4.071034482758621e-06, |
|
"loss": 0.034, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.2932897712993214, |
|
"grad_norm": 1.230726718902588, |
|
"learning_rate": 4.053793103448276e-06, |
|
"loss": 0.0319, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 2.299572756974114, |
|
"grad_norm": 1.408710241317749, |
|
"learning_rate": 4.036551724137931e-06, |
|
"loss": 0.0267, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 2.3058557426489066, |
|
"grad_norm": 1.5532382726669312, |
|
"learning_rate": 4.019310344827587e-06, |
|
"loss": 0.0335, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 2.3121387283236996, |
|
"grad_norm": 1.1856962442398071, |
|
"learning_rate": 4.002068965517242e-06, |
|
"loss": 0.0346, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.318421713998492, |
|
"grad_norm": 1.0790759325027466, |
|
"learning_rate": 3.9848275862068965e-06, |
|
"loss": 0.0339, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 2.3247046996732847, |
|
"grad_norm": 0.7154790759086609, |
|
"learning_rate": 3.967586206896552e-06, |
|
"loss": 0.0353, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 2.3309876853480773, |
|
"grad_norm": 0.8203781843185425, |
|
"learning_rate": 3.950344827586207e-06, |
|
"loss": 0.0392, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 2.33727067102287, |
|
"grad_norm": 1.3644154071807861, |
|
"learning_rate": 3.933103448275862e-06, |
|
"loss": 0.0333, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.343553656697663, |
|
"grad_norm": 0.7084758877754211, |
|
"learning_rate": 3.915862068965518e-06, |
|
"loss": 0.0309, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 2.3498366423724555, |
|
"grad_norm": 0.7974054217338562, |
|
"learning_rate": 3.898620689655173e-06, |
|
"loss": 0.0337, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 2.356119628047248, |
|
"grad_norm": 0.9612919092178345, |
|
"learning_rate": 3.8813793103448275e-06, |
|
"loss": 0.0358, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 2.3624026137220406, |
|
"grad_norm": 1.3854459524154663, |
|
"learning_rate": 3.864137931034483e-06, |
|
"loss": 0.0299, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.368685599396833, |
|
"grad_norm": 1.1167787313461304, |
|
"learning_rate": 3.846896551724138e-06, |
|
"loss": 0.0337, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 2.3749685850716262, |
|
"grad_norm": 0.9742668271064758, |
|
"learning_rate": 3.829655172413793e-06, |
|
"loss": 0.0286, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 2.381251570746419, |
|
"grad_norm": 1.435309886932373, |
|
"learning_rate": 3.8124137931034486e-06, |
|
"loss": 0.0305, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 2.3875345564212114, |
|
"grad_norm": 1.1362321376800537, |
|
"learning_rate": 3.795172413793104e-06, |
|
"loss": 0.0313, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.393817542096004, |
|
"grad_norm": 1.0465309619903564, |
|
"learning_rate": 3.7779310344827592e-06, |
|
"loss": 0.0364, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 2.4001005277707965, |
|
"grad_norm": 1.4722024202346802, |
|
"learning_rate": 3.760689655172414e-06, |
|
"loss": 0.0451, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 2.406383513445589, |
|
"grad_norm": 1.1084930896759033, |
|
"learning_rate": 3.7434482758620694e-06, |
|
"loss": 0.0349, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 2.412666499120382, |
|
"grad_norm": 1.4382020235061646, |
|
"learning_rate": 3.7262068965517247e-06, |
|
"loss": 0.0338, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.4189494847951747, |
|
"grad_norm": 1.6590332984924316, |
|
"learning_rate": 3.7089655172413795e-06, |
|
"loss": 0.0335, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 2.4252324704699673, |
|
"grad_norm": 0.7975425720214844, |
|
"learning_rate": 3.691724137931035e-06, |
|
"loss": 0.0371, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 2.43151545614476, |
|
"grad_norm": 0.9135144352912903, |
|
"learning_rate": 3.67448275862069e-06, |
|
"loss": 0.0384, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 2.4377984418195524, |
|
"grad_norm": 1.74324369430542, |
|
"learning_rate": 3.657241379310345e-06, |
|
"loss": 0.029, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.4440814274943454, |
|
"grad_norm": 0.9986597299575806, |
|
"learning_rate": 3.6400000000000003e-06, |
|
"loss": 0.0363, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 2.450364413169138, |
|
"grad_norm": 0.8304340839385986, |
|
"learning_rate": 3.622758620689655e-06, |
|
"loss": 0.0337, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 2.4566473988439306, |
|
"grad_norm": 0.6401971578598022, |
|
"learning_rate": 3.6055172413793105e-06, |
|
"loss": 0.0309, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 2.462930384518723, |
|
"grad_norm": 1.2303663492202759, |
|
"learning_rate": 3.5882758620689658e-06, |
|
"loss": 0.0333, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.4692133701935157, |
|
"grad_norm": 1.2973604202270508, |
|
"learning_rate": 3.5710344827586206e-06, |
|
"loss": 0.0346, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 2.4754963558683087, |
|
"grad_norm": 1.0538204908370972, |
|
"learning_rate": 3.553793103448276e-06, |
|
"loss": 0.0335, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 2.4817793415431013, |
|
"grad_norm": 1.006469964981079, |
|
"learning_rate": 3.5365517241379316e-06, |
|
"loss": 0.0319, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 2.488062327217894, |
|
"grad_norm": 1.2951979637145996, |
|
"learning_rate": 3.5193103448275865e-06, |
|
"loss": 0.0309, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.4943453128926865, |
|
"grad_norm": 1.205333948135376, |
|
"learning_rate": 3.502068965517242e-06, |
|
"loss": 0.0322, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 2.500628298567479, |
|
"grad_norm": 1.051314115524292, |
|
"learning_rate": 3.484827586206897e-06, |
|
"loss": 0.0347, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 2.506911284242272, |
|
"grad_norm": 1.1379516124725342, |
|
"learning_rate": 3.467586206896552e-06, |
|
"loss": 0.0297, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 2.5131942699170646, |
|
"grad_norm": 1.4470369815826416, |
|
"learning_rate": 3.4503448275862073e-06, |
|
"loss": 0.0344, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.5131942699170646, |
|
"eval_loss": 0.13530105352401733, |
|
"eval_runtime": 28721.0802, |
|
"eval_samples_per_second": 0.68, |
|
"eval_steps_per_second": 0.085, |
|
"eval_wer": 36.4960604331718, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 15000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6324881677254656e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|