|
{ |
|
"best_metric": 15.246076710047603, |
|
"best_model_checkpoint": "./whisper-turbo/checkpoint-8000", |
|
"epoch": 3.4057045551298426, |
|
"eval_steps": 1000, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010642826734780758, |
|
"grad_norm": 24.32292938232422, |
|
"learning_rate": 2.4000000000000003e-07, |
|
"loss": 2.3553, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.021285653469561516, |
|
"grad_norm": 13.848426818847656, |
|
"learning_rate": 4.900000000000001e-07, |
|
"loss": 1.8027, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.031928480204342274, |
|
"grad_norm": 12.110209465026855, |
|
"learning_rate": 7.4e-07, |
|
"loss": 1.43, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04257130693912303, |
|
"grad_norm": 11.48365306854248, |
|
"learning_rate": 9.9e-07, |
|
"loss": 1.2826, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05321413367390379, |
|
"grad_norm": 12.327783584594727, |
|
"learning_rate": 1.2400000000000002e-06, |
|
"loss": 1.0584, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06385696040868455, |
|
"grad_norm": 10.366923332214355, |
|
"learning_rate": 1.4900000000000001e-06, |
|
"loss": 0.9736, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07449978714346531, |
|
"grad_norm": 12.267701148986816, |
|
"learning_rate": 1.74e-06, |
|
"loss": 0.9528, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.08514261387824607, |
|
"grad_norm": 10.672560691833496, |
|
"learning_rate": 1.9900000000000004e-06, |
|
"loss": 0.8758, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09578544061302682, |
|
"grad_norm": 11.000222206115723, |
|
"learning_rate": 2.24e-06, |
|
"loss": 0.7971, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.10642826734780758, |
|
"grad_norm": 10.404646873474121, |
|
"learning_rate": 2.4900000000000003e-06, |
|
"loss": 0.7646, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11707109408258834, |
|
"grad_norm": 8.721226692199707, |
|
"learning_rate": 2.7400000000000004e-06, |
|
"loss": 0.7578, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.1277139208173691, |
|
"grad_norm": 9.198404312133789, |
|
"learning_rate": 2.99e-06, |
|
"loss": 0.6623, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13835674755214986, |
|
"grad_norm": 9.394553184509277, |
|
"learning_rate": 3.2400000000000003e-06, |
|
"loss": 0.669, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.14899957428693061, |
|
"grad_norm": 9.5685453414917, |
|
"learning_rate": 3.49e-06, |
|
"loss": 0.6431, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15964240102171137, |
|
"grad_norm": 10.325225830078125, |
|
"learning_rate": 3.74e-06, |
|
"loss": 0.6444, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.17028522775649213, |
|
"grad_norm": 8.178572654724121, |
|
"learning_rate": 3.990000000000001e-06, |
|
"loss": 0.609, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1809280544912729, |
|
"grad_norm": 9.746500015258789, |
|
"learning_rate": 4.24e-06, |
|
"loss": 0.5755, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.19157088122605365, |
|
"grad_norm": 8.11845874786377, |
|
"learning_rate": 4.49e-06, |
|
"loss": 0.5589, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2022137079608344, |
|
"grad_norm": 7.166477203369141, |
|
"learning_rate": 4.74e-06, |
|
"loss": 0.5578, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.21285653469561516, |
|
"grad_norm": 7.681941032409668, |
|
"learning_rate": 4.9900000000000005e-06, |
|
"loss": 0.5076, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22349936143039592, |
|
"grad_norm": 8.438258171081543, |
|
"learning_rate": 5.240000000000001e-06, |
|
"loss": 0.5445, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.23414218816517668, |
|
"grad_norm": 9.802384376525879, |
|
"learning_rate": 5.490000000000001e-06, |
|
"loss": 0.5098, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.24478501489995744, |
|
"grad_norm": 7.197368144989014, |
|
"learning_rate": 5.74e-06, |
|
"loss": 0.4969, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.2554278416347382, |
|
"grad_norm": 7.666371822357178, |
|
"learning_rate": 5.99e-06, |
|
"loss": 0.4861, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2660706683695189, |
|
"grad_norm": 7.324782371520996, |
|
"learning_rate": 6.24e-06, |
|
"loss": 0.481, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.2767134951042997, |
|
"grad_norm": 8.649055480957031, |
|
"learning_rate": 6.4900000000000005e-06, |
|
"loss": 0.447, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.28735632183908044, |
|
"grad_norm": 7.136589050292969, |
|
"learning_rate": 6.740000000000001e-06, |
|
"loss": 0.4778, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.29799914857386123, |
|
"grad_norm": 7.153022289276123, |
|
"learning_rate": 6.99e-06, |
|
"loss": 0.4674, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.30864197530864196, |
|
"grad_norm": 6.950058937072754, |
|
"learning_rate": 7.24e-06, |
|
"loss": 0.4434, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.31928480204342274, |
|
"grad_norm": 7.339558124542236, |
|
"learning_rate": 7.49e-06, |
|
"loss": 0.4439, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3299276287782035, |
|
"grad_norm": 6.6849541664123535, |
|
"learning_rate": 7.74e-06, |
|
"loss": 0.442, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.34057045551298426, |
|
"grad_norm": 7.065944194793701, |
|
"learning_rate": 7.990000000000001e-06, |
|
"loss": 0.4147, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.351213282247765, |
|
"grad_norm": 6.242930889129639, |
|
"learning_rate": 8.24e-06, |
|
"loss": 0.3905, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.3618561089825458, |
|
"grad_norm": 6.885308742523193, |
|
"learning_rate": 8.48e-06, |
|
"loss": 0.4077, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3724989357173265, |
|
"grad_norm": 5.589861869812012, |
|
"learning_rate": 8.730000000000001e-06, |
|
"loss": 0.4074, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.3831417624521073, |
|
"grad_norm": 6.651442050933838, |
|
"learning_rate": 8.98e-06, |
|
"loss": 0.4013, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.393784589186888, |
|
"grad_norm": 5.705496311187744, |
|
"learning_rate": 9.230000000000001e-06, |
|
"loss": 0.3897, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.4044274159216688, |
|
"grad_norm": 6.6162333488464355, |
|
"learning_rate": 9.48e-06, |
|
"loss": 0.365, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.41507024265644954, |
|
"grad_norm": 7.273537635803223, |
|
"learning_rate": 9.73e-06, |
|
"loss": 0.3924, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.4257130693912303, |
|
"grad_norm": 6.101346969604492, |
|
"learning_rate": 9.980000000000001e-06, |
|
"loss": 0.3715, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4257130693912303, |
|
"eval_loss": 0.3456858992576599, |
|
"eval_runtime": 2451.0815, |
|
"eval_samples_per_second": 1.949, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 40.46916574655637, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.43635589612601106, |
|
"grad_norm": 5.119203567504883, |
|
"learning_rate": 9.967142857142858e-06, |
|
"loss": 0.3875, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.44699872286079184, |
|
"grad_norm": 5.488610744476318, |
|
"learning_rate": 9.931428571428571e-06, |
|
"loss": 0.3741, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.45764154959557257, |
|
"grad_norm": 5.710753917694092, |
|
"learning_rate": 9.895714285714287e-06, |
|
"loss": 0.3622, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.46828437633035336, |
|
"grad_norm": 5.7168192863464355, |
|
"learning_rate": 9.86e-06, |
|
"loss": 0.3652, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4789272030651341, |
|
"grad_norm": 5.204087734222412, |
|
"learning_rate": 9.824285714285716e-06, |
|
"loss": 0.3696, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.4895700297999149, |
|
"grad_norm": 5.014431476593018, |
|
"learning_rate": 9.78857142857143e-06, |
|
"loss": 0.347, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5002128565346956, |
|
"grad_norm": 5.643191814422607, |
|
"learning_rate": 9.752857142857143e-06, |
|
"loss": 0.3681, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.5108556832694764, |
|
"grad_norm": 6.40764045715332, |
|
"learning_rate": 9.717142857142858e-06, |
|
"loss": 0.34, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5214985100042572, |
|
"grad_norm": 5.89484167098999, |
|
"learning_rate": 9.681428571428572e-06, |
|
"loss": 0.3257, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5321413367390379, |
|
"grad_norm": 6.474817752838135, |
|
"learning_rate": 9.645714285714286e-06, |
|
"loss": 0.3398, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5427841634738186, |
|
"grad_norm": 6.272877216339111, |
|
"learning_rate": 9.610000000000001e-06, |
|
"loss": 0.3315, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.5534269902085994, |
|
"grad_norm": 4.669580936431885, |
|
"learning_rate": 9.574285714285715e-06, |
|
"loss": 0.3296, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5640698169433802, |
|
"grad_norm": 4.576137065887451, |
|
"learning_rate": 9.538571428571428e-06, |
|
"loss": 0.3334, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.5747126436781609, |
|
"grad_norm": 4.5423665046691895, |
|
"learning_rate": 9.502857142857144e-06, |
|
"loss": 0.3087, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5853554704129417, |
|
"grad_norm": 4.673890113830566, |
|
"learning_rate": 9.467142857142857e-06, |
|
"loss": 0.3286, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.5959982971477225, |
|
"grad_norm": 4.775241374969482, |
|
"learning_rate": 9.431428571428573e-06, |
|
"loss": 0.3258, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6066411238825032, |
|
"grad_norm": 5.259005069732666, |
|
"learning_rate": 9.395714285714287e-06, |
|
"loss": 0.29, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.6172839506172839, |
|
"grad_norm": 4.7707014083862305, |
|
"learning_rate": 9.360000000000002e-06, |
|
"loss": 0.2879, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6279267773520647, |
|
"grad_norm": 4.998105525970459, |
|
"learning_rate": 9.324285714285714e-06, |
|
"loss": 0.2957, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.6385696040868455, |
|
"grad_norm": 4.137457370758057, |
|
"learning_rate": 9.28857142857143e-06, |
|
"loss": 0.2897, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6492124308216263, |
|
"grad_norm": 4.685913562774658, |
|
"learning_rate": 9.252857142857143e-06, |
|
"loss": 0.3282, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.659855257556407, |
|
"grad_norm": 5.36374044418335, |
|
"learning_rate": 9.217142857142858e-06, |
|
"loss": 0.2747, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6704980842911877, |
|
"grad_norm": 4.616824150085449, |
|
"learning_rate": 9.181428571428572e-06, |
|
"loss": 0.285, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.6811409110259685, |
|
"grad_norm": 4.848719120025635, |
|
"learning_rate": 9.145714285714287e-06, |
|
"loss": 0.2971, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6917837377607493, |
|
"grad_norm": 4.435796737670898, |
|
"learning_rate": 9.110000000000001e-06, |
|
"loss": 0.2993, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.70242656449553, |
|
"grad_norm": 4.055502414703369, |
|
"learning_rate": 9.074285714285716e-06, |
|
"loss": 0.2713, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7130693912303108, |
|
"grad_norm": 5.476015090942383, |
|
"learning_rate": 9.038571428571428e-06, |
|
"loss": 0.2553, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.7237122179650916, |
|
"grad_norm": 4.443753242492676, |
|
"learning_rate": 9.002857142857144e-06, |
|
"loss": 0.2772, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7343550446998723, |
|
"grad_norm": 4.617072105407715, |
|
"learning_rate": 8.967142857142857e-06, |
|
"loss": 0.2745, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.744997871434653, |
|
"grad_norm": 4.322467803955078, |
|
"learning_rate": 8.931428571428573e-06, |
|
"loss": 0.2756, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7556406981694338, |
|
"grad_norm": 5.194156169891357, |
|
"learning_rate": 8.895714285714286e-06, |
|
"loss": 0.2571, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.7662835249042146, |
|
"grad_norm": 5.350680828094482, |
|
"learning_rate": 8.860000000000002e-06, |
|
"loss": 0.2705, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7769263516389954, |
|
"grad_norm": 5.343641757965088, |
|
"learning_rate": 8.824285714285715e-06, |
|
"loss": 0.2499, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.787569178373776, |
|
"grad_norm": 4.356059551239014, |
|
"learning_rate": 8.788571428571429e-06, |
|
"loss": 0.2767, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.7982120051085568, |
|
"grad_norm": 4.316229820251465, |
|
"learning_rate": 8.752857142857144e-06, |
|
"loss": 0.2484, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.8088548318433376, |
|
"grad_norm": 4.627383232116699, |
|
"learning_rate": 8.717142857142858e-06, |
|
"loss": 0.2559, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8194976585781183, |
|
"grad_norm": 4.916121006011963, |
|
"learning_rate": 8.681428571428572e-06, |
|
"loss": 0.2755, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.8301404853128991, |
|
"grad_norm": 5.244263172149658, |
|
"learning_rate": 8.645714285714287e-06, |
|
"loss": 0.2334, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.8407833120476799, |
|
"grad_norm": 4.568859100341797, |
|
"learning_rate": 8.61e-06, |
|
"loss": 0.2542, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.8514261387824607, |
|
"grad_norm": 3.6536848545074463, |
|
"learning_rate": 8.574285714285714e-06, |
|
"loss": 0.251, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8514261387824607, |
|
"eval_loss": 0.21811740100383759, |
|
"eval_runtime": 2451.0584, |
|
"eval_samples_per_second": 1.949, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 27.706481799916737, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 4.318572521209717, |
|
"learning_rate": 8.53857142857143e-06, |
|
"loss": 0.2319, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.8727117922520221, |
|
"grad_norm": 4.489058494567871, |
|
"learning_rate": 8.502857142857143e-06, |
|
"loss": 0.2531, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.8833546189868029, |
|
"grad_norm": 4.232712745666504, |
|
"learning_rate": 8.467142857142859e-06, |
|
"loss": 0.2491, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.8939974457215837, |
|
"grad_norm": 4.031393051147461, |
|
"learning_rate": 8.431428571428572e-06, |
|
"loss": 0.2485, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9046402724563644, |
|
"grad_norm": 3.8136720657348633, |
|
"learning_rate": 8.395714285714286e-06, |
|
"loss": 0.2412, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.9152830991911451, |
|
"grad_norm": 4.3343505859375, |
|
"learning_rate": 8.36e-06, |
|
"loss": 0.2378, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"grad_norm": 3.6388914585113525, |
|
"learning_rate": 8.324285714285715e-06, |
|
"loss": 0.2409, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.9365687526607067, |
|
"grad_norm": 5.596227169036865, |
|
"learning_rate": 8.288571428571429e-06, |
|
"loss": 0.2363, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9472115793954874, |
|
"grad_norm": 4.016772747039795, |
|
"learning_rate": 8.252857142857144e-06, |
|
"loss": 0.2281, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.9578544061302682, |
|
"grad_norm": 5.106402397155762, |
|
"learning_rate": 8.217142857142858e-06, |
|
"loss": 0.224, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.968497232865049, |
|
"grad_norm": 3.714061975479126, |
|
"learning_rate": 8.181428571428573e-06, |
|
"loss": 0.2306, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.9791400595998297, |
|
"grad_norm": 4.1780009269714355, |
|
"learning_rate": 8.145714285714287e-06, |
|
"loss": 0.2284, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.9897828863346104, |
|
"grad_norm": 4.007058143615723, |
|
"learning_rate": 8.110000000000002e-06, |
|
"loss": 0.237, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.0004257130693912, |
|
"grad_norm": 2.91274094581604, |
|
"learning_rate": 8.074285714285714e-06, |
|
"loss": 0.227, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.0110685398041719, |
|
"grad_norm": 4.321012496948242, |
|
"learning_rate": 8.03857142857143e-06, |
|
"loss": 0.1917, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.0217113665389528, |
|
"grad_norm": 4.613705635070801, |
|
"learning_rate": 8.002857142857143e-06, |
|
"loss": 0.1861, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.0323541932737335, |
|
"grad_norm": 3.9575023651123047, |
|
"learning_rate": 7.967142857142858e-06, |
|
"loss": 0.1931, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.0429970200085144, |
|
"grad_norm": 4.651571273803711, |
|
"learning_rate": 7.931428571428572e-06, |
|
"loss": 0.1784, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.053639846743295, |
|
"grad_norm": 4.0472412109375, |
|
"learning_rate": 7.895714285714287e-06, |
|
"loss": 0.2002, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.0642826734780757, |
|
"grad_norm": 3.4641237258911133, |
|
"learning_rate": 7.860000000000001e-06, |
|
"loss": 0.1885, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0749255002128566, |
|
"grad_norm": 3.260540008544922, |
|
"learning_rate": 7.824285714285715e-06, |
|
"loss": 0.1924, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.0855683269476373, |
|
"grad_norm": 4.416691303253174, |
|
"learning_rate": 7.788571428571428e-06, |
|
"loss": 0.181, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.096211153682418, |
|
"grad_norm": 3.7334911823272705, |
|
"learning_rate": 7.752857142857144e-06, |
|
"loss": 0.1667, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.1068539804171988, |
|
"grad_norm": 4.4988555908203125, |
|
"learning_rate": 7.717142857142857e-06, |
|
"loss": 0.1864, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.1174968071519795, |
|
"grad_norm": 4.6382222175598145, |
|
"learning_rate": 7.681428571428573e-06, |
|
"loss": 0.1805, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.1281396338867604, |
|
"grad_norm": 4.512842178344727, |
|
"learning_rate": 7.645714285714286e-06, |
|
"loss": 0.1848, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.138782460621541, |
|
"grad_norm": 3.889390468597412, |
|
"learning_rate": 7.610000000000001e-06, |
|
"loss": 0.1846, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.1494252873563218, |
|
"grad_norm": 4.247312068939209, |
|
"learning_rate": 7.574285714285715e-06, |
|
"loss": 0.1799, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.1600681140911027, |
|
"grad_norm": 4.321536540985107, |
|
"learning_rate": 7.53857142857143e-06, |
|
"loss": 0.1764, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.1707109408258833, |
|
"grad_norm": 4.06414794921875, |
|
"learning_rate": 7.502857142857144e-06, |
|
"loss": 0.1903, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.181353767560664, |
|
"grad_norm": 3.314551591873169, |
|
"learning_rate": 7.467142857142857e-06, |
|
"loss": 0.1614, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.191996594295445, |
|
"grad_norm": 4.245212078094482, |
|
"learning_rate": 7.431428571428572e-06, |
|
"loss": 0.1867, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.2026394210302256, |
|
"grad_norm": 3.1465117931365967, |
|
"learning_rate": 7.395714285714286e-06, |
|
"loss": 0.1584, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.2132822477650063, |
|
"grad_norm": 4.1284637451171875, |
|
"learning_rate": 7.360000000000001e-06, |
|
"loss": 0.1856, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.2239250744997872, |
|
"grad_norm": 3.685889720916748, |
|
"learning_rate": 7.324285714285715e-06, |
|
"loss": 0.171, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.2345679012345678, |
|
"grad_norm": 4.70512580871582, |
|
"learning_rate": 7.28857142857143e-06, |
|
"loss": 0.1752, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.2452107279693487, |
|
"grad_norm": 3.846862316131592, |
|
"learning_rate": 7.252857142857143e-06, |
|
"loss": 0.1697, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.2558535547041294, |
|
"grad_norm": 3.7466206550598145, |
|
"learning_rate": 7.217142857142858e-06, |
|
"loss": 0.1796, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.2664963814389103, |
|
"grad_norm": 3.8162903785705566, |
|
"learning_rate": 7.182857142857144e-06, |
|
"loss": 0.1591, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.277139208173691, |
|
"grad_norm": 3.880910873413086, |
|
"learning_rate": 7.147142857142858e-06, |
|
"loss": 0.1569, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.277139208173691, |
|
"eval_loss": 0.1813717633485794, |
|
"eval_runtime": 2447.146, |
|
"eval_samples_per_second": 1.952, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 24.153347693087408, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2877820349084717, |
|
"grad_norm": 3.98262882232666, |
|
"learning_rate": 7.111428571428572e-06, |
|
"loss": 0.1804, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.2984248616432525, |
|
"grad_norm": 3.6790521144866943, |
|
"learning_rate": 7.075714285714286e-06, |
|
"loss": 0.1647, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.3090676883780332, |
|
"grad_norm": 3.431762456893921, |
|
"learning_rate": 7.04e-06, |
|
"loss": 0.1662, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.319710515112814, |
|
"grad_norm": 4.0635247230529785, |
|
"learning_rate": 7.004285714285715e-06, |
|
"loss": 0.1726, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.3303533418475948, |
|
"grad_norm": 3.1607766151428223, |
|
"learning_rate": 6.968571428571429e-06, |
|
"loss": 0.1544, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.3409961685823755, |
|
"grad_norm": 4.5737385749816895, |
|
"learning_rate": 6.932857142857143e-06, |
|
"loss": 0.1644, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.3516389953171561, |
|
"grad_norm": 4.182763576507568, |
|
"learning_rate": 6.8971428571428575e-06, |
|
"loss": 0.167, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.362281822051937, |
|
"grad_norm": 3.3566439151763916, |
|
"learning_rate": 6.861428571428572e-06, |
|
"loss": 0.1631, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.3729246487867177, |
|
"grad_norm": 3.771667718887329, |
|
"learning_rate": 6.8257142857142866e-06, |
|
"loss": 0.1675, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.3835674755214984, |
|
"grad_norm": 4.14226770401001, |
|
"learning_rate": 6.790000000000001e-06, |
|
"loss": 0.1785, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.3942103022562793, |
|
"grad_norm": 4.599484443664551, |
|
"learning_rate": 6.754285714285715e-06, |
|
"loss": 0.1832, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.40485312899106, |
|
"grad_norm": 3.78108286857605, |
|
"learning_rate": 6.718571428571428e-06, |
|
"loss": 0.1765, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.4154959557258409, |
|
"grad_norm": 3.3249051570892334, |
|
"learning_rate": 6.682857142857143e-06, |
|
"loss": 0.1517, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.4261387824606215, |
|
"grad_norm": 3.299750804901123, |
|
"learning_rate": 6.647142857142857e-06, |
|
"loss": 0.1632, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.4367816091954024, |
|
"grad_norm": 4.0860066413879395, |
|
"learning_rate": 6.611428571428572e-06, |
|
"loss": 0.1457, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.447424435930183, |
|
"grad_norm": 4.305485725402832, |
|
"learning_rate": 6.575714285714286e-06, |
|
"loss": 0.1638, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.4580672626649638, |
|
"grad_norm": 3.656642436981201, |
|
"learning_rate": 6.540000000000001e-06, |
|
"loss": 0.1681, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.4687100893997447, |
|
"grad_norm": 3.596554756164551, |
|
"learning_rate": 6.504285714285715e-06, |
|
"loss": 0.1473, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.4793529161345254, |
|
"grad_norm": 3.35798716545105, |
|
"learning_rate": 6.46857142857143e-06, |
|
"loss": 0.1443, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.489995742869306, |
|
"grad_norm": 3.782789468765259, |
|
"learning_rate": 6.432857142857143e-06, |
|
"loss": 0.1399, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.500638569604087, |
|
"grad_norm": 3.556546926498413, |
|
"learning_rate": 6.397142857142857e-06, |
|
"loss": 0.1657, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.5112813963388676, |
|
"grad_norm": 4.0330657958984375, |
|
"learning_rate": 6.361428571428572e-06, |
|
"loss": 0.1455, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.5219242230736483, |
|
"grad_norm": 3.4194424152374268, |
|
"learning_rate": 6.325714285714286e-06, |
|
"loss": 0.1558, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.5325670498084292, |
|
"grad_norm": 3.4053897857666016, |
|
"learning_rate": 6.290000000000001e-06, |
|
"loss": 0.1667, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.5432098765432098, |
|
"grad_norm": 3.4398772716522217, |
|
"learning_rate": 6.254285714285715e-06, |
|
"loss": 0.1704, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.5538527032779905, |
|
"grad_norm": 3.950698137283325, |
|
"learning_rate": 6.21857142857143e-06, |
|
"loss": 0.1587, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.5644955300127714, |
|
"grad_norm": 3.5105514526367188, |
|
"learning_rate": 6.1828571428571434e-06, |
|
"loss": 0.1662, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.5751383567475523, |
|
"grad_norm": 3.1570792198181152, |
|
"learning_rate": 6.147142857142858e-06, |
|
"loss": 0.1542, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.5857811834823328, |
|
"grad_norm": 3.395730495452881, |
|
"learning_rate": 6.111428571428572e-06, |
|
"loss": 0.1419, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.5964240102171137, |
|
"grad_norm": 3.692760944366455, |
|
"learning_rate": 6.075714285714286e-06, |
|
"loss": 0.1515, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.6070668369518946, |
|
"grad_norm": 4.292817115783691, |
|
"learning_rate": 6.040000000000001e-06, |
|
"loss": 0.1558, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.617709663686675, |
|
"grad_norm": 2.7795393466949463, |
|
"learning_rate": 6.004285714285715e-06, |
|
"loss": 0.1603, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.628352490421456, |
|
"grad_norm": 3.6494193077087402, |
|
"learning_rate": 5.968571428571429e-06, |
|
"loss": 0.1527, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.6389953171562368, |
|
"grad_norm": 3.185007333755493, |
|
"learning_rate": 5.932857142857143e-06, |
|
"loss": 0.1415, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.6496381438910175, |
|
"grad_norm": 4.0278143882751465, |
|
"learning_rate": 5.897142857142858e-06, |
|
"loss": 0.1595, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.6602809706257982, |
|
"grad_norm": 3.8083670139312744, |
|
"learning_rate": 5.861428571428572e-06, |
|
"loss": 0.1596, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.670923797360579, |
|
"grad_norm": 5.412234783172607, |
|
"learning_rate": 5.825714285714286e-06, |
|
"loss": 0.1418, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.6815666240953597, |
|
"grad_norm": 3.8275325298309326, |
|
"learning_rate": 5.7900000000000005e-06, |
|
"loss": 0.1725, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.6922094508301404, |
|
"grad_norm": 3.4874017238616943, |
|
"learning_rate": 5.754285714285714e-06, |
|
"loss": 0.1334, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.7028522775649213, |
|
"grad_norm": 2.9034647941589355, |
|
"learning_rate": 5.718571428571429e-06, |
|
"loss": 0.1436, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.7028522775649213, |
|
"eval_loss": 0.1530725359916687, |
|
"eval_runtime": 2470.8785, |
|
"eval_samples_per_second": 1.933, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 20.381197169077055, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.713495104299702, |
|
"grad_norm": 3.192444086074829, |
|
"learning_rate": 5.682857142857143e-06, |
|
"loss": 0.1391, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"grad_norm": 3.376185655593872, |
|
"learning_rate": 5.647142857142858e-06, |
|
"loss": 0.1447, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.7347807577692635, |
|
"grad_norm": 3.2235193252563477, |
|
"learning_rate": 5.611428571428572e-06, |
|
"loss": 0.1473, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.7454235845040442, |
|
"grad_norm": 3.4376378059387207, |
|
"learning_rate": 5.575714285714287e-06, |
|
"loss": 0.1526, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.756066411238825, |
|
"grad_norm": 3.4150240421295166, |
|
"learning_rate": 5.540000000000001e-06, |
|
"loss": 0.1503, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.7667092379736058, |
|
"grad_norm": 3.757262706756592, |
|
"learning_rate": 5.504285714285714e-06, |
|
"loss": 0.1311, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.7773520647083867, |
|
"grad_norm": 3.725192070007324, |
|
"learning_rate": 5.4685714285714285e-06, |
|
"loss": 0.1506, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.7879948914431671, |
|
"grad_norm": 3.243486166000366, |
|
"learning_rate": 5.432857142857143e-06, |
|
"loss": 0.1529, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.798637718177948, |
|
"grad_norm": 3.1005189418792725, |
|
"learning_rate": 5.3971428571428575e-06, |
|
"loss": 0.1592, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.809280544912729, |
|
"grad_norm": 2.6923441886901855, |
|
"learning_rate": 5.361428571428572e-06, |
|
"loss": 0.1373, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.8199233716475096, |
|
"grad_norm": 3.4601283073425293, |
|
"learning_rate": 5.3257142857142865e-06, |
|
"loss": 0.1358, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.8305661983822903, |
|
"grad_norm": 4.46110200881958, |
|
"learning_rate": 5.290000000000001e-06, |
|
"loss": 0.1406, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.8412090251170712, |
|
"grad_norm": 3.4556360244750977, |
|
"learning_rate": 5.254285714285715e-06, |
|
"loss": 0.1314, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.8518518518518519, |
|
"grad_norm": 2.851836919784546, |
|
"learning_rate": 5.218571428571429e-06, |
|
"loss": 0.1263, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.8624946785866325, |
|
"grad_norm": 3.1507768630981445, |
|
"learning_rate": 5.182857142857143e-06, |
|
"loss": 0.1263, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.8731375053214134, |
|
"grad_norm": 3.7861220836639404, |
|
"learning_rate": 5.147142857142857e-06, |
|
"loss": 0.1423, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.883780332056194, |
|
"grad_norm": 2.670792818069458, |
|
"learning_rate": 5.111428571428572e-06, |
|
"loss": 0.1378, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.8944231587909748, |
|
"grad_norm": 3.21482515335083, |
|
"learning_rate": 5.075714285714286e-06, |
|
"loss": 0.1305, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.9050659855257557, |
|
"grad_norm": 3.0958456993103027, |
|
"learning_rate": 5.04e-06, |
|
"loss": 0.1312, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.9157088122605364, |
|
"grad_norm": 3.2010111808776855, |
|
"learning_rate": 5.0042857142857145e-06, |
|
"loss": 0.1358, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.926351638995317, |
|
"grad_norm": 4.211108684539795, |
|
"learning_rate": 4.968571428571429e-06, |
|
"loss": 0.149, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.936994465730098, |
|
"grad_norm": 3.6158218383789062, |
|
"learning_rate": 4.932857142857143e-06, |
|
"loss": 0.1456, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.9476372924648788, |
|
"grad_norm": 3.1304032802581787, |
|
"learning_rate": 4.897142857142857e-06, |
|
"loss": 0.1474, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.9582801191996593, |
|
"grad_norm": 3.7992565631866455, |
|
"learning_rate": 4.861428571428572e-06, |
|
"loss": 0.1252, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.9689229459344402, |
|
"grad_norm": 3.0859761238098145, |
|
"learning_rate": 4.825714285714286e-06, |
|
"loss": 0.1459, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.979565772669221, |
|
"grad_norm": 4.332040309906006, |
|
"learning_rate": 4.79e-06, |
|
"loss": 0.129, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.9902085994040017, |
|
"grad_norm": 4.2954816818237305, |
|
"learning_rate": 4.754285714285714e-06, |
|
"loss": 0.1566, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.0008514261387824, |
|
"grad_norm": 2.788947105407715, |
|
"learning_rate": 4.718571428571429e-06, |
|
"loss": 0.145, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.0114942528735633, |
|
"grad_norm": 3.2599875926971436, |
|
"learning_rate": 4.682857142857143e-06, |
|
"loss": 0.1063, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.0221370796083438, |
|
"grad_norm": 3.0225577354431152, |
|
"learning_rate": 4.647142857142857e-06, |
|
"loss": 0.0877, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.0327799063431247, |
|
"grad_norm": 3.564682960510254, |
|
"learning_rate": 4.6114285714285716e-06, |
|
"loss": 0.1014, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.0434227330779056, |
|
"grad_norm": 2.5339510440826416, |
|
"learning_rate": 4.575714285714286e-06, |
|
"loss": 0.0906, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.0540655598126865, |
|
"grad_norm": 2.7343597412109375, |
|
"learning_rate": 4.540000000000001e-06, |
|
"loss": 0.0994, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.064708386547467, |
|
"grad_norm": 2.6490981578826904, |
|
"learning_rate": 4.504285714285715e-06, |
|
"loss": 0.0979, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.075351213282248, |
|
"grad_norm": 1.9775068759918213, |
|
"learning_rate": 4.468571428571429e-06, |
|
"loss": 0.1023, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.0859940400170287, |
|
"grad_norm": 2.2302167415618896, |
|
"learning_rate": 4.432857142857143e-06, |
|
"loss": 0.101, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.096636866751809, |
|
"grad_norm": 2.7685494422912598, |
|
"learning_rate": 4.397142857142858e-06, |
|
"loss": 0.0934, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.10727969348659, |
|
"grad_norm": 2.8027827739715576, |
|
"learning_rate": 4.361428571428572e-06, |
|
"loss": 0.0962, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.117922520221371, |
|
"grad_norm": 2.9173505306243896, |
|
"learning_rate": 4.325714285714286e-06, |
|
"loss": 0.0934, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.1285653469561514, |
|
"grad_norm": 2.7315633296966553, |
|
"learning_rate": 4.2900000000000004e-06, |
|
"loss": 0.0931, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.1285653469561514, |
|
"eval_loss": 0.13744878768920898, |
|
"eval_runtime": 2441.1543, |
|
"eval_samples_per_second": 1.957, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 18.466161058519013, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.1392081736909323, |
|
"grad_norm": 2.411224126815796, |
|
"learning_rate": 4.254285714285715e-06, |
|
"loss": 0.1058, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 2.149851000425713, |
|
"grad_norm": 2.7599411010742188, |
|
"learning_rate": 4.2185714285714294e-06, |
|
"loss": 0.105, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.1604938271604937, |
|
"grad_norm": 2.873077392578125, |
|
"learning_rate": 4.182857142857143e-06, |
|
"loss": 0.1122, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 2.1711366538952745, |
|
"grad_norm": 2.4859185218811035, |
|
"learning_rate": 4.147142857142858e-06, |
|
"loss": 0.0956, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.1817794806300554, |
|
"grad_norm": 2.307053565979004, |
|
"learning_rate": 4.111428571428572e-06, |
|
"loss": 0.0936, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.192422307364836, |
|
"grad_norm": 2.692552328109741, |
|
"learning_rate": 4.075714285714286e-06, |
|
"loss": 0.0814, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.203065134099617, |
|
"grad_norm": 2.640380382537842, |
|
"learning_rate": 4.04e-06, |
|
"loss": 0.0961, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 2.2137079608343977, |
|
"grad_norm": 1.9715120792388916, |
|
"learning_rate": 4.004285714285715e-06, |
|
"loss": 0.0911, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.224350787569178, |
|
"grad_norm": 2.4855728149414062, |
|
"learning_rate": 3.9685714285714284e-06, |
|
"loss": 0.0871, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 2.234993614303959, |
|
"grad_norm": 2.190443992614746, |
|
"learning_rate": 3.932857142857143e-06, |
|
"loss": 0.0923, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.24563644103874, |
|
"grad_norm": 2.5768940448760986, |
|
"learning_rate": 3.8971428571428575e-06, |
|
"loss": 0.1033, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 2.256279267773521, |
|
"grad_norm": 2.527087926864624, |
|
"learning_rate": 3.861428571428571e-06, |
|
"loss": 0.081, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.2669220945083013, |
|
"grad_norm": 3.3411247730255127, |
|
"learning_rate": 3.825714285714286e-06, |
|
"loss": 0.0931, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 2.277564921243082, |
|
"grad_norm": 2.852933645248413, |
|
"learning_rate": 3.79e-06, |
|
"loss": 0.0736, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.288207747977863, |
|
"grad_norm": 3.596585512161255, |
|
"learning_rate": 3.7542857142857146e-06, |
|
"loss": 0.0892, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.2988505747126435, |
|
"grad_norm": 3.5326387882232666, |
|
"learning_rate": 3.7185714285714287e-06, |
|
"loss": 0.0827, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.3094934014474244, |
|
"grad_norm": 2.5857245922088623, |
|
"learning_rate": 3.682857142857143e-06, |
|
"loss": 0.0957, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 2.3201362281822053, |
|
"grad_norm": 2.7961575984954834, |
|
"learning_rate": 3.6471428571428573e-06, |
|
"loss": 0.0968, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.330779054916986, |
|
"grad_norm": 2.5830881595611572, |
|
"learning_rate": 3.611428571428572e-06, |
|
"loss": 0.086, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 2.3414218816517667, |
|
"grad_norm": 3.009079694747925, |
|
"learning_rate": 3.5757142857142863e-06, |
|
"loss": 0.0821, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.3520647083865476, |
|
"grad_norm": 3.2206666469573975, |
|
"learning_rate": 3.54e-06, |
|
"loss": 0.1, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 2.362707535121328, |
|
"grad_norm": 2.6536972522735596, |
|
"learning_rate": 3.5042857142857145e-06, |
|
"loss": 0.0911, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.373350361856109, |
|
"grad_norm": 2.0286781787872314, |
|
"learning_rate": 3.468571428571429e-06, |
|
"loss": 0.083, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 2.38399318859089, |
|
"grad_norm": 3.5354936122894287, |
|
"learning_rate": 3.4328571428571435e-06, |
|
"loss": 0.0994, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.3946360153256707, |
|
"grad_norm": 2.823812246322632, |
|
"learning_rate": 3.397142857142857e-06, |
|
"loss": 0.0921, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.405278842060451, |
|
"grad_norm": 3.5603067874908447, |
|
"learning_rate": 3.3614285714285717e-06, |
|
"loss": 0.1015, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.415921668795232, |
|
"grad_norm": 2.4219422340393066, |
|
"learning_rate": 3.325714285714286e-06, |
|
"loss": 0.098, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 2.4265644955300125, |
|
"grad_norm": 3.9650704860687256, |
|
"learning_rate": 3.2900000000000003e-06, |
|
"loss": 0.0914, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.4372073222647934, |
|
"grad_norm": 2.7661550045013428, |
|
"learning_rate": 3.2542857142857148e-06, |
|
"loss": 0.0733, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 2.4478501489995743, |
|
"grad_norm": 2.8396358489990234, |
|
"learning_rate": 3.218571428571429e-06, |
|
"loss": 0.0954, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.458492975734355, |
|
"grad_norm": 2.8353986740112305, |
|
"learning_rate": 3.182857142857143e-06, |
|
"loss": 0.0848, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 2.4691358024691357, |
|
"grad_norm": 2.9679837226867676, |
|
"learning_rate": 3.1471428571428574e-06, |
|
"loss": 0.084, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.4797786292039166, |
|
"grad_norm": 2.0554795265197754, |
|
"learning_rate": 3.111428571428572e-06, |
|
"loss": 0.0894, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 2.4904214559386975, |
|
"grad_norm": 2.5439860820770264, |
|
"learning_rate": 3.0757142857142856e-06, |
|
"loss": 0.0836, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.501064282673478, |
|
"grad_norm": 2.93955135345459, |
|
"learning_rate": 3.04e-06, |
|
"loss": 0.0915, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.511707109408259, |
|
"grad_norm": 2.3502097129821777, |
|
"learning_rate": 3.0042857142857146e-06, |
|
"loss": 0.0963, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.5223499361430397, |
|
"grad_norm": 2.289599895477295, |
|
"learning_rate": 2.968571428571429e-06, |
|
"loss": 0.0892, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 2.5329927628778206, |
|
"grad_norm": 4.718634128570557, |
|
"learning_rate": 2.932857142857143e-06, |
|
"loss": 0.089, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.543635589612601, |
|
"grad_norm": 2.9124553203582764, |
|
"learning_rate": 2.8971428571428573e-06, |
|
"loss": 0.0943, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 2.554278416347382, |
|
"grad_norm": 3.2406508922576904, |
|
"learning_rate": 2.861428571428572e-06, |
|
"loss": 0.0891, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.554278416347382, |
|
"eval_loss": 0.1251918077468872, |
|
"eval_runtime": 2435.0048, |
|
"eval_samples_per_second": 1.962, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 16.934856191286404, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.5649212430821624, |
|
"grad_norm": 2.5758533477783203, |
|
"learning_rate": 2.825714285714286e-06, |
|
"loss": 0.0909, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 2.5755640698169433, |
|
"grad_norm": 2.308535575866699, |
|
"learning_rate": 2.7900000000000004e-06, |
|
"loss": 0.0903, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.586206896551724, |
|
"grad_norm": 3.0140132904052734, |
|
"learning_rate": 2.7542857142857145e-06, |
|
"loss": 0.1005, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 2.596849723286505, |
|
"grad_norm": 3.0237767696380615, |
|
"learning_rate": 2.7185714285714286e-06, |
|
"loss": 0.1032, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.6074925500212855, |
|
"grad_norm": 2.413677930831909, |
|
"learning_rate": 2.682857142857143e-06, |
|
"loss": 0.0753, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 2.6181353767560664, |
|
"grad_norm": 2.406214475631714, |
|
"learning_rate": 2.6471428571428576e-06, |
|
"loss": 0.0744, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.628778203490847, |
|
"grad_norm": 2.9371650218963623, |
|
"learning_rate": 2.6114285714285712e-06, |
|
"loss": 0.0795, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 2.639421030225628, |
|
"grad_norm": 3.0647592544555664, |
|
"learning_rate": 2.5757142857142857e-06, |
|
"loss": 0.0885, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.6500638569604087, |
|
"grad_norm": 2.245195150375366, |
|
"learning_rate": 2.5400000000000002e-06, |
|
"loss": 0.0951, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 2.6607066836951896, |
|
"grad_norm": 3.212939977645874, |
|
"learning_rate": 2.5042857142857148e-06, |
|
"loss": 0.1081, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.67134951042997, |
|
"grad_norm": 2.987602949142456, |
|
"learning_rate": 2.468571428571429e-06, |
|
"loss": 0.0694, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 2.681992337164751, |
|
"grad_norm": 2.6746339797973633, |
|
"learning_rate": 2.4328571428571433e-06, |
|
"loss": 0.0879, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.692635163899532, |
|
"grad_norm": 2.3074121475219727, |
|
"learning_rate": 2.3971428571428574e-06, |
|
"loss": 0.0771, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 2.7032779906343123, |
|
"grad_norm": 2.62947940826416, |
|
"learning_rate": 2.361428571428572e-06, |
|
"loss": 0.0882, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.713920817369093, |
|
"grad_norm": 2.5452988147735596, |
|
"learning_rate": 2.325714285714286e-06, |
|
"loss": 0.081, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 2.724563644103874, |
|
"grad_norm": 1.9240838289260864, |
|
"learning_rate": 2.29e-06, |
|
"loss": 0.0672, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.735206470838655, |
|
"grad_norm": 2.3632349967956543, |
|
"learning_rate": 2.2542857142857146e-06, |
|
"loss": 0.0716, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 2.7458492975734354, |
|
"grad_norm": 1.9626713991165161, |
|
"learning_rate": 2.2185714285714287e-06, |
|
"loss": 0.0857, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.7564921243082163, |
|
"grad_norm": 1.8497956991195679, |
|
"learning_rate": 2.1828571428571428e-06, |
|
"loss": 0.0774, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 2.767134951042997, |
|
"grad_norm": 1.9737045764923096, |
|
"learning_rate": 2.1471428571428573e-06, |
|
"loss": 0.0884, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 3.017702102661133, |
|
"learning_rate": 2.1114285714285714e-06, |
|
"loss": 0.0894, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 2.7884206045125586, |
|
"grad_norm": 2.41921067237854, |
|
"learning_rate": 2.075714285714286e-06, |
|
"loss": 0.0855, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.7990634312473395, |
|
"grad_norm": 2.0304954051971436, |
|
"learning_rate": 2.04e-06, |
|
"loss": 0.0802, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 2.80970625798212, |
|
"grad_norm": 2.724147319793701, |
|
"learning_rate": 2.0042857142857145e-06, |
|
"loss": 0.0892, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.820349084716901, |
|
"grad_norm": 1.7320371866226196, |
|
"learning_rate": 1.968571428571429e-06, |
|
"loss": 0.1036, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 2.8309919114516817, |
|
"grad_norm": 2.932657241821289, |
|
"learning_rate": 1.932857142857143e-06, |
|
"loss": 0.0902, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.841634738186462, |
|
"grad_norm": 2.653630256652832, |
|
"learning_rate": 1.8971428571428573e-06, |
|
"loss": 0.0807, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 2.852277564921243, |
|
"grad_norm": 2.851041078567505, |
|
"learning_rate": 1.8614285714285714e-06, |
|
"loss": 0.0908, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.862920391656024, |
|
"grad_norm": 3.30446720123291, |
|
"learning_rate": 1.825714285714286e-06, |
|
"loss": 0.1001, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 2.873563218390805, |
|
"grad_norm": 3.250701427459717, |
|
"learning_rate": 1.79e-06, |
|
"loss": 0.0825, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.8842060451255853, |
|
"grad_norm": 2.4845850467681885, |
|
"learning_rate": 1.7542857142857145e-06, |
|
"loss": 0.0705, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 2.894848871860366, |
|
"grad_norm": 2.6934683322906494, |
|
"learning_rate": 1.7185714285714286e-06, |
|
"loss": 0.0667, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.9054916985951467, |
|
"grad_norm": 2.785459518432617, |
|
"learning_rate": 1.6828571428571431e-06, |
|
"loss": 0.0746, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 2.9161345253299276, |
|
"grad_norm": 2.5107369422912598, |
|
"learning_rate": 1.6471428571428572e-06, |
|
"loss": 0.077, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.9267773520647085, |
|
"grad_norm": 3.4977328777313232, |
|
"learning_rate": 1.6114285714285715e-06, |
|
"loss": 0.0857, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 2.9374201787994894, |
|
"grad_norm": 2.6151537895202637, |
|
"learning_rate": 1.575714285714286e-06, |
|
"loss": 0.0729, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.94806300553427, |
|
"grad_norm": 2.975446939468384, |
|
"learning_rate": 1.54e-06, |
|
"loss": 0.0581, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 2.9587058322690507, |
|
"grad_norm": 2.03027606010437, |
|
"learning_rate": 1.5042857142857146e-06, |
|
"loss": 0.1017, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.969348659003831, |
|
"grad_norm": 3.547647476196289, |
|
"learning_rate": 1.4685714285714287e-06, |
|
"loss": 0.0603, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 2.979991485738612, |
|
"grad_norm": 1.8231449127197266, |
|
"learning_rate": 1.432857142857143e-06, |
|
"loss": 0.0738, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.979991485738612, |
|
"eval_loss": 0.11986401677131653, |
|
"eval_runtime": 2460.4728, |
|
"eval_samples_per_second": 1.941, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 15.561025938059986, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.990634312473393, |
|
"grad_norm": 2.170557737350464, |
|
"learning_rate": 1.3971428571428573e-06, |
|
"loss": 0.0847, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 3.001277139208174, |
|
"grad_norm": 1.225490689277649, |
|
"learning_rate": 1.3614285714285716e-06, |
|
"loss": 0.0737, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 3.0119199659429543, |
|
"grad_norm": 2.1241679191589355, |
|
"learning_rate": 1.3257142857142856e-06, |
|
"loss": 0.0595, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 3.022562792677735, |
|
"grad_norm": 2.3180058002471924, |
|
"learning_rate": 1.2900000000000001e-06, |
|
"loss": 0.0615, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.033205619412516, |
|
"grad_norm": 2.4434351921081543, |
|
"learning_rate": 1.2542857142857142e-06, |
|
"loss": 0.0536, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 3.0438484461472965, |
|
"grad_norm": 2.712207317352295, |
|
"learning_rate": 1.2185714285714287e-06, |
|
"loss": 0.0558, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 3.0544912728820774, |
|
"grad_norm": 2.7258520126342773, |
|
"learning_rate": 1.182857142857143e-06, |
|
"loss": 0.0727, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 3.0651340996168583, |
|
"grad_norm": 2.103072166442871, |
|
"learning_rate": 1.1471428571428573e-06, |
|
"loss": 0.0479, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.075776926351639, |
|
"grad_norm": 1.9003605842590332, |
|
"learning_rate": 1.1114285714285714e-06, |
|
"loss": 0.0554, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 3.0864197530864197, |
|
"grad_norm": 1.4967641830444336, |
|
"learning_rate": 1.0757142857142857e-06, |
|
"loss": 0.0409, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 3.0970625798212006, |
|
"grad_norm": 1.389493703842163, |
|
"learning_rate": 1.04e-06, |
|
"loss": 0.0474, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 3.107705406555981, |
|
"grad_norm": 1.4253233671188354, |
|
"learning_rate": 1.0042857142857143e-06, |
|
"loss": 0.0445, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.118348233290762, |
|
"grad_norm": 2.6737582683563232, |
|
"learning_rate": 9.685714285714288e-07, |
|
"loss": 0.0584, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 3.128991060025543, |
|
"grad_norm": 2.5511069297790527, |
|
"learning_rate": 9.32857142857143e-07, |
|
"loss": 0.0557, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 3.1396338867603237, |
|
"grad_norm": 2.139846086502075, |
|
"learning_rate": 8.971428571428573e-07, |
|
"loss": 0.0467, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 3.150276713495104, |
|
"grad_norm": 1.826206088066101, |
|
"learning_rate": 8.614285714285716e-07, |
|
"loss": 0.054, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.160919540229885, |
|
"grad_norm": 2.8576643466949463, |
|
"learning_rate": 8.257142857142858e-07, |
|
"loss": 0.0517, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 3.171562366964666, |
|
"grad_norm": 2.1208717823028564, |
|
"learning_rate": 7.900000000000001e-07, |
|
"loss": 0.0667, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 3.1822051936994464, |
|
"grad_norm": 1.534239649772644, |
|
"learning_rate": 7.542857142857144e-07, |
|
"loss": 0.0592, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 3.1928480204342273, |
|
"grad_norm": 2.3605740070343018, |
|
"learning_rate": 7.185714285714286e-07, |
|
"loss": 0.063, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.2034908471690082, |
|
"grad_norm": 2.1266493797302246, |
|
"learning_rate": 6.842857142857143e-07, |
|
"loss": 0.0567, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 3.2141336739037887, |
|
"grad_norm": 1.5303648710250854, |
|
"learning_rate": 6.485714285714287e-07, |
|
"loss": 0.0619, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 3.2247765006385696, |
|
"grad_norm": 2.740006446838379, |
|
"learning_rate": 6.128571428571429e-07, |
|
"loss": 0.0773, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 3.2354193273733505, |
|
"grad_norm": 1.5786134004592896, |
|
"learning_rate": 5.771428571428572e-07, |
|
"loss": 0.0629, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.246062154108131, |
|
"grad_norm": 1.3754280805587769, |
|
"learning_rate": 5.414285714285715e-07, |
|
"loss": 0.0614, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 3.256704980842912, |
|
"grad_norm": 0.8814867734909058, |
|
"learning_rate": 5.057142857142858e-07, |
|
"loss": 0.0574, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 3.2673478075776927, |
|
"grad_norm": 2.909646511077881, |
|
"learning_rate": 4.7000000000000005e-07, |
|
"loss": 0.0408, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 3.2779906343124736, |
|
"grad_norm": 2.272367238998413, |
|
"learning_rate": 4.342857142857143e-07, |
|
"loss": 0.0539, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.288633461047254, |
|
"grad_norm": 2.039271831512451, |
|
"learning_rate": 3.985714285714286e-07, |
|
"loss": 0.0688, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 3.299276287782035, |
|
"grad_norm": 2.0516164302825928, |
|
"learning_rate": 3.6285714285714283e-07, |
|
"loss": 0.0546, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 3.3099191145168154, |
|
"grad_norm": 1.9131453037261963, |
|
"learning_rate": 3.271428571428572e-07, |
|
"loss": 0.0532, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 3.3205619412515963, |
|
"grad_norm": 1.66374933719635, |
|
"learning_rate": 2.914285714285715e-07, |
|
"loss": 0.0536, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.331204767986377, |
|
"grad_norm": 1.596907615661621, |
|
"learning_rate": 2.557142857142857e-07, |
|
"loss": 0.0456, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 3.341847594721158, |
|
"grad_norm": 2.430992603302002, |
|
"learning_rate": 2.2e-07, |
|
"loss": 0.0635, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 3.3524904214559386, |
|
"grad_norm": 2.4150683879852295, |
|
"learning_rate": 1.842857142857143e-07, |
|
"loss": 0.0638, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 3.3631332481907195, |
|
"grad_norm": 1.5517698526382446, |
|
"learning_rate": 1.4857142857142857e-07, |
|
"loss": 0.0615, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 3.3737760749255004, |
|
"grad_norm": 1.8786826133728027, |
|
"learning_rate": 1.1285714285714287e-07, |
|
"loss": 0.0607, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 3.384418901660281, |
|
"grad_norm": 2.849170446395874, |
|
"learning_rate": 7.714285714285715e-08, |
|
"loss": 0.0567, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 3.3950617283950617, |
|
"grad_norm": 2.8803513050079346, |
|
"learning_rate": 4.1428571428571426e-08, |
|
"loss": 0.0507, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 3.4057045551298426, |
|
"grad_norm": 2.7549145221710205, |
|
"learning_rate": 5.714285714285715e-09, |
|
"loss": 0.0544, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.4057045551298426, |
|
"eval_loss": 0.1155887097120285, |
|
"eval_runtime": 2444.7471, |
|
"eval_samples_per_second": 1.954, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 15.246076710047603, |
|
"step": 8000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 8000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.364112316878029e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|