|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.1060723371308483, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.011060723371308484, |
|
"grad_norm": 275.2231140136719, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 101.1244, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.022121446742616967, |
|
"grad_norm": 1128.2623291015625, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 123.0051, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03318217011392545, |
|
"grad_norm": 77.56747436523438, |
|
"learning_rate": 3e-06, |
|
"loss": 62.0741, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.044242893485233935, |
|
"grad_norm": 181.99227905273438, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 132.6745, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.05530361685654242, |
|
"grad_norm": 2387.94189453125, |
|
"learning_rate": 5e-06, |
|
"loss": 56.9033, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0663643402278509, |
|
"grad_norm": 223.02427673339844, |
|
"learning_rate": 6e-06, |
|
"loss": 88.674, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07742506359915939, |
|
"grad_norm": 1268.653076171875, |
|
"learning_rate": 7e-06, |
|
"loss": 29.1095, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.08848578697046787, |
|
"grad_norm": 813.103759765625, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 37.2381, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.09954651034177635, |
|
"grad_norm": 310.0917053222656, |
|
"learning_rate": 9e-06, |
|
"loss": 46.942, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.11060723371308484, |
|
"grad_norm": 384.6047058105469, |
|
"learning_rate": 1e-05, |
|
"loss": 54.5569, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12166795708439332, |
|
"grad_norm": 653.60693359375, |
|
"learning_rate": 9.88888888888889e-06, |
|
"loss": 27.13, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.1327286804557018, |
|
"grad_norm": 169.83457946777344, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 30.8385, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.14378940382701028, |
|
"grad_norm": 365.39520263671875, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 41.3138, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.15485012719831878, |
|
"grad_norm": 589.5994873046875, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 30.9588, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.16591085056962726, |
|
"grad_norm": 278.96026611328125, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 30.8774, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17697157394093574, |
|
"grad_norm": 308.6740417480469, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 34.5223, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.18803229731224422, |
|
"grad_norm": 290.6882629394531, |
|
"learning_rate": 9.222222222222224e-06, |
|
"loss": 73.9331, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.1990930206835527, |
|
"grad_norm": 162.80252075195312, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 75.7575, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.21015374405486117, |
|
"grad_norm": 614.7833251953125, |
|
"learning_rate": 9e-06, |
|
"loss": 46.2102, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.22121446742616968, |
|
"grad_norm": 90.11128997802734, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 32.4061, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23227519079747816, |
|
"grad_norm": 466.54559326171875, |
|
"learning_rate": 8.777777777777778e-06, |
|
"loss": 40.7987, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.24333591416878664, |
|
"grad_norm": 103.32861328125, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 123.4267, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.2543966375400951, |
|
"grad_norm": 87.52120971679688, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 47.7221, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.2654573609114036, |
|
"grad_norm": 222.90623474121094, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 50.3554, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.2765180842827121, |
|
"grad_norm": 223.30189514160156, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 83.7681, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.28757880765402055, |
|
"grad_norm": 428.62408447265625, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 55.3063, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.29863953102532903, |
|
"grad_norm": 531.6271362304688, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 37.9162, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.30970025439663756, |
|
"grad_norm": 311.58929443359375, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 59.5817, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.32076097776794604, |
|
"grad_norm": 375.3035583496094, |
|
"learning_rate": 7.88888888888889e-06, |
|
"loss": 52.9975, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.3318217011392545, |
|
"grad_norm": 516.3969116210938, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 42.9525, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.342882424510563, |
|
"grad_norm": 379.7697448730469, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 81.1183, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.3539431478818715, |
|
"grad_norm": 312.02691650390625, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 70.5851, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.36500387125317996, |
|
"grad_norm": 13544.4599609375, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 89.4958, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.37606459462448844, |
|
"grad_norm": 272.00518798828125, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 84.8861, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.3871253179957969, |
|
"grad_norm": 2974.26416015625, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 38.6106, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.3981860413671054, |
|
"grad_norm": 1803.48095703125, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 40.9991, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.40924676473841387, |
|
"grad_norm": 4818.87890625, |
|
"learning_rate": 7e-06, |
|
"loss": 32.4676, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.42030748810972235, |
|
"grad_norm": 104.36139678955078, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 31.6814, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.4313682114810309, |
|
"grad_norm": 612.83349609375, |
|
"learning_rate": 6.777777777777779e-06, |
|
"loss": 34.3181, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.44242893485233936, |
|
"grad_norm": 288.1559143066406, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 41.178, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.45348965822364784, |
|
"grad_norm": 912.2635498046875, |
|
"learning_rate": 6.555555555555556e-06, |
|
"loss": 45.9302, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.4645503815949563, |
|
"grad_norm": 488.1215515136719, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 51.5342, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.4756111049662648, |
|
"grad_norm": 132.82635498046875, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 27.757, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.4866718283375733, |
|
"grad_norm": 564.8312377929688, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 31.4658, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.49773255170888175, |
|
"grad_norm": 306.7361755371094, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 43.3153, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5087932750801902, |
|
"grad_norm": 127.41687774658203, |
|
"learning_rate": 6e-06, |
|
"loss": 52.1603, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.5198539984514987, |
|
"grad_norm": 5578.05712890625, |
|
"learning_rate": 5.88888888888889e-06, |
|
"loss": 80.404, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.5309147218228072, |
|
"grad_norm": 163.3619384765625, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 51.359, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.5419754451941157, |
|
"grad_norm": 309.4814453125, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 32.5008, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.5530361685654241, |
|
"grad_norm": 1415.1243896484375, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 34.8322, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5640968919367326, |
|
"grad_norm": 229.50904846191406, |
|
"learning_rate": 5.444444444444445e-06, |
|
"loss": 50.1107, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.5751576153080411, |
|
"grad_norm": 123.28858184814453, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 39.4679, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.5862183386793496, |
|
"grad_norm": 48702.5703125, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 65.5247, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.5972790620506581, |
|
"grad_norm": 1075.4512939453125, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 41.7882, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.6083397854219667, |
|
"grad_norm": 150.42401123046875, |
|
"learning_rate": 5e-06, |
|
"loss": 67.5473, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.6194005087932751, |
|
"grad_norm": 466.3522644042969, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 51.2565, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.6304612321645836, |
|
"grad_norm": 11005.3369140625, |
|
"learning_rate": 4.777777777777778e-06, |
|
"loss": 53.788, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.6415219555358921, |
|
"grad_norm": 1204.18798828125, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 91.1635, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.6525826789072006, |
|
"grad_norm": 224.9308624267578, |
|
"learning_rate": 4.555555555555556e-06, |
|
"loss": 31.2719, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.663643402278509, |
|
"grad_norm": 232.47068786621094, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 28.8651, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6747041256498175, |
|
"grad_norm": 159.796630859375, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 37.8374, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.685764849021126, |
|
"grad_norm": 763.4359130859375, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 46.3411, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.6968255723924345, |
|
"grad_norm": 339.2966003417969, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 48.4202, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.707886295763743, |
|
"grad_norm": 748.3837280273438, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 50.4229, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.7189470191350514, |
|
"grad_norm": 231.0708770751953, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 51.902, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.7300077425063599, |
|
"grad_norm": 53.15177536010742, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 53.8197, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.7410684658776684, |
|
"grad_norm": 444.1663818359375, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 27.7717, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.7521291892489769, |
|
"grad_norm": 1551.6669921875, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 45.1505, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.7631899126202853, |
|
"grad_norm": 339.9322509765625, |
|
"learning_rate": 3.444444444444445e-06, |
|
"loss": 40.7306, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.7742506359915938, |
|
"grad_norm": 152.95518493652344, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 41.1691, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7853113593629023, |
|
"grad_norm": 565.0484619140625, |
|
"learning_rate": 3.2222222222222227e-06, |
|
"loss": 40.6706, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.7963720827342108, |
|
"grad_norm": 254.9434814453125, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 37.7834, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.8074328061055193, |
|
"grad_norm": 426.6705627441406, |
|
"learning_rate": 3e-06, |
|
"loss": 51.4484, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.8184935294768277, |
|
"grad_norm": 319.9366149902344, |
|
"learning_rate": 2.888888888888889e-06, |
|
"loss": 26.9082, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.8295542528481362, |
|
"grad_norm": 518.8477172851562, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 31.8693, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.8406149762194447, |
|
"grad_norm": 428.4947814941406, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 53.7889, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.8516756995907533, |
|
"grad_norm": 216.45521545410156, |
|
"learning_rate": 2.5555555555555557e-06, |
|
"loss": 35.5407, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.8627364229620618, |
|
"grad_norm": 180.7081298828125, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 28.9896, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.8737971463333702, |
|
"grad_norm": 341.2665100097656, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 48.7084, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.8848578697046787, |
|
"grad_norm": 24.655475616455078, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 47.937, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8959185930759872, |
|
"grad_norm": 244.29147338867188, |
|
"learning_rate": 2.1111111111111114e-06, |
|
"loss": 27.4093, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.9069793164472957, |
|
"grad_norm": 308.5469665527344, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 62.8975, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.9180400398186042, |
|
"grad_norm": 137.78431701660156, |
|
"learning_rate": 1.888888888888889e-06, |
|
"loss": 71.2175, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.9291007631899126, |
|
"grad_norm": 346.1696472167969, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 74.5859, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.9401614865612211, |
|
"grad_norm": 605.5797729492188, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 31.0377, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.9512222099325296, |
|
"grad_norm": 82.16192626953125, |
|
"learning_rate": 1.5555555555555558e-06, |
|
"loss": 45.4671, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.9622829333038381, |
|
"grad_norm": 1627.314453125, |
|
"learning_rate": 1.4444444444444445e-06, |
|
"loss": 60.7454, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.9733436566751466, |
|
"grad_norm": 141.41053771972656, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 43.7195, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.984404380046455, |
|
"grad_norm": 60.2154541015625, |
|
"learning_rate": 1.2222222222222223e-06, |
|
"loss": 37.6804, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.9954651034177635, |
|
"grad_norm": 164.14305114746094, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 30.995, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 120.65478515625, |
|
"eval_runtime": 20.9653, |
|
"eval_samples_per_second": 95.873, |
|
"eval_steps_per_second": 47.936, |
|
"step": 9041 |
|
}, |
|
{ |
|
"epoch": 1.006525826789072, |
|
"grad_norm": 866.4227294921875, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 62.0281, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.0175865501603805, |
|
"grad_norm": 186.7490234375, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 61.999, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.028647273531689, |
|
"grad_norm": 120.41979217529297, |
|
"learning_rate": 7.777777777777779e-07, |
|
"loss": 39.2587, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.0397079969029974, |
|
"grad_norm": 188.38967895507812, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 34.3708, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.050768720274306, |
|
"grad_norm": 429.96307373046875, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 27.8105, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.0618294436456144, |
|
"grad_norm": 321.49444580078125, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 27.4738, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.0728901670169229, |
|
"grad_norm": 232.31663513183594, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 60.7941, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.0839508903882313, |
|
"grad_norm": 315.6164855957031, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 27.5082, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.0950116137595398, |
|
"grad_norm": 841.2481689453125, |
|
"learning_rate": 1.1111111111111112e-07, |
|
"loss": 37.086, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.1060723371308483, |
|
"grad_norm": 104.0994873046875, |
|
"learning_rate": 0.0, |
|
"loss": 33.8564, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|