|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.861111111111112e-05, |
|
"loss": 2.0531, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.13333334028720856, |
|
"eval_loss": 2.159055471420288, |
|
"eval_runtime": 309.1055, |
|
"eval_samples_per_second": 4.659, |
|
"eval_steps_per_second": 1.165, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.722222222222223e-05, |
|
"loss": 2.1794, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.21111111342906952, |
|
"eval_loss": 2.074418783187866, |
|
"eval_runtime": 308.7598, |
|
"eval_samples_per_second": 4.664, |
|
"eval_steps_per_second": 1.166, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.583333333333334e-05, |
|
"loss": 2.0726, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.20277777314186096, |
|
"eval_loss": 2.005201578140259, |
|
"eval_runtime": 309.0551, |
|
"eval_samples_per_second": 4.659, |
|
"eval_steps_per_second": 1.165, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.444444444444444e-05, |
|
"loss": 1.9514, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.22430555522441864, |
|
"eval_loss": 1.9686204195022583, |
|
"eval_runtime": 310.3523, |
|
"eval_samples_per_second": 4.64, |
|
"eval_steps_per_second": 1.16, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.305555555555556e-05, |
|
"loss": 1.8919, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.22708334028720856, |
|
"eval_loss": 2.052751064300537, |
|
"eval_runtime": 307.799, |
|
"eval_samples_per_second": 4.678, |
|
"eval_steps_per_second": 1.17, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.166666666666667e-05, |
|
"loss": 1.9575, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.17916665971279144, |
|
"eval_loss": 2.038191080093384, |
|
"eval_runtime": 307.6143, |
|
"eval_samples_per_second": 4.681, |
|
"eval_steps_per_second": 1.17, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.027777777777779e-05, |
|
"loss": 1.9836, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.2701388895511627, |
|
"eval_loss": 1.7942837476730347, |
|
"eval_runtime": 307.309, |
|
"eval_samples_per_second": 4.686, |
|
"eval_steps_per_second": 1.171, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 1.9651, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.2819444537162781, |
|
"eval_loss": 1.7969180345535278, |
|
"eval_runtime": 310.9128, |
|
"eval_samples_per_second": 4.632, |
|
"eval_steps_per_second": 1.158, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.75e-05, |
|
"loss": 1.9382, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.27986112236976624, |
|
"eval_loss": 1.8196858167648315, |
|
"eval_runtime": 307.5957, |
|
"eval_samples_per_second": 4.681, |
|
"eval_steps_per_second": 1.17, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.611111111111112e-05, |
|
"loss": 1.8197, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.18958333134651184, |
|
"eval_loss": 1.9207282066345215, |
|
"eval_runtime": 308.4662, |
|
"eval_samples_per_second": 4.668, |
|
"eval_steps_per_second": 1.167, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.472222222222222e-05, |
|
"loss": 1.7721, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.2881944477558136, |
|
"eval_loss": 1.7433712482452393, |
|
"eval_runtime": 309.686, |
|
"eval_samples_per_second": 4.65, |
|
"eval_steps_per_second": 1.162, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 1.8501, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.24861110746860504, |
|
"eval_loss": 1.7375705242156982, |
|
"eval_runtime": 305.6275, |
|
"eval_samples_per_second": 4.712, |
|
"eval_steps_per_second": 1.178, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.194444444444445e-05, |
|
"loss": 1.8136, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.3291666805744171, |
|
"eval_loss": 1.6771501302719116, |
|
"eval_runtime": 311.0209, |
|
"eval_samples_per_second": 4.63, |
|
"eval_steps_per_second": 1.157, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.055555555555556e-05, |
|
"loss": 1.672, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.29027777910232544, |
|
"eval_loss": 1.6961910724639893, |
|
"eval_runtime": 308.399, |
|
"eval_samples_per_second": 4.669, |
|
"eval_steps_per_second": 1.167, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.916666666666666e-05, |
|
"loss": 1.7787, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.3499999940395355, |
|
"eval_loss": 1.654746174812317, |
|
"eval_runtime": 306.8392, |
|
"eval_samples_per_second": 4.693, |
|
"eval_steps_per_second": 1.173, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.777777777777778e-05, |
|
"loss": 1.7144, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.36597222089767456, |
|
"eval_loss": 1.6828863620758057, |
|
"eval_runtime": 306.6648, |
|
"eval_samples_per_second": 4.696, |
|
"eval_steps_per_second": 1.174, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.638888888888889e-05, |
|
"loss": 1.558, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.3430555462837219, |
|
"eval_loss": 1.6398437023162842, |
|
"eval_runtime": 312.3754, |
|
"eval_samples_per_second": 4.61, |
|
"eval_steps_per_second": 1.152, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.5197, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.3444444537162781, |
|
"eval_loss": 1.6192543506622314, |
|
"eval_runtime": 308.0294, |
|
"eval_samples_per_second": 4.675, |
|
"eval_steps_per_second": 1.169, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.361111111111111e-05, |
|
"loss": 1.5088, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.37708333134651184, |
|
"eval_loss": 1.5085088014602661, |
|
"eval_runtime": 305.6283, |
|
"eval_samples_per_second": 4.712, |
|
"eval_steps_per_second": 1.178, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.222222222222222e-05, |
|
"loss": 1.7442, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.43263888359069824, |
|
"eval_loss": 1.4439728260040283, |
|
"eval_runtime": 305.4203, |
|
"eval_samples_per_second": 4.715, |
|
"eval_steps_per_second": 1.179, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.083333333333334e-05, |
|
"loss": 1.6021, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.5138888955116272, |
|
"eval_loss": 1.3366789817810059, |
|
"eval_runtime": 307.6606, |
|
"eval_samples_per_second": 4.68, |
|
"eval_steps_per_second": 1.17, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.944444444444444e-05, |
|
"loss": 1.607, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.4756944477558136, |
|
"eval_loss": 1.400604248046875, |
|
"eval_runtime": 305.6563, |
|
"eval_samples_per_second": 4.711, |
|
"eval_steps_per_second": 1.178, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.805555555555556e-05, |
|
"loss": 1.4012, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.5361111164093018, |
|
"eval_loss": 1.2899019718170166, |
|
"eval_runtime": 305.6072, |
|
"eval_samples_per_second": 4.712, |
|
"eval_steps_per_second": 1.178, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 1.4094, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.5868055820465088, |
|
"eval_loss": 1.1848715543746948, |
|
"eval_runtime": 310.5577, |
|
"eval_samples_per_second": 4.637, |
|
"eval_steps_per_second": 1.159, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.527777777777778e-05, |
|
"loss": 1.3252, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5326389074325562, |
|
"eval_loss": 1.2207249402999878, |
|
"eval_runtime": 305.3735, |
|
"eval_samples_per_second": 4.716, |
|
"eval_steps_per_second": 1.179, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.388888888888888e-05, |
|
"loss": 1.2939, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.5722222328186035, |
|
"eval_loss": 1.127318263053894, |
|
"eval_runtime": 306.1632, |
|
"eval_samples_per_second": 4.703, |
|
"eval_steps_per_second": 1.176, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.25e-05, |
|
"loss": 1.1591, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.6090278029441833, |
|
"eval_loss": 1.0614306926727295, |
|
"eval_runtime": 306.437, |
|
"eval_samples_per_second": 4.699, |
|
"eval_steps_per_second": 1.175, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.111111111111112e-05, |
|
"loss": 1.421, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.5416666865348816, |
|
"eval_loss": 1.2863346338272095, |
|
"eval_runtime": 306.6206, |
|
"eval_samples_per_second": 4.696, |
|
"eval_steps_per_second": 1.174, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.972222222222223e-05, |
|
"loss": 1.3071, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.5666666626930237, |
|
"eval_loss": 1.1455507278442383, |
|
"eval_runtime": 308.2962, |
|
"eval_samples_per_second": 4.671, |
|
"eval_steps_per_second": 1.168, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.833333333333334e-05, |
|
"loss": 1.1463, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.6270833611488342, |
|
"eval_loss": 1.0083465576171875, |
|
"eval_runtime": 306.6088, |
|
"eval_samples_per_second": 4.697, |
|
"eval_steps_per_second": 1.174, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.6944444444444445e-05, |
|
"loss": 1.149, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.6000000238418579, |
|
"eval_loss": 1.039494276046753, |
|
"eval_runtime": 307.8298, |
|
"eval_samples_per_second": 4.678, |
|
"eval_steps_per_second": 1.169, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 1.0853, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.6319444179534912, |
|
"eval_loss": 0.9475428462028503, |
|
"eval_runtime": 310.5495, |
|
"eval_samples_per_second": 4.637, |
|
"eval_steps_per_second": 1.159, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.4166666666666664e-05, |
|
"loss": 1.0467, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.6618055701255798, |
|
"eval_loss": 0.938763439655304, |
|
"eval_runtime": 305.3429, |
|
"eval_samples_per_second": 4.716, |
|
"eval_steps_per_second": 1.179, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.2777777777777784e-05, |
|
"loss": 1.2562, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.5874999761581421, |
|
"eval_loss": 1.058403491973877, |
|
"eval_runtime": 309.582, |
|
"eval_samples_per_second": 4.651, |
|
"eval_steps_per_second": 1.163, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.138888888888889e-05, |
|
"loss": 1.0267, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.6819444298744202, |
|
"eval_loss": 0.9059543013572693, |
|
"eval_runtime": 308.1955, |
|
"eval_samples_per_second": 4.672, |
|
"eval_steps_per_second": 1.168, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7126, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.6243055462837219, |
|
"eval_loss": 0.9829866290092468, |
|
"eval_runtime": 309.2257, |
|
"eval_samples_per_second": 4.657, |
|
"eval_steps_per_second": 1.164, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 1.0441, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.675000011920929, |
|
"eval_loss": 0.909841001033783, |
|
"eval_runtime": 305.6628, |
|
"eval_samples_per_second": 4.711, |
|
"eval_steps_per_second": 1.178, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 1.0524, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.6458333134651184, |
|
"eval_loss": 0.954737663269043, |
|
"eval_runtime": 308.967, |
|
"eval_samples_per_second": 4.661, |
|
"eval_steps_per_second": 1.165, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 0.9355, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.6958333253860474, |
|
"eval_loss": 0.7970058917999268, |
|
"eval_runtime": 306.7636, |
|
"eval_samples_per_second": 4.694, |
|
"eval_steps_per_second": 1.174, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 1.0505, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.6777777671813965, |
|
"eval_loss": 0.8817840814590454, |
|
"eval_runtime": 306.413, |
|
"eval_samples_per_second": 4.7, |
|
"eval_steps_per_second": 1.175, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.305555555555556e-05, |
|
"loss": 0.7672, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.7041666507720947, |
|
"eval_loss": 0.8178666830062866, |
|
"eval_runtime": 306.6048, |
|
"eval_samples_per_second": 4.697, |
|
"eval_steps_per_second": 1.174, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.9586, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.761805534362793, |
|
"eval_loss": 0.7114083766937256, |
|
"eval_runtime": 307.286, |
|
"eval_samples_per_second": 4.686, |
|
"eval_steps_per_second": 1.172, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.027777777777778e-05, |
|
"loss": 1.0177, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.7590277791023254, |
|
"eval_loss": 0.7004179954528809, |
|
"eval_runtime": 305.701, |
|
"eval_samples_per_second": 4.71, |
|
"eval_steps_per_second": 1.178, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.7575, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.7888888716697693, |
|
"eval_loss": 0.6390997171401978, |
|
"eval_runtime": 305.5003, |
|
"eval_samples_per_second": 4.714, |
|
"eval_steps_per_second": 1.178, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.7834, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.7604166865348816, |
|
"eval_loss": 0.6542982459068298, |
|
"eval_runtime": 306.1149, |
|
"eval_samples_per_second": 4.704, |
|
"eval_steps_per_second": 1.176, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.644, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.793055534362793, |
|
"eval_loss": 0.6248801946640015, |
|
"eval_runtime": 306.8175, |
|
"eval_samples_per_second": 4.693, |
|
"eval_steps_per_second": 1.173, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.7928, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.7715277671813965, |
|
"eval_loss": 0.6328664422035217, |
|
"eval_runtime": 308.2672, |
|
"eval_samples_per_second": 4.671, |
|
"eval_steps_per_second": 1.168, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.7773, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.8125, |
|
"eval_loss": 0.5492955446243286, |
|
"eval_runtime": 305.9298, |
|
"eval_samples_per_second": 4.707, |
|
"eval_steps_per_second": 1.177, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.194444444444444e-05, |
|
"loss": 0.6163, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.7958333492279053, |
|
"eval_loss": 0.5877541303634644, |
|
"eval_runtime": 306.2121, |
|
"eval_samples_per_second": 4.703, |
|
"eval_steps_per_second": 1.176, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.6773, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.8222222328186035, |
|
"eval_loss": 0.5475648641586304, |
|
"eval_runtime": 306.3418, |
|
"eval_samples_per_second": 4.701, |
|
"eval_steps_per_second": 1.175, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.7042, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.8159722089767456, |
|
"eval_loss": 0.5389293432235718, |
|
"eval_runtime": 312.5784, |
|
"eval_samples_per_second": 4.607, |
|
"eval_steps_per_second": 1.152, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.6488, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.8180555701255798, |
|
"eval_loss": 0.5231082439422607, |
|
"eval_runtime": 307.67, |
|
"eval_samples_per_second": 4.68, |
|
"eval_steps_per_second": 1.17, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.6388888888888892e-05, |
|
"loss": 0.7165, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.800000011920929, |
|
"eval_loss": 0.567889928817749, |
|
"eval_runtime": 307.7956, |
|
"eval_samples_per_second": 4.678, |
|
"eval_steps_per_second": 1.17, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.716, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.8340277671813965, |
|
"eval_loss": 0.49685245752334595, |
|
"eval_runtime": 308.7124, |
|
"eval_samples_per_second": 4.665, |
|
"eval_steps_per_second": 1.166, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.361111111111111e-05, |
|
"loss": 0.8907, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.856249988079071, |
|
"eval_loss": 0.45114219188690186, |
|
"eval_runtime": 308.861, |
|
"eval_samples_per_second": 4.662, |
|
"eval_steps_per_second": 1.166, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.7081, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.8423610925674438, |
|
"eval_loss": 0.4767354726791382, |
|
"eval_runtime": 307.9876, |
|
"eval_samples_per_second": 4.676, |
|
"eval_steps_per_second": 1.169, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.6063, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.8458333611488342, |
|
"eval_loss": 0.4531818926334381, |
|
"eval_runtime": 307.4247, |
|
"eval_samples_per_second": 4.684, |
|
"eval_steps_per_second": 1.171, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.5105, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.831944465637207, |
|
"eval_loss": 0.45398369431495667, |
|
"eval_runtime": 306.6509, |
|
"eval_samples_per_second": 4.696, |
|
"eval_steps_per_second": 1.174, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8055555555555555e-05, |
|
"loss": 0.6397, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.8180555701255798, |
|
"eval_loss": 0.5045942664146423, |
|
"eval_runtime": 306.9645, |
|
"eval_samples_per_second": 4.691, |
|
"eval_steps_per_second": 1.173, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.5338, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.8534722328186035, |
|
"eval_loss": 0.42865610122680664, |
|
"eval_runtime": 308.1509, |
|
"eval_samples_per_second": 4.673, |
|
"eval_steps_per_second": 1.168, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.527777777777778e-05, |
|
"loss": 0.559, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.8569444417953491, |
|
"eval_loss": 0.4333796501159668, |
|
"eval_runtime": 308.3459, |
|
"eval_samples_per_second": 4.67, |
|
"eval_steps_per_second": 1.168, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.6952, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.863194465637207, |
|
"eval_loss": 0.4117491543292999, |
|
"eval_runtime": 306.5802, |
|
"eval_samples_per_second": 4.697, |
|
"eval_steps_per_second": 1.174, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.5286, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.8645833134651184, |
|
"eval_loss": 0.42295506596565247, |
|
"eval_runtime": 309.6096, |
|
"eval_samples_per_second": 4.651, |
|
"eval_steps_per_second": 1.163, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.6243, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.8458333611488342, |
|
"eval_loss": 0.4611164927482605, |
|
"eval_runtime": 308.2976, |
|
"eval_samples_per_second": 4.671, |
|
"eval_steps_per_second": 1.168, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.6353, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.8368055820465088, |
|
"eval_loss": 0.4841282367706299, |
|
"eval_runtime": 308.4373, |
|
"eval_samples_per_second": 4.669, |
|
"eval_steps_per_second": 1.167, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.4522, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.8548611402511597, |
|
"eval_loss": 0.4255611300468445, |
|
"eval_runtime": 310.2944, |
|
"eval_samples_per_second": 4.641, |
|
"eval_steps_per_second": 1.16, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.4701, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.8736110925674438, |
|
"eval_loss": 0.36990857124328613, |
|
"eval_runtime": 314.0357, |
|
"eval_samples_per_second": 4.585, |
|
"eval_steps_per_second": 1.146, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.482, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.8659722208976746, |
|
"eval_loss": 0.38237661123275757, |
|
"eval_runtime": 310.2911, |
|
"eval_samples_per_second": 4.641, |
|
"eval_steps_per_second": 1.16, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.305555555555556e-06, |
|
"loss": 0.5023, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.862500011920929, |
|
"eval_loss": 0.40548327565193176, |
|
"eval_runtime": 308.5687, |
|
"eval_samples_per_second": 4.667, |
|
"eval_steps_per_second": 1.167, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.916666666666667e-06, |
|
"loss": 0.3688, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.8673611283302307, |
|
"eval_loss": 0.391024112701416, |
|
"eval_runtime": 308.4404, |
|
"eval_samples_per_second": 4.669, |
|
"eval_steps_per_second": 1.167, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5277777777777778e-06, |
|
"loss": 0.4829, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.8708333373069763, |
|
"eval_loss": 0.37891247868537903, |
|
"eval_runtime": 309.3769, |
|
"eval_samples_per_second": 4.655, |
|
"eval_steps_per_second": 1.164, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.388888888888889e-07, |
|
"loss": 0.4577, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8743055462837219, |
|
"eval_loss": 0.3733634948730469, |
|
"eval_runtime": 307.6988, |
|
"eval_samples_per_second": 4.68, |
|
"eval_steps_per_second": 1.17, |
|
"step": 720 |
|
} |
|
], |
|
"max_steps": 720, |
|
"num_train_epochs": 1, |
|
"total_flos": 2.1336795856108278e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|