{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 9.861111111111112e-05, "loss": 2.0531, "step": 10 }, { "epoch": 0.01, "eval_accuracy": 0.13333334028720856, "eval_loss": 2.159055471420288, "eval_runtime": 309.1055, "eval_samples_per_second": 4.659, "eval_steps_per_second": 1.165, "step": 10 }, { "epoch": 0.03, "learning_rate": 9.722222222222223e-05, "loss": 2.1794, "step": 20 }, { "epoch": 0.03, "eval_accuracy": 0.21111111342906952, "eval_loss": 2.074418783187866, "eval_runtime": 308.7598, "eval_samples_per_second": 4.664, "eval_steps_per_second": 1.166, "step": 20 }, { "epoch": 0.04, "learning_rate": 9.583333333333334e-05, "loss": 2.0726, "step": 30 }, { "epoch": 0.04, "eval_accuracy": 0.20277777314186096, "eval_loss": 2.005201578140259, "eval_runtime": 309.0551, "eval_samples_per_second": 4.659, "eval_steps_per_second": 1.165, "step": 30 }, { "epoch": 0.06, "learning_rate": 9.444444444444444e-05, "loss": 1.9514, "step": 40 }, { "epoch": 0.06, "eval_accuracy": 0.22430555522441864, "eval_loss": 1.9686204195022583, "eval_runtime": 310.3523, "eval_samples_per_second": 4.64, "eval_steps_per_second": 1.16, "step": 40 }, { "epoch": 0.07, "learning_rate": 9.305555555555556e-05, "loss": 1.8919, "step": 50 }, { "epoch": 0.07, "eval_accuracy": 0.22708334028720856, "eval_loss": 2.052751064300537, "eval_runtime": 307.799, "eval_samples_per_second": 4.678, "eval_steps_per_second": 1.17, "step": 50 }, { "epoch": 0.08, "learning_rate": 9.166666666666667e-05, "loss": 1.9575, "step": 60 }, { "epoch": 0.08, "eval_accuracy": 0.17916665971279144, "eval_loss": 2.038191080093384, "eval_runtime": 307.6143, "eval_samples_per_second": 4.681, "eval_steps_per_second": 1.17, "step": 60 }, { "epoch": 0.1, "learning_rate": 9.027777777777779e-05, "loss": 1.9836, "step": 70 }, { "epoch": 0.1, "eval_accuracy": 0.2701388895511627, "eval_loss": 1.7942837476730347, "eval_runtime": 307.309, "eval_samples_per_second": 4.686, "eval_steps_per_second": 1.171, "step": 70 }, { "epoch": 0.11, "learning_rate": 8.888888888888889e-05, "loss": 1.9651, "step": 80 }, { "epoch": 0.11, "eval_accuracy": 0.2819444537162781, "eval_loss": 1.7969180345535278, "eval_runtime": 310.9128, "eval_samples_per_second": 4.632, "eval_steps_per_second": 1.158, "step": 80 }, { "epoch": 0.12, "learning_rate": 8.75e-05, "loss": 1.9382, "step": 90 }, { "epoch": 0.12, "eval_accuracy": 0.27986112236976624, "eval_loss": 1.8196858167648315, "eval_runtime": 307.5957, "eval_samples_per_second": 4.681, "eval_steps_per_second": 1.17, "step": 90 }, { "epoch": 0.14, "learning_rate": 8.611111111111112e-05, "loss": 1.8197, "step": 100 }, { "epoch": 0.14, "eval_accuracy": 0.18958333134651184, "eval_loss": 1.9207282066345215, "eval_runtime": 308.4662, "eval_samples_per_second": 4.668, "eval_steps_per_second": 1.167, "step": 100 }, { "epoch": 0.15, "learning_rate": 8.472222222222222e-05, "loss": 1.7721, "step": 110 }, { "epoch": 0.15, "eval_accuracy": 0.2881944477558136, "eval_loss": 1.7433712482452393, "eval_runtime": 309.686, "eval_samples_per_second": 4.65, "eval_steps_per_second": 1.162, "step": 110 }, { "epoch": 0.17, "learning_rate": 8.333333333333334e-05, "loss": 1.8501, "step": 120 }, { "epoch": 0.17, "eval_accuracy": 0.24861110746860504, "eval_loss": 1.7375705242156982, "eval_runtime": 305.6275, "eval_samples_per_second": 4.712, "eval_steps_per_second": 1.178, "step": 120 }, { "epoch": 0.18, "learning_rate": 8.194444444444445e-05, "loss": 1.8136, "step": 130 }, { "epoch": 0.18, "eval_accuracy": 0.3291666805744171, "eval_loss": 1.6771501302719116, "eval_runtime": 311.0209, "eval_samples_per_second": 4.63, "eval_steps_per_second": 1.157, "step": 130 }, { "epoch": 0.19, "learning_rate": 8.055555555555556e-05, "loss": 1.672, "step": 140 }, { "epoch": 0.19, "eval_accuracy": 0.29027777910232544, "eval_loss": 1.6961910724639893, "eval_runtime": 308.399, "eval_samples_per_second": 4.669, "eval_steps_per_second": 1.167, "step": 140 }, { "epoch": 0.21, "learning_rate": 7.916666666666666e-05, "loss": 1.7787, "step": 150 }, { "epoch": 0.21, "eval_accuracy": 0.3499999940395355, "eval_loss": 1.654746174812317, "eval_runtime": 306.8392, "eval_samples_per_second": 4.693, "eval_steps_per_second": 1.173, "step": 150 }, { "epoch": 0.22, "learning_rate": 7.777777777777778e-05, "loss": 1.7144, "step": 160 }, { "epoch": 0.22, "eval_accuracy": 0.36597222089767456, "eval_loss": 1.6828863620758057, "eval_runtime": 306.6648, "eval_samples_per_second": 4.696, "eval_steps_per_second": 1.174, "step": 160 }, { "epoch": 0.24, "learning_rate": 7.638888888888889e-05, "loss": 1.558, "step": 170 }, { "epoch": 0.24, "eval_accuracy": 0.3430555462837219, "eval_loss": 1.6398437023162842, "eval_runtime": 312.3754, "eval_samples_per_second": 4.61, "eval_steps_per_second": 1.152, "step": 170 }, { "epoch": 0.25, "learning_rate": 7.500000000000001e-05, "loss": 1.5197, "step": 180 }, { "epoch": 0.25, "eval_accuracy": 0.3444444537162781, "eval_loss": 1.6192543506622314, "eval_runtime": 308.0294, "eval_samples_per_second": 4.675, "eval_steps_per_second": 1.169, "step": 180 }, { "epoch": 0.26, "learning_rate": 7.361111111111111e-05, "loss": 1.5088, "step": 190 }, { "epoch": 0.26, "eval_accuracy": 0.37708333134651184, "eval_loss": 1.5085088014602661, "eval_runtime": 305.6283, "eval_samples_per_second": 4.712, "eval_steps_per_second": 1.178, "step": 190 }, { "epoch": 0.28, "learning_rate": 7.222222222222222e-05, "loss": 1.7442, "step": 200 }, { "epoch": 0.28, "eval_accuracy": 0.43263888359069824, "eval_loss": 1.4439728260040283, "eval_runtime": 305.4203, "eval_samples_per_second": 4.715, "eval_steps_per_second": 1.179, "step": 200 }, { "epoch": 0.29, "learning_rate": 7.083333333333334e-05, "loss": 1.6021, "step": 210 }, { "epoch": 0.29, "eval_accuracy": 0.5138888955116272, "eval_loss": 1.3366789817810059, "eval_runtime": 307.6606, "eval_samples_per_second": 4.68, "eval_steps_per_second": 1.17, "step": 210 }, { "epoch": 0.31, "learning_rate": 6.944444444444444e-05, "loss": 1.607, "step": 220 }, { "epoch": 0.31, "eval_accuracy": 0.4756944477558136, "eval_loss": 1.400604248046875, "eval_runtime": 305.6563, "eval_samples_per_second": 4.711, "eval_steps_per_second": 1.178, "step": 220 }, { "epoch": 0.32, "learning_rate": 6.805555555555556e-05, "loss": 1.4012, "step": 230 }, { "epoch": 0.32, "eval_accuracy": 0.5361111164093018, "eval_loss": 1.2899019718170166, "eval_runtime": 305.6072, "eval_samples_per_second": 4.712, "eval_steps_per_second": 1.178, "step": 230 }, { "epoch": 0.33, "learning_rate": 6.666666666666667e-05, "loss": 1.4094, "step": 240 }, { "epoch": 0.33, "eval_accuracy": 0.5868055820465088, "eval_loss": 1.1848715543746948, "eval_runtime": 310.5577, "eval_samples_per_second": 4.637, "eval_steps_per_second": 1.159, "step": 240 }, { "epoch": 0.35, "learning_rate": 6.527777777777778e-05, "loss": 1.3252, "step": 250 }, { "epoch": 0.35, "eval_accuracy": 0.5326389074325562, "eval_loss": 1.2207249402999878, "eval_runtime": 305.3735, "eval_samples_per_second": 4.716, "eval_steps_per_second": 1.179, "step": 250 }, { "epoch": 0.36, "learning_rate": 6.388888888888888e-05, "loss": 1.2939, "step": 260 }, { "epoch": 0.36, "eval_accuracy": 0.5722222328186035, "eval_loss": 1.127318263053894, "eval_runtime": 306.1632, "eval_samples_per_second": 4.703, "eval_steps_per_second": 1.176, "step": 260 }, { "epoch": 0.38, "learning_rate": 6.25e-05, "loss": 1.1591, "step": 270 }, { "epoch": 0.38, "eval_accuracy": 0.6090278029441833, "eval_loss": 1.0614306926727295, "eval_runtime": 306.437, "eval_samples_per_second": 4.699, "eval_steps_per_second": 1.175, "step": 270 }, { "epoch": 0.39, "learning_rate": 6.111111111111112e-05, "loss": 1.421, "step": 280 }, { "epoch": 0.39, "eval_accuracy": 0.5416666865348816, "eval_loss": 1.2863346338272095, "eval_runtime": 306.6206, "eval_samples_per_second": 4.696, "eval_steps_per_second": 1.174, "step": 280 }, { "epoch": 0.4, "learning_rate": 5.972222222222223e-05, "loss": 1.3071, "step": 290 }, { "epoch": 0.4, "eval_accuracy": 0.5666666626930237, "eval_loss": 1.1455507278442383, "eval_runtime": 308.2962, "eval_samples_per_second": 4.671, "eval_steps_per_second": 1.168, "step": 290 }, { "epoch": 0.42, "learning_rate": 5.833333333333334e-05, "loss": 1.1463, "step": 300 }, { "epoch": 0.42, "eval_accuracy": 0.6270833611488342, "eval_loss": 1.0083465576171875, "eval_runtime": 306.6088, "eval_samples_per_second": 4.697, "eval_steps_per_second": 1.174, "step": 300 }, { "epoch": 0.43, "learning_rate": 5.6944444444444445e-05, "loss": 1.149, "step": 310 }, { "epoch": 0.43, "eval_accuracy": 0.6000000238418579, "eval_loss": 1.039494276046753, "eval_runtime": 307.8298, "eval_samples_per_second": 4.678, "eval_steps_per_second": 1.169, "step": 310 }, { "epoch": 0.44, "learning_rate": 5.555555555555556e-05, "loss": 1.0853, "step": 320 }, { "epoch": 0.44, "eval_accuracy": 0.6319444179534912, "eval_loss": 0.9475428462028503, "eval_runtime": 310.5495, "eval_samples_per_second": 4.637, "eval_steps_per_second": 1.159, "step": 320 }, { "epoch": 0.46, "learning_rate": 5.4166666666666664e-05, "loss": 1.0467, "step": 330 }, { "epoch": 0.46, "eval_accuracy": 0.6618055701255798, "eval_loss": 0.938763439655304, "eval_runtime": 305.3429, "eval_samples_per_second": 4.716, "eval_steps_per_second": 1.179, "step": 330 }, { "epoch": 0.47, "learning_rate": 5.2777777777777784e-05, "loss": 1.2562, "step": 340 }, { "epoch": 0.47, "eval_accuracy": 0.5874999761581421, "eval_loss": 1.058403491973877, "eval_runtime": 309.582, "eval_samples_per_second": 4.651, "eval_steps_per_second": 1.163, "step": 340 }, { "epoch": 0.49, "learning_rate": 5.138888888888889e-05, "loss": 1.0267, "step": 350 }, { "epoch": 0.49, "eval_accuracy": 0.6819444298744202, "eval_loss": 0.9059543013572693, "eval_runtime": 308.1955, "eval_samples_per_second": 4.672, "eval_steps_per_second": 1.168, "step": 350 }, { "epoch": 0.5, "learning_rate": 5e-05, "loss": 0.7126, "step": 360 }, { "epoch": 0.5, "eval_accuracy": 0.6243055462837219, "eval_loss": 0.9829866290092468, "eval_runtime": 309.2257, "eval_samples_per_second": 4.657, "eval_steps_per_second": 1.164, "step": 360 }, { "epoch": 0.51, "learning_rate": 4.8611111111111115e-05, "loss": 1.0441, "step": 370 }, { "epoch": 0.51, "eval_accuracy": 0.675000011920929, "eval_loss": 0.909841001033783, "eval_runtime": 305.6628, "eval_samples_per_second": 4.711, "eval_steps_per_second": 1.178, "step": 370 }, { "epoch": 0.53, "learning_rate": 4.722222222222222e-05, "loss": 1.0524, "step": 380 }, { "epoch": 0.53, "eval_accuracy": 0.6458333134651184, "eval_loss": 0.954737663269043, "eval_runtime": 308.967, "eval_samples_per_second": 4.661, "eval_steps_per_second": 1.165, "step": 380 }, { "epoch": 0.54, "learning_rate": 4.5833333333333334e-05, "loss": 0.9355, "step": 390 }, { "epoch": 0.54, "eval_accuracy": 0.6958333253860474, "eval_loss": 0.7970058917999268, "eval_runtime": 306.7636, "eval_samples_per_second": 4.694, "eval_steps_per_second": 1.174, "step": 390 }, { "epoch": 0.56, "learning_rate": 4.4444444444444447e-05, "loss": 1.0505, "step": 400 }, { "epoch": 0.56, "eval_accuracy": 0.6777777671813965, "eval_loss": 0.8817840814590454, "eval_runtime": 306.413, "eval_samples_per_second": 4.7, "eval_steps_per_second": 1.175, "step": 400 }, { "epoch": 0.57, "learning_rate": 4.305555555555556e-05, "loss": 0.7672, "step": 410 }, { "epoch": 0.57, "eval_accuracy": 0.7041666507720947, "eval_loss": 0.8178666830062866, "eval_runtime": 306.6048, "eval_samples_per_second": 4.697, "eval_steps_per_second": 1.174, "step": 410 }, { "epoch": 0.58, "learning_rate": 4.166666666666667e-05, "loss": 0.9586, "step": 420 }, { "epoch": 0.58, "eval_accuracy": 0.761805534362793, "eval_loss": 0.7114083766937256, "eval_runtime": 307.286, "eval_samples_per_second": 4.686, "eval_steps_per_second": 1.172, "step": 420 }, { "epoch": 0.6, "learning_rate": 4.027777777777778e-05, "loss": 1.0177, "step": 430 }, { "epoch": 0.6, "eval_accuracy": 0.7590277791023254, "eval_loss": 0.7004179954528809, "eval_runtime": 305.701, "eval_samples_per_second": 4.71, "eval_steps_per_second": 1.178, "step": 430 }, { "epoch": 0.61, "learning_rate": 3.888888888888889e-05, "loss": 0.7575, "step": 440 }, { "epoch": 0.61, "eval_accuracy": 0.7888888716697693, "eval_loss": 0.6390997171401978, "eval_runtime": 305.5003, "eval_samples_per_second": 4.714, "eval_steps_per_second": 1.178, "step": 440 }, { "epoch": 0.62, "learning_rate": 3.7500000000000003e-05, "loss": 0.7834, "step": 450 }, { "epoch": 0.62, "eval_accuracy": 0.7604166865348816, "eval_loss": 0.6542982459068298, "eval_runtime": 306.1149, "eval_samples_per_second": 4.704, "eval_steps_per_second": 1.176, "step": 450 }, { "epoch": 0.64, "learning_rate": 3.611111111111111e-05, "loss": 0.644, "step": 460 }, { "epoch": 0.64, "eval_accuracy": 0.793055534362793, "eval_loss": 0.6248801946640015, "eval_runtime": 306.8175, "eval_samples_per_second": 4.693, "eval_steps_per_second": 1.173, "step": 460 }, { "epoch": 0.65, "learning_rate": 3.472222222222222e-05, "loss": 0.7928, "step": 470 }, { "epoch": 0.65, "eval_accuracy": 0.7715277671813965, "eval_loss": 0.6328664422035217, "eval_runtime": 308.2672, "eval_samples_per_second": 4.671, "eval_steps_per_second": 1.168, "step": 470 }, { "epoch": 0.67, "learning_rate": 3.3333333333333335e-05, "loss": 0.7773, "step": 480 }, { "epoch": 0.67, "eval_accuracy": 0.8125, "eval_loss": 0.5492955446243286, "eval_runtime": 305.9298, "eval_samples_per_second": 4.707, "eval_steps_per_second": 1.177, "step": 480 }, { "epoch": 0.68, "learning_rate": 3.194444444444444e-05, "loss": 0.6163, "step": 490 }, { "epoch": 0.68, "eval_accuracy": 0.7958333492279053, "eval_loss": 0.5877541303634644, "eval_runtime": 306.2121, "eval_samples_per_second": 4.703, "eval_steps_per_second": 1.176, "step": 490 }, { "epoch": 0.69, "learning_rate": 3.055555555555556e-05, "loss": 0.6773, "step": 500 }, { "epoch": 0.69, "eval_accuracy": 0.8222222328186035, "eval_loss": 0.5475648641586304, "eval_runtime": 306.3418, "eval_samples_per_second": 4.701, "eval_steps_per_second": 1.175, "step": 500 }, { "epoch": 0.71, "learning_rate": 2.916666666666667e-05, "loss": 0.7042, "step": 510 }, { "epoch": 0.71, "eval_accuracy": 0.8159722089767456, "eval_loss": 0.5389293432235718, "eval_runtime": 312.5784, "eval_samples_per_second": 4.607, "eval_steps_per_second": 1.152, "step": 510 }, { "epoch": 0.72, "learning_rate": 2.777777777777778e-05, "loss": 0.6488, "step": 520 }, { "epoch": 0.72, "eval_accuracy": 0.8180555701255798, "eval_loss": 0.5231082439422607, "eval_runtime": 307.67, "eval_samples_per_second": 4.68, "eval_steps_per_second": 1.17, "step": 520 }, { "epoch": 0.74, "learning_rate": 2.6388888888888892e-05, "loss": 0.7165, "step": 530 }, { "epoch": 0.74, "eval_accuracy": 0.800000011920929, "eval_loss": 0.567889928817749, "eval_runtime": 307.7956, "eval_samples_per_second": 4.678, "eval_steps_per_second": 1.17, "step": 530 }, { "epoch": 0.75, "learning_rate": 2.5e-05, "loss": 0.716, "step": 540 }, { "epoch": 0.75, "eval_accuracy": 0.8340277671813965, "eval_loss": 0.49685245752334595, "eval_runtime": 308.7124, "eval_samples_per_second": 4.665, "eval_steps_per_second": 1.166, "step": 540 }, { "epoch": 0.76, "learning_rate": 2.361111111111111e-05, "loss": 0.8907, "step": 550 }, { "epoch": 0.76, "eval_accuracy": 0.856249988079071, "eval_loss": 0.45114219188690186, "eval_runtime": 308.861, "eval_samples_per_second": 4.662, "eval_steps_per_second": 1.166, "step": 550 }, { "epoch": 0.78, "learning_rate": 2.2222222222222223e-05, "loss": 0.7081, "step": 560 }, { "epoch": 0.78, "eval_accuracy": 0.8423610925674438, "eval_loss": 0.4767354726791382, "eval_runtime": 307.9876, "eval_samples_per_second": 4.676, "eval_steps_per_second": 1.169, "step": 560 }, { "epoch": 0.79, "learning_rate": 2.0833333333333336e-05, "loss": 0.6063, "step": 570 }, { "epoch": 0.79, "eval_accuracy": 0.8458333611488342, "eval_loss": 0.4531818926334381, "eval_runtime": 307.4247, "eval_samples_per_second": 4.684, "eval_steps_per_second": 1.171, "step": 570 }, { "epoch": 0.81, "learning_rate": 1.9444444444444445e-05, "loss": 0.5105, "step": 580 }, { "epoch": 0.81, "eval_accuracy": 0.831944465637207, "eval_loss": 0.45398369431495667, "eval_runtime": 306.6509, "eval_samples_per_second": 4.696, "eval_steps_per_second": 1.174, "step": 580 }, { "epoch": 0.82, "learning_rate": 1.8055555555555555e-05, "loss": 0.6397, "step": 590 }, { "epoch": 0.82, "eval_accuracy": 0.8180555701255798, "eval_loss": 0.5045942664146423, "eval_runtime": 306.9645, "eval_samples_per_second": 4.691, "eval_steps_per_second": 1.173, "step": 590 }, { "epoch": 0.83, "learning_rate": 1.6666666666666667e-05, "loss": 0.5338, "step": 600 }, { "epoch": 0.83, "eval_accuracy": 0.8534722328186035, "eval_loss": 0.42865610122680664, "eval_runtime": 308.1509, "eval_samples_per_second": 4.673, "eval_steps_per_second": 1.168, "step": 600 }, { "epoch": 0.85, "learning_rate": 1.527777777777778e-05, "loss": 0.559, "step": 610 }, { "epoch": 0.85, "eval_accuracy": 0.8569444417953491, "eval_loss": 0.4333796501159668, "eval_runtime": 308.3459, "eval_samples_per_second": 4.67, "eval_steps_per_second": 1.168, "step": 610 }, { "epoch": 0.86, "learning_rate": 1.388888888888889e-05, "loss": 0.6952, "step": 620 }, { "epoch": 0.86, "eval_accuracy": 0.863194465637207, "eval_loss": 0.4117491543292999, "eval_runtime": 306.5802, "eval_samples_per_second": 4.697, "eval_steps_per_second": 1.174, "step": 620 }, { "epoch": 0.88, "learning_rate": 1.25e-05, "loss": 0.5286, "step": 630 }, { "epoch": 0.88, "eval_accuracy": 0.8645833134651184, "eval_loss": 0.42295506596565247, "eval_runtime": 309.6096, "eval_samples_per_second": 4.651, "eval_steps_per_second": 1.163, "step": 630 }, { "epoch": 0.89, "learning_rate": 1.1111111111111112e-05, "loss": 0.6243, "step": 640 }, { "epoch": 0.89, "eval_accuracy": 0.8458333611488342, "eval_loss": 0.4611164927482605, "eval_runtime": 308.2976, "eval_samples_per_second": 4.671, "eval_steps_per_second": 1.168, "step": 640 }, { "epoch": 0.9, "learning_rate": 9.722222222222223e-06, "loss": 0.6353, "step": 650 }, { "epoch": 0.9, "eval_accuracy": 0.8368055820465088, "eval_loss": 0.4841282367706299, "eval_runtime": 308.4373, "eval_samples_per_second": 4.669, "eval_steps_per_second": 1.167, "step": 650 }, { "epoch": 0.92, "learning_rate": 8.333333333333334e-06, "loss": 0.4522, "step": 660 }, { "epoch": 0.92, "eval_accuracy": 0.8548611402511597, "eval_loss": 0.4255611300468445, "eval_runtime": 310.2944, "eval_samples_per_second": 4.641, "eval_steps_per_second": 1.16, "step": 660 }, { "epoch": 0.93, "learning_rate": 6.944444444444445e-06, "loss": 0.4701, "step": 670 }, { "epoch": 0.93, "eval_accuracy": 0.8736110925674438, "eval_loss": 0.36990857124328613, "eval_runtime": 314.0357, "eval_samples_per_second": 4.585, "eval_steps_per_second": 1.146, "step": 670 }, { "epoch": 0.94, "learning_rate": 5.555555555555556e-06, "loss": 0.482, "step": 680 }, { "epoch": 0.94, "eval_accuracy": 0.8659722208976746, "eval_loss": 0.38237661123275757, "eval_runtime": 310.2911, "eval_samples_per_second": 4.641, "eval_steps_per_second": 1.16, "step": 680 }, { "epoch": 0.96, "learning_rate": 4.305555555555556e-06, "loss": 0.5023, "step": 690 }, { "epoch": 0.96, "eval_accuracy": 0.862500011920929, "eval_loss": 0.40548327565193176, "eval_runtime": 308.5687, "eval_samples_per_second": 4.667, "eval_steps_per_second": 1.167, "step": 690 }, { "epoch": 0.97, "learning_rate": 2.916666666666667e-06, "loss": 0.3688, "step": 700 }, { "epoch": 0.97, "eval_accuracy": 0.8673611283302307, "eval_loss": 0.391024112701416, "eval_runtime": 308.4404, "eval_samples_per_second": 4.669, "eval_steps_per_second": 1.167, "step": 700 }, { "epoch": 0.99, "learning_rate": 1.5277777777777778e-06, "loss": 0.4829, "step": 710 }, { "epoch": 0.99, "eval_accuracy": 0.8708333373069763, "eval_loss": 0.37891247868537903, "eval_runtime": 309.3769, "eval_samples_per_second": 4.655, "eval_steps_per_second": 1.164, "step": 710 }, { "epoch": 1.0, "learning_rate": 1.388888888888889e-07, "loss": 0.4577, "step": 720 }, { "epoch": 1.0, "eval_accuracy": 0.8743055462837219, "eval_loss": 0.3733634948730469, "eval_runtime": 307.6988, "eval_samples_per_second": 4.68, "eval_steps_per_second": 1.17, "step": 720 } ], "max_steps": 720, "num_train_epochs": 1, "total_flos": 2.1336795856108278e+18, "trial_name": null, "trial_params": null }