{ "best_metric": 2.2304611206054688, "best_model_checkpoint": "dogs_70_breeds_image_detection/checkpoint-29100", "epoch": 100.0, "eval_steps": 500, "global_step": 29100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.03811369509043928, "eval_loss": 4.224255561828613, "eval_model_preparation_time": 0.005, "eval_runtime": 53.4638, "eval_samples_per_second": 115.817, "eval_steps_per_second": 14.477, "step": 291 }, { "epoch": 1.718213058419244, "grad_norm": 1.1522711515426636, "learning_rate": 9.845094664371773e-07, "loss": 4.2286, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.08301033591731266, "eval_loss": 4.186123371124268, "eval_model_preparation_time": 0.005, "eval_runtime": 53.282, "eval_samples_per_second": 116.212, "eval_steps_per_second": 14.526, "step": 582 }, { "epoch": 3.0, "eval_accuracy": 0.15536175710594316, "eval_loss": 4.142723560333252, "eval_model_preparation_time": 0.005, "eval_runtime": 53.6144, "eval_samples_per_second": 115.491, "eval_steps_per_second": 14.436, "step": 873 }, { "epoch": 3.436426116838488, "grad_norm": 1.341554880142212, "learning_rate": 9.672977624784853e-07, "loss": 4.1533, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.24563953488372092, "eval_loss": 4.09642219543457, "eval_model_preparation_time": 0.005, "eval_runtime": 53.2746, "eval_samples_per_second": 116.228, "eval_steps_per_second": 14.529, "step": 1164 }, { "epoch": 5.0, "eval_accuracy": 0.35852713178294576, "eval_loss": 4.050450801849365, "eval_model_preparation_time": 0.005, "eval_runtime": 53.2922, "eval_samples_per_second": 116.19, "eval_steps_per_second": 14.524, "step": 1455 }, { "epoch": 5.154639175257732, "grad_norm": 1.4653651714324951, "learning_rate": 9.500860585197934e-07, "loss": 4.0657, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.44993540051679587, "eval_loss": 4.003639221191406, "eval_model_preparation_time": 0.005, "eval_runtime": 53.2227, "eval_samples_per_second": 116.341, "eval_steps_per_second": 14.543, "step": 1746 }, { "epoch": 6.872852233676976, "grad_norm": 1.3512729406356812, "learning_rate": 9.328743545611016e-07, "loss": 3.9797, "step": 2000 }, { "epoch": 7.0, "eval_accuracy": 0.5489341085271318, "eval_loss": 3.956012725830078, "eval_model_preparation_time": 0.005, "eval_runtime": 53.824, "eval_samples_per_second": 115.042, "eval_steps_per_second": 14.38, "step": 2037 }, { "epoch": 8.0, "eval_accuracy": 0.6153100775193798, "eval_loss": 3.908579111099243, "eval_model_preparation_time": 0.005, "eval_runtime": 53.6148, "eval_samples_per_second": 115.491, "eval_steps_per_second": 14.436, "step": 2328 }, { "epoch": 8.59106529209622, "grad_norm": 1.6852034330368042, "learning_rate": 9.156626506024095e-07, "loss": 3.8945, "step": 2500 }, { "epoch": 9.0, "eval_accuracy": 0.6690891472868217, "eval_loss": 3.861515522003174, "eval_model_preparation_time": 0.005, "eval_runtime": 53.3317, "eval_samples_per_second": 116.104, "eval_steps_per_second": 14.513, "step": 2619 }, { "epoch": 10.0, "eval_accuracy": 0.7041343669250646, "eval_loss": 3.8151276111602783, "eval_model_preparation_time": 0.005, "eval_runtime": 53.7261, "eval_samples_per_second": 115.251, "eval_steps_per_second": 14.406, "step": 2910 }, { "epoch": 10.309278350515465, "grad_norm": 1.4977772235870361, "learning_rate": 8.984509466437177e-07, "loss": 3.8073, "step": 3000 }, { "epoch": 11.0, "eval_accuracy": 0.7333656330749354, "eval_loss": 3.767709732055664, "eval_model_preparation_time": 0.005, "eval_runtime": 53.2216, "eval_samples_per_second": 116.344, "eval_steps_per_second": 14.543, "step": 3201 }, { "epoch": 12.0, "eval_accuracy": 0.7543604651162791, "eval_loss": 3.7216570377349854, "eval_model_preparation_time": 0.005, "eval_runtime": 52.8673, "eval_samples_per_second": 117.123, "eval_steps_per_second": 14.64, "step": 3492 }, { "epoch": 12.027491408934708, "grad_norm": 1.4954266548156738, "learning_rate": 8.812392426850258e-07, "loss": 3.7217, "step": 3500 }, { "epoch": 13.0, "eval_accuracy": 0.7698643410852714, "eval_loss": 3.6769490242004395, "eval_model_preparation_time": 0.005, "eval_runtime": 53.1587, "eval_samples_per_second": 116.481, "eval_steps_per_second": 14.56, "step": 3783 }, { "epoch": 13.745704467353953, "grad_norm": 1.6733068227767944, "learning_rate": 8.640275387263338e-07, "loss": 3.6412, "step": 4000 }, { "epoch": 14.0, "eval_accuracy": 0.7805232558139535, "eval_loss": 3.6332757472991943, "eval_model_preparation_time": 0.005, "eval_runtime": 52.6862, "eval_samples_per_second": 117.526, "eval_steps_per_second": 14.691, "step": 4074 }, { "epoch": 15.0, "eval_accuracy": 0.7876291989664083, "eval_loss": 3.58959698677063, "eval_model_preparation_time": 0.005, "eval_runtime": 52.5851, "eval_samples_per_second": 117.752, "eval_steps_per_second": 14.719, "step": 4365 }, { "epoch": 15.463917525773196, "grad_norm": 1.5803037881851196, "learning_rate": 8.46815834767642e-07, "loss": 3.56, "step": 4500 }, { "epoch": 16.0, "eval_accuracy": 0.7963501291989664, "eval_loss": 3.547574520111084, "eval_model_preparation_time": 0.005, "eval_runtime": 52.7736, "eval_samples_per_second": 117.331, "eval_steps_per_second": 14.666, "step": 4656 }, { "epoch": 17.0, "eval_accuracy": 0.8026485788113695, "eval_loss": 3.507601737976074, "eval_model_preparation_time": 0.005, "eval_runtime": 53.0827, "eval_samples_per_second": 116.648, "eval_steps_per_second": 14.581, "step": 4947 }, { "epoch": 17.18213058419244, "grad_norm": 1.741551399230957, "learning_rate": 8.2960413080895e-07, "loss": 3.4839, "step": 5000 }, { "epoch": 18.0, "eval_accuracy": 0.8086240310077519, "eval_loss": 3.467883825302124, "eval_model_preparation_time": 0.005, "eval_runtime": 52.7555, "eval_samples_per_second": 117.372, "eval_steps_per_second": 14.671, "step": 5238 }, { "epoch": 18.900343642611684, "grad_norm": 1.6814472675323486, "learning_rate": 8.123924268502581e-07, "loss": 3.4135, "step": 5500 }, { "epoch": 19.0, "eval_accuracy": 0.8162144702842378, "eval_loss": 3.429760694503784, "eval_model_preparation_time": 0.005, "eval_runtime": 52.6582, "eval_samples_per_second": 117.589, "eval_steps_per_second": 14.699, "step": 5529 }, { "epoch": 20.0, "eval_accuracy": 0.8207364341085271, "eval_loss": 3.3927395343780518, "eval_model_preparation_time": 0.005, "eval_runtime": 52.6007, "eval_samples_per_second": 117.717, "eval_steps_per_second": 14.715, "step": 5820 }, { "epoch": 20.61855670103093, "grad_norm": 1.8037664890289307, "learning_rate": 7.951807228915662e-07, "loss": 3.3452, "step": 6000 }, { "epoch": 21.0, "eval_accuracy": 0.8254198966408268, "eval_loss": 3.3561041355133057, "eval_model_preparation_time": 0.005, "eval_runtime": 52.766, "eval_samples_per_second": 117.348, "eval_steps_per_second": 14.669, "step": 6111 }, { "epoch": 22.0, "eval_accuracy": 0.8283268733850129, "eval_loss": 3.3211123943328857, "eval_model_preparation_time": 0.005, "eval_runtime": 52.9233, "eval_samples_per_second": 117.0, "eval_steps_per_second": 14.625, "step": 6402 }, { "epoch": 22.33676975945017, "grad_norm": 1.7638081312179565, "learning_rate": 7.779690189328744e-07, "loss": 3.2809, "step": 6500 }, { "epoch": 23.0, "eval_accuracy": 0.8330103359173127, "eval_loss": 3.2866406440734863, "eval_model_preparation_time": 0.005, "eval_runtime": 53.5163, "eval_samples_per_second": 115.703, "eval_steps_per_second": 14.463, "step": 6693 }, { "epoch": 24.0, "eval_accuracy": 0.838501291989664, "eval_loss": 3.2538506984710693, "eval_model_preparation_time": 0.005, "eval_runtime": 53.3194, "eval_samples_per_second": 116.13, "eval_steps_per_second": 14.516, "step": 6984 }, { "epoch": 24.054982817869416, "grad_norm": 2.052138328552246, "learning_rate": 7.607573149741824e-07, "loss": 3.2174, "step": 7000 }, { "epoch": 25.0, "eval_accuracy": 0.8410852713178295, "eval_loss": 3.221360445022583, "eval_model_preparation_time": 0.005, "eval_runtime": 53.5019, "eval_samples_per_second": 115.734, "eval_steps_per_second": 14.467, "step": 7275 }, { "epoch": 25.77319587628866, "grad_norm": 1.6119282245635986, "learning_rate": 7.435456110154906e-07, "loss": 3.1592, "step": 7500 }, { "epoch": 26.0, "eval_accuracy": 0.8443152454780362, "eval_loss": 3.189776659011841, "eval_model_preparation_time": 0.005, "eval_runtime": 53.7691, "eval_samples_per_second": 115.159, "eval_steps_per_second": 14.395, "step": 7566 }, { "epoch": 27.0, "eval_accuracy": 0.8464147286821705, "eval_loss": 3.1592888832092285, "eval_model_preparation_time": 0.005, "eval_runtime": 53.6596, "eval_samples_per_second": 115.394, "eval_steps_per_second": 14.424, "step": 7857 }, { "epoch": 27.491408934707902, "grad_norm": 1.8645163774490356, "learning_rate": 7.263339070567986e-07, "loss": 3.1041, "step": 8000 }, { "epoch": 28.0, "eval_accuracy": 0.8481912144702842, "eval_loss": 3.1283702850341797, "eval_model_preparation_time": 0.005, "eval_runtime": 53.3822, "eval_samples_per_second": 115.994, "eval_steps_per_second": 14.499, "step": 8148 }, { "epoch": 29.0, "eval_accuracy": 0.8510981912144703, "eval_loss": 3.1002068519592285, "eval_model_preparation_time": 0.005, "eval_runtime": 53.0972, "eval_samples_per_second": 116.616, "eval_steps_per_second": 14.577, "step": 8439 }, { "epoch": 29.209621993127147, "grad_norm": 2.0290517807006836, "learning_rate": 7.091222030981066e-07, "loss": 3.047, "step": 8500 }, { "epoch": 30.0, "eval_accuracy": 0.853843669250646, "eval_loss": 3.071702480316162, "eval_model_preparation_time": 0.005, "eval_runtime": 53.8219, "eval_samples_per_second": 115.046, "eval_steps_per_second": 14.381, "step": 8730 }, { "epoch": 30.927835051546392, "grad_norm": 1.9484608173370361, "learning_rate": 6.919104991394148e-07, "loss": 2.9999, "step": 9000 }, { "epoch": 31.0, "eval_accuracy": 0.8552971576227391, "eval_loss": 3.044823408126831, "eval_model_preparation_time": 0.005, "eval_runtime": 54.0725, "eval_samples_per_second": 114.513, "eval_steps_per_second": 14.314, "step": 9021 }, { "epoch": 32.0, "eval_accuracy": 0.8562661498708011, "eval_loss": 3.0170109272003174, "eval_model_preparation_time": 0.005, "eval_runtime": 54.2696, "eval_samples_per_second": 114.097, "eval_steps_per_second": 14.262, "step": 9312 }, { "epoch": 32.64604810996563, "grad_norm": 1.9190058708190918, "learning_rate": 6.746987951807228e-07, "loss": 2.951, "step": 9500 }, { "epoch": 33.0, "eval_accuracy": 0.8580426356589147, "eval_loss": 2.9910173416137695, "eval_model_preparation_time": 0.005, "eval_runtime": 53.2749, "eval_samples_per_second": 116.227, "eval_steps_per_second": 14.528, "step": 9603 }, { "epoch": 34.0, "eval_accuracy": 0.8594961240310077, "eval_loss": 2.965754747390747, "eval_model_preparation_time": 0.005, "eval_runtime": 53.3652, "eval_samples_per_second": 116.031, "eval_steps_per_second": 14.504, "step": 9894 }, { "epoch": 34.36426116838488, "grad_norm": 2.104431390762329, "learning_rate": 6.57487091222031e-07, "loss": 2.8999, "step": 10000 }, { "epoch": 35.0, "eval_accuracy": 0.8609496124031008, "eval_loss": 2.9400594234466553, "eval_model_preparation_time": 0.005, "eval_runtime": 52.9247, "eval_samples_per_second": 116.996, "eval_steps_per_second": 14.625, "step": 10185 }, { "epoch": 36.0, "eval_accuracy": 0.8627260981912145, "eval_loss": 2.9151716232299805, "eval_model_preparation_time": 0.005, "eval_runtime": 52.9313, "eval_samples_per_second": 116.982, "eval_steps_per_second": 14.623, "step": 10476 }, { "epoch": 36.08247422680412, "grad_norm": 1.8955918550491333, "learning_rate": 6.402753872633391e-07, "loss": 2.8589, "step": 10500 }, { "epoch": 37.0, "eval_accuracy": 0.8643410852713178, "eval_loss": 2.8920605182647705, "eval_model_preparation_time": 0.005, "eval_runtime": 53.1759, "eval_samples_per_second": 116.444, "eval_steps_per_second": 14.555, "step": 10767 }, { "epoch": 37.80068728522337, "grad_norm": 2.0740840435028076, "learning_rate": 6.230636833046471e-07, "loss": 2.8145, "step": 11000 }, { "epoch": 38.0, "eval_accuracy": 0.8646640826873385, "eval_loss": 2.8690123558044434, "eval_model_preparation_time": 0.005, "eval_runtime": 52.8932, "eval_samples_per_second": 117.066, "eval_steps_per_second": 14.633, "step": 11058 }, { "epoch": 39.0, "eval_accuracy": 0.8659560723514211, "eval_loss": 2.845964193344116, "eval_model_preparation_time": 0.005, "eval_runtime": 52.8287, "eval_samples_per_second": 117.209, "eval_steps_per_second": 14.651, "step": 11349 }, { "epoch": 39.51890034364261, "grad_norm": 1.9286776781082153, "learning_rate": 6.058519793459552e-07, "loss": 2.7734, "step": 11500 }, { "epoch": 40.0, "eval_accuracy": 0.8674095607235142, "eval_loss": 2.8238561153411865, "eval_model_preparation_time": 0.005, "eval_runtime": 52.9947, "eval_samples_per_second": 116.842, "eval_steps_per_second": 14.605, "step": 11640 }, { "epoch": 41.0, "eval_accuracy": 0.8680555555555556, "eval_loss": 2.8031866550445557, "eval_model_preparation_time": 0.005, "eval_runtime": 53.11, "eval_samples_per_second": 116.588, "eval_steps_per_second": 14.574, "step": 11931 }, { "epoch": 41.23711340206186, "grad_norm": 1.892899751663208, "learning_rate": 5.886402753872633e-07, "loss": 2.7352, "step": 12000 }, { "epoch": 42.0, "eval_accuracy": 0.8704780361757106, "eval_loss": 2.782097816467285, "eval_model_preparation_time": 0.005, "eval_runtime": 53.2125, "eval_samples_per_second": 116.364, "eval_steps_per_second": 14.545, "step": 12222 }, { "epoch": 42.955326460481096, "grad_norm": 1.8656749725341797, "learning_rate": 5.714285714285714e-07, "loss": 2.695, "step": 12500 }, { "epoch": 43.0, "eval_accuracy": 0.8716085271317829, "eval_loss": 2.7616491317749023, "eval_model_preparation_time": 0.005, "eval_runtime": 52.6755, "eval_samples_per_second": 117.55, "eval_steps_per_second": 14.694, "step": 12513 }, { "epoch": 44.0, "eval_accuracy": 0.873062015503876, "eval_loss": 2.7412848472595215, "eval_model_preparation_time": 0.005, "eval_runtime": 53.2571, "eval_samples_per_second": 116.266, "eval_steps_per_second": 14.533, "step": 12804 }, { "epoch": 44.67353951890034, "grad_norm": 1.9270859956741333, "learning_rate": 5.542168674698795e-07, "loss": 2.6587, "step": 13000 }, { "epoch": 45.0, "eval_accuracy": 0.8717700258397932, "eval_loss": 2.721040725708008, "eval_model_preparation_time": 0.005, "eval_runtime": 52.9963, "eval_samples_per_second": 116.838, "eval_steps_per_second": 14.605, "step": 13095 }, { "epoch": 46.0, "eval_accuracy": 0.8741925064599483, "eval_loss": 2.702544689178467, "eval_model_preparation_time": 0.005, "eval_runtime": 53.1374, "eval_samples_per_second": 116.528, "eval_steps_per_second": 14.566, "step": 13386 }, { "epoch": 46.391752577319586, "grad_norm": 2.173807382583618, "learning_rate": 5.370051635111877e-07, "loss": 2.6245, "step": 13500 }, { "epoch": 47.0, "eval_accuracy": 0.8743540051679587, "eval_loss": 2.6839137077331543, "eval_model_preparation_time": 0.005, "eval_runtime": 53.0364, "eval_samples_per_second": 116.75, "eval_steps_per_second": 14.594, "step": 13677 }, { "epoch": 48.0, "eval_accuracy": 0.8761304909560723, "eval_loss": 2.666257619857788, "eval_model_preparation_time": 0.005, "eval_runtime": 52.9704, "eval_samples_per_second": 116.895, "eval_steps_per_second": 14.612, "step": 13968 }, { "epoch": 48.10996563573883, "grad_norm": 2.061178207397461, "learning_rate": 5.197934595524956e-07, "loss": 2.5929, "step": 14000 }, { "epoch": 49.0, "eval_accuracy": 0.875968992248062, "eval_loss": 2.648406505584717, "eval_model_preparation_time": 0.005, "eval_runtime": 52.8971, "eval_samples_per_second": 117.057, "eval_steps_per_second": 14.632, "step": 14259 }, { "epoch": 49.828178694158076, "grad_norm": 2.2463059425354004, "learning_rate": 5.025817555938038e-07, "loss": 2.5577, "step": 14500 }, { "epoch": 50.0, "eval_accuracy": 0.8780684754521964, "eval_loss": 2.630814790725708, "eval_model_preparation_time": 0.005, "eval_runtime": 52.7435, "eval_samples_per_second": 117.398, "eval_steps_per_second": 14.675, "step": 14550 }, { "epoch": 51.0, "eval_accuracy": 0.8793604651162791, "eval_loss": 2.613746404647827, "eval_model_preparation_time": 0.005, "eval_runtime": 53.3318, "eval_samples_per_second": 116.103, "eval_steps_per_second": 14.513, "step": 14841 }, { "epoch": 51.54639175257732, "grad_norm": 2.0589890480041504, "learning_rate": 4.853700516351119e-07, "loss": 2.5317, "step": 15000 }, { "epoch": 52.0, "eval_accuracy": 0.8791989664082688, "eval_loss": 2.5976715087890625, "eval_model_preparation_time": 0.005, "eval_runtime": 52.6721, "eval_samples_per_second": 117.558, "eval_steps_per_second": 14.695, "step": 15132 }, { "epoch": 53.0, "eval_accuracy": 0.8804909560723514, "eval_loss": 2.5813040733337402, "eval_model_preparation_time": 0.005, "eval_runtime": 53.2754, "eval_samples_per_second": 116.226, "eval_steps_per_second": 14.528, "step": 15423 }, { "epoch": 53.264604810996566, "grad_norm": 2.004828929901123, "learning_rate": 4.6815834767641994e-07, "loss": 2.4971, "step": 15500 }, { "epoch": 54.0, "eval_accuracy": 0.8808139534883721, "eval_loss": 2.566027879714966, "eval_model_preparation_time": 0.005, "eval_runtime": 53.3637, "eval_samples_per_second": 116.034, "eval_steps_per_second": 14.504, "step": 15714 }, { "epoch": 54.982817869415804, "grad_norm": 1.9317859411239624, "learning_rate": 4.5094664371772807e-07, "loss": 2.4695, "step": 16000 }, { "epoch": 55.0, "eval_accuracy": 0.8812984496124031, "eval_loss": 2.551055431365967, "eval_model_preparation_time": 0.005, "eval_runtime": 52.4399, "eval_samples_per_second": 118.078, "eval_steps_per_second": 14.76, "step": 16005 }, { "epoch": 56.0, "eval_accuracy": 0.8806524547803618, "eval_loss": 2.536409854888916, "eval_model_preparation_time": 0.005, "eval_runtime": 52.5325, "eval_samples_per_second": 117.87, "eval_steps_per_second": 14.734, "step": 16296 }, { "epoch": 56.70103092783505, "grad_norm": 2.0537452697753906, "learning_rate": 4.3373493975903615e-07, "loss": 2.444, "step": 16500 }, { "epoch": 57.0, "eval_accuracy": 0.8827519379844961, "eval_loss": 2.521925449371338, "eval_model_preparation_time": 0.005, "eval_runtime": 52.5122, "eval_samples_per_second": 117.915, "eval_steps_per_second": 14.739, "step": 16587 }, { "epoch": 58.0, "eval_accuracy": 0.8824289405684754, "eval_loss": 2.508209228515625, "eval_model_preparation_time": 0.005, "eval_runtime": 52.9146, "eval_samples_per_second": 117.019, "eval_steps_per_second": 14.627, "step": 16878 }, { "epoch": 58.419243986254294, "grad_norm": 2.472633123397827, "learning_rate": 4.165232358003442e-07, "loss": 2.4143, "step": 17000 }, { "epoch": 59.0, "eval_accuracy": 0.8845284237726099, "eval_loss": 2.4946951866149902, "eval_model_preparation_time": 0.005, "eval_runtime": 53.263, "eval_samples_per_second": 116.253, "eval_steps_per_second": 14.532, "step": 17169 }, { "epoch": 60.0, "eval_accuracy": 0.8837209302325582, "eval_loss": 2.4811995029449463, "eval_model_preparation_time": 0.005, "eval_runtime": 53.3521, "eval_samples_per_second": 116.059, "eval_steps_per_second": 14.507, "step": 17460 }, { "epoch": 60.13745704467354, "grad_norm": 2.3744590282440186, "learning_rate": 3.9931153184165226e-07, "loss": 2.3935, "step": 17500 }, { "epoch": 61.0, "eval_accuracy": 0.8838824289405685, "eval_loss": 2.468451499938965, "eval_model_preparation_time": 0.005, "eval_runtime": 53.0799, "eval_samples_per_second": 116.654, "eval_steps_per_second": 14.582, "step": 17751 }, { "epoch": 61.855670103092784, "grad_norm": 2.0248947143554688, "learning_rate": 3.820998278829604e-07, "loss": 2.3717, "step": 18000 }, { "epoch": 62.0, "eval_accuracy": 0.8858204134366925, "eval_loss": 2.456092119216919, "eval_model_preparation_time": 0.005, "eval_runtime": 52.7271, "eval_samples_per_second": 117.435, "eval_steps_per_second": 14.679, "step": 18042 }, { "epoch": 63.0, "eval_accuracy": 0.8866279069767442, "eval_loss": 2.444223403930664, "eval_model_preparation_time": 0.005, "eval_runtime": 53.2783, "eval_samples_per_second": 116.22, "eval_steps_per_second": 14.528, "step": 18333 }, { "epoch": 63.57388316151203, "grad_norm": 2.6887121200561523, "learning_rate": 3.648881239242685e-07, "loss": 2.3475, "step": 18500 }, { "epoch": 64.0, "eval_accuracy": 0.8875968992248062, "eval_loss": 2.432462215423584, "eval_model_preparation_time": 0.005, "eval_runtime": 53.2565, "eval_samples_per_second": 116.267, "eval_steps_per_second": 14.533, "step": 18624 }, { "epoch": 65.0, "eval_accuracy": 0.8875968992248062, "eval_loss": 2.42094087600708, "eval_model_preparation_time": 0.005, "eval_runtime": 53.453, "eval_samples_per_second": 115.84, "eval_steps_per_second": 14.48, "step": 18915 }, { "epoch": 65.29209621993127, "grad_norm": 1.8884248733520508, "learning_rate": 3.4767641996557657e-07, "loss": 2.3262, "step": 19000 }, { "epoch": 66.0, "eval_accuracy": 0.8884043927648578, "eval_loss": 2.410001754760742, "eval_model_preparation_time": 0.005, "eval_runtime": 53.9318, "eval_samples_per_second": 114.812, "eval_steps_per_second": 14.351, "step": 19206 }, { "epoch": 67.0, "eval_accuracy": 0.8887273901808785, "eval_loss": 2.3990447521209717, "eval_model_preparation_time": 0.005, "eval_runtime": 53.9445, "eval_samples_per_second": 114.785, "eval_steps_per_second": 14.348, "step": 19497 }, { "epoch": 67.01030927835052, "grad_norm": 2.0446999073028564, "learning_rate": 3.304647160068847e-07, "loss": 2.3043, "step": 19500 }, { "epoch": 68.0, "eval_accuracy": 0.8885658914728682, "eval_loss": 2.3888654708862305, "eval_model_preparation_time": 0.005, "eval_runtime": 53.8449, "eval_samples_per_second": 114.997, "eval_steps_per_second": 14.375, "step": 19788 }, { "epoch": 68.72852233676976, "grad_norm": 2.0572509765625, "learning_rate": 3.132530120481928e-07, "loss": 2.2841, "step": 20000 }, { "epoch": 69.0, "eval_accuracy": 0.8906653746770026, "eval_loss": 2.3787026405334473, "eval_model_preparation_time": 0.005, "eval_runtime": 53.7045, "eval_samples_per_second": 115.298, "eval_steps_per_second": 14.412, "step": 20079 }, { "epoch": 70.0, "eval_accuracy": 0.8903423772609819, "eval_loss": 2.369292736053467, "eval_model_preparation_time": 0.005, "eval_runtime": 53.8822, "eval_samples_per_second": 114.917, "eval_steps_per_second": 14.365, "step": 20370 }, { "epoch": 70.44673539518901, "grad_norm": 1.8821637630462646, "learning_rate": 2.9604130808950087e-07, "loss": 2.2718, "step": 20500 }, { "epoch": 71.0, "eval_accuracy": 0.8906653746770026, "eval_loss": 2.3600070476531982, "eval_model_preparation_time": 0.005, "eval_runtime": 53.9421, "eval_samples_per_second": 114.79, "eval_steps_per_second": 14.349, "step": 20661 }, { "epoch": 72.0, "eval_accuracy": 0.8919573643410853, "eval_loss": 2.350867509841919, "eval_model_preparation_time": 0.005, "eval_runtime": 54.6506, "eval_samples_per_second": 113.302, "eval_steps_per_second": 14.163, "step": 20952 }, { "epoch": 72.16494845360825, "grad_norm": 2.260333299636841, "learning_rate": 2.788296041308089e-07, "loss": 2.2487, "step": 21000 }, { "epoch": 73.0, "eval_accuracy": 0.8924418604651163, "eval_loss": 2.3422977924346924, "eval_model_preparation_time": 0.005, "eval_runtime": 53.8113, "eval_samples_per_second": 115.069, "eval_steps_per_second": 14.384, "step": 21243 }, { "epoch": 73.88316151202748, "grad_norm": 2.037961006164551, "learning_rate": 2.6161790017211703e-07, "loss": 2.2375, "step": 21500 }, { "epoch": 74.0, "eval_accuracy": 0.8916343669250646, "eval_loss": 2.3344151973724365, "eval_model_preparation_time": 0.005, "eval_runtime": 53.589, "eval_samples_per_second": 115.546, "eval_steps_per_second": 14.443, "step": 21534 }, { "epoch": 75.0, "eval_accuracy": 0.8930878552971576, "eval_loss": 2.326270580291748, "eval_model_preparation_time": 0.005, "eval_runtime": 54.0166, "eval_samples_per_second": 114.631, "eval_steps_per_second": 14.329, "step": 21825 }, { "epoch": 75.60137457044674, "grad_norm": 2.1131231784820557, "learning_rate": 2.444061962134251e-07, "loss": 2.2226, "step": 22000 }, { "epoch": 76.0, "eval_accuracy": 0.8922803617571059, "eval_loss": 2.318610668182373, "eval_model_preparation_time": 0.005, "eval_runtime": 53.7404, "eval_samples_per_second": 115.221, "eval_steps_per_second": 14.403, "step": 22116 }, { "epoch": 77.0, "eval_accuracy": 0.8932493540051679, "eval_loss": 2.3115651607513428, "eval_model_preparation_time": 0.005, "eval_runtime": 53.7352, "eval_samples_per_second": 115.232, "eval_steps_per_second": 14.404, "step": 22407 }, { "epoch": 77.31958762886597, "grad_norm": 2.1695611476898193, "learning_rate": 2.2719449225473322e-07, "loss": 2.206, "step": 22500 }, { "epoch": 78.0, "eval_accuracy": 0.8935723514211886, "eval_loss": 2.3046042919158936, "eval_model_preparation_time": 0.005, "eval_runtime": 54.1529, "eval_samples_per_second": 114.343, "eval_steps_per_second": 14.293, "step": 22698 }, { "epoch": 79.0, "eval_accuracy": 0.8943798449612403, "eval_loss": 2.297482490539551, "eval_model_preparation_time": 0.005, "eval_runtime": 52.2319, "eval_samples_per_second": 118.548, "eval_steps_per_second": 14.819, "step": 22989 }, { "epoch": 79.03780068728523, "grad_norm": 2.3586909770965576, "learning_rate": 2.099827882960413e-07, "loss": 2.1947, "step": 23000 }, { "epoch": 80.0, "eval_accuracy": 0.8938953488372093, "eval_loss": 2.291203260421753, "eval_model_preparation_time": 0.005, "eval_runtime": 52.1733, "eval_samples_per_second": 118.681, "eval_steps_per_second": 14.835, "step": 23280 }, { "epoch": 80.75601374570446, "grad_norm": 2.4226436614990234, "learning_rate": 1.9277108433734939e-07, "loss": 2.1804, "step": 23500 }, { "epoch": 81.0, "eval_accuracy": 0.8937338501291989, "eval_loss": 2.285174608230591, "eval_model_preparation_time": 0.005, "eval_runtime": 52.1705, "eval_samples_per_second": 118.688, "eval_steps_per_second": 14.836, "step": 23571 }, { "epoch": 82.0, "eval_accuracy": 0.8940568475452196, "eval_loss": 2.2793898582458496, "eval_model_preparation_time": 0.005, "eval_runtime": 53.532, "eval_samples_per_second": 115.669, "eval_steps_per_second": 14.459, "step": 23862 }, { "epoch": 82.47422680412372, "grad_norm": 2.2000808715820312, "learning_rate": 1.7555938037865747e-07, "loss": 2.1725, "step": 24000 }, { "epoch": 83.0, "eval_accuracy": 0.895187338501292, "eval_loss": 2.2743537425994873, "eval_model_preparation_time": 0.005, "eval_runtime": 52.7704, "eval_samples_per_second": 117.338, "eval_steps_per_second": 14.667, "step": 24153 }, { "epoch": 84.0, "eval_accuracy": 0.8953488372093024, "eval_loss": 2.2692511081695557, "eval_model_preparation_time": 0.005, "eval_runtime": 52.6929, "eval_samples_per_second": 117.511, "eval_steps_per_second": 14.689, "step": 24444 }, { "epoch": 84.19243986254295, "grad_norm": 1.9906361103057861, "learning_rate": 1.5834767641996558e-07, "loss": 2.165, "step": 24500 }, { "epoch": 85.0, "eval_accuracy": 0.895187338501292, "eval_loss": 2.264594078063965, "eval_model_preparation_time": 0.005, "eval_runtime": 53.0382, "eval_samples_per_second": 116.746, "eval_steps_per_second": 14.593, "step": 24735 }, { "epoch": 85.91065292096219, "grad_norm": 2.219193935394287, "learning_rate": 1.4113597246127366e-07, "loss": 2.1539, "step": 25000 }, { "epoch": 86.0, "eval_accuracy": 0.8955103359173127, "eval_loss": 2.2601823806762695, "eval_model_preparation_time": 0.005, "eval_runtime": 53.4023, "eval_samples_per_second": 115.95, "eval_steps_per_second": 14.494, "step": 25026 }, { "epoch": 87.0, "eval_accuracy": 0.8955103359173127, "eval_loss": 2.2560276985168457, "eval_model_preparation_time": 0.005, "eval_runtime": 52.4937, "eval_samples_per_second": 117.957, "eval_steps_per_second": 14.745, "step": 25317 }, { "epoch": 87.62886597938144, "grad_norm": 1.8535250425338745, "learning_rate": 1.2392426850258174e-07, "loss": 2.1479, "step": 25500 }, { "epoch": 88.0, "eval_accuracy": 0.895671834625323, "eval_loss": 2.2524540424346924, "eval_model_preparation_time": 0.005, "eval_runtime": 52.1601, "eval_samples_per_second": 118.712, "eval_steps_per_second": 14.839, "step": 25608 }, { "epoch": 89.0, "eval_accuracy": 0.8964793281653747, "eval_loss": 2.2489233016967773, "eval_model_preparation_time": 0.005, "eval_runtime": 52.5825, "eval_samples_per_second": 117.758, "eval_steps_per_second": 14.72, "step": 25899 }, { "epoch": 89.34707903780068, "grad_norm": 1.8873833417892456, "learning_rate": 1.0671256454388984e-07, "loss": 2.1392, "step": 26000 }, { "epoch": 90.0, "eval_accuracy": 0.896640826873385, "eval_loss": 2.2456018924713135, "eval_model_preparation_time": 0.005, "eval_runtime": 52.5499, "eval_samples_per_second": 117.831, "eval_steps_per_second": 14.729, "step": 26190 }, { "epoch": 91.0, "eval_accuracy": 0.896640826873385, "eval_loss": 2.2428722381591797, "eval_model_preparation_time": 0.005, "eval_runtime": 52.35, "eval_samples_per_second": 118.281, "eval_steps_per_second": 14.785, "step": 26481 }, { "epoch": 91.06529209621993, "grad_norm": 1.9566396474838257, "learning_rate": 8.950086058519793e-08, "loss": 2.1339, "step": 26500 }, { "epoch": 92.0, "eval_accuracy": 0.8968023255813954, "eval_loss": 2.240267515182495, "eval_model_preparation_time": 0.005, "eval_runtime": 53.7343, "eval_samples_per_second": 115.234, "eval_steps_per_second": 14.404, "step": 26772 }, { "epoch": 92.78350515463917, "grad_norm": 2.347038745880127, "learning_rate": 7.228915662650602e-08, "loss": 2.1307, "step": 27000 }, { "epoch": 93.0, "eval_accuracy": 0.8964793281653747, "eval_loss": 2.2379775047302246, "eval_model_preparation_time": 0.005, "eval_runtime": 52.7112, "eval_samples_per_second": 117.47, "eval_steps_per_second": 14.684, "step": 27063 }, { "epoch": 94.0, "eval_accuracy": 0.8964793281653747, "eval_loss": 2.2358651161193848, "eval_model_preparation_time": 0.005, "eval_runtime": 52.7355, "eval_samples_per_second": 117.416, "eval_steps_per_second": 14.677, "step": 27354 }, { "epoch": 94.50171821305842, "grad_norm": 2.1997811794281006, "learning_rate": 5.507745266781411e-08, "loss": 2.1234, "step": 27500 }, { "epoch": 95.0, "eval_accuracy": 0.8963178294573644, "eval_loss": 2.2341957092285156, "eval_model_preparation_time": 0.005, "eval_runtime": 52.5471, "eval_samples_per_second": 117.837, "eval_steps_per_second": 14.73, "step": 27645 }, { "epoch": 96.0, "eval_accuracy": 0.8964793281653747, "eval_loss": 2.2328357696533203, "eval_model_preparation_time": 0.005, "eval_runtime": 53.6532, "eval_samples_per_second": 115.408, "eval_steps_per_second": 14.426, "step": 27936 }, { "epoch": 96.21993127147766, "grad_norm": 1.939498782157898, "learning_rate": 3.78657487091222e-08, "loss": 2.1212, "step": 28000 }, { "epoch": 97.0, "eval_accuracy": 0.8964793281653747, "eval_loss": 2.2317166328430176, "eval_model_preparation_time": 0.005, "eval_runtime": 52.8569, "eval_samples_per_second": 117.146, "eval_steps_per_second": 14.643, "step": 28227 }, { "epoch": 97.9381443298969, "grad_norm": 2.966059684753418, "learning_rate": 2.0654044750430294e-08, "loss": 2.1197, "step": 28500 }, { "epoch": 98.0, "eval_accuracy": 0.8963178294573644, "eval_loss": 2.231013059616089, "eval_model_preparation_time": 0.005, "eval_runtime": 52.9339, "eval_samples_per_second": 116.976, "eval_steps_per_second": 14.622, "step": 28518 }, { "epoch": 99.0, "eval_accuracy": 0.8964793281653747, "eval_loss": 2.2305853366851807, "eval_model_preparation_time": 0.005, "eval_runtime": 53.021, "eval_samples_per_second": 116.784, "eval_steps_per_second": 14.598, "step": 28809 }, { "epoch": 99.65635738831615, "grad_norm": 2.3651483058929443, "learning_rate": 3.442340791738382e-09, "loss": 2.1161, "step": 29000 }, { "epoch": 100.0, "eval_accuracy": 0.8963178294573644, "eval_loss": 2.2304611206054688, "eval_model_preparation_time": 0.005, "eval_runtime": 52.8568, "eval_samples_per_second": 117.147, "eval_steps_per_second": 14.643, "step": 29100 } ], "logging_steps": 500, "max_steps": 29100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.200356285135443e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }