|
{ |
|
"best_metric": 0.33774590492248535, |
|
"best_model_checkpoint": "deepfake_vs_real_image_detection/checkpoint-1392", |
|
"epoch": 87.0, |
|
"eval_steps": 500, |
|
"global_step": 1392, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6765578635014837, |
|
"eval_loss": 1.4959243535995483, |
|
"eval_runtime": 4.5936, |
|
"eval_samples_per_second": 73.362, |
|
"eval_steps_per_second": 9.361, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6913946587537092, |
|
"eval_loss": 1.4148070812225342, |
|
"eval_runtime": 5.1723, |
|
"eval_samples_per_second": 65.154, |
|
"eval_steps_per_second": 8.313, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7091988130563798, |
|
"eval_loss": 1.309860348701477, |
|
"eval_runtime": 4.8265, |
|
"eval_samples_per_second": 69.823, |
|
"eval_steps_per_second": 8.909, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7240356083086054, |
|
"eval_loss": 1.2197798490524292, |
|
"eval_runtime": 4.7427, |
|
"eval_samples_per_second": 71.057, |
|
"eval_steps_per_second": 9.067, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7477744807121661, |
|
"eval_loss": 1.1465003490447998, |
|
"eval_runtime": 4.8571, |
|
"eval_samples_per_second": 69.383, |
|
"eval_steps_per_second": 8.853, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7655786350148368, |
|
"eval_loss": 1.0904771089553833, |
|
"eval_runtime": 4.7162, |
|
"eval_samples_per_second": 71.456, |
|
"eval_steps_per_second": 9.118, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7626112759643917, |
|
"eval_loss": 1.0423768758773804, |
|
"eval_runtime": 4.7414, |
|
"eval_samples_per_second": 71.077, |
|
"eval_steps_per_second": 9.069, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7626112759643917, |
|
"eval_loss": 0.9944449067115784, |
|
"eval_runtime": 4.7288, |
|
"eval_samples_per_second": 71.265, |
|
"eval_steps_per_second": 9.093, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7626112759643917, |
|
"eval_loss": 0.9456853270530701, |
|
"eval_runtime": 4.6848, |
|
"eval_samples_per_second": 71.935, |
|
"eval_steps_per_second": 9.179, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7626112759643917, |
|
"eval_loss": 0.8957338333129883, |
|
"eval_runtime": 4.7194, |
|
"eval_samples_per_second": 71.407, |
|
"eval_steps_per_second": 9.111, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7626112759643917, |
|
"eval_loss": 0.852699875831604, |
|
"eval_runtime": 4.7114, |
|
"eval_samples_per_second": 71.529, |
|
"eval_steps_per_second": 9.127, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7626112759643917, |
|
"eval_loss": 0.815643310546875, |
|
"eval_runtime": 4.7006, |
|
"eval_samples_per_second": 71.694, |
|
"eval_steps_per_second": 9.148, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.771513353115727, |
|
"eval_loss": 0.7831457853317261, |
|
"eval_runtime": 4.7983, |
|
"eval_samples_per_second": 70.233, |
|
"eval_steps_per_second": 8.961, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7744807121661721, |
|
"eval_loss": 0.7558016180992126, |
|
"eval_runtime": 4.7359, |
|
"eval_samples_per_second": 71.159, |
|
"eval_steps_per_second": 9.08, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7744807121661721, |
|
"eval_loss": 0.7284858226776123, |
|
"eval_runtime": 4.6713, |
|
"eval_samples_per_second": 72.143, |
|
"eval_steps_per_second": 9.205, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7744807121661721, |
|
"eval_loss": 0.6914460062980652, |
|
"eval_runtime": 4.7809, |
|
"eval_samples_per_second": 70.489, |
|
"eval_steps_per_second": 8.994, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7774480712166172, |
|
"eval_loss": 0.6581370830535889, |
|
"eval_runtime": 4.7253, |
|
"eval_samples_per_second": 71.318, |
|
"eval_steps_per_second": 9.1, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7804154302670623, |
|
"eval_loss": 0.6323521733283997, |
|
"eval_runtime": 4.7945, |
|
"eval_samples_per_second": 70.289, |
|
"eval_steps_per_second": 8.969, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7774480712166172, |
|
"eval_loss": 0.6072251796722412, |
|
"eval_runtime": 4.8798, |
|
"eval_samples_per_second": 69.06, |
|
"eval_steps_per_second": 8.812, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7804154302670623, |
|
"eval_loss": 0.5835662484169006, |
|
"eval_runtime": 4.6039, |
|
"eval_samples_per_second": 73.199, |
|
"eval_steps_per_second": 9.34, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7922848664688428, |
|
"eval_loss": 0.5593723654747009, |
|
"eval_runtime": 4.7955, |
|
"eval_samples_per_second": 70.274, |
|
"eval_steps_per_second": 8.967, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7922848664688428, |
|
"eval_loss": 0.5364292860031128, |
|
"eval_runtime": 4.8089, |
|
"eval_samples_per_second": 70.078, |
|
"eval_steps_per_second": 8.942, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7952522255192879, |
|
"eval_loss": 0.5182026028633118, |
|
"eval_runtime": 4.6149, |
|
"eval_samples_per_second": 73.024, |
|
"eval_steps_per_second": 9.318, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8041543026706232, |
|
"eval_loss": 0.504047691822052, |
|
"eval_runtime": 4.5605, |
|
"eval_samples_per_second": 73.895, |
|
"eval_steps_per_second": 9.429, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8041543026706232, |
|
"eval_loss": 0.4920850694179535, |
|
"eval_runtime": 4.6821, |
|
"eval_samples_per_second": 71.976, |
|
"eval_steps_per_second": 9.184, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8160237388724035, |
|
"eval_loss": 0.48037412762641907, |
|
"eval_runtime": 4.7178, |
|
"eval_samples_per_second": 71.432, |
|
"eval_steps_per_second": 9.114, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.8189910979228486, |
|
"eval_loss": 0.4688655138015747, |
|
"eval_runtime": 4.6086, |
|
"eval_samples_per_second": 73.124, |
|
"eval_steps_per_second": 9.33, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8249258160237388, |
|
"eval_loss": 0.4574899971485138, |
|
"eval_runtime": 4.6369, |
|
"eval_samples_per_second": 72.678, |
|
"eval_steps_per_second": 9.273, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.827893175074184, |
|
"eval_loss": 0.448953777551651, |
|
"eval_runtime": 4.7138, |
|
"eval_samples_per_second": 71.491, |
|
"eval_steps_per_second": 9.122, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.827893175074184, |
|
"eval_loss": 0.4414035379886627, |
|
"eval_runtime": 4.7192, |
|
"eval_samples_per_second": 71.41, |
|
"eval_steps_per_second": 9.112, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.827893175074184, |
|
"eval_loss": 0.4335722029209137, |
|
"eval_runtime": 4.6675, |
|
"eval_samples_per_second": 72.202, |
|
"eval_steps_per_second": 9.213, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"grad_norm": 2.672912120819092, |
|
"learning_rate": 7.096774193548387e-07, |
|
"loss": 0.6719, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8219584569732937, |
|
"eval_loss": 0.43041178584098816, |
|
"eval_runtime": 4.7762, |
|
"eval_samples_per_second": 70.559, |
|
"eval_steps_per_second": 9.003, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.8249258160237388, |
|
"eval_loss": 0.4242512881755829, |
|
"eval_runtime": 4.6367, |
|
"eval_samples_per_second": 72.681, |
|
"eval_steps_per_second": 9.274, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.827893175074184, |
|
"eval_loss": 0.421082079410553, |
|
"eval_runtime": 4.733, |
|
"eval_samples_per_second": 71.201, |
|
"eval_steps_per_second": 9.085, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.8338278931750742, |
|
"eval_loss": 0.41326144337654114, |
|
"eval_runtime": 4.6282, |
|
"eval_samples_per_second": 72.815, |
|
"eval_steps_per_second": 9.291, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8338278931750742, |
|
"eval_loss": 0.41096124053001404, |
|
"eval_runtime": 4.6107, |
|
"eval_samples_per_second": 73.092, |
|
"eval_steps_per_second": 9.326, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.8367952522255193, |
|
"eval_loss": 0.404769629240036, |
|
"eval_runtime": 4.6205, |
|
"eval_samples_per_second": 72.936, |
|
"eval_steps_per_second": 9.306, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.8427299703264095, |
|
"eval_loss": 0.40141698718070984, |
|
"eval_runtime": 4.6612, |
|
"eval_samples_per_second": 72.298, |
|
"eval_steps_per_second": 9.225, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.8456973293768546, |
|
"eval_loss": 0.3952626883983612, |
|
"eval_runtime": 4.6328, |
|
"eval_samples_per_second": 72.742, |
|
"eval_steps_per_second": 9.282, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8456973293768546, |
|
"eval_loss": 0.390024334192276, |
|
"eval_runtime": 4.5638, |
|
"eval_samples_per_second": 73.843, |
|
"eval_steps_per_second": 9.422, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.8456973293768546, |
|
"eval_loss": 0.3834027349948883, |
|
"eval_runtime": 4.6261, |
|
"eval_samples_per_second": 72.848, |
|
"eval_steps_per_second": 9.295, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.8456973293768546, |
|
"eval_loss": 0.3803389072418213, |
|
"eval_runtime": 4.6158, |
|
"eval_samples_per_second": 73.009, |
|
"eval_steps_per_second": 9.316, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.8456973293768546, |
|
"eval_loss": 0.37990784645080566, |
|
"eval_runtime": 4.6288, |
|
"eval_samples_per_second": 72.805, |
|
"eval_steps_per_second": 9.29, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8456973293768546, |
|
"eval_loss": 0.3741393983364105, |
|
"eval_runtime": 4.7575, |
|
"eval_samples_per_second": 70.836, |
|
"eval_steps_per_second": 9.038, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.8545994065281899, |
|
"eval_loss": 0.36607253551483154, |
|
"eval_runtime": 4.6476, |
|
"eval_samples_per_second": 72.511, |
|
"eval_steps_per_second": 9.252, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.8486646884272997, |
|
"eval_loss": 0.3655509054660797, |
|
"eval_runtime": 4.6664, |
|
"eval_samples_per_second": 72.218, |
|
"eval_steps_per_second": 9.215, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.8486646884272997, |
|
"eval_loss": 0.36614400148391724, |
|
"eval_runtime": 4.6792, |
|
"eval_samples_per_second": 72.021, |
|
"eval_steps_per_second": 9.19, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8486646884272997, |
|
"eval_loss": 0.3684944808483124, |
|
"eval_runtime": 4.6175, |
|
"eval_samples_per_second": 72.983, |
|
"eval_steps_per_second": 9.312, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.8486646884272997, |
|
"eval_loss": 0.36265119910240173, |
|
"eval_runtime": 4.6573, |
|
"eval_samples_per_second": 72.36, |
|
"eval_steps_per_second": 9.233, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8486646884272997, |
|
"eval_loss": 0.359169065952301, |
|
"eval_runtime": 4.7664, |
|
"eval_samples_per_second": 70.703, |
|
"eval_steps_per_second": 9.021, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.8486646884272997, |
|
"eval_loss": 0.35570138692855835, |
|
"eval_runtime": 4.7355, |
|
"eval_samples_per_second": 71.165, |
|
"eval_steps_per_second": 9.08, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8456973293768546, |
|
"eval_loss": 0.35541436076164246, |
|
"eval_runtime": 4.7442, |
|
"eval_samples_per_second": 71.035, |
|
"eval_steps_per_second": 9.064, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.8486646884272997, |
|
"eval_loss": 0.3521236479282379, |
|
"eval_runtime": 4.6755, |
|
"eval_samples_per_second": 72.078, |
|
"eval_steps_per_second": 9.197, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.8486646884272997, |
|
"eval_loss": 0.3523922264575958, |
|
"eval_runtime": 4.6657, |
|
"eval_samples_per_second": 72.23, |
|
"eval_steps_per_second": 9.216, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.8516320474777448, |
|
"eval_loss": 0.34878119826316833, |
|
"eval_runtime": 4.6488, |
|
"eval_samples_per_second": 72.491, |
|
"eval_steps_per_second": 9.25, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8516320474777448, |
|
"eval_loss": 0.3482997715473175, |
|
"eval_runtime": 4.683, |
|
"eval_samples_per_second": 71.963, |
|
"eval_steps_per_second": 9.182, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.8456973293768546, |
|
"eval_loss": 0.348295122385025, |
|
"eval_runtime": 4.8638, |
|
"eval_samples_per_second": 69.287, |
|
"eval_steps_per_second": 8.841, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.8456973293768546, |
|
"eval_loss": 0.3468620479106903, |
|
"eval_runtime": 4.8133, |
|
"eval_samples_per_second": 70.014, |
|
"eval_steps_per_second": 8.934, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.8486646884272997, |
|
"eval_loss": 0.3429933488368988, |
|
"eval_runtime": 4.7367, |
|
"eval_samples_per_second": 71.146, |
|
"eval_steps_per_second": 9.078, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8516320474777448, |
|
"eval_loss": 0.347034752368927, |
|
"eval_runtime": 4.7735, |
|
"eval_samples_per_second": 70.598, |
|
"eval_steps_per_second": 9.008, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.8545994065281899, |
|
"eval_loss": 0.34952038526535034, |
|
"eval_runtime": 4.6627, |
|
"eval_samples_per_second": 72.276, |
|
"eval_steps_per_second": 9.222, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.857566765578635, |
|
"eval_loss": 0.34523066878318787, |
|
"eval_runtime": 4.761, |
|
"eval_samples_per_second": 70.783, |
|
"eval_steps_per_second": 9.032, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"grad_norm": 4.032630920410156, |
|
"learning_rate": 3.8709677419354837e-07, |
|
"loss": 0.2134, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.8605341246290801, |
|
"eval_loss": 0.3458469808101654, |
|
"eval_runtime": 4.7825, |
|
"eval_samples_per_second": 70.465, |
|
"eval_steps_per_second": 8.991, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.8605341246290801, |
|
"eval_loss": 0.3463592529296875, |
|
"eval_runtime": 4.7675, |
|
"eval_samples_per_second": 70.687, |
|
"eval_steps_per_second": 9.019, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.8605341246290801, |
|
"eval_loss": 0.3484276831150055, |
|
"eval_runtime": 4.7586, |
|
"eval_samples_per_second": 70.82, |
|
"eval_steps_per_second": 9.036, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.8635014836795252, |
|
"eval_loss": 0.34886136651039124, |
|
"eval_runtime": 4.7061, |
|
"eval_samples_per_second": 71.609, |
|
"eval_steps_per_second": 9.137, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.8635014836795252, |
|
"eval_loss": 0.3485889434814453, |
|
"eval_runtime": 4.6029, |
|
"eval_samples_per_second": 73.215, |
|
"eval_steps_per_second": 9.342, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8635014836795252, |
|
"eval_loss": 0.3503687381744385, |
|
"eval_runtime": 4.7013, |
|
"eval_samples_per_second": 71.682, |
|
"eval_steps_per_second": 9.146, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.8635014836795252, |
|
"eval_loss": 0.3495609164237976, |
|
"eval_runtime": 4.7225, |
|
"eval_samples_per_second": 71.36, |
|
"eval_steps_per_second": 9.105, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.8605341246290801, |
|
"eval_loss": 0.3446400761604309, |
|
"eval_runtime": 4.7445, |
|
"eval_samples_per_second": 71.029, |
|
"eval_steps_per_second": 9.063, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.8635014836795252, |
|
"eval_loss": 0.3452640175819397, |
|
"eval_runtime": 4.6434, |
|
"eval_samples_per_second": 72.577, |
|
"eval_steps_per_second": 9.261, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.8635014836795252, |
|
"eval_loss": 0.34035128355026245, |
|
"eval_runtime": 4.7053, |
|
"eval_samples_per_second": 71.622, |
|
"eval_steps_per_second": 9.139, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.8635014836795252, |
|
"eval_loss": 0.33860069513320923, |
|
"eval_runtime": 4.6174, |
|
"eval_samples_per_second": 72.985, |
|
"eval_steps_per_second": 9.313, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.8635014836795252, |
|
"eval_loss": 0.3384004831314087, |
|
"eval_runtime": 4.663, |
|
"eval_samples_per_second": 72.271, |
|
"eval_steps_per_second": 9.221, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.8664688427299704, |
|
"eval_loss": 0.33985474705696106, |
|
"eval_runtime": 4.7211, |
|
"eval_samples_per_second": 71.382, |
|
"eval_steps_per_second": 9.108, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.8664688427299704, |
|
"eval_loss": 0.3414258062839508, |
|
"eval_runtime": 4.7314, |
|
"eval_samples_per_second": 71.226, |
|
"eval_steps_per_second": 9.088, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.8664688427299704, |
|
"eval_loss": 0.3427448272705078, |
|
"eval_runtime": 4.7038, |
|
"eval_samples_per_second": 71.644, |
|
"eval_steps_per_second": 9.141, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.8635014836795252, |
|
"eval_loss": 0.34361016750335693, |
|
"eval_runtime": 4.8723, |
|
"eval_samples_per_second": 69.167, |
|
"eval_steps_per_second": 8.825, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.8664688427299704, |
|
"eval_loss": 0.34205198287963867, |
|
"eval_runtime": 4.7564, |
|
"eval_samples_per_second": 70.851, |
|
"eval_steps_per_second": 9.04, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.8664688427299704, |
|
"eval_loss": 0.3414119780063629, |
|
"eval_runtime": 4.7639, |
|
"eval_samples_per_second": 70.74, |
|
"eval_steps_per_second": 9.026, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.8664688427299704, |
|
"eval_loss": 0.3397296071052551, |
|
"eval_runtime": 4.7085, |
|
"eval_samples_per_second": 71.572, |
|
"eval_steps_per_second": 9.132, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.8664688427299704, |
|
"eval_loss": 0.3387517035007477, |
|
"eval_runtime": 4.764, |
|
"eval_samples_per_second": 70.738, |
|
"eval_steps_per_second": 9.026, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.8664688427299704, |
|
"eval_loss": 0.33925285935401917, |
|
"eval_runtime": 4.8225, |
|
"eval_samples_per_second": 69.88, |
|
"eval_steps_per_second": 8.916, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.8664688427299704, |
|
"eval_loss": 0.3406101167201996, |
|
"eval_runtime": 4.6734, |
|
"eval_samples_per_second": 72.11, |
|
"eval_steps_per_second": 9.201, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.8664688427299704, |
|
"eval_loss": 0.33851388096809387, |
|
"eval_runtime": 4.7413, |
|
"eval_samples_per_second": 71.077, |
|
"eval_steps_per_second": 9.069, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.8664688427299704, |
|
"eval_loss": 0.3385758697986603, |
|
"eval_runtime": 4.707, |
|
"eval_samples_per_second": 71.596, |
|
"eval_steps_per_second": 9.135, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.8664688427299704, |
|
"eval_loss": 0.33774590492248535, |
|
"eval_runtime": 4.6381, |
|
"eval_samples_per_second": 72.658, |
|
"eval_steps_per_second": 9.271, |
|
"step": 1392 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 3.4046105637170995e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|