|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"eval_steps": 500, |
|
"global_step": 7400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.2912011474885726, |
|
"eval_loss": 5.149496078491211, |
|
"eval_runtime": 19.7373, |
|
"eval_samples_per_second": 59.228, |
|
"eval_steps_per_second": 1.875, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.33825854783938614, |
|
"eval_loss": 4.6761064529418945, |
|
"eval_runtime": 19.7373, |
|
"eval_samples_per_second": 59.228, |
|
"eval_steps_per_second": 1.875, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.37122292212112573, |
|
"eval_loss": 4.382660388946533, |
|
"eval_runtime": 19.7317, |
|
"eval_samples_per_second": 59.245, |
|
"eval_steps_per_second": 1.875, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 4.662162162162162e-05, |
|
"loss": 4.9816, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3959613241050367, |
|
"eval_loss": 4.176214694976807, |
|
"eval_runtime": 19.7386, |
|
"eval_samples_per_second": 59.224, |
|
"eval_steps_per_second": 1.874, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.41364025196360527, |
|
"eval_loss": 4.029497146606445, |
|
"eval_runtime": 19.7378, |
|
"eval_samples_per_second": 59.226, |
|
"eval_steps_per_second": 1.875, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.4276382300334396, |
|
"eval_loss": 3.915071725845337, |
|
"eval_runtime": 19.7391, |
|
"eval_samples_per_second": 59.223, |
|
"eval_steps_per_second": 1.874, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 4.324324324324325e-05, |
|
"loss": 3.9068, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.43997718848018247, |
|
"eval_loss": 3.829800844192505, |
|
"eval_runtime": 19.73, |
|
"eval_samples_per_second": 59.25, |
|
"eval_steps_per_second": 1.875, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.450026354217971, |
|
"eval_loss": 3.7551472187042236, |
|
"eval_runtime": 19.7353, |
|
"eval_samples_per_second": 59.234, |
|
"eval_steps_per_second": 1.875, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.45856339269512925, |
|
"eval_loss": 3.6967544555664062, |
|
"eval_runtime": 19.735, |
|
"eval_samples_per_second": 59.235, |
|
"eval_steps_per_second": 1.875, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.4668152871745686, |
|
"eval_loss": 3.6389007568359375, |
|
"eval_runtime": 19.7369, |
|
"eval_samples_per_second": 59.229, |
|
"eval_steps_per_second": 1.875, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 3.986486486486487e-05, |
|
"loss": 3.3777, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.4742895162056839, |
|
"eval_loss": 3.5959553718566895, |
|
"eval_runtime": 19.7345, |
|
"eval_samples_per_second": 59.236, |
|
"eval_steps_per_second": 1.875, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.4803034623393905, |
|
"eval_loss": 3.5559630393981934, |
|
"eval_runtime": 19.7359, |
|
"eval_samples_per_second": 59.232, |
|
"eval_steps_per_second": 1.875, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.48679264846929515, |
|
"eval_loss": 3.537980794906616, |
|
"eval_runtime": 19.7353, |
|
"eval_samples_per_second": 59.234, |
|
"eval_steps_per_second": 1.875, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 3.648648648648649e-05, |
|
"loss": 3.0075, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.49136359316000033, |
|
"eval_loss": 3.484649181365967, |
|
"eval_runtime": 20.0887, |
|
"eval_samples_per_second": 58.192, |
|
"eval_steps_per_second": 1.842, |
|
"step": 2072 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.49689365857030526, |
|
"eval_loss": 3.4657890796661377, |
|
"eval_runtime": 19.7322, |
|
"eval_samples_per_second": 59.243, |
|
"eval_steps_per_second": 1.875, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.501041207628034, |
|
"eval_loss": 3.4555246829986572, |
|
"eval_runtime": 19.7332, |
|
"eval_samples_per_second": 59.24, |
|
"eval_steps_per_second": 1.875, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 3.310810810810811e-05, |
|
"loss": 2.7329, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5052838046850023, |
|
"eval_loss": 3.4299707412719727, |
|
"eval_runtime": 19.7326, |
|
"eval_samples_per_second": 59.242, |
|
"eval_steps_per_second": 1.875, |
|
"step": 2516 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5079710708453223, |
|
"eval_loss": 3.420835494995117, |
|
"eval_runtime": 19.737, |
|
"eval_samples_per_second": 59.229, |
|
"eval_steps_per_second": 1.875, |
|
"step": 2664 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5120667755398294, |
|
"eval_loss": 3.425023078918457, |
|
"eval_runtime": 19.7397, |
|
"eval_samples_per_second": 59.221, |
|
"eval_steps_per_second": 1.874, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5146849158825206, |
|
"eval_loss": 3.3964016437530518, |
|
"eval_runtime": 19.7401, |
|
"eval_samples_per_second": 59.22, |
|
"eval_steps_per_second": 1.874, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"learning_rate": 2.9729729729729733e-05, |
|
"loss": 2.5153, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.5181498474911649, |
|
"eval_loss": 3.3892605304718018, |
|
"eval_runtime": 19.7415, |
|
"eval_samples_per_second": 59.215, |
|
"eval_steps_per_second": 1.874, |
|
"step": 3108 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5204396402001192, |
|
"eval_loss": 3.3914339542388916, |
|
"eval_runtime": 19.7345, |
|
"eval_samples_per_second": 59.236, |
|
"eval_steps_per_second": 1.875, |
|
"step": 3256 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.5228590438171277, |
|
"eval_loss": 3.3818881511688232, |
|
"eval_runtime": 19.7369, |
|
"eval_samples_per_second": 59.229, |
|
"eval_steps_per_second": 1.875, |
|
"step": 3404 |
|
}, |
|
{ |
|
"epoch": 23.65, |
|
"learning_rate": 2.635135135135135e-05, |
|
"loss": 2.336, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5246649558026804, |
|
"eval_loss": 3.3786017894744873, |
|
"eval_runtime": 19.7359, |
|
"eval_samples_per_second": 59.232, |
|
"eval_steps_per_second": 1.875, |
|
"step": 3552 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.5267128081499339, |
|
"eval_loss": 3.374890089035034, |
|
"eval_runtime": 19.7329, |
|
"eval_samples_per_second": 59.241, |
|
"eval_steps_per_second": 1.875, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.5287001754067623, |
|
"eval_loss": 3.377443552017212, |
|
"eval_runtime": 19.7328, |
|
"eval_samples_per_second": 59.241, |
|
"eval_steps_per_second": 1.875, |
|
"step": 3848 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.5302555063034105, |
|
"eval_loss": 3.3700437545776367, |
|
"eval_runtime": 19.7375, |
|
"eval_samples_per_second": 59.227, |
|
"eval_steps_per_second": 1.875, |
|
"step": 3996 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"learning_rate": 2.2972972972972976e-05, |
|
"loss": 2.1918, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5320614182889633, |
|
"eval_loss": 3.372175693511963, |
|
"eval_runtime": 19.7333, |
|
"eval_samples_per_second": 59.24, |
|
"eval_steps_per_second": 1.875, |
|
"step": 4144 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.5339623782737555, |
|
"eval_loss": 3.3728740215301514, |
|
"eval_runtime": 19.7294, |
|
"eval_samples_per_second": 59.252, |
|
"eval_steps_per_second": 1.875, |
|
"step": 4292 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5349819840837805, |
|
"eval_loss": 3.3896090984344482, |
|
"eval_runtime": 19.7317, |
|
"eval_samples_per_second": 59.245, |
|
"eval_steps_per_second": 1.875, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 30.41, |
|
"learning_rate": 1.9594594594594595e-05, |
|
"loss": 2.0717, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.5367446924333152, |
|
"eval_loss": 3.37764048576355, |
|
"eval_runtime": 19.7293, |
|
"eval_samples_per_second": 59.252, |
|
"eval_steps_per_second": 1.875, |
|
"step": 4588 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5384901193284427, |
|
"eval_loss": 3.384190797805786, |
|
"eval_runtime": 19.7297, |
|
"eval_samples_per_second": 59.251, |
|
"eval_steps_per_second": 1.875, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.5399244800442405, |
|
"eval_loss": 3.38201642036438, |
|
"eval_runtime": 19.7316, |
|
"eval_samples_per_second": 59.245, |
|
"eval_steps_per_second": 1.875, |
|
"step": 4884 |
|
}, |
|
{ |
|
"epoch": 33.78, |
|
"learning_rate": 1.6216216216216218e-05, |
|
"loss": 1.9814, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.540391079313235, |
|
"eval_loss": 3.393296241760254, |
|
"eval_runtime": 19.7393, |
|
"eval_samples_per_second": 59.222, |
|
"eval_steps_per_second": 1.874, |
|
"step": 5032 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.5410564153079123, |
|
"eval_loss": 3.386063814163208, |
|
"eval_runtime": 19.7351, |
|
"eval_samples_per_second": 59.234, |
|
"eval_steps_per_second": 1.875, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.5425253389325245, |
|
"eval_loss": 3.3877809047698975, |
|
"eval_runtime": 19.7289, |
|
"eval_samples_per_second": 59.253, |
|
"eval_steps_per_second": 1.875, |
|
"step": 5328 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.5431129083823695, |
|
"eval_loss": 3.3903427124023438, |
|
"eval_runtime": 19.7301, |
|
"eval_samples_per_second": 59.25, |
|
"eval_steps_per_second": 1.875, |
|
"step": 5476 |
|
}, |
|
{ |
|
"epoch": 37.16, |
|
"learning_rate": 1.2837837837837838e-05, |
|
"loss": 1.9049, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.5439856218299332, |
|
"eval_loss": 3.3847618103027344, |
|
"eval_runtime": 19.731, |
|
"eval_samples_per_second": 59.247, |
|
"eval_steps_per_second": 1.875, |
|
"step": 5624 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.5446855207334249, |
|
"eval_loss": 3.396543502807617, |
|
"eval_runtime": 19.7337, |
|
"eval_samples_per_second": 59.239, |
|
"eval_steps_per_second": 1.875, |
|
"step": 5772 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.5453249345464914, |
|
"eval_loss": 3.403301477432251, |
|
"eval_runtime": 19.7292, |
|
"eval_samples_per_second": 59.252, |
|
"eval_steps_per_second": 1.875, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 40.54, |
|
"learning_rate": 9.45945945945946e-06, |
|
"loss": 1.8441, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.5456187192714139, |
|
"eval_loss": 3.4074459075927734, |
|
"eval_runtime": 19.7354, |
|
"eval_samples_per_second": 59.234, |
|
"eval_steps_per_second": 1.875, |
|
"step": 6068 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.5462322109028696, |
|
"eval_loss": 3.4045963287353516, |
|
"eval_runtime": 19.7291, |
|
"eval_samples_per_second": 59.253, |
|
"eval_steps_per_second": 1.875, |
|
"step": 6216 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.5467333730806785, |
|
"eval_loss": 3.4119956493377686, |
|
"eval_runtime": 19.7304, |
|
"eval_samples_per_second": 59.249, |
|
"eval_steps_per_second": 1.875, |
|
"step": 6364 |
|
}, |
|
{ |
|
"epoch": 43.92, |
|
"learning_rate": 6.081081081081082e-06, |
|
"loss": 1.804, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.5466901694446604, |
|
"eval_loss": 3.4043545722961426, |
|
"eval_runtime": 19.7301, |
|
"eval_samples_per_second": 59.25, |
|
"eval_steps_per_second": 1.875, |
|
"step": 6512 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.5471913316224694, |
|
"eval_loss": 3.4124903678894043, |
|
"eval_runtime": 19.7367, |
|
"eval_samples_per_second": 59.23, |
|
"eval_steps_per_second": 1.875, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.5476665716186674, |
|
"eval_loss": 3.4115209579467773, |
|
"eval_runtime": 19.7299, |
|
"eval_samples_per_second": 59.25, |
|
"eval_steps_per_second": 1.875, |
|
"step": 6808 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.547675212345871, |
|
"eval_loss": 3.4069876670837402, |
|
"eval_runtime": 19.7339, |
|
"eval_samples_per_second": 59.238, |
|
"eval_steps_per_second": 1.875, |
|
"step": 6956 |
|
}, |
|
{ |
|
"epoch": 47.3, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 1.7744, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.5477875417995178, |
|
"eval_loss": 3.420271396636963, |
|
"eval_runtime": 19.7291, |
|
"eval_samples_per_second": 59.253, |
|
"eval_steps_per_second": 1.875, |
|
"step": 7104 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.5479430748891827, |
|
"eval_loss": 3.4173614978790283, |
|
"eval_runtime": 19.7304, |
|
"eval_samples_per_second": 59.249, |
|
"eval_steps_per_second": 1.875, |
|
"step": 7252 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.5480813265244403, |
|
"eval_loss": 3.4181294441223145, |
|
"eval_runtime": 19.7276, |
|
"eval_samples_per_second": 59.257, |
|
"eval_steps_per_second": 1.876, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 7400, |
|
"total_flos": 1.00265577216e+17, |
|
"train_loss": 2.556491814690667, |
|
"train_runtime": 9037.2432, |
|
"train_samples_per_second": 26.103, |
|
"train_steps_per_second": 0.819 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.00265577216e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|