|
{ |
|
"best_metric": 0.951527777777778, |
|
"best_model_checkpoint": "/n/holyscratch01/economics/esilcock/syria_models/8_5e-06/checkpoint-740", |
|
"epoch": 17.61904761904762, |
|
"global_step": 740, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.18055555555555555, |
|
"eval_f1": 0.05162907268170425, |
|
"eval_loss": 2.1331899166107178, |
|
"eval_precision": 0.05357142857142857, |
|
"eval_recall": 0.08588235294117648, |
|
"eval_runtime": 2.9184, |
|
"eval_samples_per_second": 24.671, |
|
"eval_steps_per_second": 3.084, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.25, |
|
"eval_f1": 0.10818399044205496, |
|
"eval_loss": 2.088547468185425, |
|
"eval_precision": 0.09869674185463659, |
|
"eval_recall": 0.12676470588235295, |
|
"eval_runtime": 2.6064, |
|
"eval_samples_per_second": 27.625, |
|
"eval_steps_per_second": 3.453, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.2777777777777778, |
|
"eval_f1": 0.09715938834550213, |
|
"eval_loss": 1.9733208417892456, |
|
"eval_precision": 0.08442028985507247, |
|
"eval_recall": 0.13751633986928105, |
|
"eval_runtime": 2.5925, |
|
"eval_samples_per_second": 27.773, |
|
"eval_steps_per_second": 3.472, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.5555555555555556, |
|
"eval_f1": 0.22415689149560114, |
|
"eval_loss": 1.8020447492599487, |
|
"eval_precision": 0.21013071895424837, |
|
"eval_recall": 0.26833333333333337, |
|
"eval_runtime": 2.5848, |
|
"eval_samples_per_second": 27.855, |
|
"eval_steps_per_second": 3.482, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_f1": 0.2961904761904762, |
|
"eval_loss": 1.4616996049880981, |
|
"eval_precision": 0.3290064102564102, |
|
"eval_recall": 0.335686274509804, |
|
"eval_runtime": 2.9293, |
|
"eval_samples_per_second": 24.579, |
|
"eval_steps_per_second": 3.072, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_accuracy": 0.6527777777777778, |
|
"eval_f1": 0.43249639249639255, |
|
"eval_loss": 1.2280499935150146, |
|
"eval_precision": 0.4880341880341881, |
|
"eval_recall": 0.4904901960784313, |
|
"eval_runtime": 2.6424, |
|
"eval_samples_per_second": 27.248, |
|
"eval_steps_per_second": 3.406, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_accuracy": 0.75, |
|
"eval_f1": 0.6376634768740032, |
|
"eval_loss": 0.9447543025016785, |
|
"eval_precision": 0.7244047619047619, |
|
"eval_recall": 0.6264705882352941, |
|
"eval_runtime": 2.6104, |
|
"eval_samples_per_second": 27.582, |
|
"eval_steps_per_second": 3.448, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"eval_accuracy": 0.8055555555555556, |
|
"eval_f1": 0.6916894699503395, |
|
"eval_loss": 0.7263244986534119, |
|
"eval_precision": 0.7515151515151516, |
|
"eval_recall": 0.6826797385620915, |
|
"eval_runtime": 2.6192, |
|
"eval_samples_per_second": 27.489, |
|
"eval_steps_per_second": 3.436, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_accuracy": 0.8611111111111112, |
|
"eval_f1": 0.780800727050727, |
|
"eval_loss": 0.6345593333244324, |
|
"eval_precision": 0.7710130718954249, |
|
"eval_recall": 0.826797385620915, |
|
"eval_runtime": 2.6126, |
|
"eval_samples_per_second": 27.559, |
|
"eval_steps_per_second": 3.445, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_accuracy": 0.875, |
|
"eval_f1": 0.8041340603840604, |
|
"eval_loss": 0.5365733504295349, |
|
"eval_precision": 0.8055159958720332, |
|
"eval_recall": 0.8326797385620915, |
|
"eval_runtime": 2.7985, |
|
"eval_samples_per_second": 25.728, |
|
"eval_steps_per_second": 3.216, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_accuracy": 0.875, |
|
"eval_f1": 0.8001352813852813, |
|
"eval_loss": 0.41519981622695923, |
|
"eval_precision": 0.800515995872033, |
|
"eval_recall": 0.8187908496732026, |
|
"eval_runtime": 2.6402, |
|
"eval_samples_per_second": 27.271, |
|
"eval_steps_per_second": 3.409, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"eval_accuracy": 0.8472222222222222, |
|
"eval_f1": 0.774096712111418, |
|
"eval_loss": 0.41259926557540894, |
|
"eval_precision": 0.7647058823529411, |
|
"eval_recall": 0.8209150326797385, |
|
"eval_runtime": 2.6387, |
|
"eval_samples_per_second": 27.286, |
|
"eval_steps_per_second": 3.411, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"eval_accuracy": 0.875, |
|
"eval_f1": 0.7898691749427044, |
|
"eval_loss": 0.34084904193878174, |
|
"eval_precision": 0.7705882352941177, |
|
"eval_recall": 0.8379084967320262, |
|
"eval_runtime": 2.7068, |
|
"eval_samples_per_second": 26.6, |
|
"eval_steps_per_second": 3.325, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_accuracy": 0.8611111111111112, |
|
"eval_f1": 0.7678326775480475, |
|
"eval_loss": 0.39358699321746826, |
|
"eval_precision": 0.7325162337662338, |
|
"eval_recall": 0.8477124183006536, |
|
"eval_runtime": 2.6003, |
|
"eval_samples_per_second": 27.689, |
|
"eval_steps_per_second": 3.461, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_f1": 0.8779316203509753, |
|
"eval_loss": 0.2763499915599823, |
|
"eval_precision": 0.8507142857142858, |
|
"eval_recall": 0.9372549019607843, |
|
"eval_runtime": 2.6589, |
|
"eval_samples_per_second": 27.078, |
|
"eval_steps_per_second": 3.385, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"eval_accuracy": 0.9305555555555556, |
|
"eval_f1": 0.9120108649520414, |
|
"eval_loss": 0.217352032661438, |
|
"eval_precision": 0.8800000000000001, |
|
"eval_recall": 0.9653594771241831, |
|
"eval_runtime": 2.6648, |
|
"eval_samples_per_second": 27.019, |
|
"eval_steps_per_second": 3.377, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_f1": 0.8755410011444494, |
|
"eval_loss": 0.22454272210597992, |
|
"eval_precision": 0.8466666666666667, |
|
"eval_recall": 0.9424836601307189, |
|
"eval_runtime": 2.6406, |
|
"eval_samples_per_second": 27.267, |
|
"eval_steps_per_second": 3.408, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_f1": 0.9410636043273615, |
|
"eval_loss": 0.19036133587360382, |
|
"eval_precision": 0.925, |
|
"eval_recall": 0.9712418300653596, |
|
"eval_runtime": 2.6164, |
|
"eval_samples_per_second": 27.519, |
|
"eval_steps_per_second": 3.44, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_f1": 0.9299524932162504, |
|
"eval_loss": 0.20558035373687744, |
|
"eval_precision": 0.905, |
|
"eval_recall": 0.9712418300653596, |
|
"eval_runtime": 2.6164, |
|
"eval_samples_per_second": 27.519, |
|
"eval_steps_per_second": 3.44, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_f1": 0.9299524932162504, |
|
"eval_loss": 0.20008066296577454, |
|
"eval_precision": 0.905, |
|
"eval_recall": 0.9712418300653596, |
|
"eval_runtime": 2.5909, |
|
"eval_samples_per_second": 27.79, |
|
"eval_steps_per_second": 3.474, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_f1": 0.9410636043273615, |
|
"eval_loss": 0.17470227181911469, |
|
"eval_precision": 0.925, |
|
"eval_recall": 0.9712418300653596, |
|
"eval_runtime": 2.8957, |
|
"eval_samples_per_second": 24.864, |
|
"eval_steps_per_second": 3.108, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"eval_accuracy": 0.9583333333333334, |
|
"eval_f1": 0.9476656268568033, |
|
"eval_loss": 0.11614257842302322, |
|
"eval_precision": 0.93, |
|
"eval_recall": 0.977124183006536, |
|
"eval_runtime": 2.6154, |
|
"eval_samples_per_second": 27.53, |
|
"eval_steps_per_second": 3.441, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"eval_accuracy": 0.9305555555555556, |
|
"eval_f1": 0.9205122655122654, |
|
"eval_loss": 0.2461088001728058, |
|
"eval_precision": 0.8983333333333334, |
|
"eval_recall": 0.9601307189542485, |
|
"eval_runtime": 2.6479, |
|
"eval_samples_per_second": 27.191, |
|
"eval_steps_per_second": 3.399, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"eval_accuracy": 0.9305555555555556, |
|
"eval_f1": 0.9205122655122654, |
|
"eval_loss": 0.20940905809402466, |
|
"eval_precision": 0.8983333333333334, |
|
"eval_recall": 0.9601307189542485, |
|
"eval_runtime": 2.5922, |
|
"eval_samples_per_second": 27.776, |
|
"eval_steps_per_second": 3.472, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 3.015873015873016e-06, |
|
"loss": 0.6157, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_f1": 0.9299524932162504, |
|
"eval_loss": 0.19723846018314362, |
|
"eval_precision": 0.905, |
|
"eval_recall": 0.9712418300653596, |
|
"eval_runtime": 2.6516, |
|
"eval_samples_per_second": 27.154, |
|
"eval_steps_per_second": 3.394, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"eval_accuracy": 0.9305555555555556, |
|
"eval_f1": 0.9120108649520414, |
|
"eval_loss": 0.20041564106941223, |
|
"eval_precision": 0.8800000000000001, |
|
"eval_recall": 0.9653594771241831, |
|
"eval_runtime": 2.6152, |
|
"eval_samples_per_second": 27.531, |
|
"eval_steps_per_second": 3.441, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"eval_accuracy": 0.9305555555555556, |
|
"eval_f1": 0.9120108649520414, |
|
"eval_loss": 0.1812739223241806, |
|
"eval_precision": 0.8800000000000001, |
|
"eval_recall": 0.9653594771241831, |
|
"eval_runtime": 2.6055, |
|
"eval_samples_per_second": 27.634, |
|
"eval_steps_per_second": 3.454, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"eval_accuracy": 0.9583333333333334, |
|
"eval_f1": 0.9476656268568033, |
|
"eval_loss": 0.16734400391578674, |
|
"eval_precision": 0.93, |
|
"eval_recall": 0.977124183006536, |
|
"eval_runtime": 2.6389, |
|
"eval_samples_per_second": 27.284, |
|
"eval_steps_per_second": 3.411, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_f1": 0.9382070707070707, |
|
"eval_loss": 0.17178066074848175, |
|
"eval_precision": 0.9237500000000001, |
|
"eval_recall": 0.966013071895425, |
|
"eval_runtime": 2.7026, |
|
"eval_samples_per_second": 26.641, |
|
"eval_steps_per_second": 3.33, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_f1": 0.9382070707070707, |
|
"eval_loss": 0.15890704095363617, |
|
"eval_precision": 0.9237500000000001, |
|
"eval_recall": 0.966013071895425, |
|
"eval_runtime": 2.605, |
|
"eval_samples_per_second": 27.639, |
|
"eval_steps_per_second": 3.455, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 14.76, |
|
"eval_accuracy": 0.9583333333333334, |
|
"eval_f1": 0.9476656268568033, |
|
"eval_loss": 0.131246879696846, |
|
"eval_precision": 0.93, |
|
"eval_recall": 0.977124183006536, |
|
"eval_runtime": 2.6046, |
|
"eval_samples_per_second": 27.643, |
|
"eval_steps_per_second": 3.455, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"eval_accuracy": 0.9583333333333334, |
|
"eval_f1": 0.9476656268568033, |
|
"eval_loss": 0.1490662544965744, |
|
"eval_precision": 0.93, |
|
"eval_recall": 0.977124183006536, |
|
"eval_runtime": 2.5887, |
|
"eval_samples_per_second": 27.813, |
|
"eval_steps_per_second": 3.477, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_f1": 0.9382070707070707, |
|
"eval_loss": 0.22067798674106598, |
|
"eval_precision": 0.9237500000000001, |
|
"eval_recall": 0.966013071895425, |
|
"eval_runtime": 2.6374, |
|
"eval_samples_per_second": 27.299, |
|
"eval_steps_per_second": 3.412, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 16.19, |
|
"eval_accuracy": 0.9583333333333334, |
|
"eval_f1": 0.9476656268568033, |
|
"eval_loss": 0.22119665145874023, |
|
"eval_precision": 0.93, |
|
"eval_recall": 0.977124183006536, |
|
"eval_runtime": 2.6042, |
|
"eval_samples_per_second": 27.647, |
|
"eval_steps_per_second": 3.456, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"eval_accuracy": 0.9583333333333334, |
|
"eval_f1": 0.9476656268568033, |
|
"eval_loss": 0.2090146392583847, |
|
"eval_precision": 0.93, |
|
"eval_recall": 0.977124183006536, |
|
"eval_runtime": 2.6202, |
|
"eval_samples_per_second": 27.478, |
|
"eval_steps_per_second": 3.435, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"eval_accuracy": 0.9583333333333334, |
|
"eval_f1": 0.9476656268568033, |
|
"eval_loss": 0.18439681828022003, |
|
"eval_precision": 0.93, |
|
"eval_recall": 0.977124183006536, |
|
"eval_runtime": 2.5985, |
|
"eval_samples_per_second": 27.708, |
|
"eval_steps_per_second": 3.464, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"eval_accuracy": 0.9583333333333334, |
|
"eval_f1": 0.951527777777778, |
|
"eval_loss": 0.17655248939990997, |
|
"eval_precision": 0.9355555555555556, |
|
"eval_recall": 0.977124183006536, |
|
"eval_runtime": 2.893, |
|
"eval_samples_per_second": 24.888, |
|
"eval_steps_per_second": 3.111, |
|
"step": 740 |
|
} |
|
], |
|
"max_steps": 1260, |
|
"num_train_epochs": 30, |
|
"total_flos": 5517182798069760.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|