|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 10560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.2754160165786743, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.7665, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8710623074629832, |
|
"eval_f1": 0.1887287024901704, |
|
"eval_loss": 0.42895129323005676, |
|
"eval_precision": 0.38028169014084506, |
|
"eval_recall": 0.12550842533410808, |
|
"eval_runtime": 4.4417, |
|
"eval_samples_per_second": 210.505, |
|
"eval_steps_per_second": 3.377, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.9667028784751892, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.336, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9335188313624168, |
|
"eval_f1": 0.6881609491875161, |
|
"eval_loss": 0.21774514019489288, |
|
"eval_precision": 0.6187384044526901, |
|
"eval_recall": 0.7751307379430563, |
|
"eval_runtime": 4.5594, |
|
"eval_samples_per_second": 205.071, |
|
"eval_steps_per_second": 3.29, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.0499272346496582, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.2067, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9410215641458809, |
|
"eval_f1": 0.7261015202267457, |
|
"eval_loss": 0.17432348430156708, |
|
"eval_precision": 0.6523148148148148, |
|
"eval_recall": 0.8187100522951772, |
|
"eval_runtime": 4.8137, |
|
"eval_samples_per_second": 194.236, |
|
"eval_steps_per_second": 3.116, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.841440200805664, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1734, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9499652191195469, |
|
"eval_f1": 0.7669569807337029, |
|
"eval_loss": 0.15249943733215332, |
|
"eval_precision": 0.7026112185686654, |
|
"eval_recall": 0.8442765833817548, |
|
"eval_runtime": 4.8347, |
|
"eval_samples_per_second": 193.394, |
|
"eval_steps_per_second": 3.103, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.0053447484970093, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.1557, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9523501937791911, |
|
"eval_f1": 0.7757479481069631, |
|
"eval_loss": 0.14416351914405823, |
|
"eval_precision": 0.7125486381322957, |
|
"eval_recall": 0.8512492736780941, |
|
"eval_runtime": 4.5552, |
|
"eval_samples_per_second": 205.259, |
|
"eval_steps_per_second": 3.293, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.2348506450653076, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.146, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9519526980025838, |
|
"eval_f1": 0.7780979827089337, |
|
"eval_loss": 0.14452077448368073, |
|
"eval_precision": 0.7084923664122137, |
|
"eval_recall": 0.862870424171993, |
|
"eval_runtime": 4.5529, |
|
"eval_samples_per_second": 205.363, |
|
"eval_steps_per_second": 3.295, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.4902212619781494, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.1397, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9525489416674948, |
|
"eval_f1": 0.7873728150273936, |
|
"eval_loss": 0.14437128603458405, |
|
"eval_precision": 0.7144886363636364, |
|
"eval_recall": 0.8768158047646717, |
|
"eval_runtime": 4.5644, |
|
"eval_samples_per_second": 204.845, |
|
"eval_steps_per_second": 3.286, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.4951108992099762, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1338, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9545364205505317, |
|
"eval_f1": 0.7905745300503045, |
|
"eval_loss": 0.1385926455259323, |
|
"eval_precision": 0.7261673151750972, |
|
"eval_recall": 0.8675188843695526, |
|
"eval_runtime": 4.5189, |
|
"eval_samples_per_second": 206.907, |
|
"eval_steps_per_second": 3.319, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.3395795822143555, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.1277, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9561264036569611, |
|
"eval_f1": 0.7964601769911505, |
|
"eval_loss": 0.13648679852485657, |
|
"eval_precision": 0.7395418326693227, |
|
"eval_recall": 0.862870424171993, |
|
"eval_runtime": 4.4933, |
|
"eval_samples_per_second": 208.086, |
|
"eval_steps_per_second": 3.338, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.1327812671661377, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1255, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9562754645731889, |
|
"eval_f1": 0.7936932121859969, |
|
"eval_loss": 0.1332310438156128, |
|
"eval_precision": 0.7347847600197922, |
|
"eval_recall": 0.862870424171993, |
|
"eval_runtime": 4.519, |
|
"eval_samples_per_second": 206.904, |
|
"eval_steps_per_second": 3.319, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 11.329936027526855, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1215, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9557289078803538, |
|
"eval_f1": 0.7884564469155414, |
|
"eval_loss": 0.1330229640007019, |
|
"eval_precision": 0.7242217898832685, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.5033, |
|
"eval_samples_per_second": 207.627, |
|
"eval_steps_per_second": 3.331, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.500403642654419, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1189, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9561264036569611, |
|
"eval_f1": 0.794345158708989, |
|
"eval_loss": 0.13398447632789612, |
|
"eval_precision": 0.7342209072978304, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.5341, |
|
"eval_samples_per_second": 206.215, |
|
"eval_steps_per_second": 3.308, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.2570128440856934, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1179, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9571201430984796, |
|
"eval_f1": 0.7973009446693656, |
|
"eval_loss": 0.12948381900787354, |
|
"eval_precision": 0.7444556451612904, |
|
"eval_recall": 0.8582219639744335, |
|
"eval_runtime": 4.5379, |
|
"eval_samples_per_second": 206.041, |
|
"eval_steps_per_second": 3.305, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.9663475751876831, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.114, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9578654476796185, |
|
"eval_f1": 0.8013955984970478, |
|
"eval_loss": 0.1294805407524109, |
|
"eval_precision": 0.7446384039900249, |
|
"eval_recall": 0.8675188843695526, |
|
"eval_runtime": 4.5196, |
|
"eval_samples_per_second": 206.877, |
|
"eval_steps_per_second": 3.319, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.1781103610992432, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1128, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9571201430984796, |
|
"eval_f1": 0.7960438385458433, |
|
"eval_loss": 0.13167551159858704, |
|
"eval_precision": 0.7371287128712871, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.523, |
|
"eval_samples_per_second": 206.722, |
|
"eval_steps_per_second": 3.316, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.9357908964157104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1115, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.957517638875087, |
|
"eval_f1": 0.7973297730307075, |
|
"eval_loss": 0.12998828291893005, |
|
"eval_precision": 0.7376482213438735, |
|
"eval_recall": 0.8675188843695526, |
|
"eval_runtime": 4.5035, |
|
"eval_samples_per_second": 207.616, |
|
"eval_steps_per_second": 3.331, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 1.123679280281067, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1109, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9577163867633907, |
|
"eval_f1": 0.7951935914552738, |
|
"eval_loss": 0.13070179522037506, |
|
"eval_precision": 0.7356719367588933, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.5394, |
|
"eval_samples_per_second": 205.975, |
|
"eval_steps_per_second": 3.304, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.571700096130371, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1097, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.957517638875087, |
|
"eval_f1": 0.7968959058067969, |
|
"eval_loss": 0.1318700611591339, |
|
"eval_precision": 0.7385912698412699, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.5083, |
|
"eval_samples_per_second": 207.394, |
|
"eval_steps_per_second": 3.327, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 2.028625726699829, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1086, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9573188909867832, |
|
"eval_f1": 0.7943148297130599, |
|
"eval_loss": 0.12963609397411346, |
|
"eval_precision": 0.7375498007968128, |
|
"eval_recall": 0.8605461940732132, |
|
"eval_runtime": 4.5495, |
|
"eval_samples_per_second": 205.519, |
|
"eval_steps_per_second": 3.297, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.7686336040496826, |
|
"learning_rate": 0.0, |
|
"loss": 0.1094, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9573188909867832, |
|
"eval_f1": 0.7943148297130599, |
|
"eval_loss": 0.13019020855426788, |
|
"eval_precision": 0.7375498007968128, |
|
"eval_recall": 0.8605461940732132, |
|
"eval_runtime": 4.5125, |
|
"eval_samples_per_second": 207.2, |
|
"eval_steps_per_second": 3.324, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 10560, |
|
"total_flos": 4545096690358590.0, |
|
"train_loss": 0.17230618686387034, |
|
"train_runtime": 1231.892, |
|
"train_samples_per_second": 136.976, |
|
"train_steps_per_second": 8.572 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 4545096690358590.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|