|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 10560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.21040940284729, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.79, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8595349299413694, |
|
"eval_f1": 0.13053613053613053, |
|
"eval_loss": 0.4638139605522156, |
|
"eval_precision": 0.330188679245283, |
|
"eval_recall": 0.08134805345729228, |
|
"eval_runtime": 4.5047, |
|
"eval_samples_per_second": 207.56, |
|
"eval_steps_per_second": 3.33, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.8728626370429993, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.3919, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9275067077412302, |
|
"eval_f1": 0.6317512274959083, |
|
"eval_loss": 0.2518700361251831, |
|
"eval_precision": 0.5953727506426735, |
|
"eval_recall": 0.6728646135967461, |
|
"eval_runtime": 4.62, |
|
"eval_samples_per_second": 202.381, |
|
"eval_steps_per_second": 3.247, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.2418427467346191, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.2386, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9381894067375535, |
|
"eval_f1": 0.7159389794844818, |
|
"eval_loss": 0.1926584243774414, |
|
"eval_precision": 0.6540124939932724, |
|
"eval_recall": 0.7908192911098199, |
|
"eval_runtime": 4.522, |
|
"eval_samples_per_second": 206.768, |
|
"eval_steps_per_second": 3.317, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.416585087776184, |
|
"learning_rate": 4e-05, |
|
"loss": 0.193, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.944797774023651, |
|
"eval_f1": 0.7463787200421385, |
|
"eval_loss": 0.16769319772720337, |
|
"eval_precision": 0.6825626204238922, |
|
"eval_recall": 0.8233585124927367, |
|
"eval_runtime": 4.5524, |
|
"eval_samples_per_second": 205.386, |
|
"eval_steps_per_second": 3.295, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.0125393867492676, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.1712, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9475802444599026, |
|
"eval_f1": 0.7629299028616435, |
|
"eval_loss": 0.1593998372554779, |
|
"eval_precision": 0.6958812260536399, |
|
"eval_recall": 0.8442765833817548, |
|
"eval_runtime": 4.594, |
|
"eval_samples_per_second": 203.525, |
|
"eval_steps_per_second": 3.265, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.202476143836975, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1596, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9497664712312431, |
|
"eval_f1": 0.7750591949486978, |
|
"eval_loss": 0.1543859839439392, |
|
"eval_precision": 0.708173076923077, |
|
"eval_recall": 0.8558977338756537, |
|
"eval_runtime": 4.6035, |
|
"eval_samples_per_second": 203.107, |
|
"eval_steps_per_second": 3.258, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.2051475048065186, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.1524, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.950561462784458, |
|
"eval_f1": 0.7727628489433862, |
|
"eval_loss": 0.15189574658870697, |
|
"eval_precision": 0.7012310606060606, |
|
"eval_recall": 0.8605461940732132, |
|
"eval_runtime": 4.7094, |
|
"eval_samples_per_second": 198.539, |
|
"eval_steps_per_second": 3.185, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.5321469306945801, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1452, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9521514458908874, |
|
"eval_f1": 0.7842202806460153, |
|
"eval_loss": 0.14606598019599915, |
|
"eval_precision": 0.7203307392996109, |
|
"eval_recall": 0.8605461940732132, |
|
"eval_runtime": 4.9388, |
|
"eval_samples_per_second": 189.317, |
|
"eval_steps_per_second": 3.037, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.277453064918518, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.1397, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9535426811090132, |
|
"eval_f1": 0.7858095492131236, |
|
"eval_loss": 0.14319637417793274, |
|
"eval_precision": 0.7263313609467456, |
|
"eval_recall": 0.8558977338756537, |
|
"eval_runtime": 4.6458, |
|
"eval_samples_per_second": 201.257, |
|
"eval_steps_per_second": 3.229, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.3887606859207153, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1369, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9539401768856206, |
|
"eval_f1": 0.784512683578104, |
|
"eval_loss": 0.13940192759037018, |
|
"eval_precision": 0.7257905138339921, |
|
"eval_recall": 0.8535735037768739, |
|
"eval_runtime": 4.5522, |
|
"eval_samples_per_second": 205.396, |
|
"eval_steps_per_second": 3.295, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.879906415939331, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1336, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.954287985690152, |
|
"eval_f1": 0.7872111767866737, |
|
"eval_loss": 0.13752727210521698, |
|
"eval_precision": 0.7321339330334833, |
|
"eval_recall": 0.8512492736780941, |
|
"eval_runtime": 4.5366, |
|
"eval_samples_per_second": 206.102, |
|
"eval_steps_per_second": 3.306, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.3187131881713867, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1305, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9547351684388353, |
|
"eval_f1": 0.7895726955119592, |
|
"eval_loss": 0.13747519254684448, |
|
"eval_precision": 0.7345, |
|
"eval_recall": 0.8535735037768739, |
|
"eval_runtime": 4.526, |
|
"eval_samples_per_second": 206.585, |
|
"eval_steps_per_second": 3.314, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.600463628768921, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1281, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9547351684388353, |
|
"eval_f1": 0.7887248322147652, |
|
"eval_loss": 0.13505811989307404, |
|
"eval_precision": 0.7330339321357285, |
|
"eval_recall": 0.8535735037768739, |
|
"eval_runtime": 4.5517, |
|
"eval_samples_per_second": 205.416, |
|
"eval_steps_per_second": 3.295, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 1.121996283531189, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1252, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9553314121037464, |
|
"eval_f1": 0.794345158708989, |
|
"eval_loss": 0.13602255284786224, |
|
"eval_precision": 0.7342209072978304, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.6009, |
|
"eval_samples_per_second": 203.22, |
|
"eval_steps_per_second": 3.26, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.0593712329864502, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.124, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9541389247739243, |
|
"eval_f1": 0.7874899759422614, |
|
"eval_loss": 0.13636602461338043, |
|
"eval_precision": 0.7292079207920792, |
|
"eval_recall": 0.8558977338756537, |
|
"eval_runtime": 4.6126, |
|
"eval_samples_per_second": 202.707, |
|
"eval_steps_per_second": 3.252, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.4150090217590332, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1234, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.954933916327139, |
|
"eval_f1": 0.7875565009306035, |
|
"eval_loss": 0.13505925238132477, |
|
"eval_precision": 0.7259803921568627, |
|
"eval_recall": 0.8605461940732132, |
|
"eval_runtime": 4.6455, |
|
"eval_samples_per_second": 201.272, |
|
"eval_steps_per_second": 3.229, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.8472805619239807, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1224, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.954933916327139, |
|
"eval_f1": 0.7918106886466365, |
|
"eval_loss": 0.13572688400745392, |
|
"eval_precision": 0.7299019607843137, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.5281, |
|
"eval_samples_per_second": 206.491, |
|
"eval_steps_per_second": 3.313, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.521100401878357, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1208, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9553314121037464, |
|
"eval_f1": 0.7947830715996806, |
|
"eval_loss": 0.1359829157590866, |
|
"eval_precision": 0.7333005893909627, |
|
"eval_recall": 0.8675188843695526, |
|
"eval_runtime": 4.5366, |
|
"eval_samples_per_second": 206.104, |
|
"eval_steps_per_second": 3.306, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.9698516726493835, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1201, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.95553015999205, |
|
"eval_f1": 0.7956301625366373, |
|
"eval_loss": 0.13499899208545685, |
|
"eval_precision": 0.734744094488189, |
|
"eval_recall": 0.8675188843695526, |
|
"eval_runtime": 4.5393, |
|
"eval_samples_per_second": 205.979, |
|
"eval_steps_per_second": 3.304, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.61683189868927, |
|
"learning_rate": 0.0, |
|
"loss": 0.1205, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.95553015999205, |
|
"eval_f1": 0.794345158708989, |
|
"eval_loss": 0.1346217691898346, |
|
"eval_precision": 0.7342209072978304, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.9424, |
|
"eval_samples_per_second": 189.179, |
|
"eval_steps_per_second": 3.035, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 10560, |
|
"total_flos": 4541164131293502.0, |
|
"train_loss": 0.18834613236514006, |
|
"train_runtime": 1259.6794, |
|
"train_samples_per_second": 133.955, |
|
"train_steps_per_second": 8.383 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 4541164131293502.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|