|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.091732025146484, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5658, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7268170426065163, |
|
"eval_f1": 0.6550441396924102, |
|
"eval_loss": 0.519511878490448, |
|
"eval_precision": 0.6646488294314381, |
|
"eval_recall": 0.6492089470812875, |
|
"eval_runtime": 5.1503, |
|
"eval_samples_per_second": 77.471, |
|
"eval_steps_per_second": 9.708, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.045337200164795, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5125, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7293233082706767, |
|
"eval_f1": 0.6854598540145985, |
|
"eval_loss": 0.5060133337974548, |
|
"eval_precision": 0.6804511278195489, |
|
"eval_recall": 0.6934897254046191, |
|
"eval_runtime": 5.0488, |
|
"eval_samples_per_second": 79.029, |
|
"eval_steps_per_second": 9.903, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.02074670791626, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4809, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7669172932330827, |
|
"eval_f1": 0.7166907166907166, |
|
"eval_loss": 0.46860969066619873, |
|
"eval_precision": 0.718421052631579, |
|
"eval_recall": 0.7150845608292418, |
|
"eval_runtime": 5.0676, |
|
"eval_samples_per_second": 78.735, |
|
"eval_steps_per_second": 9.867, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.456829786300659, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4353, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7919799498746867, |
|
"eval_f1": 0.7417274322480016, |
|
"eval_loss": 0.4295312464237213, |
|
"eval_precision": 0.7499839010882864, |
|
"eval_recall": 0.7353155119112567, |
|
"eval_runtime": 5.071, |
|
"eval_samples_per_second": 78.683, |
|
"eval_steps_per_second": 9.86, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.0160815715789795, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4116, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8020050125313283, |
|
"eval_f1": 0.7713964535663778, |
|
"eval_loss": 0.4171212613582611, |
|
"eval_precision": 0.7628187206441512, |
|
"eval_recall": 0.7849154391707583, |
|
"eval_runtime": 5.0771, |
|
"eval_samples_per_second": 78.588, |
|
"eval_steps_per_second": 9.848, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.68072509765625, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3809, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8096491228070175, |
|
"eval_loss": 0.38647741079330444, |
|
"eval_precision": 0.8148460960960962, |
|
"eval_recall": 0.8050554646299327, |
|
"eval_runtime": 5.0568, |
|
"eval_samples_per_second": 78.904, |
|
"eval_steps_per_second": 9.888, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.0169405937194824, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3681, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8176861216035092, |
|
"eval_loss": 0.36971357464790344, |
|
"eval_precision": 0.8193355786895284, |
|
"eval_recall": 0.8161029278050556, |
|
"eval_runtime": 5.0528, |
|
"eval_samples_per_second": 78.966, |
|
"eval_steps_per_second": 9.895, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.219416618347168, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3469, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8101852212906999, |
|
"eval_loss": 0.35537657141685486, |
|
"eval_precision": 0.820642024599137, |
|
"eval_recall": 0.8018276050190944, |
|
"eval_runtime": 5.0531, |
|
"eval_samples_per_second": 78.961, |
|
"eval_steps_per_second": 9.895, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.871381759643555, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3455, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8157894736842105, |
|
"eval_loss": 0.3493554890155792, |
|
"eval_precision": 0.8210867117117118, |
|
"eval_recall": 0.8111020185488271, |
|
"eval_runtime": 5.0737, |
|
"eval_samples_per_second": 78.641, |
|
"eval_steps_per_second": 9.855, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 6.4938273429870605, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3284, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8095647193585338, |
|
"eval_loss": 0.34365448355674744, |
|
"eval_precision": 0.8289393939393939, |
|
"eval_recall": 0.7960992907801419, |
|
"eval_runtime": 5.0561, |
|
"eval_samples_per_second": 78.915, |
|
"eval_steps_per_second": 9.889, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.7792277336120605, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.3132, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8242843661528783, |
|
"eval_loss": 0.3370875418186188, |
|
"eval_precision": 0.8389366308055628, |
|
"eval_recall": 0.8131933078741589, |
|
"eval_runtime": 5.0782, |
|
"eval_samples_per_second": 78.572, |
|
"eval_steps_per_second": 9.846, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 11.339967727661133, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3042, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8237632508833923, |
|
"eval_loss": 0.3370998501777649, |
|
"eval_precision": 0.8254439681567667, |
|
"eval_recall": 0.8221494817239499, |
|
"eval_runtime": 5.0538, |
|
"eval_samples_per_second": 78.95, |
|
"eval_steps_per_second": 9.893, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 3.7291290760040283, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.3063, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8232837709585574, |
|
"eval_loss": 0.331680566072464, |
|
"eval_precision": 0.8405874144461426, |
|
"eval_recall": 0.8106928532460447, |
|
"eval_runtime": 5.0832, |
|
"eval_samples_per_second": 78.494, |
|
"eval_steps_per_second": 9.836, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 9.840410232543945, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.3013, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8306935047100303, |
|
"eval_loss": 0.33042553067207336, |
|
"eval_precision": 0.8372758729160114, |
|
"eval_recall": 0.8249681760320058, |
|
"eval_runtime": 5.0615, |
|
"eval_samples_per_second": 78.83, |
|
"eval_steps_per_second": 9.878, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.2072865962982178, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2928, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8289650949173301, |
|
"eval_loss": 0.3295370638370514, |
|
"eval_precision": 0.8325081997648369, |
|
"eval_recall": 0.82569558101473, |
|
"eval_runtime": 5.0623, |
|
"eval_samples_per_second": 78.818, |
|
"eval_steps_per_second": 9.877, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 7.374602317810059, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2864, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8324514991181657, |
|
"eval_loss": 0.32842549681663513, |
|
"eval_precision": 0.8350877192982455, |
|
"eval_recall": 0.8299690852882342, |
|
"eval_runtime": 5.0572, |
|
"eval_samples_per_second": 78.897, |
|
"eval_steps_per_second": 9.887, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 5.933995723724365, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2819, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8271551457392166, |
|
"eval_loss": 0.3254449963569641, |
|
"eval_precision": 0.8347358430876305, |
|
"eval_recall": 0.8206946717585015, |
|
"eval_runtime": 5.0559, |
|
"eval_samples_per_second": 78.917, |
|
"eval_steps_per_second": 9.889, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.023169994354248, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2877, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8280701754385965, |
|
"eval_loss": 0.32487839460372925, |
|
"eval_precision": 0.833567942942943, |
|
"eval_recall": 0.8231951263866157, |
|
"eval_runtime": 5.0643, |
|
"eval_samples_per_second": 78.787, |
|
"eval_steps_per_second": 9.873, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.3332839012146, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2819, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8333281762485303, |
|
"eval_loss": 0.3241012394428253, |
|
"eval_precision": 0.8410471369819678, |
|
"eval_recall": 0.8267412256773959, |
|
"eval_runtime": 5.0908, |
|
"eval_samples_per_second": 78.377, |
|
"eval_steps_per_second": 9.822, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 7.439182758331299, |
|
"learning_rate": 0.0, |
|
"loss": 0.2803, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8306935047100303, |
|
"eval_loss": 0.3238792419433594, |
|
"eval_precision": 0.8372758729160114, |
|
"eval_recall": 0.8249681760320058, |
|
"eval_runtime": 5.0792, |
|
"eval_samples_per_second": 78.556, |
|
"eval_steps_per_second": 9.844, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7597037114448000.0, |
|
"train_loss": 0.35560139046340694, |
|
"train_runtime": 1953.7377, |
|
"train_samples_per_second": 37.241, |
|
"train_steps_per_second": 1.249 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7597037114448000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|