|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 0.03092513605952263, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.4044, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 0.03055603615939617, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.3836, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 0.023527173325419426, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.02501656673848629, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0008, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.968421052631579, |
|
"eval_f1": 0.9770992366412214, |
|
"eval_loss": 0.23919111490249634, |
|
"eval_precision": 0.9696969696969697, |
|
"eval_recall": 0.9846153846153847, |
|
"eval_runtime": 2.2913, |
|
"eval_samples_per_second": 41.461, |
|
"eval_steps_per_second": 2.619, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.0416666666666667, |
|
"grad_norm": 0.027175450697541237, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0007, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.017659997567534447, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0006, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.4583333333333333, |
|
"grad_norm": 0.01459481567144394, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.0007, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.011688518337905407, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0028, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 0.08688002079725266, |
|
"learning_rate": 9e-06, |
|
"loss": 0.0006, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9052631578947369, |
|
"eval_f1": 0.9312977099236641, |
|
"eval_loss": 0.7143413424491882, |
|
"eval_precision": 0.9242424242424242, |
|
"eval_recall": 0.9384615384615385, |
|
"eval_runtime": 2.2819, |
|
"eval_samples_per_second": 41.632, |
|
"eval_steps_per_second": 2.629, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 0.029978642240166664, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1005, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.2916666666666665, |
|
"grad_norm": 0.010506043210625648, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.0003, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 413.86444091796875, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.1257, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.7083333333333335, |
|
"grad_norm": 0.022496210411190987, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.0018, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.9166666666666665, |
|
"grad_norm": 4.833926677703857, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.0823, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9473684210526315, |
|
"eval_f1": 0.962406015037594, |
|
"eval_loss": 0.2848361134529114, |
|
"eval_precision": 0.9411764705882353, |
|
"eval_recall": 0.9846153846153847, |
|
"eval_runtime": 2.3284, |
|
"eval_samples_per_second": 40.8, |
|
"eval_steps_per_second": 2.577, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 3.125, |
|
"grad_norm": 133.06631469726562, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0298, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 5.606410503387451, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.1977, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.5416666666666665, |
|
"grad_norm": 1.3438758850097656, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.0345, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 219.62388610839844, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.2788, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.9583333333333335, |
|
"grad_norm": 0.02319713868200779, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.0778, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.9285714285714286, |
|
"eval_loss": 0.73874431848526, |
|
"eval_precision": 0.8666666666666667, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 2.3292, |
|
"eval_samples_per_second": 40.786, |
|
"eval_steps_per_second": 2.576, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 4.166666666666667, |
|
"grad_norm": 0.07177528738975525, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2188, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.375, |
|
"grad_norm": 0.025927864015102386, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.0018, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.583333333333333, |
|
"grad_norm": 0.25771522521972656, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.2547, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.791666666666667, |
|
"grad_norm": 0.1835576444864273, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.1684, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.051910556852817535, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.1683, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9263157894736842, |
|
"eval_f1": 0.9465648854961831, |
|
"eval_loss": 0.482939213514328, |
|
"eval_precision": 0.9393939393939394, |
|
"eval_recall": 0.9538461538461539, |
|
"eval_runtime": 2.321, |
|
"eval_samples_per_second": 40.931, |
|
"eval_steps_per_second": 2.585, |
|
"step": 240 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 442667397580800.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|