|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 234, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 0.1764, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2288, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2324, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.714285714285714e-05, |
|
"loss": 0.2094, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.428571428571428e-05, |
|
"loss": 0.2219, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.1428571428571437e-05, |
|
"loss": 0.2048, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.857142857142858e-05, |
|
"loss": 0.2337, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_Macro F1": 0.9058241520350262, |
|
"eval_Macro Precision": 0.9220297823287216, |
|
"eval_Macro Recall": 0.9040021601099654, |
|
"eval_Micro F1": 0.9087221095334685, |
|
"eval_Micro Precision": 0.9087221095334685, |
|
"eval_Micro Recall": 0.9087221095334685, |
|
"eval_Weighted F1": 0.9097572645217704, |
|
"eval_Weighted Precision": 0.9228816407309685, |
|
"eval_Weighted Recall": 0.9087221095334685, |
|
"eval_accuracy": 0.9087221095334685, |
|
"eval_loss": 0.26681017875671387, |
|
"eval_runtime": 509.7519, |
|
"eval_samples_per_second": 3.869, |
|
"eval_steps_per_second": 0.122, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.1943, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.285714285714286e-05, |
|
"loss": 0.2022, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1662, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.714285714285714e-05, |
|
"loss": 0.1906, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.4285714285714288e-05, |
|
"loss": 0.1614, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 0.16, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8571428571428572e-05, |
|
"loss": 0.1711, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_Macro F1": 0.9330778801188252, |
|
"eval_Macro Precision": 0.9291584729568021, |
|
"eval_Macro Recall": 0.9402683529435533, |
|
"eval_Micro F1": 0.9376267748478702, |
|
"eval_Micro Precision": 0.9376267748478702, |
|
"eval_Micro Recall": 0.9376267748478702, |
|
"eval_Weighted F1": 0.9380245851334839, |
|
"eval_Weighted Precision": 0.9415569462618286, |
|
"eval_Weighted Recall": 0.9376267748478702, |
|
"eval_accuracy": 0.9376267748478702, |
|
"eval_loss": 0.1819760948419571, |
|
"eval_runtime": 52.791, |
|
"eval_samples_per_second": 37.355, |
|
"eval_steps_per_second": 1.174, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5714285714285715e-05, |
|
"loss": 0.1743, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2857142857142857e-05, |
|
"loss": 0.1607, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1675, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 0.1393, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.285714285714286e-06, |
|
"loss": 0.1308, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.1297, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_Macro F1": 0.9504520666030198, |
|
"eval_Macro Precision": 0.9506141996378501, |
|
"eval_Macro Recall": 0.9523141162334021, |
|
"eval_Micro F1": 0.9523326572008114, |
|
"eval_Micro Precision": 0.9523326572008114, |
|
"eval_Micro Recall": 0.9523326572008114, |
|
"eval_Weighted F1": 0.9523782212526937, |
|
"eval_Weighted Precision": 0.9543848061355097, |
|
"eval_Weighted Recall": 0.9523326572008114, |
|
"eval_accuracy": 0.9523326572008114, |
|
"eval_loss": 0.1481797993183136, |
|
"eval_runtime": 51.55, |
|
"eval_samples_per_second": 38.254, |
|
"eval_steps_per_second": 1.203, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 234, |
|
"total_flos": 2.320189226545914e+18, |
|
"train_loss": 0.18204877430047745, |
|
"train_runtime": 4369.7043, |
|
"train_samples_per_second": 6.851, |
|
"train_steps_per_second": 0.054 |
|
} |
|
], |
|
"logging_steps": 12, |
|
"max_steps": 234, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 2.320189226545914e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|