|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.47084125631106155, |
|
"eval_steps": 187, |
|
"global_step": 2244, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 2.1276595744680852e-07, |
|
"loss": 1.5104, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.90625, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1784, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_validation_loss": 0.956233024597168, |
|
"eval_validation_runtime": 190.0844, |
|
"eval_validation_samples_per_second": 0.726, |
|
"eval_validation_steps_per_second": 0.726, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_validation_privacy_sources_loss": 0.956233024597168, |
|
"eval_validation_privacy_sources_runtime": 190.1247, |
|
"eval_validation_privacy_sources_samples_per_second": 0.726, |
|
"eval_validation_privacy_sources_steps_per_second": 0.726, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_validation_agenda_digitale_loss": 1.5249053239822388, |
|
"eval_validation_agenda_digitale_runtime": 170.8208, |
|
"eval_validation_agenda_digitale_samples_per_second": 0.726, |
|
"eval_validation_agenda_digitale_steps_per_second": 0.726, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_validation_legal_articles_loss": 1.4138059616088867, |
|
"eval_validation_legal_articles_runtime": 274.0975, |
|
"eval_validation_legal_articles_samples_per_second": 0.726, |
|
"eval_validation_legal_articles_steps_per_second": 0.726, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_validation_leggepertutti_loss": 1.2507734298706055, |
|
"eval_validation_leggepertutti_runtime": 26.1774, |
|
"eval_validation_leggepertutti_samples_per_second": 0.726, |
|
"eval_validation_leggepertutti_steps_per_second": 0.726, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.5625, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0602, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_validation_loss": 0.9068162441253662, |
|
"eval_validation_runtime": 190.0623, |
|
"eval_validation_samples_per_second": 0.726, |
|
"eval_validation_steps_per_second": 0.726, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_validation_privacy_sources_loss": 0.9068162441253662, |
|
"eval_validation_privacy_sources_runtime": 190.1749, |
|
"eval_validation_privacy_sources_samples_per_second": 0.726, |
|
"eval_validation_privacy_sources_steps_per_second": 0.726, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_validation_agenda_digitale_loss": 1.5025277137756348, |
|
"eval_validation_agenda_digitale_runtime": 170.8116, |
|
"eval_validation_agenda_digitale_samples_per_second": 0.726, |
|
"eval_validation_agenda_digitale_steps_per_second": 0.726, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_validation_legal_articles_loss": 1.3901658058166504, |
|
"eval_validation_legal_articles_runtime": 274.2187, |
|
"eval_validation_legal_articles_samples_per_second": 0.726, |
|
"eval_validation_legal_articles_steps_per_second": 0.726, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_validation_leggepertutti_loss": 1.2389607429504395, |
|
"eval_validation_leggepertutti_runtime": 26.1823, |
|
"eval_validation_leggepertutti_samples_per_second": 0.726, |
|
"eval_validation_leggepertutti_steps_per_second": 0.726, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.375, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9973, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_validation_loss": 0.8726317286491394, |
|
"eval_validation_runtime": 190.0376, |
|
"eval_validation_samples_per_second": 0.726, |
|
"eval_validation_steps_per_second": 0.726, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_validation_privacy_sources_loss": 0.8726317286491394, |
|
"eval_validation_privacy_sources_runtime": 189.9977, |
|
"eval_validation_privacy_sources_samples_per_second": 0.726, |
|
"eval_validation_privacy_sources_steps_per_second": 0.726, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_validation_agenda_digitale_loss": 1.4876974821090698, |
|
"eval_validation_agenda_digitale_runtime": 170.7148, |
|
"eval_validation_agenda_digitale_samples_per_second": 0.726, |
|
"eval_validation_agenda_digitale_steps_per_second": 0.726, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_validation_legal_articles_loss": 1.3721040487289429, |
|
"eval_validation_legal_articles_runtime": 273.9951, |
|
"eval_validation_legal_articles_samples_per_second": 0.726, |
|
"eval_validation_legal_articles_steps_per_second": 0.726, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_validation_leggepertutti_loss": 1.213807463645935, |
|
"eval_validation_leggepertutti_runtime": 26.1576, |
|
"eval_validation_leggepertutti_samples_per_second": 0.726, |
|
"eval_validation_leggepertutti_steps_per_second": 0.726, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9936, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_validation_loss": 0.8519095778465271, |
|
"eval_validation_runtime": 190.0914, |
|
"eval_validation_samples_per_second": 0.726, |
|
"eval_validation_steps_per_second": 0.726, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_validation_privacy_sources_loss": 0.8519095778465271, |
|
"eval_validation_privacy_sources_runtime": 190.1871, |
|
"eval_validation_privacy_sources_samples_per_second": 0.726, |
|
"eval_validation_privacy_sources_steps_per_second": 0.726, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_validation_agenda_digitale_loss": 1.4739274978637695, |
|
"eval_validation_agenda_digitale_runtime": 170.8961, |
|
"eval_validation_agenda_digitale_samples_per_second": 0.726, |
|
"eval_validation_agenda_digitale_steps_per_second": 0.726, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_validation_legal_articles_loss": 1.3598031997680664, |
|
"eval_validation_legal_articles_runtime": 274.1856, |
|
"eval_validation_legal_articles_samples_per_second": 0.726, |
|
"eval_validation_legal_articles_steps_per_second": 0.726, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_validation_leggepertutti_loss": 1.2074390649795532, |
|
"eval_validation_leggepertutti_runtime": 26.177, |
|
"eval_validation_leggepertutti_samples_per_second": 0.726, |
|
"eval_validation_leggepertutti_steps_per_second": 0.726, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.90625, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9671, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_validation_loss": 0.8343605399131775, |
|
"eval_validation_runtime": 190.0578, |
|
"eval_validation_samples_per_second": 0.726, |
|
"eval_validation_steps_per_second": 0.726, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_validation_privacy_sources_loss": 0.8343605399131775, |
|
"eval_validation_privacy_sources_runtime": 190.0981, |
|
"eval_validation_privacy_sources_samples_per_second": 0.726, |
|
"eval_validation_privacy_sources_steps_per_second": 0.726, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_validation_agenda_digitale_loss": 1.4663511514663696, |
|
"eval_validation_agenda_digitale_runtime": 170.7988, |
|
"eval_validation_agenda_digitale_samples_per_second": 0.726, |
|
"eval_validation_agenda_digitale_steps_per_second": 0.726, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_validation_legal_articles_loss": 1.3504709005355835, |
|
"eval_validation_legal_articles_runtime": 274.1096, |
|
"eval_validation_legal_articles_samples_per_second": 0.726, |
|
"eval_validation_legal_articles_steps_per_second": 0.726, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_validation_leggepertutti_loss": 1.19623863697052, |
|
"eval_validation_leggepertutti_runtime": 26.1751, |
|
"eval_validation_leggepertutti_samples_per_second": 0.726, |
|
"eval_validation_leggepertutti_steps_per_second": 0.726, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 18.625, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9582, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_validation_loss": 0.8237400650978088, |
|
"eval_validation_runtime": 190.0819, |
|
"eval_validation_samples_per_second": 0.726, |
|
"eval_validation_steps_per_second": 0.726, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_validation_privacy_sources_loss": 0.8237400650978088, |
|
"eval_validation_privacy_sources_runtime": 190.0975, |
|
"eval_validation_privacy_sources_samples_per_second": 0.726, |
|
"eval_validation_privacy_sources_steps_per_second": 0.726, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_validation_agenda_digitale_loss": 1.4592102766036987, |
|
"eval_validation_agenda_digitale_runtime": 170.7961, |
|
"eval_validation_agenda_digitale_samples_per_second": 0.726, |
|
"eval_validation_agenda_digitale_steps_per_second": 0.726, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_validation_legal_articles_loss": 1.341143250465393, |
|
"eval_validation_legal_articles_runtime": 274.155, |
|
"eval_validation_legal_articles_samples_per_second": 0.726, |
|
"eval_validation_legal_articles_steps_per_second": 0.726, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_validation_leggepertutti_loss": 1.1890021562576294, |
|
"eval_validation_leggepertutti_runtime": 26.1699, |
|
"eval_validation_leggepertutti_samples_per_second": 0.726, |
|
"eval_validation_leggepertutti_steps_per_second": 0.726, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9456, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_validation_loss": 0.8131315112113953, |
|
"eval_validation_runtime": 189.7603, |
|
"eval_validation_samples_per_second": 0.727, |
|
"eval_validation_steps_per_second": 0.727, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_validation_privacy_sources_loss": 0.8131315112113953, |
|
"eval_validation_privacy_sources_runtime": 189.8097, |
|
"eval_validation_privacy_sources_samples_per_second": 0.727, |
|
"eval_validation_privacy_sources_steps_per_second": 0.727, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_validation_agenda_digitale_loss": 1.4516360759735107, |
|
"eval_validation_agenda_digitale_runtime": 170.4955, |
|
"eval_validation_agenda_digitale_samples_per_second": 0.727, |
|
"eval_validation_agenda_digitale_steps_per_second": 0.727, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_validation_legal_articles_loss": 1.3339463472366333, |
|
"eval_validation_legal_articles_runtime": 273.5945, |
|
"eval_validation_legal_articles_samples_per_second": 0.727, |
|
"eval_validation_legal_articles_steps_per_second": 0.727, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_validation_leggepertutti_loss": 1.1844114065170288, |
|
"eval_validation_leggepertutti_runtime": 26.1031, |
|
"eval_validation_leggepertutti_samples_per_second": 0.728, |
|
"eval_validation_leggepertutti_steps_per_second": 0.728, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.203125, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9118, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_validation_loss": 0.8056552410125732, |
|
"eval_validation_runtime": 189.1065, |
|
"eval_validation_samples_per_second": 0.73, |
|
"eval_validation_steps_per_second": 0.73, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_validation_privacy_sources_loss": 0.8056552410125732, |
|
"eval_validation_privacy_sources_runtime": 189.1418, |
|
"eval_validation_privacy_sources_samples_per_second": 0.73, |
|
"eval_validation_privacy_sources_steps_per_second": 0.73, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_validation_agenda_digitale_loss": 1.4482883214950562, |
|
"eval_validation_agenda_digitale_runtime": 169.9314, |
|
"eval_validation_agenda_digitale_samples_per_second": 0.73, |
|
"eval_validation_agenda_digitale_steps_per_second": 0.73, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_validation_legal_articles_loss": 1.3298695087432861, |
|
"eval_validation_legal_articles_runtime": 272.7633, |
|
"eval_validation_legal_articles_samples_per_second": 0.73, |
|
"eval_validation_legal_articles_steps_per_second": 0.73, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_validation_leggepertutti_loss": 1.178873896598816, |
|
"eval_validation_leggepertutti_runtime": 26.0447, |
|
"eval_validation_leggepertutti_samples_per_second": 0.73, |
|
"eval_validation_leggepertutti_steps_per_second": 0.73, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9018, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_validation_loss": 0.7955409288406372, |
|
"eval_validation_runtime": 190.0308, |
|
"eval_validation_samples_per_second": 0.726, |
|
"eval_validation_steps_per_second": 0.726, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_validation_privacy_sources_loss": 0.7955409288406372, |
|
"eval_validation_privacy_sources_runtime": 190.1015, |
|
"eval_validation_privacy_sources_samples_per_second": 0.726, |
|
"eval_validation_privacy_sources_steps_per_second": 0.726, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_validation_agenda_digitale_loss": 1.4420207738876343, |
|
"eval_validation_agenda_digitale_runtime": 170.7677, |
|
"eval_validation_agenda_digitale_samples_per_second": 0.726, |
|
"eval_validation_agenda_digitale_steps_per_second": 0.726, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_validation_legal_articles_loss": 1.3251116275787354, |
|
"eval_validation_legal_articles_runtime": 274.0824, |
|
"eval_validation_legal_articles_samples_per_second": 0.726, |
|
"eval_validation_legal_articles_steps_per_second": 0.726, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_validation_leggepertutti_loss": 1.1761751174926758, |
|
"eval_validation_leggepertutti_runtime": 26.1786, |
|
"eval_validation_leggepertutti_samples_per_second": 0.726, |
|
"eval_validation_leggepertutti_steps_per_second": 0.726, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.375, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8911, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_validation_loss": 0.7886275053024292, |
|
"eval_validation_runtime": 190.0169, |
|
"eval_validation_samples_per_second": 0.726, |
|
"eval_validation_steps_per_second": 0.726, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_validation_privacy_sources_loss": 0.7886275053024292, |
|
"eval_validation_privacy_sources_runtime": 189.9755, |
|
"eval_validation_privacy_sources_samples_per_second": 0.726, |
|
"eval_validation_privacy_sources_steps_per_second": 0.726, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_validation_agenda_digitale_loss": 1.437984585762024, |
|
"eval_validation_agenda_digitale_runtime": 170.7137, |
|
"eval_validation_agenda_digitale_samples_per_second": 0.726, |
|
"eval_validation_agenda_digitale_steps_per_second": 0.726, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_validation_legal_articles_loss": 1.3213441371917725, |
|
"eval_validation_legal_articles_runtime": 273.9792, |
|
"eval_validation_legal_articles_samples_per_second": 0.726, |
|
"eval_validation_legal_articles_steps_per_second": 0.726, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_validation_leggepertutti_loss": 1.1724990606307983, |
|
"eval_validation_leggepertutti_runtime": 26.1471, |
|
"eval_validation_leggepertutti_samples_per_second": 0.727, |
|
"eval_validation_leggepertutti_steps_per_second": 0.727, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.34375, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8824, |
|
"step": 2057 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_validation_loss": 0.7812964916229248, |
|
"eval_validation_runtime": 189.9852, |
|
"eval_validation_samples_per_second": 0.726, |
|
"eval_validation_steps_per_second": 0.726, |
|
"step": 2057 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_validation_privacy_sources_loss": 0.7812964916229248, |
|
"eval_validation_privacy_sources_runtime": 190.0648, |
|
"eval_validation_privacy_sources_samples_per_second": 0.726, |
|
"eval_validation_privacy_sources_steps_per_second": 0.726, |
|
"step": 2057 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_validation_agenda_digitale_loss": 1.4352736473083496, |
|
"eval_validation_agenda_digitale_runtime": 170.7674, |
|
"eval_validation_agenda_digitale_samples_per_second": 0.726, |
|
"eval_validation_agenda_digitale_steps_per_second": 0.726, |
|
"step": 2057 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_validation_legal_articles_loss": 1.3146668672561646, |
|
"eval_validation_legal_articles_runtime": 274.0088, |
|
"eval_validation_legal_articles_samples_per_second": 0.726, |
|
"eval_validation_legal_articles_steps_per_second": 0.726, |
|
"step": 2057 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_validation_leggepertutti_loss": 1.1654175519943237, |
|
"eval_validation_leggepertutti_runtime": 26.1671, |
|
"eval_validation_leggepertutti_samples_per_second": 0.726, |
|
"eval_validation_leggepertutti_steps_per_second": 0.726, |
|
"step": 2057 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.515625, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8871, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_validation_loss": 0.7756069898605347, |
|
"eval_validation_runtime": 190.0643, |
|
"eval_validation_samples_per_second": 0.726, |
|
"eval_validation_steps_per_second": 0.726, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_validation_privacy_sources_loss": 0.7756069898605347, |
|
"eval_validation_privacy_sources_runtime": 190.0095, |
|
"eval_validation_privacy_sources_samples_per_second": 0.726, |
|
"eval_validation_privacy_sources_steps_per_second": 0.726, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_validation_agenda_digitale_loss": 1.431518316268921, |
|
"eval_validation_agenda_digitale_runtime": 170.7517, |
|
"eval_validation_agenda_digitale_samples_per_second": 0.726, |
|
"eval_validation_agenda_digitale_steps_per_second": 0.726, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_validation_legal_articles_loss": 1.3110154867172241, |
|
"eval_validation_legal_articles_runtime": 274.0835, |
|
"eval_validation_legal_articles_samples_per_second": 0.726, |
|
"eval_validation_legal_articles_steps_per_second": 0.726, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_validation_leggepertutti_loss": 1.161647915840149, |
|
"eval_validation_leggepertutti_runtime": 26.1417, |
|
"eval_validation_leggepertutti_samples_per_second": 0.727, |
|
"eval_validation_leggepertutti_steps_per_second": 0.727, |
|
"step": 2244 |
|
} |
|
], |
|
"logging_steps": 187, |
|
"max_steps": 4765, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 187, |
|
"total_flos": 2.5097123275091214e+19, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|