{ "best_metric": 0.03419998660683632, "best_model_checkpoint": "logs/FacebookAI/roberta-large_accuracy-coverage/checkpoint-772", "epoch": 7.0, "eval_steps": 500, "global_step": 1351, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 7.81976318359375, "learning_rate": 1.901554404145078e-05, "loss": 0.0819, "step": 193 }, { "epoch": 1.0, "eval_MAE": 0.1725175678730011, "eval_R2": -0.1497003860798276, "eval_RMSE": 0.2353871762752533, "eval_loss": 0.05540712550282478, "eval_runtime": 14.0688, "eval_samples_per_second": 204.779, "eval_steps_per_second": 3.483, "step": 193 }, { "epoch": 2.0, "grad_norm": 13.916872024536133, "learning_rate": 1.8015544041450778e-05, "loss": 0.0552, "step": 386 }, { "epoch": 2.0, "eval_MAE": 0.14744172990322113, "eval_R2": 0.2686269017946187, "eval_RMSE": 0.18774136900901794, "eval_loss": 0.035246819257736206, "eval_runtime": 14.01, "eval_samples_per_second": 205.64, "eval_steps_per_second": 3.498, "step": 386 }, { "epoch": 3.0, "grad_norm": 1.4010157585144043, "learning_rate": 1.7015544041450777e-05, "loss": 0.044, "step": 579 }, { "epoch": 3.0, "eval_MAE": 0.1637636125087738, "eval_R2": -0.005046362468473786, "eval_RMSE": 0.22008149325847626, "eval_loss": 0.048435866832733154, "eval_runtime": 13.9579, "eval_samples_per_second": 206.406, "eval_steps_per_second": 3.511, "step": 579 }, { "epoch": 4.0, "grad_norm": 5.166985034942627, "learning_rate": 1.601554404145078e-05, "loss": 0.0377, "step": 772 }, { "epoch": 4.0, "eval_MAE": 0.13679075241088867, "eval_R2": 0.2903487127030966, "eval_RMSE": 0.1849323809146881, "eval_loss": 0.03419998660683632, "eval_runtime": 13.9498, "eval_samples_per_second": 206.527, "eval_steps_per_second": 3.513, "step": 772 }, { "epoch": 5.0, "grad_norm": 5.7769083976745605, "learning_rate": 1.5015544041450778e-05, "loss": 0.0309, "step": 965 }, { "epoch": 5.0, "eval_MAE": 0.1612362116575241, "eval_R2": 0.01970298411235538, "eval_RMSE": 0.21735484898090363, "eval_loss": 0.04724312946200371, "eval_runtime": 13.9679, "eval_samples_per_second": 206.259, "eval_steps_per_second": 3.508, "step": 965 }, { "epoch": 6.0, "grad_norm": 4.735123157501221, "learning_rate": 1.4015544041450779e-05, "loss": 0.0269, "step": 1158 }, { "epoch": 6.0, "eval_MAE": 0.1395827978849411, "eval_R2": 0.2431203329693593, "eval_RMSE": 0.19098703563213348, "eval_loss": 0.036476049572229385, "eval_runtime": 14.0051, "eval_samples_per_second": 205.711, "eval_steps_per_second": 3.499, "step": 1158 }, { "epoch": 7.0, "grad_norm": 11.766636848449707, "learning_rate": 1.3015544041450778e-05, "loss": 0.0232, "step": 1351 }, { "epoch": 7.0, "eval_MAE": 0.1361502856016159, "eval_R2": 0.28310775750843686, "eval_RMSE": 0.18587347865104675, "eval_loss": 0.03454894945025444, "eval_runtime": 13.9169, "eval_samples_per_second": 207.014, "eval_steps_per_second": 3.521, "step": 1351 } ], "logging_steps": 500, "max_steps": 3860, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7.515640838071338e+16, "train_batch_size": 60, "trial_name": null, "trial_params": null }