{ "best_metric": 0.6847230792045593, "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/FacebookAI/roberta_base_amazon/checkpoint-550", "epoch": 3.0, "eval_steps": 50, "global_step": 570, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 1.8449782133102417, "learning_rate": 1.9649122807017544e-05, "loss": 3.1004, "step": 10 }, { "epoch": 0.11, "grad_norm": 3.084895133972168, "learning_rate": 1.929824561403509e-05, "loss": 3.0595, "step": 20 }, { "epoch": 0.16, "grad_norm": 4.537363529205322, "learning_rate": 1.894736842105263e-05, "loss": 2.802, "step": 30 }, { "epoch": 0.21, "grad_norm": 5.116600036621094, "learning_rate": 1.8596491228070176e-05, "loss": 2.3885, "step": 40 }, { "epoch": 0.26, "grad_norm": 4.867352485656738, "learning_rate": 1.824561403508772e-05, "loss": 2.0556, "step": 50 }, { "epoch": 0.26, "eval_accuracy": 0.616600790513834, "eval_f1_macro": 0.46316309278720524, "eval_f1_micro": 0.616600790513834, "eval_loss": 1.7788082361221313, "eval_runtime": 1.4418, "eval_samples_per_second": 1052.82, "eval_steps_per_second": 16.645, "step": 50 }, { "epoch": 0.32, "grad_norm": 6.37776517868042, "learning_rate": 1.7894736842105264e-05, "loss": 1.7967, "step": 60 }, { "epoch": 0.37, "grad_norm": 8.01982593536377, "learning_rate": 1.754385964912281e-05, "loss": 1.5339, "step": 70 }, { "epoch": 0.42, "grad_norm": 6.54938268661499, "learning_rate": 1.719298245614035e-05, "loss": 1.398, "step": 80 }, { "epoch": 0.47, "grad_norm": 5.9745659828186035, "learning_rate": 1.6842105263157896e-05, "loss": 1.337, "step": 90 }, { "epoch": 0.53, "grad_norm": 7.999369144439697, "learning_rate": 1.649122807017544e-05, "loss": 1.2395, "step": 100 }, { "epoch": 0.53, "eval_accuracy": 0.7140974967061924, "eval_f1_macro": 0.6033304658823342, "eval_f1_micro": 0.7140974967061924, "eval_loss": 1.1504093408584595, "eval_runtime": 1.4443, "eval_samples_per_second": 1051.003, "eval_steps_per_second": 16.617, "step": 100 }, { "epoch": 0.58, "grad_norm": 5.7676262855529785, "learning_rate": 1.6140350877192984e-05, "loss": 1.1179, "step": 110 }, { "epoch": 0.63, "grad_norm": 7.363275051116943, "learning_rate": 1.578947368421053e-05, "loss": 1.1031, "step": 120 }, { "epoch": 0.68, "grad_norm": 9.764667510986328, "learning_rate": 1.543859649122807e-05, "loss": 1.1, "step": 130 }, { "epoch": 0.74, "grad_norm": 7.389791965484619, "learning_rate": 1.5087719298245615e-05, "loss": 1.0124, "step": 140 }, { "epoch": 0.79, "grad_norm": 9.022140502929688, "learning_rate": 1.4736842105263159e-05, "loss": 0.9724, "step": 150 }, { "epoch": 0.79, "eval_accuracy": 0.738471673254282, "eval_f1_macro": 0.6449968622620541, "eval_f1_micro": 0.738471673254282, "eval_loss": 0.964474618434906, "eval_runtime": 1.4518, "eval_samples_per_second": 1045.572, "eval_steps_per_second": 16.531, "step": 150 }, { "epoch": 0.84, "grad_norm": 7.70412540435791, "learning_rate": 1.4385964912280704e-05, "loss": 0.9852, "step": 160 }, { "epoch": 0.89, "grad_norm": 9.079597473144531, "learning_rate": 1.4035087719298246e-05, "loss": 0.9693, "step": 170 }, { "epoch": 0.95, "grad_norm": 10.334077835083008, "learning_rate": 1.3684210526315791e-05, "loss": 0.9577, "step": 180 }, { "epoch": 1.0, "grad_norm": 9.137624740600586, "learning_rate": 1.3333333333333333e-05, "loss": 0.9565, "step": 190 }, { "epoch": 1.05, "grad_norm": 8.054055213928223, "learning_rate": 1.2982456140350879e-05, "loss": 0.8878, "step": 200 }, { "epoch": 1.05, "eval_accuracy": 0.7733860342555995, "eval_f1_macro": 0.7001366407404722, "eval_f1_micro": 0.7733860342555995, "eval_loss": 0.870373010635376, "eval_runtime": 1.4548, "eval_samples_per_second": 1043.435, "eval_steps_per_second": 16.497, "step": 200 }, { "epoch": 1.11, "grad_norm": 8.359734535217285, "learning_rate": 1.263157894736842e-05, "loss": 0.8157, "step": 210 }, { "epoch": 1.16, "grad_norm": 11.497844696044922, "learning_rate": 1.2280701754385966e-05, "loss": 0.8643, "step": 220 }, { "epoch": 1.21, "grad_norm": 10.738656997680664, "learning_rate": 1.192982456140351e-05, "loss": 0.8478, "step": 230 }, { "epoch": 1.26, "grad_norm": 7.716694355010986, "learning_rate": 1.1578947368421053e-05, "loss": 0.7587, "step": 240 }, { "epoch": 1.32, "grad_norm": 10.482646942138672, "learning_rate": 1.1228070175438597e-05, "loss": 0.7261, "step": 250 }, { "epoch": 1.32, "eval_accuracy": 0.7766798418972332, "eval_f1_macro": 0.7188389846095605, "eval_f1_micro": 0.7766798418972332, "eval_loss": 0.815912127494812, "eval_runtime": 1.4582, "eval_samples_per_second": 1041.022, "eval_steps_per_second": 16.459, "step": 250 }, { "epoch": 1.37, "grad_norm": 8.698047637939453, "learning_rate": 1.0877192982456142e-05, "loss": 0.7495, "step": 260 }, { "epoch": 1.42, "grad_norm": 7.04860782623291, "learning_rate": 1.0526315789473684e-05, "loss": 0.7562, "step": 270 }, { "epoch": 1.47, "grad_norm": 16.800617218017578, "learning_rate": 1.017543859649123e-05, "loss": 0.7407, "step": 280 }, { "epoch": 1.53, "grad_norm": 11.83535385131836, "learning_rate": 9.824561403508772e-06, "loss": 0.7224, "step": 290 }, { "epoch": 1.58, "grad_norm": 8.78000545501709, "learning_rate": 9.473684210526315e-06, "loss": 0.8083, "step": 300 }, { "epoch": 1.58, "eval_accuracy": 0.7878787878787878, "eval_f1_macro": 0.7240527586663765, "eval_f1_micro": 0.7878787878787878, "eval_loss": 0.7717685699462891, "eval_runtime": 1.4583, "eval_samples_per_second": 1040.974, "eval_steps_per_second": 16.458, "step": 300 }, { "epoch": 1.63, "grad_norm": 8.961960792541504, "learning_rate": 9.12280701754386e-06, "loss": 0.7576, "step": 310 }, { "epoch": 1.68, "grad_norm": 9.152416229248047, "learning_rate": 8.771929824561405e-06, "loss": 0.7021, "step": 320 }, { "epoch": 1.74, "grad_norm": 12.08969783782959, "learning_rate": 8.421052631578948e-06, "loss": 0.7218, "step": 330 }, { "epoch": 1.79, "grad_norm": 11.890509605407715, "learning_rate": 8.070175438596492e-06, "loss": 0.806, "step": 340 }, { "epoch": 1.84, "grad_norm": 13.19563102722168, "learning_rate": 7.719298245614036e-06, "loss": 0.7209, "step": 350 }, { "epoch": 1.84, "eval_accuracy": 0.7997364953886693, "eval_f1_macro": 0.7414355218281033, "eval_f1_micro": 0.7997364953886693, "eval_loss": 0.7306675910949707, "eval_runtime": 1.4597, "eval_samples_per_second": 1039.954, "eval_steps_per_second": 16.442, "step": 350 }, { "epoch": 1.89, "grad_norm": 10.341647148132324, "learning_rate": 7.368421052631579e-06, "loss": 0.6538, "step": 360 }, { "epoch": 1.95, "grad_norm": 10.095640182495117, "learning_rate": 7.017543859649123e-06, "loss": 0.7268, "step": 370 }, { "epoch": 2.0, "grad_norm": 14.569330215454102, "learning_rate": 6.666666666666667e-06, "loss": 0.7441, "step": 380 }, { "epoch": 2.05, "grad_norm": 8.13227653503418, "learning_rate": 6.31578947368421e-06, "loss": 0.6634, "step": 390 }, { "epoch": 2.11, "grad_norm": 8.250567436218262, "learning_rate": 5.964912280701755e-06, "loss": 0.6535, "step": 400 }, { "epoch": 2.11, "eval_accuracy": 0.8043478260869565, "eval_f1_macro": 0.7452177798047408, "eval_f1_micro": 0.8043478260869565, "eval_loss": 0.7204875349998474, "eval_runtime": 1.4623, "eval_samples_per_second": 1038.057, "eval_steps_per_second": 16.412, "step": 400 }, { "epoch": 2.16, "grad_norm": 10.287467002868652, "learning_rate": 5.6140350877192985e-06, "loss": 0.569, "step": 410 }, { "epoch": 2.21, "grad_norm": 8.486634254455566, "learning_rate": 5.263157894736842e-06, "loss": 0.6313, "step": 420 }, { "epoch": 2.26, "grad_norm": 8.879149436950684, "learning_rate": 4.912280701754386e-06, "loss": 0.5844, "step": 430 }, { "epoch": 2.32, "grad_norm": 10.877876281738281, "learning_rate": 4.56140350877193e-06, "loss": 0.64, "step": 440 }, { "epoch": 2.37, "grad_norm": 7.0523834228515625, "learning_rate": 4.210526315789474e-06, "loss": 0.6283, "step": 450 }, { "epoch": 2.37, "eval_accuracy": 0.8089591567852438, "eval_f1_macro": 0.7498355070996475, "eval_f1_micro": 0.8089591567852438, "eval_loss": 0.7047255635261536, "eval_runtime": 1.4632, "eval_samples_per_second": 1037.454, "eval_steps_per_second": 16.402, "step": 450 }, { "epoch": 2.42, "grad_norm": 8.621397972106934, "learning_rate": 3.859649122807018e-06, "loss": 0.5349, "step": 460 }, { "epoch": 2.47, "grad_norm": 7.53057861328125, "learning_rate": 3.5087719298245615e-06, "loss": 0.6345, "step": 470 }, { "epoch": 2.53, "grad_norm": 12.083460807800293, "learning_rate": 3.157894736842105e-06, "loss": 0.6337, "step": 480 }, { "epoch": 2.58, "grad_norm": 10.678044319152832, "learning_rate": 2.8070175438596493e-06, "loss": 0.5657, "step": 490 }, { "epoch": 2.63, "grad_norm": 8.628580093383789, "learning_rate": 2.456140350877193e-06, "loss": 0.5214, "step": 500 }, { "epoch": 2.63, "eval_accuracy": 0.810935441370224, "eval_f1_macro": 0.7541066085678262, "eval_f1_micro": 0.810935441370224, "eval_loss": 0.687921404838562, "eval_runtime": 1.4612, "eval_samples_per_second": 1038.859, "eval_steps_per_second": 16.425, "step": 500 }, { "epoch": 2.68, "grad_norm": 11.350371360778809, "learning_rate": 2.105263157894737e-06, "loss": 0.5632, "step": 510 }, { "epoch": 2.74, "grad_norm": 10.923992156982422, "learning_rate": 1.7543859649122807e-06, "loss": 0.602, "step": 520 }, { "epoch": 2.79, "grad_norm": 11.192131996154785, "learning_rate": 1.4035087719298246e-06, "loss": 0.6038, "step": 530 }, { "epoch": 2.84, "grad_norm": 9.871031761169434, "learning_rate": 1.0526315789473685e-06, "loss": 0.5776, "step": 540 }, { "epoch": 2.89, "grad_norm": 9.84094524383545, "learning_rate": 7.017543859649123e-07, "loss": 0.5808, "step": 550 }, { "epoch": 2.89, "eval_accuracy": 0.80566534914361, "eval_f1_macro": 0.7451626163175262, "eval_f1_micro": 0.80566534914361, "eval_loss": 0.6847230792045593, "eval_runtime": 1.4607, "eval_samples_per_second": 1039.224, "eval_steps_per_second": 16.43, "step": 550 }, { "epoch": 2.95, "grad_norm": 8.289957046508789, "learning_rate": 3.5087719298245616e-07, "loss": 0.6075, "step": 560 }, { "epoch": 3.0, "grad_norm": 13.524730682373047, "learning_rate": 0.0, "loss": 0.596, "step": 570 }, { "epoch": 3.0, "step": 570, "total_flos": 2400025338445824.0, "train_loss": 0.9787228534096166, "train_runtime": 143.678, "train_samples_per_second": 253.567, "train_steps_per_second": 3.967 } ], "logging_steps": 10, "max_steps": 570, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "total_flos": 2400025338445824.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }