{ "best_metric": 1.698286533355713, "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/distilbert/distilroberta_base_scotus/checkpoint-200", "epoch": 2.5316455696202533, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "grad_norm": 4.000258445739746, "learning_rate": 1.9156118143459917e-05, "loss": 2.4305, "step": 10 }, { "epoch": 0.25, "grad_norm": 2.501706838607788, "learning_rate": 1.8312236286919833e-05, "loss": 2.2057, "step": 20 }, { "epoch": 0.38, "grad_norm": 1.6157236099243164, "learning_rate": 1.746835443037975e-05, "loss": 2.1717, "step": 30 }, { "epoch": 0.51, "grad_norm": 2.867032527923584, "learning_rate": 1.662447257383966e-05, "loss": 2.1116, "step": 40 }, { "epoch": 0.63, "grad_norm": 3.3913872241973877, "learning_rate": 1.578059071729958e-05, "loss": 2.0217, "step": 50 }, { "epoch": 0.63, "eval_accuracy": 0.38785714285714284, "eval_f1_macro": 0.0997314847642978, "eval_f1_micro": 0.38785714285714284, "eval_loss": 2.0066702365875244, "eval_runtime": 0.7437, "eval_samples_per_second": 1882.416, "eval_steps_per_second": 29.581, "step": 50 }, { "epoch": 0.76, "grad_norm": 3.9374263286590576, "learning_rate": 1.4936708860759495e-05, "loss": 1.9817, "step": 60 }, { "epoch": 0.89, "grad_norm": 4.026462078094482, "learning_rate": 1.4092827004219412e-05, "loss": 1.8422, "step": 70 }, { "epoch": 1.01, "grad_norm": 4.174111843109131, "learning_rate": 1.3248945147679326e-05, "loss": 1.8743, "step": 80 }, { "epoch": 1.14, "grad_norm": 4.04383659362793, "learning_rate": 1.240506329113924e-05, "loss": 1.7323, "step": 90 }, { "epoch": 1.27, "grad_norm": 8.888038635253906, "learning_rate": 1.1561181434599158e-05, "loss": 1.6626, "step": 100 }, { "epoch": 1.27, "eval_accuracy": 0.4421428571428571, "eval_f1_macro": 0.12879339040210455, "eval_f1_micro": 0.4421428571428571, "eval_loss": 1.8173670768737793, "eval_runtime": 0.7419, "eval_samples_per_second": 1887.139, "eval_steps_per_second": 29.655, "step": 100 }, { "epoch": 1.39, "grad_norm": 4.270557403564453, "learning_rate": 1.0717299578059072e-05, "loss": 1.7792, "step": 110 }, { "epoch": 1.52, "grad_norm": 5.571478366851807, "learning_rate": 9.87341772151899e-06, "loss": 1.7678, "step": 120 }, { "epoch": 1.65, "grad_norm": 3.9349613189697266, "learning_rate": 9.029535864978903e-06, "loss": 1.7242, "step": 130 }, { "epoch": 1.77, "grad_norm": 9.635564804077148, "learning_rate": 8.18565400843882e-06, "loss": 1.6204, "step": 140 }, { "epoch": 1.9, "grad_norm": 9.567301750183105, "learning_rate": 7.341772151898735e-06, "loss": 1.7473, "step": 150 }, { "epoch": 1.9, "eval_accuracy": 0.44857142857142857, "eval_f1_macro": 0.1535216694972622, "eval_f1_micro": 0.44857142857142857, "eval_loss": 1.7734556198120117, "eval_runtime": 0.7466, "eval_samples_per_second": 1875.068, "eval_steps_per_second": 29.465, "step": 150 }, { "epoch": 2.03, "grad_norm": 6.150979042053223, "learning_rate": 6.49789029535865e-06, "loss": 1.6948, "step": 160 }, { "epoch": 2.15, "grad_norm": 6.748580455780029, "learning_rate": 5.654008438818566e-06, "loss": 1.606, "step": 170 }, { "epoch": 2.28, "grad_norm": 6.995398998260498, "learning_rate": 4.8101265822784815e-06, "loss": 1.6395, "step": 180 }, { "epoch": 2.41, "grad_norm": 5.082231521606445, "learning_rate": 3.9662447257383965e-06, "loss": 1.5691, "step": 190 }, { "epoch": 2.53, "grad_norm": 3.8263602256774902, "learning_rate": 3.1223628691983127e-06, "loss": 1.5993, "step": 200 }, { "epoch": 2.53, "eval_accuracy": 0.49142857142857144, "eval_f1_macro": 0.18881283313886577, "eval_f1_micro": 0.49142857142857144, "eval_loss": 1.698286533355713, "eval_runtime": 0.747, "eval_samples_per_second": 1874.133, "eval_steps_per_second": 29.451, "step": 200 } ], "logging_steps": 10, "max_steps": 237, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "total_flos": 423978829086720.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }