{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 1883, "global_step": 18824, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10003187420314492, "grad_norm": 39.029380798339844, "learning_rate": 9.976625584360391e-07, "loss": 3.6326, "step": 1883 }, { "epoch": 0.10003187420314492, "eval_nli-pairs_loss": 2.6952593326568604, "eval_nli-pairs_runtime": 25.731, "eval_nli-pairs_samples_per_second": 264.584, "eval_nli-pairs_steps_per_second": 16.556, "step": 1883 }, { "epoch": 0.10003187420314492, "eval_scitail-pairs-pos_loss": 2.172569990158081, "eval_scitail-pairs-pos_runtime": 6.2772, "eval_scitail-pairs-pos_samples_per_second": 207.736, "eval_scitail-pairs-pos_steps_per_second": 13.063, "step": 1883 }, { "epoch": 0.10003187420314492, "eval_qnli-contrastive_loss": 2.702913999557495, "eval_qnli-contrastive_runtime": 16.475, "eval_qnli-contrastive_samples_per_second": 331.593, "eval_qnli-contrastive_steps_per_second": 20.759, "step": 1883 }, { "epoch": 0.20006374840628985, "grad_norm": 25.459535598754883, "learning_rate": 1.9974500637484067e-06, "loss": 1.7665, "step": 3766 }, { "epoch": 0.20006374840628985, "eval_nli-pairs_loss": 1.2885302305221558, "eval_nli-pairs_runtime": 25.4564, "eval_nli-pairs_samples_per_second": 267.438, "eval_nli-pairs_steps_per_second": 16.734, "step": 3766 }, { "epoch": 0.20006374840628985, "eval_scitail-pairs-pos_loss": 0.9637606143951416, "eval_scitail-pairs-pos_runtime": 6.1565, "eval_scitail-pairs-pos_samples_per_second": 211.809, "eval_scitail-pairs-pos_steps_per_second": 13.319, "step": 3766 }, { "epoch": 0.20006374840628985, "eval_qnli-contrastive_loss": 1.713547945022583, "eval_qnli-contrastive_runtime": 16.4307, "eval_qnli-contrastive_samples_per_second": 332.487, "eval_qnli-contrastive_steps_per_second": 20.815, "step": 3766 }, { "epoch": 0.3000956226094348, "grad_norm": 0.8201059103012085, "learning_rate": 2.9977688057798558e-06, "loss": 1.1522, "step": 5649 }, { "epoch": 0.3000956226094348, "eval_nli-pairs_loss": 0.9093547463417053, "eval_nli-pairs_runtime": 25.1271, "eval_nli-pairs_samples_per_second": 270.943, "eval_nli-pairs_steps_per_second": 16.954, "step": 5649 }, { "epoch": 0.3000956226094348, "eval_scitail-pairs-pos_loss": 0.7571232914924622, "eval_scitail-pairs-pos_runtime": 5.9021, "eval_scitail-pairs-pos_samples_per_second": 220.937, "eval_scitail-pairs-pos_steps_per_second": 13.893, "step": 5649 }, { "epoch": 0.3000956226094348, "eval_qnli-contrastive_loss": 0.91651451587677, "eval_qnli-contrastive_runtime": 16.2309, "eval_qnli-contrastive_samples_per_second": 336.579, "eval_qnli-contrastive_steps_per_second": 21.071, "step": 5649 }, { "epoch": 0.4001274968125797, "grad_norm": 12.970890045166016, "learning_rate": 3.9975563110922225e-06, "loss": 0.9533, "step": 7532 }, { "epoch": 0.4001274968125797, "eval_nli-pairs_loss": 0.7290090322494507, "eval_nli-pairs_runtime": 25.3154, "eval_nli-pairs_samples_per_second": 268.928, "eval_nli-pairs_steps_per_second": 16.828, "step": 7532 }, { "epoch": 0.4001274968125797, "eval_scitail-pairs-pos_loss": 0.6498324275016785, "eval_scitail-pairs-pos_runtime": 6.0764, "eval_scitail-pairs-pos_samples_per_second": 214.6, "eval_scitail-pairs-pos_steps_per_second": 13.495, "step": 7532 }, { "epoch": 0.4001274968125797, "eval_qnli-contrastive_loss": 0.4303818643093109, "eval_qnli-contrastive_runtime": 16.4463, "eval_qnli-contrastive_samples_per_second": 332.172, "eval_qnli-contrastive_steps_per_second": 20.795, "step": 7532 }, { "epoch": 0.5001593710157246, "grad_norm": 10.865135192871094, "learning_rate": 4.9973438164045905e-06, "loss": 0.8013, "step": 9415 }, { "epoch": 0.5001593710157246, "eval_nli-pairs_loss": 0.6431913375854492, "eval_nli-pairs_runtime": 25.4337, "eval_nli-pairs_samples_per_second": 267.676, "eval_nli-pairs_steps_per_second": 16.749, "step": 9415 }, { "epoch": 0.5001593710157246, "eval_scitail-pairs-pos_loss": 0.6006649732589722, "eval_scitail-pairs-pos_runtime": 6.199, "eval_scitail-pairs-pos_samples_per_second": 210.355, "eval_scitail-pairs-pos_steps_per_second": 13.228, "step": 9415 }, { "epoch": 0.5001593710157246, "eval_qnli-contrastive_loss": 0.25907495617866516, "eval_qnli-contrastive_runtime": 16.4896, "eval_qnli-contrastive_samples_per_second": 331.299, "eval_qnli-contrastive_steps_per_second": 20.74, "step": 9415 }, { "epoch": 0.6001912452188696, "grad_norm": 2.3549954891204834, "learning_rate": 5.997662558436039e-06, "loss": 0.6568, "step": 11298 }, { "epoch": 0.6001912452188696, "eval_nli-pairs_loss": 0.5626155734062195, "eval_nli-pairs_runtime": 25.1226, "eval_nli-pairs_samples_per_second": 270.991, "eval_nli-pairs_steps_per_second": 16.957, "step": 11298 }, { "epoch": 0.6001912452188696, "eval_scitail-pairs-pos_loss": 0.5481033325195312, "eval_scitail-pairs-pos_runtime": 6.0513, "eval_scitail-pairs-pos_samples_per_second": 215.492, "eval_scitail-pairs-pos_steps_per_second": 13.551, "step": 11298 }, { "epoch": 0.6001912452188696, "eval_qnli-contrastive_loss": 0.13647136092185974, "eval_qnli-contrastive_runtime": 16.3856, "eval_qnli-contrastive_samples_per_second": 333.402, "eval_qnli-contrastive_steps_per_second": 20.872, "step": 11298 }, { "epoch": 0.7002231194220144, "grad_norm": 10.994942665100098, "learning_rate": 6.997450063748406e-06, "loss": 0.6095, "step": 13181 }, { "epoch": 0.7002231194220144, "eval_nli-pairs_loss": 0.5226004719734192, "eval_nli-pairs_runtime": 25.203, "eval_nli-pairs_samples_per_second": 270.127, "eval_nli-pairs_steps_per_second": 16.903, "step": 13181 }, { "epoch": 0.7002231194220144, "eval_scitail-pairs-pos_loss": 0.5108869075775146, "eval_scitail-pairs-pos_runtime": 6.1126, "eval_scitail-pairs-pos_samples_per_second": 213.331, "eval_scitail-pairs-pos_steps_per_second": 13.415, "step": 13181 }, { "epoch": 0.7002231194220144, "eval_qnli-contrastive_loss": 0.16431590914726257, "eval_qnli-contrastive_runtime": 16.4372, "eval_qnli-contrastive_samples_per_second": 332.355, "eval_qnli-contrastive_steps_per_second": 20.806, "step": 13181 }, { "epoch": 0.8002549936251594, "grad_norm": 8.826902389526367, "learning_rate": 7.997768805779857e-06, "loss": 0.5694, "step": 15064 }, { "epoch": 0.8002549936251594, "eval_nli-pairs_loss": 0.49213743209838867, "eval_nli-pairs_runtime": 25.0892, "eval_nli-pairs_samples_per_second": 271.352, "eval_nli-pairs_steps_per_second": 16.979, "step": 15064 }, { "epoch": 0.8002549936251594, "eval_scitail-pairs-pos_loss": 0.5194270610809326, "eval_scitail-pairs-pos_runtime": 6.261, "eval_scitail-pairs-pos_samples_per_second": 208.273, "eval_scitail-pairs-pos_steps_per_second": 13.097, "step": 15064 }, { "epoch": 0.8002549936251594, "eval_qnli-contrastive_loss": 0.05173656344413757, "eval_qnli-contrastive_runtime": 16.3578, "eval_qnli-contrastive_samples_per_second": 333.97, "eval_qnli-contrastive_steps_per_second": 20.908, "step": 15064 }, { "epoch": 0.9002868678283042, "grad_norm": 0.4369502067565918, "learning_rate": 8.997556311092223e-06, "loss": 0.5375, "step": 16947 }, { "epoch": 0.9002868678283042, "eval_nli-pairs_loss": 0.5060996413230896, "eval_nli-pairs_runtime": 25.3561, "eval_nli-pairs_samples_per_second": 268.496, "eval_nli-pairs_steps_per_second": 16.801, "step": 16947 }, { "epoch": 0.9002868678283042, "eval_scitail-pairs-pos_loss": 0.5642966628074646, "eval_scitail-pairs-pos_runtime": 6.1557, "eval_scitail-pairs-pos_samples_per_second": 211.837, "eval_scitail-pairs-pos_steps_per_second": 13.321, "step": 16947 }, { "epoch": 0.9002868678283042, "eval_qnli-contrastive_loss": 0.046243228018283844, "eval_qnli-contrastive_runtime": 16.4399, "eval_qnli-contrastive_samples_per_second": 332.302, "eval_qnli-contrastive_steps_per_second": 20.803, "step": 16947 } ], "logging_steps": 1883, "max_steps": 37648, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 18824, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 28, "trial_name": null, "trial_params": null }