{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 471, "global_step": 4710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 17.846229553222656, "learning_rate": 3.004181408813123e-06, "loss": 4.4848, "step": 471 }, { "epoch": 0.1, "eval_nli-pairs_loss": 3.227689504623413, "eval_nli-pairs_runtime": 23.5758, "eval_nli-pairs_samples_per_second": 288.77, "eval_nli-pairs_steps_per_second": 18.069, "step": 471 }, { "epoch": 0.1, "eval_scitail-pairs-pos_loss": 2.469686508178711, "eval_scitail-pairs-pos_runtime": 5.4679, "eval_scitail-pairs-pos_samples_per_second": 238.485, "eval_scitail-pairs-pos_steps_per_second": 14.997, "step": 471 }, { "epoch": 0.1, "eval_qnli-contrastive_loss": 3.3142430782318115, "eval_qnli-contrastive_runtime": 15.7426, "eval_qnli-contrastive_samples_per_second": 347.019, "eval_qnli-contrastive_steps_per_second": 21.724, "step": 471 }, { "epoch": 0.2, "grad_norm": 29.59261703491211, "learning_rate": 6.027661627532969e-06, "loss": 2.6358, "step": 942 }, { "epoch": 0.2, "eval_nli-pairs_loss": 1.5920209884643555, "eval_nli-pairs_runtime": 23.3765, "eval_nli-pairs_samples_per_second": 291.232, "eval_nli-pairs_steps_per_second": 18.223, "step": 942 }, { "epoch": 0.2, "eval_scitail-pairs-pos_loss": 0.9157330989837646, "eval_scitail-pairs-pos_runtime": 5.4478, "eval_scitail-pairs-pos_samples_per_second": 239.363, "eval_scitail-pairs-pos_steps_per_second": 15.052, "step": 942 }, { "epoch": 0.2, "eval_qnli-contrastive_loss": 2.663238763809204, "eval_qnli-contrastive_runtime": 15.751, "eval_qnli-contrastive_samples_per_second": 346.836, "eval_qnli-contrastive_steps_per_second": 21.713, "step": 942 }, { "epoch": 0.3, "grad_norm": 24.539047241210938, "learning_rate": 9.057574782888389e-06, "loss": 1.7183, "step": 1413 }, { "epoch": 0.3, "eval_nli-pairs_loss": 1.1536647081375122, "eval_nli-pairs_runtime": 23.6115, "eval_nli-pairs_samples_per_second": 288.335, "eval_nli-pairs_steps_per_second": 18.042, "step": 1413 }, { "epoch": 0.3, "eval_scitail-pairs-pos_loss": 0.7445429563522339, "eval_scitail-pairs-pos_runtime": 5.3966, "eval_scitail-pairs-pos_samples_per_second": 241.635, "eval_scitail-pairs-pos_steps_per_second": 15.195, "step": 1413 }, { "epoch": 0.3, "eval_qnli-contrastive_loss": 2.130812406539917, "eval_qnli-contrastive_runtime": 15.7293, "eval_qnli-contrastive_samples_per_second": 347.313, "eval_qnli-contrastive_steps_per_second": 21.743, "step": 1413 }, { "epoch": 0.4, "grad_norm": 139.8046875, "learning_rate": 1.208748793824381e-05, "loss": 1.6114, "step": 1884 }, { "epoch": 0.4, "eval_nli-pairs_loss": 0.8992123007774353, "eval_nli-pairs_runtime": 23.6196, "eval_nli-pairs_samples_per_second": 288.236, "eval_nli-pairs_steps_per_second": 18.036, "step": 1884 }, { "epoch": 0.4, "eval_scitail-pairs-pos_loss": 0.6193641424179077, "eval_scitail-pairs-pos_runtime": 5.4024, "eval_scitail-pairs-pos_samples_per_second": 241.376, "eval_scitail-pairs-pos_steps_per_second": 15.179, "step": 1884 }, { "epoch": 0.4, "eval_qnli-contrastive_loss": 1.6952241659164429, "eval_qnli-contrastive_runtime": 15.7392, "eval_qnli-contrastive_samples_per_second": 347.095, "eval_qnli-contrastive_steps_per_second": 21.729, "step": 1884 }, { "epoch": 0.5, "grad_norm": 2.1193487644195557, "learning_rate": 1.511740109359923e-05, "loss": 1.5367, "step": 2355 }, { "epoch": 0.5, "eval_nli-pairs_loss": 0.8112400770187378, "eval_nli-pairs_runtime": 23.4573, "eval_nli-pairs_samples_per_second": 290.23, "eval_nli-pairs_steps_per_second": 18.161, "step": 2355 }, { "epoch": 0.5, "eval_scitail-pairs-pos_loss": 0.6661093831062317, "eval_scitail-pairs-pos_runtime": 5.3621, "eval_scitail-pairs-pos_samples_per_second": 243.189, "eval_scitail-pairs-pos_steps_per_second": 15.293, "step": 2355 }, { "epoch": 0.5, "eval_qnli-contrastive_loss": 0.8697724938392639, "eval_qnli-contrastive_runtime": 15.7092, "eval_qnli-contrastive_samples_per_second": 347.759, "eval_qnli-contrastive_steps_per_second": 21.771, "step": 2355 }, { "epoch": 0.6, "grad_norm": 8.693464279174805, "learning_rate": 1.814731424895465e-05, "loss": 1.1657, "step": 2826 }, { "epoch": 0.6, "eval_nli-pairs_loss": 0.7330080270767212, "eval_nli-pairs_runtime": 23.359, "eval_nli-pairs_samples_per_second": 291.451, "eval_nli-pairs_steps_per_second": 18.237, "step": 2826 }, { "epoch": 0.6, "eval_scitail-pairs-pos_loss": 0.558278501033783, "eval_scitail-pairs-pos_runtime": 5.3162, "eval_scitail-pairs-pos_samples_per_second": 245.289, "eval_scitail-pairs-pos_steps_per_second": 15.425, "step": 2826 }, { "epoch": 0.6, "eval_qnli-contrastive_loss": 0.8414629101753235, "eval_qnli-contrastive_runtime": 15.5773, "eval_qnli-contrastive_samples_per_second": 350.703, "eval_qnli-contrastive_steps_per_second": 21.955, "step": 2826 }, { "epoch": 0.7, "grad_norm": 20.00510025024414, "learning_rate": 1.995853561663268e-05, "loss": 1.2926, "step": 3297 }, { "epoch": 0.7, "eval_nli-pairs_loss": 0.688292384147644, "eval_nli-pairs_runtime": 23.1585, "eval_nli-pairs_samples_per_second": 293.974, "eval_nli-pairs_steps_per_second": 18.395, "step": 3297 }, { "epoch": 0.7, "eval_scitail-pairs-pos_loss": 0.5283708572387695, "eval_scitail-pairs-pos_runtime": 5.3322, "eval_scitail-pairs-pos_samples_per_second": 244.552, "eval_scitail-pairs-pos_steps_per_second": 15.378, "step": 3297 }, { "epoch": 0.7, "eval_qnli-contrastive_loss": 0.5239661335945129, "eval_qnli-contrastive_runtime": 15.5222, "eval_qnli-contrastive_samples_per_second": 351.947, "eval_qnli-contrastive_steps_per_second": 22.033, "step": 3297 }, { "epoch": 0.8, "grad_norm": 20.681690216064453, "learning_rate": 1.9476312452068522e-05, "loss": 1.1523, "step": 3768 }, { "epoch": 0.8, "eval_nli-pairs_loss": 0.6775749325752258, "eval_nli-pairs_runtime": 23.2425, "eval_nli-pairs_samples_per_second": 292.912, "eval_nli-pairs_steps_per_second": 18.328, "step": 3768 }, { "epoch": 0.8, "eval_scitail-pairs-pos_loss": 0.4816366732120514, "eval_scitail-pairs-pos_runtime": 5.2694, "eval_scitail-pairs-pos_samples_per_second": 247.467, "eval_scitail-pairs-pos_steps_per_second": 15.562, "step": 3768 }, { "epoch": 0.8, "eval_qnli-contrastive_loss": 0.4342482388019562, "eval_qnli-contrastive_runtime": 15.5335, "eval_qnli-contrastive_samples_per_second": 351.691, "eval_qnli-contrastive_steps_per_second": 22.017, "step": 3768 }, { "epoch": 0.9, "grad_norm": 12.640650749206543, "learning_rate": 1.8475083492522773e-05, "loss": 1.0387, "step": 4239 }, { "epoch": 0.9, "eval_nli-pairs_loss": 0.6213383674621582, "eval_nli-pairs_runtime": 23.1579, "eval_nli-pairs_samples_per_second": 293.981, "eval_nli-pairs_steps_per_second": 18.395, "step": 4239 }, { "epoch": 0.9, "eval_scitail-pairs-pos_loss": 0.4603377878665924, "eval_scitail-pairs-pos_runtime": 5.3009, "eval_scitail-pairs-pos_samples_per_second": 245.997, "eval_scitail-pairs-pos_steps_per_second": 15.469, "step": 4239 }, { "epoch": 0.9, "eval_qnli-contrastive_loss": 0.3022189736366272, "eval_qnli-contrastive_runtime": 15.5459, "eval_qnli-contrastive_samples_per_second": 351.411, "eval_qnli-contrastive_steps_per_second": 21.999, "step": 4239 }, { "epoch": 1.0, "grad_norm": 20.227073669433594, "learning_rate": 1.701008869684049e-05, "loss": 1.0356, "step": 4710 }, { "epoch": 1.0, "eval_nli-pairs_loss": 0.6488831043243408, "eval_nli-pairs_runtime": 23.1759, "eval_nli-pairs_samples_per_second": 293.753, "eval_nli-pairs_steps_per_second": 18.381, "step": 4710 }, { "epoch": 1.0, "eval_scitail-pairs-pos_loss": 0.5449082255363464, "eval_scitail-pairs-pos_runtime": 5.3602, "eval_scitail-pairs-pos_samples_per_second": 243.276, "eval_scitail-pairs-pos_steps_per_second": 15.298, "step": 4710 }, { "epoch": 1.0, "eval_qnli-contrastive_loss": 0.1294127106666565, "eval_qnli-contrastive_runtime": 15.5044, "eval_qnli-contrastive_samples_per_second": 352.352, "eval_qnli-contrastive_steps_per_second": 22.058, "step": 4710 } ], "logging_steps": 471, "max_steps": 9420, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 4710, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 28, "trial_name": null, "trial_params": null }