|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 471, |
|
"global_step": 4710, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 23.178159713745117, |
|
"learning_rate": 3.004181408813123e-06, |
|
"loss": 3.3296, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_nli-pairs_loss": 1.8879033327102661, |
|
"eval_nli-pairs_runtime": 14.5841, |
|
"eval_nli-pairs_samples_per_second": 466.81, |
|
"eval_nli-pairs_steps_per_second": 29.21, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_scitail-pairs-pos_loss": 1.3438984155654907, |
|
"eval_scitail-pairs-pos_runtime": 3.3055, |
|
"eval_scitail-pairs-pos_samples_per_second": 394.499, |
|
"eval_scitail-pairs-pos_steps_per_second": 24.807, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_qnli-contrastive_loss": 2.2597947120666504, |
|
"eval_qnli-contrastive_runtime": 15.4075, |
|
"eval_qnli-contrastive_samples_per_second": 354.567, |
|
"eval_qnli-contrastive_steps_per_second": 22.197, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 20.00649070739746, |
|
"learning_rate": 6.021228690897395e-06, |
|
"loss": 1.8704, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_nli-pairs_loss": 0.9545981884002686, |
|
"eval_nli-pairs_runtime": 14.5243, |
|
"eval_nli-pairs_samples_per_second": 468.731, |
|
"eval_nli-pairs_steps_per_second": 29.33, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_scitail-pairs-pos_loss": 0.5628724098205566, |
|
"eval_scitail-pairs-pos_runtime": 3.3029, |
|
"eval_scitail-pairs-pos_samples_per_second": 394.801, |
|
"eval_scitail-pairs-pos_steps_per_second": 24.826, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_qnli-contrastive_loss": 1.840173602104187, |
|
"eval_qnli-contrastive_runtime": 15.4134, |
|
"eval_qnli-contrastive_samples_per_second": 354.433, |
|
"eval_qnli-contrastive_steps_per_second": 22.189, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 14.465508460998535, |
|
"learning_rate": 9.051141846252816e-06, |
|
"loss": 1.2621, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_nli-pairs_loss": 0.715168297290802, |
|
"eval_nli-pairs_runtime": 14.4626, |
|
"eval_nli-pairs_samples_per_second": 470.731, |
|
"eval_nli-pairs_steps_per_second": 29.455, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_scitail-pairs-pos_loss": 0.45529162883758545, |
|
"eval_scitail-pairs-pos_runtime": 3.3513, |
|
"eval_scitail-pairs-pos_samples_per_second": 389.098, |
|
"eval_scitail-pairs-pos_steps_per_second": 24.468, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_qnli-contrastive_loss": 1.388743281364441, |
|
"eval_qnli-contrastive_runtime": 15.4261, |
|
"eval_qnli-contrastive_samples_per_second": 354.139, |
|
"eval_qnli-contrastive_steps_per_second": 22.17, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 164.2409210205078, |
|
"learning_rate": 1.2081055001608235e-05, |
|
"loss": 1.2512, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_nli-pairs_loss": 0.5274420976638794, |
|
"eval_nli-pairs_runtime": 14.4658, |
|
"eval_nli-pairs_samples_per_second": 470.628, |
|
"eval_nli-pairs_steps_per_second": 29.449, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_scitail-pairs-pos_loss": 0.3621281683444977, |
|
"eval_scitail-pairs-pos_runtime": 3.3054, |
|
"eval_scitail-pairs-pos_samples_per_second": 394.502, |
|
"eval_scitail-pairs-pos_steps_per_second": 24.808, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_qnli-contrastive_loss": 0.8418154120445251, |
|
"eval_qnli-contrastive_runtime": 15.4336, |
|
"eval_qnli-contrastive_samples_per_second": 353.967, |
|
"eval_qnli-contrastive_steps_per_second": 22.159, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.1174694299697876, |
|
"learning_rate": 1.5110968156963654e-05, |
|
"loss": 1.1724, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_nli-pairs_loss": 0.49269717931747437, |
|
"eval_nli-pairs_runtime": 14.6969, |
|
"eval_nli-pairs_samples_per_second": 463.228, |
|
"eval_nli-pairs_steps_per_second": 28.986, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_scitail-pairs-pos_loss": 0.39243820309638977, |
|
"eval_scitail-pairs-pos_runtime": 3.3462, |
|
"eval_scitail-pairs-pos_samples_per_second": 389.699, |
|
"eval_scitail-pairs-pos_steps_per_second": 24.506, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_qnli-contrastive_loss": 0.14236953854560852, |
|
"eval_qnli-contrastive_runtime": 15.7375, |
|
"eval_qnli-contrastive_samples_per_second": 347.133, |
|
"eval_qnli-contrastive_steps_per_second": 21.732, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 8.20367431640625, |
|
"learning_rate": 1.8140881312319075e-05, |
|
"loss": 0.9036, |
|
"step": 2826 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_nli-pairs_loss": 0.46205422282218933, |
|
"eval_nli-pairs_runtime": 14.6645, |
|
"eval_nli-pairs_samples_per_second": 464.249, |
|
"eval_nli-pairs_steps_per_second": 29.05, |
|
"step": 2826 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_scitail-pairs-pos_loss": 0.37769660353660583, |
|
"eval_scitail-pairs-pos_runtime": 3.3324, |
|
"eval_scitail-pairs-pos_samples_per_second": 391.314, |
|
"eval_scitail-pairs-pos_steps_per_second": 24.607, |
|
"step": 2826 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_qnli-contrastive_loss": 0.3408704996109009, |
|
"eval_qnli-contrastive_runtime": 15.4886, |
|
"eval_qnli-contrastive_samples_per_second": 352.711, |
|
"eval_qnli-contrastive_steps_per_second": 22.081, |
|
"step": 2826 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 13.231554985046387, |
|
"learning_rate": 1.995898723197675e-05, |
|
"loss": 1.0374, |
|
"step": 3297 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_nli-pairs_loss": 0.41105732321739197, |
|
"eval_nli-pairs_runtime": 14.6153, |
|
"eval_nli-pairs_samples_per_second": 465.813, |
|
"eval_nli-pairs_steps_per_second": 29.148, |
|
"step": 3297 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_scitail-pairs-pos_loss": 0.3417491614818573, |
|
"eval_scitail-pairs-pos_runtime": 3.3206, |
|
"eval_scitail-pairs-pos_samples_per_second": 392.697, |
|
"eval_scitail-pairs-pos_steps_per_second": 24.694, |
|
"step": 3297 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_qnli-contrastive_loss": 0.21254216134548187, |
|
"eval_qnli-contrastive_runtime": 15.5347, |
|
"eval_qnli-contrastive_samples_per_second": 351.664, |
|
"eval_qnli-contrastive_steps_per_second": 22.015, |
|
"step": 3297 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 23.010765075683594, |
|
"learning_rate": 1.9476312452068522e-05, |
|
"loss": 0.9259, |
|
"step": 3768 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_nli-pairs_loss": 0.3852880597114563, |
|
"eval_nli-pairs_runtime": 14.5431, |
|
"eval_nli-pairs_samples_per_second": 468.125, |
|
"eval_nli-pairs_steps_per_second": 29.292, |
|
"step": 3768 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_scitail-pairs-pos_loss": 0.2818955183029175, |
|
"eval_scitail-pairs-pos_runtime": 3.3663, |
|
"eval_scitail-pairs-pos_samples_per_second": 387.364, |
|
"eval_scitail-pairs-pos_steps_per_second": 24.359, |
|
"step": 3768 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_qnli-contrastive_loss": 0.16461187601089478, |
|
"eval_qnli-contrastive_runtime": 15.6023, |
|
"eval_qnli-contrastive_samples_per_second": 350.141, |
|
"eval_qnli-contrastive_steps_per_second": 21.92, |
|
"step": 3768 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 4.332469940185547, |
|
"learning_rate": 1.8475083492522773e-05, |
|
"loss": 0.8709, |
|
"step": 4239 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_nli-pairs_loss": 0.37486234307289124, |
|
"eval_nli-pairs_runtime": 14.7406, |
|
"eval_nli-pairs_samples_per_second": 461.852, |
|
"eval_nli-pairs_steps_per_second": 28.9, |
|
"step": 4239 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_scitail-pairs-pos_loss": 0.29122474789619446, |
|
"eval_scitail-pairs-pos_runtime": 3.5504, |
|
"eval_scitail-pairs-pos_samples_per_second": 367.283, |
|
"eval_scitail-pairs-pos_steps_per_second": 23.096, |
|
"step": 4239 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_qnli-contrastive_loss": 0.11566311866044998, |
|
"eval_qnli-contrastive_runtime": 15.6925, |
|
"eval_qnli-contrastive_samples_per_second": 348.129, |
|
"eval_qnli-contrastive_steps_per_second": 21.794, |
|
"step": 4239 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 26.054088592529297, |
|
"learning_rate": 1.701008869684049e-05, |
|
"loss": 0.8686, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_nli-pairs_loss": 0.36355406045913696, |
|
"eval_nli-pairs_runtime": 14.5214, |
|
"eval_nli-pairs_samples_per_second": 468.824, |
|
"eval_nli-pairs_steps_per_second": 29.336, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_scitail-pairs-pos_loss": 0.3108903765678406, |
|
"eval_scitail-pairs-pos_runtime": 3.3842, |
|
"eval_scitail-pairs-pos_samples_per_second": 385.319, |
|
"eval_scitail-pairs-pos_steps_per_second": 24.23, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_qnli-contrastive_loss": 0.09614822268486023, |
|
"eval_qnli-contrastive_runtime": 15.7192, |
|
"eval_qnli-contrastive_samples_per_second": 347.537, |
|
"eval_qnli-contrastive_steps_per_second": 21.757, |
|
"step": 4710 |
|
} |
|
], |
|
"logging_steps": 471, |
|
"max_steps": 9420, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 4710, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|