bobox's picture
Training in progress, epoch 1, checkpoint
0929227 verified
raw
history blame
10.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 471,
"global_step": 4710,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"grad_norm": 23.178159713745117,
"learning_rate": 3.004181408813123e-06,
"loss": 3.3296,
"step": 471
},
{
"epoch": 0.1,
"eval_nli-pairs_loss": 1.8879033327102661,
"eval_nli-pairs_runtime": 14.5841,
"eval_nli-pairs_samples_per_second": 466.81,
"eval_nli-pairs_steps_per_second": 29.21,
"step": 471
},
{
"epoch": 0.1,
"eval_scitail-pairs-pos_loss": 1.3438984155654907,
"eval_scitail-pairs-pos_runtime": 3.3055,
"eval_scitail-pairs-pos_samples_per_second": 394.499,
"eval_scitail-pairs-pos_steps_per_second": 24.807,
"step": 471
},
{
"epoch": 0.1,
"eval_qnli-contrastive_loss": 2.2597947120666504,
"eval_qnli-contrastive_runtime": 15.4075,
"eval_qnli-contrastive_samples_per_second": 354.567,
"eval_qnli-contrastive_steps_per_second": 22.197,
"step": 471
},
{
"epoch": 0.2,
"grad_norm": 20.00649070739746,
"learning_rate": 6.021228690897395e-06,
"loss": 1.8704,
"step": 942
},
{
"epoch": 0.2,
"eval_nli-pairs_loss": 0.9545981884002686,
"eval_nli-pairs_runtime": 14.5243,
"eval_nli-pairs_samples_per_second": 468.731,
"eval_nli-pairs_steps_per_second": 29.33,
"step": 942
},
{
"epoch": 0.2,
"eval_scitail-pairs-pos_loss": 0.5628724098205566,
"eval_scitail-pairs-pos_runtime": 3.3029,
"eval_scitail-pairs-pos_samples_per_second": 394.801,
"eval_scitail-pairs-pos_steps_per_second": 24.826,
"step": 942
},
{
"epoch": 0.2,
"eval_qnli-contrastive_loss": 1.840173602104187,
"eval_qnli-contrastive_runtime": 15.4134,
"eval_qnli-contrastive_samples_per_second": 354.433,
"eval_qnli-contrastive_steps_per_second": 22.189,
"step": 942
},
{
"epoch": 0.3,
"grad_norm": 14.465508460998535,
"learning_rate": 9.051141846252816e-06,
"loss": 1.2621,
"step": 1413
},
{
"epoch": 0.3,
"eval_nli-pairs_loss": 0.715168297290802,
"eval_nli-pairs_runtime": 14.4626,
"eval_nli-pairs_samples_per_second": 470.731,
"eval_nli-pairs_steps_per_second": 29.455,
"step": 1413
},
{
"epoch": 0.3,
"eval_scitail-pairs-pos_loss": 0.45529162883758545,
"eval_scitail-pairs-pos_runtime": 3.3513,
"eval_scitail-pairs-pos_samples_per_second": 389.098,
"eval_scitail-pairs-pos_steps_per_second": 24.468,
"step": 1413
},
{
"epoch": 0.3,
"eval_qnli-contrastive_loss": 1.388743281364441,
"eval_qnli-contrastive_runtime": 15.4261,
"eval_qnli-contrastive_samples_per_second": 354.139,
"eval_qnli-contrastive_steps_per_second": 22.17,
"step": 1413
},
{
"epoch": 0.4,
"grad_norm": 164.2409210205078,
"learning_rate": 1.2081055001608235e-05,
"loss": 1.2512,
"step": 1884
},
{
"epoch": 0.4,
"eval_nli-pairs_loss": 0.5274420976638794,
"eval_nli-pairs_runtime": 14.4658,
"eval_nli-pairs_samples_per_second": 470.628,
"eval_nli-pairs_steps_per_second": 29.449,
"step": 1884
},
{
"epoch": 0.4,
"eval_scitail-pairs-pos_loss": 0.3621281683444977,
"eval_scitail-pairs-pos_runtime": 3.3054,
"eval_scitail-pairs-pos_samples_per_second": 394.502,
"eval_scitail-pairs-pos_steps_per_second": 24.808,
"step": 1884
},
{
"epoch": 0.4,
"eval_qnli-contrastive_loss": 0.8418154120445251,
"eval_qnli-contrastive_runtime": 15.4336,
"eval_qnli-contrastive_samples_per_second": 353.967,
"eval_qnli-contrastive_steps_per_second": 22.159,
"step": 1884
},
{
"epoch": 0.5,
"grad_norm": 1.1174694299697876,
"learning_rate": 1.5110968156963654e-05,
"loss": 1.1724,
"step": 2355
},
{
"epoch": 0.5,
"eval_nli-pairs_loss": 0.49269717931747437,
"eval_nli-pairs_runtime": 14.6969,
"eval_nli-pairs_samples_per_second": 463.228,
"eval_nli-pairs_steps_per_second": 28.986,
"step": 2355
},
{
"epoch": 0.5,
"eval_scitail-pairs-pos_loss": 0.39243820309638977,
"eval_scitail-pairs-pos_runtime": 3.3462,
"eval_scitail-pairs-pos_samples_per_second": 389.699,
"eval_scitail-pairs-pos_steps_per_second": 24.506,
"step": 2355
},
{
"epoch": 0.5,
"eval_qnli-contrastive_loss": 0.14236953854560852,
"eval_qnli-contrastive_runtime": 15.7375,
"eval_qnli-contrastive_samples_per_second": 347.133,
"eval_qnli-contrastive_steps_per_second": 21.732,
"step": 2355
},
{
"epoch": 0.6,
"grad_norm": 8.20367431640625,
"learning_rate": 1.8140881312319075e-05,
"loss": 0.9036,
"step": 2826
},
{
"epoch": 0.6,
"eval_nli-pairs_loss": 0.46205422282218933,
"eval_nli-pairs_runtime": 14.6645,
"eval_nli-pairs_samples_per_second": 464.249,
"eval_nli-pairs_steps_per_second": 29.05,
"step": 2826
},
{
"epoch": 0.6,
"eval_scitail-pairs-pos_loss": 0.37769660353660583,
"eval_scitail-pairs-pos_runtime": 3.3324,
"eval_scitail-pairs-pos_samples_per_second": 391.314,
"eval_scitail-pairs-pos_steps_per_second": 24.607,
"step": 2826
},
{
"epoch": 0.6,
"eval_qnli-contrastive_loss": 0.3408704996109009,
"eval_qnli-contrastive_runtime": 15.4886,
"eval_qnli-contrastive_samples_per_second": 352.711,
"eval_qnli-contrastive_steps_per_second": 22.081,
"step": 2826
},
{
"epoch": 0.7,
"grad_norm": 13.231554985046387,
"learning_rate": 1.995898723197675e-05,
"loss": 1.0374,
"step": 3297
},
{
"epoch": 0.7,
"eval_nli-pairs_loss": 0.41105732321739197,
"eval_nli-pairs_runtime": 14.6153,
"eval_nli-pairs_samples_per_second": 465.813,
"eval_nli-pairs_steps_per_second": 29.148,
"step": 3297
},
{
"epoch": 0.7,
"eval_scitail-pairs-pos_loss": 0.3417491614818573,
"eval_scitail-pairs-pos_runtime": 3.3206,
"eval_scitail-pairs-pos_samples_per_second": 392.697,
"eval_scitail-pairs-pos_steps_per_second": 24.694,
"step": 3297
},
{
"epoch": 0.7,
"eval_qnli-contrastive_loss": 0.21254216134548187,
"eval_qnli-contrastive_runtime": 15.5347,
"eval_qnli-contrastive_samples_per_second": 351.664,
"eval_qnli-contrastive_steps_per_second": 22.015,
"step": 3297
},
{
"epoch": 0.8,
"grad_norm": 23.010765075683594,
"learning_rate": 1.9476312452068522e-05,
"loss": 0.9259,
"step": 3768
},
{
"epoch": 0.8,
"eval_nli-pairs_loss": 0.3852880597114563,
"eval_nli-pairs_runtime": 14.5431,
"eval_nli-pairs_samples_per_second": 468.125,
"eval_nli-pairs_steps_per_second": 29.292,
"step": 3768
},
{
"epoch": 0.8,
"eval_scitail-pairs-pos_loss": 0.2818955183029175,
"eval_scitail-pairs-pos_runtime": 3.3663,
"eval_scitail-pairs-pos_samples_per_second": 387.364,
"eval_scitail-pairs-pos_steps_per_second": 24.359,
"step": 3768
},
{
"epoch": 0.8,
"eval_qnli-contrastive_loss": 0.16461187601089478,
"eval_qnli-contrastive_runtime": 15.6023,
"eval_qnli-contrastive_samples_per_second": 350.141,
"eval_qnli-contrastive_steps_per_second": 21.92,
"step": 3768
},
{
"epoch": 0.9,
"grad_norm": 4.332469940185547,
"learning_rate": 1.8475083492522773e-05,
"loss": 0.8709,
"step": 4239
},
{
"epoch": 0.9,
"eval_nli-pairs_loss": 0.37486234307289124,
"eval_nli-pairs_runtime": 14.7406,
"eval_nli-pairs_samples_per_second": 461.852,
"eval_nli-pairs_steps_per_second": 28.9,
"step": 4239
},
{
"epoch": 0.9,
"eval_scitail-pairs-pos_loss": 0.29122474789619446,
"eval_scitail-pairs-pos_runtime": 3.5504,
"eval_scitail-pairs-pos_samples_per_second": 367.283,
"eval_scitail-pairs-pos_steps_per_second": 23.096,
"step": 4239
},
{
"epoch": 0.9,
"eval_qnli-contrastive_loss": 0.11566311866044998,
"eval_qnli-contrastive_runtime": 15.6925,
"eval_qnli-contrastive_samples_per_second": 348.129,
"eval_qnli-contrastive_steps_per_second": 21.794,
"step": 4239
},
{
"epoch": 1.0,
"grad_norm": 26.054088592529297,
"learning_rate": 1.701008869684049e-05,
"loss": 0.8686,
"step": 4710
},
{
"epoch": 1.0,
"eval_nli-pairs_loss": 0.36355406045913696,
"eval_nli-pairs_runtime": 14.5214,
"eval_nli-pairs_samples_per_second": 468.824,
"eval_nli-pairs_steps_per_second": 29.336,
"step": 4710
},
{
"epoch": 1.0,
"eval_scitail-pairs-pos_loss": 0.3108903765678406,
"eval_scitail-pairs-pos_runtime": 3.3842,
"eval_scitail-pairs-pos_samples_per_second": 385.319,
"eval_scitail-pairs-pos_steps_per_second": 24.23,
"step": 4710
},
{
"epoch": 1.0,
"eval_qnli-contrastive_loss": 0.09614822268486023,
"eval_qnli-contrastive_runtime": 15.7192,
"eval_qnli-contrastive_samples_per_second": 347.537,
"eval_qnli-contrastive_steps_per_second": 21.757,
"step": 4710
}
],
"logging_steps": 471,
"max_steps": 9420,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 4710,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 28,
"trial_name": null,
"trial_params": null
}