BERT2BERT_finetuned / trainer_state.json
JulienRPA's picture
End of training
b074a01
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 300.0,
"global_step": 11700,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 12.82,
"learning_rate": 1.2375000000000001e-05,
"loss": 3.4354,
"step": 500
},
{
"epoch": 12.82,
"eval_bleu": 56.6427,
"eval_em": 0.0,
"eval_gen_len": 70.5947,
"eval_loss": 1.5065408945083618,
"eval_rm": 0.0,
"eval_runtime": 175.1978,
"eval_samples_per_second": 2.38,
"eval_steps_per_second": 0.303,
"step": 500
},
{
"epoch": 25.64,
"learning_rate": 2.4875e-05,
"loss": 0.8473,
"step": 1000
},
{
"epoch": 25.64,
"eval_bleu": 90.5419,
"eval_em": 0.0192,
"eval_gen_len": 76.9736,
"eval_loss": 0.3859139084815979,
"eval_rm": 0.0216,
"eval_runtime": 165.5619,
"eval_samples_per_second": 2.519,
"eval_steps_per_second": 0.32,
"step": 1000
},
{
"epoch": 38.46,
"learning_rate": 3.737500000000001e-05,
"loss": 0.2049,
"step": 1500
},
{
"epoch": 38.46,
"eval_bleu": 93.6495,
"eval_em": 0.0504,
"eval_gen_len": 75.1655,
"eval_loss": 0.24716846644878387,
"eval_rm": 0.0671,
"eval_runtime": 167.8405,
"eval_samples_per_second": 2.485,
"eval_steps_per_second": 0.316,
"step": 1500
},
{
"epoch": 51.28,
"learning_rate": 4.9875000000000006e-05,
"loss": 0.1222,
"step": 2000
},
{
"epoch": 51.28,
"eval_bleu": 93.8388,
"eval_em": 0.0959,
"eval_gen_len": 75.6403,
"eval_loss": 0.23381924629211426,
"eval_rm": 0.1487,
"eval_runtime": 164.0184,
"eval_samples_per_second": 2.542,
"eval_steps_per_second": 0.323,
"step": 2000
},
{
"epoch": 64.1,
"learning_rate": 4.7448453608247423e-05,
"loss": 0.0923,
"step": 2500
},
{
"epoch": 64.1,
"eval_bleu": 94.71,
"eval_em": 0.2158,
"eval_gen_len": 75.8177,
"eval_loss": 0.19438204169273376,
"eval_rm": 0.2662,
"eval_runtime": 166.6507,
"eval_samples_per_second": 2.502,
"eval_steps_per_second": 0.318,
"step": 2500
},
{
"epoch": 76.92,
"learning_rate": 4.487113402061856e-05,
"loss": 0.0752,
"step": 3000
},
{
"epoch": 76.92,
"eval_bleu": 95.0458,
"eval_em": 0.2662,
"eval_gen_len": 75.2638,
"eval_loss": 0.19899217784404755,
"eval_rm": 0.3022,
"eval_runtime": 165.9288,
"eval_samples_per_second": 2.513,
"eval_steps_per_second": 0.319,
"step": 3000
},
{
"epoch": 89.74,
"learning_rate": 4.229381443298969e-05,
"loss": 0.0627,
"step": 3500
},
{
"epoch": 89.74,
"eval_bleu": 95.3518,
"eval_em": 0.3429,
"eval_gen_len": 76.9928,
"eval_loss": 0.195655956864357,
"eval_rm": 0.3957,
"eval_runtime": 164.1213,
"eval_samples_per_second": 2.541,
"eval_steps_per_second": 0.323,
"step": 3500
},
{
"epoch": 102.56,
"learning_rate": 3.9716494845360825e-05,
"loss": 0.052,
"step": 4000
},
{
"epoch": 102.56,
"eval_bleu": 95.5392,
"eval_em": 0.3837,
"eval_gen_len": 76.1007,
"eval_loss": 0.18605293333530426,
"eval_rm": 0.4508,
"eval_runtime": 163.8256,
"eval_samples_per_second": 2.545,
"eval_steps_per_second": 0.324,
"step": 4000
},
{
"epoch": 115.38,
"learning_rate": 3.713917525773196e-05,
"loss": 0.0457,
"step": 4500
},
{
"epoch": 115.38,
"eval_bleu": 95.6692,
"eval_em": 0.4173,
"eval_gen_len": 76.1727,
"eval_loss": 0.187970370054245,
"eval_rm": 0.4892,
"eval_runtime": 165.0086,
"eval_samples_per_second": 2.527,
"eval_steps_per_second": 0.321,
"step": 4500
},
{
"epoch": 128.21,
"learning_rate": 3.4561855670103095e-05,
"loss": 0.0386,
"step": 5000
},
{
"epoch": 128.21,
"eval_bleu": 95.9215,
"eval_em": 0.446,
"eval_gen_len": 76.0168,
"eval_loss": 0.18496404588222504,
"eval_rm": 0.5276,
"eval_runtime": 160.7718,
"eval_samples_per_second": 2.594,
"eval_steps_per_second": 0.33,
"step": 5000
},
{
"epoch": 141.03,
"learning_rate": 3.1984536082474226e-05,
"loss": 0.0321,
"step": 5500
},
{
"epoch": 141.03,
"eval_bleu": 95.931,
"eval_em": 0.4964,
"eval_gen_len": 75.2566,
"eval_loss": 0.17244744300842285,
"eval_rm": 0.5875,
"eval_runtime": 162.2245,
"eval_samples_per_second": 2.571,
"eval_steps_per_second": 0.327,
"step": 5500
},
{
"epoch": 153.85,
"learning_rate": 2.9407216494845364e-05,
"loss": 0.026,
"step": 6000
},
{
"epoch": 153.85,
"eval_bleu": 96.4317,
"eval_em": 0.5348,
"eval_gen_len": 75.741,
"eval_loss": 0.16870950162410736,
"eval_rm": 0.6499,
"eval_runtime": 165.0932,
"eval_samples_per_second": 2.526,
"eval_steps_per_second": 0.321,
"step": 6000
},
{
"epoch": 166.67,
"learning_rate": 2.6829896907216496e-05,
"loss": 0.0242,
"step": 6500
},
{
"epoch": 166.67,
"eval_bleu": 96.197,
"eval_em": 0.5372,
"eval_gen_len": 76.1127,
"eval_loss": 0.17071698606014252,
"eval_rm": 0.6403,
"eval_runtime": 162.7041,
"eval_samples_per_second": 2.563,
"eval_steps_per_second": 0.326,
"step": 6500
},
{
"epoch": 179.49,
"learning_rate": 2.425257731958763e-05,
"loss": 0.0193,
"step": 7000
},
{
"epoch": 179.49,
"eval_bleu": 96.3422,
"eval_em": 0.5564,
"eval_gen_len": 75.3933,
"eval_loss": 0.1643209457397461,
"eval_rm": 0.6691,
"eval_runtime": 163.0211,
"eval_samples_per_second": 2.558,
"eval_steps_per_second": 0.325,
"step": 7000
},
{
"epoch": 192.31,
"learning_rate": 2.1675257731958766e-05,
"loss": 0.0164,
"step": 7500
},
{
"epoch": 192.31,
"eval_bleu": 96.5278,
"eval_em": 0.5779,
"eval_gen_len": 75.4508,
"eval_loss": 0.16497784852981567,
"eval_rm": 0.693,
"eval_runtime": 161.7709,
"eval_samples_per_second": 2.578,
"eval_steps_per_second": 0.328,
"step": 7500
},
{
"epoch": 205.13,
"learning_rate": 1.9097938144329897e-05,
"loss": 0.0139,
"step": 8000
},
{
"epoch": 205.13,
"eval_bleu": 96.6382,
"eval_em": 0.6091,
"eval_gen_len": 75.9592,
"eval_loss": 0.16682015359401703,
"eval_rm": 0.7314,
"eval_runtime": 160.3701,
"eval_samples_per_second": 2.6,
"eval_steps_per_second": 0.33,
"step": 8000
},
{
"epoch": 217.95,
"learning_rate": 1.6520618556701032e-05,
"loss": 0.012,
"step": 8500
},
{
"epoch": 217.95,
"eval_bleu": 96.5488,
"eval_em": 0.6163,
"eval_gen_len": 76.0024,
"eval_loss": 0.16442929208278656,
"eval_rm": 0.729,
"eval_runtime": 161.7705,
"eval_samples_per_second": 2.578,
"eval_steps_per_second": 0.328,
"step": 8500
},
{
"epoch": 230.77,
"learning_rate": 1.3943298969072165e-05,
"loss": 0.0106,
"step": 9000
},
{
"epoch": 230.77,
"eval_bleu": 96.6353,
"eval_em": 0.6091,
"eval_gen_len": 75.5468,
"eval_loss": 0.16534733772277832,
"eval_rm": 0.7266,
"eval_runtime": 158.2739,
"eval_samples_per_second": 2.635,
"eval_steps_per_second": 0.335,
"step": 9000
},
{
"epoch": 243.59,
"learning_rate": 1.1365979381443299e-05,
"loss": 0.0093,
"step": 9500
},
{
"epoch": 243.59,
"eval_bleu": 96.8984,
"eval_em": 0.6331,
"eval_gen_len": 75.7242,
"eval_loss": 0.16627563536167145,
"eval_rm": 0.7482,
"eval_runtime": 159.472,
"eval_samples_per_second": 2.615,
"eval_steps_per_second": 0.332,
"step": 9500
},
{
"epoch": 256.41,
"learning_rate": 8.788659793814432e-06,
"loss": 0.0084,
"step": 10000
},
{
"epoch": 256.41,
"eval_bleu": 96.6199,
"eval_em": 0.6331,
"eval_gen_len": 75.3885,
"eval_loss": 0.1675705760717392,
"eval_rm": 0.7482,
"eval_runtime": 157.5771,
"eval_samples_per_second": 2.646,
"eval_steps_per_second": 0.336,
"step": 10000
},
{
"epoch": 269.23,
"learning_rate": 6.211340206185568e-06,
"loss": 0.0076,
"step": 10500
},
{
"epoch": 269.23,
"eval_bleu": 96.5038,
"eval_em": 0.6283,
"eval_gen_len": 75.3453,
"eval_loss": 0.16782505810260773,
"eval_rm": 0.7482,
"eval_runtime": 181.3202,
"eval_samples_per_second": 2.3,
"eval_steps_per_second": 0.292,
"step": 10500
},
{
"epoch": 282.05,
"learning_rate": 3.6340206185567013e-06,
"loss": 0.007,
"step": 11000
},
{
"epoch": 282.05,
"eval_bleu": 96.7187,
"eval_em": 0.6355,
"eval_gen_len": 75.9281,
"eval_loss": 0.16688644886016846,
"eval_rm": 0.7458,
"eval_runtime": 181.4265,
"eval_samples_per_second": 2.298,
"eval_steps_per_second": 0.292,
"step": 11000
},
{
"epoch": 294.87,
"learning_rate": 1.0567010309278351e-06,
"loss": 0.0065,
"step": 11500
},
{
"epoch": 294.87,
"eval_bleu": 96.7679,
"eval_em": 0.6307,
"eval_gen_len": 75.6355,
"eval_loss": 0.16715963184833527,
"eval_rm": 0.7482,
"eval_runtime": 181.7054,
"eval_samples_per_second": 2.295,
"eval_steps_per_second": 0.292,
"step": 11500
},
{
"epoch": 300.0,
"step": 11700,
"total_flos": 9455707744902144.0,
"train_loss": 0.001008551752465403,
"train_runtime": 1876.4514,
"train_samples_per_second": 196.008,
"train_steps_per_second": 6.235
}
],
"max_steps": 11700,
"num_train_epochs": 300,
"total_flos": 9455707744902144.0,
"trial_name": null,
"trial_params": null
}