|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 300.0, |
|
"global_step": 11700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 1.2375000000000001e-05, |
|
"loss": 3.4354, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"eval_bleu": 56.6427, |
|
"eval_em": 0.0, |
|
"eval_gen_len": 70.5947, |
|
"eval_loss": 1.5065408945083618, |
|
"eval_rm": 0.0, |
|
"eval_runtime": 175.1978, |
|
"eval_samples_per_second": 2.38, |
|
"eval_steps_per_second": 0.303, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 2.4875e-05, |
|
"loss": 0.8473, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"eval_bleu": 90.5419, |
|
"eval_em": 0.0192, |
|
"eval_gen_len": 76.9736, |
|
"eval_loss": 0.3859139084815979, |
|
"eval_rm": 0.0216, |
|
"eval_runtime": 165.5619, |
|
"eval_samples_per_second": 2.519, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 3.737500000000001e-05, |
|
"loss": 0.2049, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_bleu": 93.6495, |
|
"eval_em": 0.0504, |
|
"eval_gen_len": 75.1655, |
|
"eval_loss": 0.24716846644878387, |
|
"eval_rm": 0.0671, |
|
"eval_runtime": 167.8405, |
|
"eval_samples_per_second": 2.485, |
|
"eval_steps_per_second": 0.316, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 4.9875000000000006e-05, |
|
"loss": 0.1222, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"eval_bleu": 93.8388, |
|
"eval_em": 0.0959, |
|
"eval_gen_len": 75.6403, |
|
"eval_loss": 0.23381924629211426, |
|
"eval_rm": 0.1487, |
|
"eval_runtime": 164.0184, |
|
"eval_samples_per_second": 2.542, |
|
"eval_steps_per_second": 0.323, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"learning_rate": 4.7448453608247423e-05, |
|
"loss": 0.0923, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"eval_bleu": 94.71, |
|
"eval_em": 0.2158, |
|
"eval_gen_len": 75.8177, |
|
"eval_loss": 0.19438204169273376, |
|
"eval_rm": 0.2662, |
|
"eval_runtime": 166.6507, |
|
"eval_samples_per_second": 2.502, |
|
"eval_steps_per_second": 0.318, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"learning_rate": 4.487113402061856e-05, |
|
"loss": 0.0752, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_bleu": 95.0458, |
|
"eval_em": 0.2662, |
|
"eval_gen_len": 75.2638, |
|
"eval_loss": 0.19899217784404755, |
|
"eval_rm": 0.3022, |
|
"eval_runtime": 165.9288, |
|
"eval_samples_per_second": 2.513, |
|
"eval_steps_per_second": 0.319, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 89.74, |
|
"learning_rate": 4.229381443298969e-05, |
|
"loss": 0.0627, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 89.74, |
|
"eval_bleu": 95.3518, |
|
"eval_em": 0.3429, |
|
"eval_gen_len": 76.9928, |
|
"eval_loss": 0.195655956864357, |
|
"eval_rm": 0.3957, |
|
"eval_runtime": 164.1213, |
|
"eval_samples_per_second": 2.541, |
|
"eval_steps_per_second": 0.323, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 102.56, |
|
"learning_rate": 3.9716494845360825e-05, |
|
"loss": 0.052, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 102.56, |
|
"eval_bleu": 95.5392, |
|
"eval_em": 0.3837, |
|
"eval_gen_len": 76.1007, |
|
"eval_loss": 0.18605293333530426, |
|
"eval_rm": 0.4508, |
|
"eval_runtime": 163.8256, |
|
"eval_samples_per_second": 2.545, |
|
"eval_steps_per_second": 0.324, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"learning_rate": 3.713917525773196e-05, |
|
"loss": 0.0457, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"eval_bleu": 95.6692, |
|
"eval_em": 0.4173, |
|
"eval_gen_len": 76.1727, |
|
"eval_loss": 0.187970370054245, |
|
"eval_rm": 0.4892, |
|
"eval_runtime": 165.0086, |
|
"eval_samples_per_second": 2.527, |
|
"eval_steps_per_second": 0.321, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 128.21, |
|
"learning_rate": 3.4561855670103095e-05, |
|
"loss": 0.0386, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 128.21, |
|
"eval_bleu": 95.9215, |
|
"eval_em": 0.446, |
|
"eval_gen_len": 76.0168, |
|
"eval_loss": 0.18496404588222504, |
|
"eval_rm": 0.5276, |
|
"eval_runtime": 160.7718, |
|
"eval_samples_per_second": 2.594, |
|
"eval_steps_per_second": 0.33, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 141.03, |
|
"learning_rate": 3.1984536082474226e-05, |
|
"loss": 0.0321, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 141.03, |
|
"eval_bleu": 95.931, |
|
"eval_em": 0.4964, |
|
"eval_gen_len": 75.2566, |
|
"eval_loss": 0.17244744300842285, |
|
"eval_rm": 0.5875, |
|
"eval_runtime": 162.2245, |
|
"eval_samples_per_second": 2.571, |
|
"eval_steps_per_second": 0.327, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"learning_rate": 2.9407216494845364e-05, |
|
"loss": 0.026, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"eval_bleu": 96.4317, |
|
"eval_em": 0.5348, |
|
"eval_gen_len": 75.741, |
|
"eval_loss": 0.16870950162410736, |
|
"eval_rm": 0.6499, |
|
"eval_runtime": 165.0932, |
|
"eval_samples_per_second": 2.526, |
|
"eval_steps_per_second": 0.321, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"learning_rate": 2.6829896907216496e-05, |
|
"loss": 0.0242, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"eval_bleu": 96.197, |
|
"eval_em": 0.5372, |
|
"eval_gen_len": 76.1127, |
|
"eval_loss": 0.17071698606014252, |
|
"eval_rm": 0.6403, |
|
"eval_runtime": 162.7041, |
|
"eval_samples_per_second": 2.563, |
|
"eval_steps_per_second": 0.326, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 179.49, |
|
"learning_rate": 2.425257731958763e-05, |
|
"loss": 0.0193, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 179.49, |
|
"eval_bleu": 96.3422, |
|
"eval_em": 0.5564, |
|
"eval_gen_len": 75.3933, |
|
"eval_loss": 0.1643209457397461, |
|
"eval_rm": 0.6691, |
|
"eval_runtime": 163.0211, |
|
"eval_samples_per_second": 2.558, |
|
"eval_steps_per_second": 0.325, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 192.31, |
|
"learning_rate": 2.1675257731958766e-05, |
|
"loss": 0.0164, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 192.31, |
|
"eval_bleu": 96.5278, |
|
"eval_em": 0.5779, |
|
"eval_gen_len": 75.4508, |
|
"eval_loss": 0.16497784852981567, |
|
"eval_rm": 0.693, |
|
"eval_runtime": 161.7709, |
|
"eval_samples_per_second": 2.578, |
|
"eval_steps_per_second": 0.328, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 205.13, |
|
"learning_rate": 1.9097938144329897e-05, |
|
"loss": 0.0139, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 205.13, |
|
"eval_bleu": 96.6382, |
|
"eval_em": 0.6091, |
|
"eval_gen_len": 75.9592, |
|
"eval_loss": 0.16682015359401703, |
|
"eval_rm": 0.7314, |
|
"eval_runtime": 160.3701, |
|
"eval_samples_per_second": 2.6, |
|
"eval_steps_per_second": 0.33, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 217.95, |
|
"learning_rate": 1.6520618556701032e-05, |
|
"loss": 0.012, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 217.95, |
|
"eval_bleu": 96.5488, |
|
"eval_em": 0.6163, |
|
"eval_gen_len": 76.0024, |
|
"eval_loss": 0.16442929208278656, |
|
"eval_rm": 0.729, |
|
"eval_runtime": 161.7705, |
|
"eval_samples_per_second": 2.578, |
|
"eval_steps_per_second": 0.328, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"learning_rate": 1.3943298969072165e-05, |
|
"loss": 0.0106, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"eval_bleu": 96.6353, |
|
"eval_em": 0.6091, |
|
"eval_gen_len": 75.5468, |
|
"eval_loss": 0.16534733772277832, |
|
"eval_rm": 0.7266, |
|
"eval_runtime": 158.2739, |
|
"eval_samples_per_second": 2.635, |
|
"eval_steps_per_second": 0.335, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 243.59, |
|
"learning_rate": 1.1365979381443299e-05, |
|
"loss": 0.0093, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 243.59, |
|
"eval_bleu": 96.8984, |
|
"eval_em": 0.6331, |
|
"eval_gen_len": 75.7242, |
|
"eval_loss": 0.16627563536167145, |
|
"eval_rm": 0.7482, |
|
"eval_runtime": 159.472, |
|
"eval_samples_per_second": 2.615, |
|
"eval_steps_per_second": 0.332, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 256.41, |
|
"learning_rate": 8.788659793814432e-06, |
|
"loss": 0.0084, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 256.41, |
|
"eval_bleu": 96.6199, |
|
"eval_em": 0.6331, |
|
"eval_gen_len": 75.3885, |
|
"eval_loss": 0.1675705760717392, |
|
"eval_rm": 0.7482, |
|
"eval_runtime": 157.5771, |
|
"eval_samples_per_second": 2.646, |
|
"eval_steps_per_second": 0.336, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 269.23, |
|
"learning_rate": 6.211340206185568e-06, |
|
"loss": 0.0076, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 269.23, |
|
"eval_bleu": 96.5038, |
|
"eval_em": 0.6283, |
|
"eval_gen_len": 75.3453, |
|
"eval_loss": 0.16782505810260773, |
|
"eval_rm": 0.7482, |
|
"eval_runtime": 181.3202, |
|
"eval_samples_per_second": 2.3, |
|
"eval_steps_per_second": 0.292, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 282.05, |
|
"learning_rate": 3.6340206185567013e-06, |
|
"loss": 0.007, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 282.05, |
|
"eval_bleu": 96.7187, |
|
"eval_em": 0.6355, |
|
"eval_gen_len": 75.9281, |
|
"eval_loss": 0.16688644886016846, |
|
"eval_rm": 0.7458, |
|
"eval_runtime": 181.4265, |
|
"eval_samples_per_second": 2.298, |
|
"eval_steps_per_second": 0.292, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 294.87, |
|
"learning_rate": 1.0567010309278351e-06, |
|
"loss": 0.0065, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 294.87, |
|
"eval_bleu": 96.7679, |
|
"eval_em": 0.6307, |
|
"eval_gen_len": 75.6355, |
|
"eval_loss": 0.16715963184833527, |
|
"eval_rm": 0.7482, |
|
"eval_runtime": 181.7054, |
|
"eval_samples_per_second": 2.295, |
|
"eval_steps_per_second": 0.292, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"step": 11700, |
|
"total_flos": 9455707744902144.0, |
|
"train_loss": 0.001008551752465403, |
|
"train_runtime": 1876.4514, |
|
"train_samples_per_second": 196.008, |
|
"train_steps_per_second": 6.235 |
|
} |
|
], |
|
"max_steps": 11700, |
|
"num_train_epochs": 300, |
|
"total_flos": 9455707744902144.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|