{ "best_metric": null, "best_model_checkpoint": null, "epoch": 300.0, "global_step": 11700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 12.82, "learning_rate": 1.2375000000000001e-05, "loss": 3.4354, "step": 500 }, { "epoch": 12.82, "eval_bleu": 56.6427, "eval_em": 0.0, "eval_gen_len": 70.5947, "eval_loss": 1.5065408945083618, "eval_rm": 0.0, "eval_runtime": 175.1978, "eval_samples_per_second": 2.38, "eval_steps_per_second": 0.303, "step": 500 }, { "epoch": 25.64, "learning_rate": 2.4875e-05, "loss": 0.8473, "step": 1000 }, { "epoch": 25.64, "eval_bleu": 90.5419, "eval_em": 0.0192, "eval_gen_len": 76.9736, "eval_loss": 0.3859139084815979, "eval_rm": 0.0216, "eval_runtime": 165.5619, "eval_samples_per_second": 2.519, "eval_steps_per_second": 0.32, "step": 1000 }, { "epoch": 38.46, "learning_rate": 3.737500000000001e-05, "loss": 0.2049, "step": 1500 }, { "epoch": 38.46, "eval_bleu": 93.6495, "eval_em": 0.0504, "eval_gen_len": 75.1655, "eval_loss": 0.24716846644878387, "eval_rm": 0.0671, "eval_runtime": 167.8405, "eval_samples_per_second": 2.485, "eval_steps_per_second": 0.316, "step": 1500 }, { "epoch": 51.28, "learning_rate": 4.9875000000000006e-05, "loss": 0.1222, "step": 2000 }, { "epoch": 51.28, "eval_bleu": 93.8388, "eval_em": 0.0959, "eval_gen_len": 75.6403, "eval_loss": 0.23381924629211426, "eval_rm": 0.1487, "eval_runtime": 164.0184, "eval_samples_per_second": 2.542, "eval_steps_per_second": 0.323, "step": 2000 }, { "epoch": 64.1, "learning_rate": 4.7448453608247423e-05, "loss": 0.0923, "step": 2500 }, { "epoch": 64.1, "eval_bleu": 94.71, "eval_em": 0.2158, "eval_gen_len": 75.8177, "eval_loss": 0.19438204169273376, "eval_rm": 0.2662, "eval_runtime": 166.6507, "eval_samples_per_second": 2.502, "eval_steps_per_second": 0.318, "step": 2500 }, { "epoch": 76.92, "learning_rate": 4.487113402061856e-05, "loss": 0.0752, "step": 3000 }, { "epoch": 76.92, "eval_bleu": 95.0458, "eval_em": 0.2662, "eval_gen_len": 75.2638, "eval_loss": 0.19899217784404755, "eval_rm": 0.3022, "eval_runtime": 165.9288, "eval_samples_per_second": 2.513, "eval_steps_per_second": 0.319, "step": 3000 }, { "epoch": 89.74, "learning_rate": 4.229381443298969e-05, "loss": 0.0627, "step": 3500 }, { "epoch": 89.74, "eval_bleu": 95.3518, "eval_em": 0.3429, "eval_gen_len": 76.9928, "eval_loss": 0.195655956864357, "eval_rm": 0.3957, "eval_runtime": 164.1213, "eval_samples_per_second": 2.541, "eval_steps_per_second": 0.323, "step": 3500 }, { "epoch": 102.56, "learning_rate": 3.9716494845360825e-05, "loss": 0.052, "step": 4000 }, { "epoch": 102.56, "eval_bleu": 95.5392, "eval_em": 0.3837, "eval_gen_len": 76.1007, "eval_loss": 0.18605293333530426, "eval_rm": 0.4508, "eval_runtime": 163.8256, "eval_samples_per_second": 2.545, "eval_steps_per_second": 0.324, "step": 4000 }, { "epoch": 115.38, "learning_rate": 3.713917525773196e-05, "loss": 0.0457, "step": 4500 }, { "epoch": 115.38, "eval_bleu": 95.6692, "eval_em": 0.4173, "eval_gen_len": 76.1727, "eval_loss": 0.187970370054245, "eval_rm": 0.4892, "eval_runtime": 165.0086, "eval_samples_per_second": 2.527, "eval_steps_per_second": 0.321, "step": 4500 }, { "epoch": 128.21, "learning_rate": 3.4561855670103095e-05, "loss": 0.0386, "step": 5000 }, { "epoch": 128.21, "eval_bleu": 95.9215, "eval_em": 0.446, "eval_gen_len": 76.0168, "eval_loss": 0.18496404588222504, "eval_rm": 0.5276, "eval_runtime": 160.7718, "eval_samples_per_second": 2.594, "eval_steps_per_second": 0.33, "step": 5000 }, { "epoch": 141.03, "learning_rate": 3.1984536082474226e-05, "loss": 0.0321, "step": 5500 }, { "epoch": 141.03, "eval_bleu": 95.931, "eval_em": 0.4964, "eval_gen_len": 75.2566, "eval_loss": 0.17244744300842285, "eval_rm": 0.5875, "eval_runtime": 162.2245, "eval_samples_per_second": 2.571, "eval_steps_per_second": 0.327, "step": 5500 }, { "epoch": 153.85, "learning_rate": 2.9407216494845364e-05, "loss": 0.026, "step": 6000 }, { "epoch": 153.85, "eval_bleu": 96.4317, "eval_em": 0.5348, "eval_gen_len": 75.741, "eval_loss": 0.16870950162410736, "eval_rm": 0.6499, "eval_runtime": 165.0932, "eval_samples_per_second": 2.526, "eval_steps_per_second": 0.321, "step": 6000 }, { "epoch": 166.67, "learning_rate": 2.6829896907216496e-05, "loss": 0.0242, "step": 6500 }, { "epoch": 166.67, "eval_bleu": 96.197, "eval_em": 0.5372, "eval_gen_len": 76.1127, "eval_loss": 0.17071698606014252, "eval_rm": 0.6403, "eval_runtime": 162.7041, "eval_samples_per_second": 2.563, "eval_steps_per_second": 0.326, "step": 6500 }, { "epoch": 179.49, "learning_rate": 2.425257731958763e-05, "loss": 0.0193, "step": 7000 }, { "epoch": 179.49, "eval_bleu": 96.3422, "eval_em": 0.5564, "eval_gen_len": 75.3933, "eval_loss": 0.1643209457397461, "eval_rm": 0.6691, "eval_runtime": 163.0211, "eval_samples_per_second": 2.558, "eval_steps_per_second": 0.325, "step": 7000 }, { "epoch": 192.31, "learning_rate": 2.1675257731958766e-05, "loss": 0.0164, "step": 7500 }, { "epoch": 192.31, "eval_bleu": 96.5278, "eval_em": 0.5779, "eval_gen_len": 75.4508, "eval_loss": 0.16497784852981567, "eval_rm": 0.693, "eval_runtime": 161.7709, "eval_samples_per_second": 2.578, "eval_steps_per_second": 0.328, "step": 7500 }, { "epoch": 205.13, "learning_rate": 1.9097938144329897e-05, "loss": 0.0139, "step": 8000 }, { "epoch": 205.13, "eval_bleu": 96.6382, "eval_em": 0.6091, "eval_gen_len": 75.9592, "eval_loss": 0.16682015359401703, "eval_rm": 0.7314, "eval_runtime": 160.3701, "eval_samples_per_second": 2.6, "eval_steps_per_second": 0.33, "step": 8000 }, { "epoch": 217.95, "learning_rate": 1.6520618556701032e-05, "loss": 0.012, "step": 8500 }, { "epoch": 217.95, "eval_bleu": 96.5488, "eval_em": 0.6163, "eval_gen_len": 76.0024, "eval_loss": 0.16442929208278656, "eval_rm": 0.729, "eval_runtime": 161.7705, "eval_samples_per_second": 2.578, "eval_steps_per_second": 0.328, "step": 8500 }, { "epoch": 230.77, "learning_rate": 1.3943298969072165e-05, "loss": 0.0106, "step": 9000 }, { "epoch": 230.77, "eval_bleu": 96.6353, "eval_em": 0.6091, "eval_gen_len": 75.5468, "eval_loss": 0.16534733772277832, "eval_rm": 0.7266, "eval_runtime": 158.2739, "eval_samples_per_second": 2.635, "eval_steps_per_second": 0.335, "step": 9000 }, { "epoch": 243.59, "learning_rate": 1.1365979381443299e-05, "loss": 0.0093, "step": 9500 }, { "epoch": 243.59, "eval_bleu": 96.8984, "eval_em": 0.6331, "eval_gen_len": 75.7242, "eval_loss": 0.16627563536167145, "eval_rm": 0.7482, "eval_runtime": 159.472, "eval_samples_per_second": 2.615, "eval_steps_per_second": 0.332, "step": 9500 }, { "epoch": 256.41, "learning_rate": 8.788659793814432e-06, "loss": 0.0084, "step": 10000 }, { "epoch": 256.41, "eval_bleu": 96.6199, "eval_em": 0.6331, "eval_gen_len": 75.3885, "eval_loss": 0.1675705760717392, "eval_rm": 0.7482, "eval_runtime": 157.5771, "eval_samples_per_second": 2.646, "eval_steps_per_second": 0.336, "step": 10000 }, { "epoch": 269.23, "learning_rate": 6.211340206185568e-06, "loss": 0.0076, "step": 10500 }, { "epoch": 269.23, "eval_bleu": 96.5038, "eval_em": 0.6283, "eval_gen_len": 75.3453, "eval_loss": 0.16782505810260773, "eval_rm": 0.7482, "eval_runtime": 181.3202, "eval_samples_per_second": 2.3, "eval_steps_per_second": 0.292, "step": 10500 }, { "epoch": 282.05, "learning_rate": 3.6340206185567013e-06, "loss": 0.007, "step": 11000 }, { "epoch": 282.05, "eval_bleu": 96.7187, "eval_em": 0.6355, "eval_gen_len": 75.9281, "eval_loss": 0.16688644886016846, "eval_rm": 0.7458, "eval_runtime": 181.4265, "eval_samples_per_second": 2.298, "eval_steps_per_second": 0.292, "step": 11000 }, { "epoch": 294.87, "learning_rate": 1.0567010309278351e-06, "loss": 0.0065, "step": 11500 }, { "epoch": 294.87, "eval_bleu": 96.7679, "eval_em": 0.6307, "eval_gen_len": 75.6355, "eval_loss": 0.16715963184833527, "eval_rm": 0.7482, "eval_runtime": 181.7054, "eval_samples_per_second": 2.295, "eval_steps_per_second": 0.292, "step": 11500 }, { "epoch": 300.0, "step": 11700, "total_flos": 9455707744902144.0, "train_loss": 0.001008551752465403, "train_runtime": 1876.4514, "train_samples_per_second": 196.008, "train_steps_per_second": 6.235 } ], "max_steps": 11700, "num_train_epochs": 300, "total_flos": 9455707744902144.0, "trial_name": null, "trial_params": null }