{ "best_metric": null, "best_model_checkpoint": null, "epoch": 23.25581395348837, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 59.1033, "eval_gen_len": 17.5513, "eval_loss": 0.1559952348470688, "eval_meteor": 0.7539, "eval_runtime": 19.7643, "eval_samples_per_second": 26.614, "eval_steps_per_second": 0.86, "step": 129 }, { "epoch": 2.0, "eval_bleu": 65.6424, "eval_gen_len": 17.6027, "eval_loss": 0.0991397500038147, "eval_meteor": 0.8044, "eval_runtime": 14.846, "eval_samples_per_second": 35.43, "eval_steps_per_second": 1.145, "step": 258 }, { "epoch": 3.0, "eval_bleu": 70.6577, "eval_gen_len": 17.5779, "eval_loss": 0.06296151131391525, "eval_meteor": 0.8488, "eval_runtime": 14.7963, "eval_samples_per_second": 35.549, "eval_steps_per_second": 1.149, "step": 387 }, { "epoch": 3.88, "learning_rate": 1.689922480620155e-05, "loss": 0.3038, "step": 500 }, { "epoch": 4.0, "eval_bleu": 71.6744, "eval_gen_len": 17.5989, "eval_loss": 0.04667546600103378, "eval_meteor": 0.8522, "eval_runtime": 14.7696, "eval_samples_per_second": 35.614, "eval_steps_per_second": 1.151, "step": 516 }, { "epoch": 5.0, "eval_bleu": 72.4991, "eval_gen_len": 17.6749, "eval_loss": 0.038296110928058624, "eval_meteor": 0.8509, "eval_runtime": 14.8269, "eval_samples_per_second": 35.476, "eval_steps_per_second": 1.147, "step": 645 }, { "epoch": 6.0, "eval_bleu": 72.5858, "eval_gen_len": 17.6464, "eval_loss": 0.03319519758224487, "eval_meteor": 0.8548, "eval_runtime": 14.6593, "eval_samples_per_second": 35.882, "eval_steps_per_second": 1.16, "step": 774 }, { "epoch": 7.0, "eval_bleu": 74.3526, "eval_gen_len": 17.6217, "eval_loss": 0.023467697203159332, "eval_meteor": 0.8734, "eval_runtime": 14.705, "eval_samples_per_second": 35.77, "eval_steps_per_second": 1.156, "step": 903 }, { "epoch": 7.75, "learning_rate": 1.3798449612403102e-05, "loss": 0.0643, "step": 1000 }, { "epoch": 8.0, "eval_bleu": 74.9962, "eval_gen_len": 17.6141, "eval_loss": 0.01849055290222168, "eval_meteor": 0.8793, "eval_runtime": 15.0305, "eval_samples_per_second": 34.995, "eval_steps_per_second": 1.131, "step": 1032 }, { "epoch": 9.0, "eval_bleu": 75.5462, "eval_gen_len": 17.6027, "eval_loss": 0.014913694001734257, "eval_meteor": 0.8862, "eval_runtime": 14.5903, "eval_samples_per_second": 36.051, "eval_steps_per_second": 1.165, "step": 1161 }, { "epoch": 10.0, "eval_bleu": 76.3236, "eval_gen_len": 17.5798, "eval_loss": 0.014180008322000504, "eval_meteor": 0.8954, "eval_runtime": 14.6398, "eval_samples_per_second": 35.929, "eval_steps_per_second": 1.161, "step": 1290 }, { "epoch": 11.0, "eval_bleu": 75.8326, "eval_gen_len": 17.5951, "eval_loss": 0.010033702477812767, "eval_meteor": 0.8888, "eval_runtime": 14.6128, "eval_samples_per_second": 35.996, "eval_steps_per_second": 1.163, "step": 1419 }, { "epoch": 11.63, "learning_rate": 1.0697674418604651e-05, "loss": 0.0341, "step": 1500 }, { "epoch": 12.0, "eval_bleu": 75.9138, "eval_gen_len": 17.5951, "eval_loss": 0.009980925358831882, "eval_meteor": 0.8891, "eval_runtime": 14.7328, "eval_samples_per_second": 35.703, "eval_steps_per_second": 1.154, "step": 1548 }, { "epoch": 13.0, "eval_bleu": 76.0534, "eval_gen_len": 17.5913, "eval_loss": 0.0070331464521586895, "eval_meteor": 0.8901, "eval_runtime": 14.643, "eval_samples_per_second": 35.922, "eval_steps_per_second": 1.161, "step": 1677 }, { "epoch": 14.0, "eval_bleu": 76.3943, "eval_gen_len": 17.5798, "eval_loss": 0.006607058458030224, "eval_meteor": 0.8952, "eval_runtime": 14.5594, "eval_samples_per_second": 36.128, "eval_steps_per_second": 1.168, "step": 1806 }, { "epoch": 15.0, "eval_bleu": 76.9833, "eval_gen_len": 17.5608, "eval_loss": 0.003804780077189207, "eval_meteor": 0.9027, "eval_runtime": 14.9867, "eval_samples_per_second": 35.098, "eval_steps_per_second": 1.134, "step": 1935 }, { "epoch": 15.5, "learning_rate": 7.596899224806202e-06, "loss": 0.0191, "step": 2000 }, { "epoch": 16.0, "eval_bleu": 76.9399, "eval_gen_len": 17.5608, "eval_loss": 0.0028171560261398554, "eval_meteor": 0.9025, "eval_runtime": 14.5931, "eval_samples_per_second": 36.044, "eval_steps_per_second": 1.165, "step": 2064 }, { "epoch": 17.0, "eval_bleu": 76.5796, "eval_gen_len": 17.5722, "eval_loss": 0.005369492340832949, "eval_meteor": 0.8979, "eval_runtime": 14.6939, "eval_samples_per_second": 35.797, "eval_steps_per_second": 1.157, "step": 2193 }, { "epoch": 18.0, "eval_bleu": 77.0507, "eval_gen_len": 17.557, "eval_loss": 0.002158859744668007, "eval_meteor": 0.904, "eval_runtime": 14.6859, "eval_samples_per_second": 35.817, "eval_steps_per_second": 1.158, "step": 2322 }, { "epoch": 19.0, "eval_bleu": 76.3097, "eval_gen_len": 17.5837, "eval_loss": 0.0028479481115937233, "eval_meteor": 0.8933, "eval_runtime": 14.699, "eval_samples_per_second": 35.785, "eval_steps_per_second": 1.157, "step": 2451 }, { "epoch": 19.38, "learning_rate": 4.4961240310077525e-06, "loss": 0.0121, "step": 2500 }, { "epoch": 20.0, "eval_bleu": 77.0507, "eval_gen_len": 17.557, "eval_loss": 0.0012633432634174824, "eval_meteor": 0.904, "eval_runtime": 14.9177, "eval_samples_per_second": 35.26, "eval_steps_per_second": 1.14, "step": 2580 }, { "epoch": 21.0, "eval_bleu": 76.5168, "eval_gen_len": 17.576, "eval_loss": 0.001905079698190093, "eval_meteor": 0.8965, "eval_runtime": 14.6207, "eval_samples_per_second": 35.976, "eval_steps_per_second": 1.163, "step": 2709 }, { "epoch": 22.0, "eval_bleu": 77.2739, "eval_gen_len": 17.5494, "eval_loss": 0.0008121016435325146, "eval_meteor": 0.9072, "eval_runtime": 14.6135, "eval_samples_per_second": 35.994, "eval_steps_per_second": 1.163, "step": 2838 }, { "epoch": 23.0, "eval_bleu": 77.1609, "eval_gen_len": 17.5532, "eval_loss": 0.0007495949394069612, "eval_meteor": 0.9056, "eval_runtime": 14.5508, "eval_samples_per_second": 36.149, "eval_steps_per_second": 1.168, "step": 2967 }, { "epoch": 23.26, "learning_rate": 1.3953488372093025e-06, "loss": 0.0083, "step": 3000 } ], "logging_steps": 500, "max_steps": 3225, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "total_flos": 1.172703512236032e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }