{ "best_metric": null, "best_model_checkpoint": null, "epoch": 27.09120173383691, "eval_steps": 50000, "global_step": 1700000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8, "learning_rate": 4.9920355054102726e-05, "loss": 1.1412, "step": 50000 }, { "epoch": 0.8, "eval_loss": 1.011365294456482, "eval_runtime": 2006.5534, "eval_samples_per_second": 111.193, "eval_steps_per_second": 1.738, "step": 50000 }, { "epoch": 1.59, "learning_rate": 4.992035346050262e-05, "loss": 1.0123, "step": 100000 }, { "epoch": 1.59, "eval_loss": 0.9693423509597778, "eval_runtime": 1995.4615, "eval_samples_per_second": 111.811, "eval_steps_per_second": 1.747, "step": 100000 }, { "epoch": 2.39, "learning_rate": 4.992034708610221e-05, "loss": 0.9754, "step": 150000 }, { "epoch": 2.39, "eval_loss": 0.9472731351852417, "eval_runtime": 1997.2157, "eval_samples_per_second": 111.713, "eval_steps_per_second": 1.746, "step": 150000 }, { "epoch": 3.19, "learning_rate": 4.992034549250211e-05, "loss": 0.9539, "step": 200000 }, { "epoch": 3.19, "eval_loss": 0.9325647354125977, "eval_runtime": 2015.1208, "eval_samples_per_second": 110.72, "eval_steps_per_second": 1.73, "step": 200000 }, { "epoch": 3.98, "learning_rate": 4.992035027330242e-05, "loss": 0.9387, "step": 250000 }, { "epoch": 3.98, "eval_loss": 0.9212433099746704, "eval_runtime": 2011.4269, "eval_samples_per_second": 110.924, "eval_steps_per_second": 1.734, "step": 250000 }, { "epoch": 4.78, "learning_rate": 4.992035027330242e-05, "loss": 0.9243, "step": 300000 }, { "epoch": 4.78, "eval_loss": 0.9138051271438599, "eval_runtime": 2011.3973, "eval_samples_per_second": 110.925, "eval_steps_per_second": 1.734, "step": 300000 }, { "epoch": 5.58, "learning_rate": 4.992035186690252e-05, "loss": 0.9144, "step": 350000 }, { "epoch": 5.58, "eval_loss": 0.9093130826950073, "eval_runtime": 1998.4573, "eval_samples_per_second": 111.644, "eval_steps_per_second": 1.745, "step": 350000 }, { "epoch": 6.37, "learning_rate": 4.9920355054102726e-05, "loss": 0.906, "step": 400000 }, { "epoch": 6.37, "eval_loss": 0.9041373133659363, "eval_runtime": 1998.7351, "eval_samples_per_second": 111.628, "eval_steps_per_second": 1.745, "step": 400000 }, { "epoch": 7.17, "learning_rate": 4.9920355054102726e-05, "loss": 0.8994, "step": 450000 }, { "epoch": 7.17, "eval_loss": 0.9003444910049438, "eval_runtime": 1982.6092, "eval_samples_per_second": 112.536, "eval_steps_per_second": 1.759, "step": 450000 }, { "epoch": 7.97, "learning_rate": 4.992035186690252e-05, "loss": 0.8933, "step": 500000 }, { "epoch": 7.97, "eval_loss": 0.8956149220466614, "eval_runtime": 2002.7479, "eval_samples_per_second": 111.404, "eval_steps_per_second": 1.741, "step": 500000 }, { "epoch": 8.76, "learning_rate": 4.9920355054102726e-05, "loss": 0.8856, "step": 550000 }, { "epoch": 8.76, "eval_loss": 0.8930546045303345, "eval_runtime": 1996.5839, "eval_samples_per_second": 111.748, "eval_steps_per_second": 1.746, "step": 550000 }, { "epoch": 9.56, "learning_rate": 4.992035346050262e-05, "loss": 0.8802, "step": 600000 }, { "epoch": 9.56, "eval_loss": 0.89084392786026, "eval_runtime": 1991.4984, "eval_samples_per_second": 112.034, "eval_steps_per_second": 1.751, "step": 600000 }, { "epoch": 10.36, "learning_rate": 4.992035346050262e-05, "loss": 0.8763, "step": 650000 }, { "epoch": 10.36, "eval_loss": 0.8895950317382812, "eval_runtime": 2006.0064, "eval_samples_per_second": 111.223, "eval_steps_per_second": 1.738, "step": 650000 }, { "epoch": 11.16, "learning_rate": 4.992035186690252e-05, "loss": 0.8725, "step": 700000 }, { "epoch": 11.16, "eval_loss": 0.8886296153068542, "eval_runtime": 2027.9335, "eval_samples_per_second": 110.021, "eval_steps_per_second": 1.719, "step": 700000 }, { "epoch": 11.95, "learning_rate": 4.992035186690252e-05, "loss": 0.8688, "step": 750000 }, { "epoch": 11.95, "eval_loss": 0.885003924369812, "eval_runtime": 1989.8507, "eval_samples_per_second": 112.127, "eval_steps_per_second": 1.752, "step": 750000 }, { "epoch": 12.75, "learning_rate": 4.992035186690252e-05, "loss": 0.8628, "step": 800000 }, { "epoch": 12.75, "eval_loss": 0.8833887577056885, "eval_runtime": 2010.1701, "eval_samples_per_second": 110.993, "eval_steps_per_second": 1.735, "step": 800000 }, { "epoch": 13.55, "learning_rate": 4.9920348679702315e-05, "loss": 0.8599, "step": 850000 }, { "epoch": 13.55, "eval_loss": 0.883805513381958, "eval_runtime": 2005.9725, "eval_samples_per_second": 111.225, "eval_steps_per_second": 1.738, "step": 850000 }, { "epoch": 14.34, "learning_rate": 4.9920348679702315e-05, "loss": 0.8572, "step": 900000 }, { "epoch": 14.34, "eval_loss": 0.8837567567825317, "eval_runtime": 2005.535, "eval_samples_per_second": 111.25, "eval_steps_per_second": 1.739, "step": 900000 }, { "epoch": 15.14, "learning_rate": 4.9920348679702315e-05, "loss": 0.8548, "step": 950000 }, { "epoch": 15.14, "eval_loss": 0.8825677037239075, "eval_runtime": 1984.894, "eval_samples_per_second": 112.407, "eval_steps_per_second": 1.757, "step": 950000 }, { "epoch": 15.94, "learning_rate": 4.992034549250211e-05, "loss": 0.8502, "step": 1000000 }, { "epoch": 15.94, "eval_loss": 0.8808427453041077, "eval_runtime": 2006.4913, "eval_samples_per_second": 111.197, "eval_steps_per_second": 1.738, "step": 1000000 }, { "epoch": 16.73, "learning_rate": 4.992034708610221e-05, "loss": 0.8471, "step": 1050000 }, { "epoch": 16.73, "eval_loss": 0.8812766075134277, "eval_runtime": 1998.6292, "eval_samples_per_second": 111.634, "eval_steps_per_second": 1.745, "step": 1050000 }, { "epoch": 17.53, "learning_rate": 4.992034708610221e-05, "loss": 0.8427, "step": 1100000 }, { "epoch": 17.53, "eval_loss": 0.8817498683929443, "eval_runtime": 1994.6872, "eval_samples_per_second": 111.855, "eval_steps_per_second": 1.748, "step": 1100000 }, { "epoch": 18.33, "learning_rate": 4.992034549250211e-05, "loss": 0.841, "step": 1150000 }, { "epoch": 18.33, "eval_loss": 0.8802331686019897, "eval_runtime": 1993.2844, "eval_samples_per_second": 111.933, "eval_steps_per_second": 1.749, "step": 1150000 }, { "epoch": 19.12, "learning_rate": 4.992034549250211e-05, "loss": 0.8399, "step": 1200000 }, { "epoch": 19.12, "eval_loss": 0.8813353180885315, "eval_runtime": 2003.661, "eval_samples_per_second": 111.354, "eval_steps_per_second": 1.74, "step": 1200000 }, { "epoch": 19.92, "learning_rate": 4.9920348679702315e-05, "loss": 0.8382, "step": 1250000 }, { "epoch": 19.92, "eval_loss": 0.8779821991920471, "eval_runtime": 1999.9414, "eval_samples_per_second": 111.561, "eval_steps_per_second": 1.744, "step": 1250000 }, { "epoch": 20.72, "learning_rate": 4.992035186690252e-05, "loss": 0.8356, "step": 1300000 }, { "epoch": 20.72, "eval_loss": 0.878333330154419, "eval_runtime": 1991.7989, "eval_samples_per_second": 112.017, "eval_steps_per_second": 1.751, "step": 1300000 }, { "epoch": 21.51, "learning_rate": 4.992034708610221e-05, "loss": 0.8311, "step": 1350000 }, { "epoch": 21.51, "eval_loss": 0.8799993991851807, "eval_runtime": 2009.116, "eval_samples_per_second": 111.051, "eval_steps_per_second": 1.736, "step": 1350000 }, { "epoch": 22.31, "learning_rate": 4.992035027330242e-05, "loss": 0.8297, "step": 1400000 }, { "epoch": 22.31, "eval_loss": 0.8792157769203186, "eval_runtime": 1985.6931, "eval_samples_per_second": 112.361, "eval_steps_per_second": 1.756, "step": 1400000 }, { "epoch": 23.11, "learning_rate": 4.9920348679702315e-05, "loss": 0.8294, "step": 1450000 }, { "epoch": 23.11, "eval_loss": 0.8799900412559509, "eval_runtime": 2021.1167, "eval_samples_per_second": 110.392, "eval_steps_per_second": 1.725, "step": 1450000 }, { "epoch": 23.9, "learning_rate": 4.9920348679702315e-05, "loss": 0.8277, "step": 1500000 }, { "epoch": 23.9, "eval_loss": 0.8771235346794128, "eval_runtime": 1995.2079, "eval_samples_per_second": 111.825, "eval_steps_per_second": 1.748, "step": 1500000 }, { "epoch": 24.7, "learning_rate": 4.9920348679702315e-05, "loss": 0.8254, "step": 1550000 }, { "epoch": 24.7, "eval_loss": 0.8784825205802917, "eval_runtime": 1994.3392, "eval_samples_per_second": 111.874, "eval_steps_per_second": 1.748, "step": 1550000 }, { "epoch": 25.5, "learning_rate": 4.992034708610221e-05, "loss": 0.821, "step": 1600000 }, { "epoch": 25.5, "eval_loss": 0.8786540627479553, "eval_runtime": 1998.0285, "eval_samples_per_second": 111.668, "eval_steps_per_second": 1.745, "step": 1600000 }, { "epoch": 26.29, "learning_rate": 4.9920348679702315e-05, "loss": 0.8201, "step": 1650000 }, { "epoch": 26.29, "eval_loss": 0.8791316151618958, "eval_runtime": 2008.615, "eval_samples_per_second": 111.079, "eval_steps_per_second": 1.736, "step": 1650000 }, { "epoch": 27.09, "learning_rate": 4.9920348679702315e-05, "loss": 0.8204, "step": 1700000 }, { "epoch": 27.09, "eval_loss": 0.8804346323013306, "eval_runtime": 2001.1642, "eval_samples_per_second": 111.493, "eval_steps_per_second": 1.742, "step": 1700000 } ], "logging_steps": 50000, "max_steps": 31375500, "num_input_tokens_seen": 0, "num_train_epochs": 500, "save_steps": 50000, "total_flos": 3.55356281659392e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }