{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.7267080745341614, "eval_steps": 500, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4.998766400914329e-05, "loss": 0.6951, "step": 2 }, { "epoch": 0.1, "learning_rate": 4.995066821070679e-05, "loss": 0.5589, "step": 4 }, { "epoch": 0.15, "learning_rate": 4.9889049115077005e-05, "loss": 0.5485, "step": 6 }, { "epoch": 0.2, "learning_rate": 4.980286753286195e-05, "loss": 0.5392, "step": 8 }, { "epoch": 0.25, "learning_rate": 4.9692208514878444e-05, "loss": 0.5347, "step": 10 }, { "epoch": 0.3, "learning_rate": 4.9557181268217227e-05, "loss": 0.5261, "step": 12 }, { "epoch": 0.35, "learning_rate": 4.939791904846869e-05, "loss": 0.5298, "step": 14 }, { "epoch": 0.4, "learning_rate": 4.9214579028215776e-05, "loss": 0.536, "step": 16 }, { "epoch": 0.45, "learning_rate": 4.900734214192358e-05, "loss": 0.5012, "step": 18 }, { "epoch": 0.5, "learning_rate": 4.877641290737884e-05, "loss": 0.5342, "step": 20 }, { "epoch": 0.55, "learning_rate": 4.852201922385564e-05, "loss": 0.5324, "step": 22 }, { "epoch": 0.6, "learning_rate": 4.8244412147206284e-05, "loss": 0.4863, "step": 24 }, { "epoch": 0.65, "learning_rate": 4.794386564209953e-05, "loss": 0.4774, "step": 26 }, { "epoch": 0.7, "learning_rate": 4.762067631165049e-05, "loss": 0.4957, "step": 28 }, { "epoch": 0.75, "learning_rate": 4.72751631047092e-05, "loss": 0.4531, "step": 30 }, { "epoch": 0.8, "learning_rate": 4.72751631047092e-05, "loss": 0.5678, "step": 32 }, { "epoch": 0.84, "learning_rate": 4.72751631047092e-05, "loss": 0.5456, "step": 34 }, { "epoch": 0.89, "learning_rate": 4.690766700109659e-05, "loss": 0.541, "step": 36 }, { "epoch": 0.94, "learning_rate": 4.65185506750986e-05, "loss": 0.5035, "step": 38 }, { "epoch": 0.99, "learning_rate": 4.610819813755038e-05, "loss": 0.4901, "step": 40 }, { "epoch": 1.04, "learning_rate": 4.567701435686404e-05, "loss": 0.3193, "step": 42 }, { "epoch": 1.09, "learning_rate": 4.522542485937369e-05, "loss": 0.3019, "step": 44 }, { "epoch": 1.14, "learning_rate": 4.4753875309392266e-05, "loss": 0.2869, "step": 46 }, { "epoch": 1.19, "learning_rate": 4.426283106939474e-05, "loss": 0.2998, "step": 48 }, { "epoch": 1.24, "learning_rate": 4.375277674076149e-05, "loss": 0.2784, "step": 50 }, { "epoch": 1.29, "learning_rate": 4.3224215685535294e-05, "loss": 0.3551, "step": 52 }, { "epoch": 1.34, "learning_rate": 4.267766952966369e-05, "loss": 0.3224, "step": 54 }, { "epoch": 1.39, "learning_rate": 4.211367764821722e-05, "loss": 0.2901, "step": 56 }, { "epoch": 1.44, "learning_rate": 4.1532796633091296e-05, "loss": 0.2776, "step": 58 }, { "epoch": 1.49, "learning_rate": 4.093559974371725e-05, "loss": 0.2993, "step": 60 }, { "epoch": 1.54, "learning_rate": 4.0322676341324415e-05, "loss": 0.2978, "step": 62 }, { "epoch": 1.59, "learning_rate": 3.969463130731183e-05, "loss": 0.2895, "step": 64 }, { "epoch": 1.64, "learning_rate": 3.905208444630327e-05, "loss": 0.3016, "step": 66 }, { "epoch": 1.69, "learning_rate": 3.8395669874474915e-05, "loss": 0.2613, "step": 68 }, { "epoch": 1.74, "learning_rate": 3.7726035393759285e-05, "loss": 0.2993, "step": 70 }, { "epoch": 1.79, "learning_rate": 3.704384185254288e-05, "loss": 0.2853, "step": 72 }, { "epoch": 1.84, "learning_rate": 3.634976249348867e-05, "loss": 0.2578, "step": 74 }, { "epoch": 1.89, "learning_rate": 3.564448228912682e-05, "loss": 0.2845, "step": 76 }, { "epoch": 1.94, "learning_rate": 3.4928697265869515e-05, "loss": 0.2931, "step": 78 }, { "epoch": 1.99, "learning_rate": 3.4203113817116957e-05, "loss": 0.3093, "step": 80 }, { "epoch": 2.04, "learning_rate": 3.346844800613229e-05, "loss": 0.1741, "step": 82 }, { "epoch": 2.09, "learning_rate": 3.272542485937369e-05, "loss": 0.1583, "step": 84 }, { "epoch": 2.14, "learning_rate": 3.1974777650980735e-05, "loss": 0.135, "step": 86 }, { "epoch": 2.19, "learning_rate": 3.121724717912138e-05, "loss": 0.1423, "step": 88 }, { "epoch": 2.24, "learning_rate": 3.045358103491357e-05, "loss": 0.1372, "step": 90 }, { "epoch": 2.29, "learning_rate": 2.9684532864643122e-05, "loss": 0.1249, "step": 92 }, { "epoch": 2.34, "learning_rate": 2.8910861626005776e-05, "loss": 0.1222, "step": 94 }, { "epoch": 2.39, "learning_rate": 2.8133330839107608e-05, "loss": 0.1607, "step": 96 }, { "epoch": 2.43, "learning_rate": 2.7352707832962865e-05, "loss": 0.1427, "step": 98 }, { "epoch": 2.48, "learning_rate": 2.656976298823284e-05, "loss": 0.12, "step": 100 }, { "epoch": 2.53, "learning_rate": 2.578526897695321e-05, "loss": 0.1309, "step": 102 }, { "epoch": 2.58, "learning_rate": 2.5e-05, "loss": 0.1332, "step": 104 }, { "epoch": 2.63, "learning_rate": 2.4214731023046793e-05, "loss": 0.1256, "step": 106 }, { "epoch": 2.68, "learning_rate": 2.3430237011767167e-05, "loss": 0.1258, "step": 108 }, { "epoch": 2.73, "learning_rate": 2.2647292167037144e-05, "loss": 0.1498, "step": 110 }, { "epoch": 2.78, "learning_rate": 2.186666916089239e-05, "loss": 0.1417, "step": 112 }, { "epoch": 2.83, "learning_rate": 2.1089138373994223e-05, "loss": 0.1287, "step": 114 }, { "epoch": 2.88, "learning_rate": 2.031546713535688e-05, "loss": 0.1152, "step": 116 }, { "epoch": 2.93, "learning_rate": 1.9546418965086442e-05, "loss": 0.1352, "step": 118 }, { "epoch": 2.98, "learning_rate": 1.8782752820878634e-05, "loss": 0.1149, "step": 120 }, { "epoch": 3.03, "learning_rate": 1.802522234901927e-05, "loss": 0.0871, "step": 122 }, { "epoch": 3.08, "learning_rate": 1.7274575140626318e-05, "loss": 0.0679, "step": 124 }, { "epoch": 3.13, "learning_rate": 1.6531551993867717e-05, "loss": 0.0616, "step": 126 }, { "epoch": 3.18, "learning_rate": 1.5796886182883053e-05, "loss": 0.0677, "step": 128 }, { "epoch": 3.23, "learning_rate": 1.5071302734130489e-05, "loss": 0.0636, "step": 130 }, { "epoch": 3.28, "learning_rate": 1.4355517710873184e-05, "loss": 0.0612, "step": 132 }, { "epoch": 3.33, "learning_rate": 1.3650237506511331e-05, "loss": 0.0574, "step": 134 }, { "epoch": 3.38, "learning_rate": 1.2956158147457115e-05, "loss": 0.0467, "step": 136 }, { "epoch": 3.43, "learning_rate": 1.2273964606240718e-05, "loss": 0.0611, "step": 138 }, { "epoch": 3.48, "learning_rate": 1.1604330125525079e-05, "loss": 0.0655, "step": 140 }, { "epoch": 3.53, "learning_rate": 1.0947915553696742e-05, "loss": 0.069, "step": 142 }, { "epoch": 3.58, "learning_rate": 1.0305368692688174e-05, "loss": 0.0573, "step": 144 }, { "epoch": 3.63, "learning_rate": 9.677323658675594e-06, "loss": 0.047, "step": 146 }, { "epoch": 3.68, "learning_rate": 9.064400256282757e-06, "loss": 0.0443, "step": 148 }, { "epoch": 3.73, "learning_rate": 8.467203366908707e-06, "loss": 0.0595, "step": 150 } ], "logging_steps": 2, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 25, "total_flos": 2.021958544038822e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }