{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9058960565963444, "global_step": 301500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.99749613653868e-05, "loss": 4.5704, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.9949922730773584e-05, "loss": 3.9367, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.992488409616038e-05, "loss": 4.3169, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.9899845461547165e-05, "loss": 4.4226, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.987480682693396e-05, "loss": 4.3777, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.984976819232076e-05, "loss": 4.3272, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.982472955770754e-05, "loss": 4.2964, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.979969092309434e-05, "loss": 4.2587, "step": 4000 }, { "epoch": 0.01, "learning_rate": 4.9774652288481124e-05, "loss": 4.2217, "step": 4500 }, { "epoch": 0.02, "learning_rate": 4.974961365386792e-05, "loss": 4.2056, "step": 5000 }, { "epoch": 0.02, "learning_rate": 4.972457501925471e-05, "loss": 4.2108, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.96995363846415e-05, "loss": 4.135, "step": 6000 }, { "epoch": 0.02, "learning_rate": 4.96744977500283e-05, "loss": 4.1552, "step": 6500 }, { "epoch": 0.02, "learning_rate": 4.9649459115415084e-05, "loss": 4.0666, "step": 7000 }, { "epoch": 0.02, "learning_rate": 4.962442048080188e-05, "loss": 4.1164, "step": 7500 }, { "epoch": 0.02, "learning_rate": 4.959938184618867e-05, "loss": 4.0798, "step": 8000 }, { "epoch": 0.03, "learning_rate": 4.957434321157546e-05, "loss": 4.0284, "step": 8500 }, { "epoch": 0.03, "learning_rate": 4.954930457696226e-05, "loss": 4.046, "step": 9000 }, { "epoch": 0.03, "learning_rate": 4.9524265942349043e-05, "loss": 4.0595, "step": 9500 }, { "epoch": 0.03, "learning_rate": 4.949922730773584e-05, "loss": 4.0243, "step": 10000 }, { "epoch": 0.03, "learning_rate": 4.947418867312263e-05, "loss": 4.0064, "step": 10500 }, { "epoch": 0.03, "learning_rate": 4.944915003850942e-05, "loss": 3.9907, "step": 11000 }, { "epoch": 0.03, "learning_rate": 4.942411140389621e-05, "loss": 3.9488, "step": 11500 }, { "epoch": 0.04, "learning_rate": 4.9399072769283e-05, "loss": 3.95, "step": 12000 }, { "epoch": 0.04, "learning_rate": 4.93740341346698e-05, "loss": 3.9148, "step": 12500 }, { "epoch": 0.04, "learning_rate": 4.934899550005659e-05, "loss": 3.917, "step": 13000 }, { "epoch": 0.04, "learning_rate": 4.932395686544338e-05, "loss": 3.8958, "step": 13500 }, { "epoch": 0.04, "learning_rate": 4.929891823083017e-05, "loss": 3.9248, "step": 14000 }, { "epoch": 0.04, "learning_rate": 4.927387959621696e-05, "loss": 3.8752, "step": 14500 }, { "epoch": 0.05, "learning_rate": 4.924884096160376e-05, "loss": 3.8548, "step": 15000 }, { "epoch": 0.05, "learning_rate": 4.922380232699055e-05, "loss": 3.8882, "step": 15500 }, { "epoch": 0.05, "learning_rate": 4.919876369237734e-05, "loss": 3.8457, "step": 16000 }, { "epoch": 0.05, "learning_rate": 4.917372505776413e-05, "loss": 3.8472, "step": 16500 }, { "epoch": 0.05, "learning_rate": 4.914868642315092e-05, "loss": 3.8743, "step": 17000 }, { "epoch": 0.05, "learning_rate": 4.912364778853771e-05, "loss": 3.8541, "step": 17500 }, { "epoch": 0.05, "learning_rate": 4.909860915392451e-05, "loss": 3.823, "step": 18000 }, { "epoch": 0.06, "learning_rate": 4.90735705193113e-05, "loss": 3.841, "step": 18500 }, { "epoch": 0.06, "learning_rate": 4.904853188469809e-05, "loss": 3.8262, "step": 19000 }, { "epoch": 0.06, "learning_rate": 4.902349325008488e-05, "loss": 3.7779, "step": 19500 }, { "epoch": 0.06, "learning_rate": 4.899845461547167e-05, "loss": 3.7952, "step": 20000 }, { "epoch": 0.06, "learning_rate": 4.897341598085847e-05, "loss": 3.8115, "step": 20500 }, { "epoch": 0.06, "learning_rate": 4.894837734624526e-05, "loss": 3.7978, "step": 21000 }, { "epoch": 0.06, "learning_rate": 4.892333871163205e-05, "loss": 3.7757, "step": 21500 }, { "epoch": 0.07, "learning_rate": 4.889830007701884e-05, "loss": 3.7953, "step": 22000 }, { "epoch": 0.07, "learning_rate": 4.887326144240563e-05, "loss": 3.7988, "step": 22500 }, { "epoch": 0.07, "learning_rate": 4.884822280779243e-05, "loss": 3.779, "step": 23000 }, { "epoch": 0.07, "learning_rate": 4.882318417317922e-05, "loss": 3.7545, "step": 23500 }, { "epoch": 0.07, "learning_rate": 4.879814553856601e-05, "loss": 3.7502, "step": 24000 }, { "epoch": 0.07, "learning_rate": 4.87731069039528e-05, "loss": 3.762, "step": 24500 }, { "epoch": 0.08, "learning_rate": 4.874806826933959e-05, "loss": 3.7711, "step": 25000 }, { "epoch": 0.08, "learning_rate": 4.872302963472639e-05, "loss": 3.7314, "step": 25500 }, { "epoch": 0.08, "learning_rate": 4.869799100011317e-05, "loss": 3.7427, "step": 26000 }, { "epoch": 0.08, "learning_rate": 4.867295236549997e-05, "loss": 3.7024, "step": 26500 }, { "epoch": 0.08, "learning_rate": 4.864791373088676e-05, "loss": 3.7356, "step": 27000 }, { "epoch": 0.08, "learning_rate": 4.862287509627355e-05, "loss": 3.7117, "step": 27500 }, { "epoch": 0.08, "learning_rate": 4.859783646166035e-05, "loss": 3.7235, "step": 28000 }, { "epoch": 0.09, "learning_rate": 4.857279782704713e-05, "loss": 3.6791, "step": 28500 }, { "epoch": 0.09, "learning_rate": 4.854775919243393e-05, "loss": 3.696, "step": 29000 }, { "epoch": 0.09, "learning_rate": 4.852272055782072e-05, "loss": 3.6852, "step": 29500 }, { "epoch": 0.09, "learning_rate": 4.849768192320751e-05, "loss": 3.6896, "step": 30000 }, { "epoch": 0.09, "learning_rate": 4.847264328859431e-05, "loss": 3.6759, "step": 30500 }, { "epoch": 0.09, "learning_rate": 4.844760465398109e-05, "loss": 3.7012, "step": 31000 }, { "epoch": 0.09, "learning_rate": 4.842256601936789e-05, "loss": 3.6378, "step": 31500 }, { "epoch": 0.1, "learning_rate": 4.839752738475467e-05, "loss": 3.6746, "step": 32000 }, { "epoch": 0.1, "learning_rate": 4.837248875014147e-05, "loss": 3.68, "step": 32500 }, { "epoch": 0.1, "learning_rate": 4.834745011552827e-05, "loss": 3.6775, "step": 33000 }, { "epoch": 0.1, "learning_rate": 4.832241148091505e-05, "loss": 3.6471, "step": 33500 }, { "epoch": 0.1, "learning_rate": 4.829737284630185e-05, "loss": 3.662, "step": 34000 }, { "epoch": 0.1, "learning_rate": 4.827233421168863e-05, "loss": 3.6359, "step": 34500 }, { "epoch": 0.11, "learning_rate": 4.824729557707543e-05, "loss": 3.6494, "step": 35000 }, { "epoch": 0.11, "learning_rate": 4.822225694246222e-05, "loss": 3.6187, "step": 35500 }, { "epoch": 0.11, "learning_rate": 4.819721830784901e-05, "loss": 3.6115, "step": 36000 }, { "epoch": 0.11, "learning_rate": 4.817217967323581e-05, "loss": 3.6062, "step": 36500 }, { "epoch": 0.11, "learning_rate": 4.814714103862259e-05, "loss": 3.6109, "step": 37000 }, { "epoch": 0.11, "learning_rate": 4.812210240400939e-05, "loss": 3.6239, "step": 37500 }, { "epoch": 0.11, "learning_rate": 4.809706376939618e-05, "loss": 3.646, "step": 38000 }, { "epoch": 0.12, "learning_rate": 4.807202513478297e-05, "loss": 3.5898, "step": 38500 }, { "epoch": 0.12, "learning_rate": 4.804698650016977e-05, "loss": 3.6008, "step": 39000 }, { "epoch": 0.12, "learning_rate": 4.802194786555655e-05, "loss": 3.6265, "step": 39500 }, { "epoch": 0.12, "learning_rate": 4.799690923094335e-05, "loss": 3.6111, "step": 40000 }, { "epoch": 0.12, "learning_rate": 4.797187059633014e-05, "loss": 3.5766, "step": 40500 }, { "epoch": 0.12, "learning_rate": 4.794683196171693e-05, "loss": 3.5916, "step": 41000 }, { "epoch": 0.12, "learning_rate": 4.792179332710373e-05, "loss": 3.641, "step": 41500 }, { "epoch": 0.13, "learning_rate": 4.789675469249051e-05, "loss": 3.5743, "step": 42000 }, { "epoch": 0.13, "learning_rate": 4.787171605787731e-05, "loss": 3.6153, "step": 42500 }, { "epoch": 0.13, "learning_rate": 4.78466774232641e-05, "loss": 3.6073, "step": 43000 }, { "epoch": 0.13, "learning_rate": 4.782163878865089e-05, "loss": 3.5881, "step": 43500 }, { "epoch": 0.13, "learning_rate": 4.779660015403769e-05, "loss": 3.5649, "step": 44000 }, { "epoch": 0.13, "learning_rate": 4.777156151942447e-05, "loss": 3.5602, "step": 44500 }, { "epoch": 0.14, "learning_rate": 4.774652288481127e-05, "loss": 3.5707, "step": 45000 }, { "epoch": 0.14, "learning_rate": 4.772148425019806e-05, "loss": 3.5546, "step": 45500 }, { "epoch": 0.14, "learning_rate": 4.769644561558485e-05, "loss": 3.5327, "step": 46000 }, { "epoch": 0.14, "learning_rate": 4.767140698097164e-05, "loss": 3.5489, "step": 46500 }, { "epoch": 0.14, "learning_rate": 4.764636834635843e-05, "loss": 3.5559, "step": 47000 }, { "epoch": 0.14, "learning_rate": 4.762132971174523e-05, "loss": 3.5514, "step": 47500 }, { "epoch": 0.14, "learning_rate": 4.759629107713202e-05, "loss": 3.5295, "step": 48000 }, { "epoch": 0.15, "learning_rate": 4.757125244251881e-05, "loss": 3.5198, "step": 48500 }, { "epoch": 0.15, "learning_rate": 4.75462138079056e-05, "loss": 3.5231, "step": 49000 }, { "epoch": 0.15, "learning_rate": 4.752117517329239e-05, "loss": 3.5695, "step": 49500 }, { "epoch": 0.15, "learning_rate": 4.749613653867919e-05, "loss": 3.581, "step": 50000 }, { "epoch": 0.15, "learning_rate": 4.747109790406598e-05, "loss": 3.5228, "step": 50500 }, { "epoch": 0.15, "learning_rate": 4.744605926945277e-05, "loss": 3.5382, "step": 51000 }, { "epoch": 0.15, "learning_rate": 4.742102063483956e-05, "loss": 3.5384, "step": 51500 }, { "epoch": 0.16, "learning_rate": 4.739598200022635e-05, "loss": 3.4945, "step": 52000 }, { "epoch": 0.16, "learning_rate": 4.737094336561314e-05, "loss": 3.503, "step": 52500 }, { "epoch": 0.16, "learning_rate": 4.734590473099994e-05, "loss": 3.523, "step": 53000 }, { "epoch": 0.16, "learning_rate": 4.732086609638673e-05, "loss": 3.5115, "step": 53500 }, { "epoch": 0.16, "learning_rate": 4.729582746177352e-05, "loss": 3.5014, "step": 54000 }, { "epoch": 0.16, "learning_rate": 4.727078882716031e-05, "loss": 3.5253, "step": 54500 }, { "epoch": 0.17, "learning_rate": 4.72457501925471e-05, "loss": 3.4913, "step": 55000 }, { "epoch": 0.17, "learning_rate": 4.72207115579339e-05, "loss": 3.5152, "step": 55500 }, { "epoch": 0.17, "learning_rate": 4.719567292332069e-05, "loss": 3.4847, "step": 56000 }, { "epoch": 0.17, "learning_rate": 4.717063428870748e-05, "loss": 3.5086, "step": 56500 }, { "epoch": 0.17, "learning_rate": 4.714559565409427e-05, "loss": 3.5071, "step": 57000 }, { "epoch": 0.17, "learning_rate": 4.712055701948106e-05, "loss": 3.4918, "step": 57500 }, { "epoch": 0.17, "learning_rate": 4.709551838486786e-05, "loss": 3.4677, "step": 58000 }, { "epoch": 0.18, "learning_rate": 4.707047975025464e-05, "loss": 3.4594, "step": 58500 }, { "epoch": 0.18, "learning_rate": 4.704544111564144e-05, "loss": 3.5236, "step": 59000 }, { "epoch": 0.18, "learning_rate": 4.702040248102823e-05, "loss": 3.4743, "step": 59500 }, { "epoch": 0.18, "learning_rate": 4.699536384641502e-05, "loss": 3.478, "step": 60000 }, { "epoch": 0.18, "learning_rate": 4.6970325211801817e-05, "loss": 3.458, "step": 60500 }, { "epoch": 0.18, "learning_rate": 4.69452865771886e-05, "loss": 3.464, "step": 61000 }, { "epoch": 0.18, "learning_rate": 4.69202479425754e-05, "loss": 3.4501, "step": 61500 }, { "epoch": 0.19, "learning_rate": 4.689520930796219e-05, "loss": 3.4772, "step": 62000 }, { "epoch": 0.19, "learning_rate": 4.687017067334898e-05, "loss": 3.4522, "step": 62500 }, { "epoch": 0.19, "learning_rate": 4.6845132038735776e-05, "loss": 3.4415, "step": 63000 }, { "epoch": 0.19, "learning_rate": 4.682009340412256e-05, "loss": 3.4953, "step": 63500 }, { "epoch": 0.19, "learning_rate": 4.679505476950936e-05, "loss": 3.4713, "step": 64000 }, { "epoch": 0.19, "learning_rate": 4.677001613489615e-05, "loss": 3.4588, "step": 64500 }, { "epoch": 0.2, "learning_rate": 4.674497750028294e-05, "loss": 3.4765, "step": 65000 }, { "epoch": 0.2, "learning_rate": 4.671993886566973e-05, "loss": 3.4408, "step": 65500 }, { "epoch": 0.2, "learning_rate": 4.669490023105652e-05, "loss": 3.4454, "step": 66000 }, { "epoch": 0.2, "learning_rate": 4.666986159644332e-05, "loss": 3.4629, "step": 66500 }, { "epoch": 0.2, "learning_rate": 4.66448229618301e-05, "loss": 3.4751, "step": 67000 }, { "epoch": 0.2, "learning_rate": 4.66197843272169e-05, "loss": 3.446, "step": 67500 }, { "epoch": 0.2, "learning_rate": 4.659474569260369e-05, "loss": 3.4132, "step": 68000 }, { "epoch": 0.21, "learning_rate": 4.656970705799048e-05, "loss": 3.4266, "step": 68500 }, { "epoch": 0.21, "learning_rate": 4.6544668423377277e-05, "loss": 3.4197, "step": 69000 }, { "epoch": 0.21, "learning_rate": 4.651962978876406e-05, "loss": 3.444, "step": 69500 }, { "epoch": 0.21, "learning_rate": 4.649459115415086e-05, "loss": 3.4453, "step": 70000 }, { "epoch": 0.21, "learning_rate": 4.646955251953765e-05, "loss": 3.4215, "step": 70500 }, { "epoch": 0.21, "learning_rate": 4.644451388492444e-05, "loss": 3.4524, "step": 71000 }, { "epoch": 0.21, "learning_rate": 4.6419475250311236e-05, "loss": 3.4432, "step": 71500 }, { "epoch": 0.22, "learning_rate": 4.639443661569802e-05, "loss": 3.4105, "step": 72000 }, { "epoch": 0.22, "learning_rate": 4.636939798108482e-05, "loss": 3.415, "step": 72500 }, { "epoch": 0.22, "learning_rate": 4.634435934647161e-05, "loss": 3.4441, "step": 73000 }, { "epoch": 0.22, "learning_rate": 4.63193207118584e-05, "loss": 3.4117, "step": 73500 }, { "epoch": 0.22, "learning_rate": 4.6294282077245196e-05, "loss": 3.4101, "step": 74000 }, { "epoch": 0.22, "learning_rate": 4.626924344263198e-05, "loss": 3.3981, "step": 74500 }, { "epoch": 0.23, "learning_rate": 4.624420480801878e-05, "loss": 3.4246, "step": 75000 }, { "epoch": 0.23, "learning_rate": 4.621916617340557e-05, "loss": 3.423, "step": 75500 }, { "epoch": 0.23, "learning_rate": 4.619412753879236e-05, "loss": 3.4062, "step": 76000 }, { "epoch": 0.23, "learning_rate": 4.6169088904179155e-05, "loss": 3.4059, "step": 76500 }, { "epoch": 0.23, "learning_rate": 4.614405026956594e-05, "loss": 3.3988, "step": 77000 }, { "epoch": 0.23, "learning_rate": 4.6119011634952736e-05, "loss": 3.4308, "step": 77500 }, { "epoch": 0.23, "learning_rate": 4.609397300033953e-05, "loss": 3.3904, "step": 78000 }, { "epoch": 0.24, "learning_rate": 4.606893436572632e-05, "loss": 3.3847, "step": 78500 }, { "epoch": 0.24, "learning_rate": 4.6043895731113115e-05, "loss": 3.3941, "step": 79000 }, { "epoch": 0.24, "learning_rate": 4.60188570964999e-05, "loss": 3.3834, "step": 79500 }, { "epoch": 0.24, "learning_rate": 4.5993818461886696e-05, "loss": 3.4271, "step": 80000 }, { "epoch": 0.24, "learning_rate": 4.596877982727349e-05, "loss": 3.3714, "step": 80500 }, { "epoch": 0.24, "learning_rate": 4.594374119266028e-05, "loss": 3.3709, "step": 81000 }, { "epoch": 0.24, "learning_rate": 4.591870255804707e-05, "loss": 3.4005, "step": 81500 }, { "epoch": 0.25, "learning_rate": 4.589366392343386e-05, "loss": 3.3783, "step": 82000 }, { "epoch": 0.25, "learning_rate": 4.5868625288820656e-05, "loss": 3.385, "step": 82500 }, { "epoch": 0.25, "learning_rate": 4.5843586654207446e-05, "loss": 3.3812, "step": 83000 }, { "epoch": 0.25, "learning_rate": 4.581854801959424e-05, "loss": 3.362, "step": 83500 }, { "epoch": 0.25, "learning_rate": 4.579350938498103e-05, "loss": 3.3715, "step": 84000 }, { "epoch": 0.25, "learning_rate": 4.576847075036782e-05, "loss": 3.387, "step": 84500 }, { "epoch": 0.26, "learning_rate": 4.5743432115754615e-05, "loss": 3.3741, "step": 85000 }, { "epoch": 0.26, "learning_rate": 4.5718393481141406e-05, "loss": 3.3461, "step": 85500 }, { "epoch": 0.26, "learning_rate": 4.5693354846528196e-05, "loss": 3.3659, "step": 86000 }, { "epoch": 0.26, "learning_rate": 4.566831621191499e-05, "loss": 3.3693, "step": 86500 }, { "epoch": 0.26, "learning_rate": 4.564327757730178e-05, "loss": 3.3776, "step": 87000 }, { "epoch": 0.26, "learning_rate": 4.561823894268857e-05, "loss": 3.3391, "step": 87500 }, { "epoch": 0.26, "learning_rate": 4.5593200308075365e-05, "loss": 3.3516, "step": 88000 }, { "epoch": 0.27, "learning_rate": 4.5568161673462156e-05, "loss": 3.3808, "step": 88500 }, { "epoch": 0.27, "learning_rate": 4.5543123038848947e-05, "loss": 3.3933, "step": 89000 }, { "epoch": 0.27, "learning_rate": 4.551808440423574e-05, "loss": 3.3717, "step": 89500 }, { "epoch": 0.27, "learning_rate": 4.549304576962253e-05, "loss": 3.3625, "step": 90000 }, { "epoch": 0.27, "learning_rate": 4.5468007135009325e-05, "loss": 3.3391, "step": 90500 }, { "epoch": 0.27, "learning_rate": 4.5442968500396116e-05, "loss": 3.3361, "step": 91000 }, { "epoch": 0.27, "learning_rate": 4.5417929865782906e-05, "loss": 3.3201, "step": 91500 }, { "epoch": 0.28, "learning_rate": 4.53928912311697e-05, "loss": 3.3444, "step": 92000 }, { "epoch": 0.28, "learning_rate": 4.536785259655649e-05, "loss": 3.3624, "step": 92500 }, { "epoch": 0.28, "learning_rate": 4.5342813961943285e-05, "loss": 3.3289, "step": 93000 }, { "epoch": 0.28, "learning_rate": 4.531777532733007e-05, "loss": 3.342, "step": 93500 }, { "epoch": 0.28, "learning_rate": 4.5292736692716866e-05, "loss": 3.3372, "step": 94000 }, { "epoch": 0.28, "learning_rate": 4.5267698058103656e-05, "loss": 3.3458, "step": 94500 }, { "epoch": 0.29, "learning_rate": 4.524265942349045e-05, "loss": 3.386, "step": 95000 }, { "epoch": 0.29, "learning_rate": 4.521762078887724e-05, "loss": 3.336, "step": 95500 }, { "epoch": 0.29, "learning_rate": 4.519258215426403e-05, "loss": 3.3324, "step": 96000 }, { "epoch": 0.29, "learning_rate": 4.5167543519650825e-05, "loss": 3.3054, "step": 96500 }, { "epoch": 0.29, "learning_rate": 4.5142504885037616e-05, "loss": 3.3409, "step": 97000 }, { "epoch": 0.29, "learning_rate": 4.5117466250424406e-05, "loss": 3.3233, "step": 97500 }, { "epoch": 0.29, "learning_rate": 4.50924276158112e-05, "loss": 3.3296, "step": 98000 }, { "epoch": 0.3, "learning_rate": 4.506738898119799e-05, "loss": 3.3135, "step": 98500 }, { "epoch": 0.3, "learning_rate": 4.5042350346584785e-05, "loss": 3.3307, "step": 99000 }, { "epoch": 0.3, "learning_rate": 4.5017311711971576e-05, "loss": 3.3117, "step": 99500 }, { "epoch": 0.3, "learning_rate": 4.4992273077358366e-05, "loss": 3.3407, "step": 100000 }, { "epoch": 0.3, "learning_rate": 4.496723444274516e-05, "loss": 3.3376, "step": 100500 }, { "epoch": 0.3, "learning_rate": 4.494219580813195e-05, "loss": 3.3178, "step": 101000 }, { "epoch": 0.3, "learning_rate": 4.4917157173518745e-05, "loss": 3.3416, "step": 101500 }, { "epoch": 0.31, "learning_rate": 4.489211853890553e-05, "loss": 3.2907, "step": 102000 }, { "epoch": 0.31, "learning_rate": 4.4867079904292326e-05, "loss": 3.3315, "step": 102500 }, { "epoch": 0.31, "learning_rate": 4.4842041269679116e-05, "loss": 3.3199, "step": 103000 }, { "epoch": 0.31, "learning_rate": 4.481700263506591e-05, "loss": 3.3082, "step": 103500 }, { "epoch": 0.31, "learning_rate": 4.4791964000452704e-05, "loss": 3.3224, "step": 104000 }, { "epoch": 0.31, "learning_rate": 4.476692536583949e-05, "loss": 3.3023, "step": 104500 }, { "epoch": 0.32, "learning_rate": 4.4741886731226285e-05, "loss": 3.3068, "step": 105000 }, { "epoch": 0.32, "learning_rate": 4.4716848096613076e-05, "loss": 3.3125, "step": 105500 }, { "epoch": 0.32, "learning_rate": 4.4691809461999866e-05, "loss": 3.3026, "step": 106000 }, { "epoch": 0.32, "learning_rate": 4.4666770827386664e-05, "loss": 3.302, "step": 106500 }, { "epoch": 0.32, "learning_rate": 4.464173219277345e-05, "loss": 3.299, "step": 107000 }, { "epoch": 0.32, "learning_rate": 4.4616693558160245e-05, "loss": 3.2802, "step": 107500 }, { "epoch": 0.32, "learning_rate": 4.4591654923547035e-05, "loss": 3.2993, "step": 108000 }, { "epoch": 0.33, "learning_rate": 4.4566616288933826e-05, "loss": 3.2822, "step": 108500 }, { "epoch": 0.33, "learning_rate": 4.454157765432062e-05, "loss": 3.2963, "step": 109000 }, { "epoch": 0.33, "learning_rate": 4.451653901970741e-05, "loss": 3.3129, "step": 109500 }, { "epoch": 0.33, "learning_rate": 4.4491500385094204e-05, "loss": 3.3025, "step": 110000 }, { "epoch": 0.33, "learning_rate": 4.4466461750480995e-05, "loss": 3.3245, "step": 110500 }, { "epoch": 0.33, "learning_rate": 4.4441423115867786e-05, "loss": 3.2676, "step": 111000 }, { "epoch": 0.34, "learning_rate": 4.441638448125458e-05, "loss": 3.317, "step": 111500 }, { "epoch": 0.34, "learning_rate": 4.439134584664137e-05, "loss": 3.3008, "step": 112000 }, { "epoch": 0.34, "learning_rate": 4.4366307212028164e-05, "loss": 3.3093, "step": 112500 }, { "epoch": 0.34, "learning_rate": 4.4341268577414955e-05, "loss": 3.3066, "step": 113000 }, { "epoch": 0.34, "learning_rate": 4.4316229942801745e-05, "loss": 3.2671, "step": 113500 }, { "epoch": 0.34, "learning_rate": 4.4291191308188536e-05, "loss": 3.2873, "step": 114000 }, { "epoch": 0.34, "learning_rate": 4.4266152673575326e-05, "loss": 3.2857, "step": 114500 }, { "epoch": 0.35, "learning_rate": 4.4241114038962124e-05, "loss": 3.2992, "step": 115000 }, { "epoch": 0.35, "learning_rate": 4.4216075404348914e-05, "loss": 3.2959, "step": 115500 }, { "epoch": 0.35, "learning_rate": 4.4191036769735705e-05, "loss": 3.2688, "step": 116000 }, { "epoch": 0.35, "learning_rate": 4.4165998135122495e-05, "loss": 3.2958, "step": 116500 }, { "epoch": 0.35, "learning_rate": 4.4140959500509286e-05, "loss": 3.2933, "step": 117000 }, { "epoch": 0.35, "learning_rate": 4.411592086589608e-05, "loss": 3.2796, "step": 117500 }, { "epoch": 0.35, "learning_rate": 4.4090882231282874e-05, "loss": 3.2577, "step": 118000 }, { "epoch": 0.36, "learning_rate": 4.4065843596669664e-05, "loss": 3.2797, "step": 118500 }, { "epoch": 0.36, "learning_rate": 4.4040804962056455e-05, "loss": 3.3004, "step": 119000 }, { "epoch": 0.36, "learning_rate": 4.4015766327443246e-05, "loss": 3.2539, "step": 119500 }, { "epoch": 0.36, "learning_rate": 4.399072769283004e-05, "loss": 3.2525, "step": 120000 }, { "epoch": 0.36, "learning_rate": 4.3965689058216833e-05, "loss": 3.2668, "step": 120500 }, { "epoch": 0.36, "learning_rate": 4.3940650423603624e-05, "loss": 3.2693, "step": 121000 }, { "epoch": 0.37, "learning_rate": 4.3915611788990415e-05, "loss": 3.2792, "step": 121500 }, { "epoch": 0.37, "learning_rate": 4.3890573154377205e-05, "loss": 3.2649, "step": 122000 }, { "epoch": 0.37, "learning_rate": 4.3865534519763996e-05, "loss": 3.2556, "step": 122500 }, { "epoch": 0.37, "learning_rate": 4.3840495885150786e-05, "loss": 3.2509, "step": 123000 }, { "epoch": 0.37, "learning_rate": 4.3815457250537584e-05, "loss": 3.2616, "step": 123500 }, { "epoch": 0.37, "learning_rate": 4.3790418615924374e-05, "loss": 3.2654, "step": 124000 }, { "epoch": 0.37, "learning_rate": 4.3765379981311165e-05, "loss": 3.2628, "step": 124500 }, { "epoch": 0.38, "learning_rate": 4.3740341346697955e-05, "loss": 3.2462, "step": 125000 }, { "epoch": 0.38, "learning_rate": 4.3715302712084746e-05, "loss": 3.2517, "step": 125500 }, { "epoch": 0.38, "learning_rate": 4.369026407747154e-05, "loss": 3.2897, "step": 126000 }, { "epoch": 0.38, "learning_rate": 4.3665225442858334e-05, "loss": 3.2707, "step": 126500 }, { "epoch": 0.38, "learning_rate": 4.3640186808245124e-05, "loss": 3.269, "step": 127000 }, { "epoch": 0.38, "learning_rate": 4.3615148173631915e-05, "loss": 3.2331, "step": 127500 }, { "epoch": 0.38, "learning_rate": 4.3590109539018705e-05, "loss": 3.2644, "step": 128000 }, { "epoch": 0.39, "learning_rate": 4.3565070904405496e-05, "loss": 3.2406, "step": 128500 }, { "epoch": 0.39, "learning_rate": 4.354003226979229e-05, "loss": 3.2499, "step": 129000 }, { "epoch": 0.39, "learning_rate": 4.3514993635179084e-05, "loss": 3.2325, "step": 129500 }, { "epoch": 0.39, "learning_rate": 4.3489955000565875e-05, "loss": 3.2631, "step": 130000 }, { "epoch": 0.39, "learning_rate": 4.3464916365952665e-05, "loss": 3.2447, "step": 130500 }, { "epoch": 0.39, "learning_rate": 4.3439877731339456e-05, "loss": 3.2349, "step": 131000 }, { "epoch": 0.4, "learning_rate": 4.341483909672625e-05, "loss": 3.2372, "step": 131500 }, { "epoch": 0.4, "learning_rate": 4.3389800462113044e-05, "loss": 3.2456, "step": 132000 }, { "epoch": 0.4, "learning_rate": 4.3364761827499834e-05, "loss": 3.2477, "step": 132500 }, { "epoch": 0.4, "learning_rate": 4.3339723192886625e-05, "loss": 3.243, "step": 133000 }, { "epoch": 0.4, "learning_rate": 4.3314684558273415e-05, "loss": 3.2287, "step": 133500 }, { "epoch": 0.4, "learning_rate": 4.328964592366021e-05, "loss": 3.2613, "step": 134000 }, { "epoch": 0.4, "learning_rate": 4.3264607289046996e-05, "loss": 3.2242, "step": 134500 }, { "epoch": 0.41, "learning_rate": 4.3239568654433794e-05, "loss": 3.2481, "step": 135000 }, { "epoch": 0.41, "learning_rate": 4.3214530019820584e-05, "loss": 3.2482, "step": 135500 }, { "epoch": 0.41, "learning_rate": 4.3189491385207375e-05, "loss": 3.2439, "step": 136000 }, { "epoch": 0.41, "learning_rate": 4.316445275059417e-05, "loss": 3.2321, "step": 136500 }, { "epoch": 0.41, "learning_rate": 4.3139414115980956e-05, "loss": 3.2422, "step": 137000 }, { "epoch": 0.41, "learning_rate": 4.311437548136775e-05, "loss": 3.249, "step": 137500 }, { "epoch": 0.41, "learning_rate": 4.3089336846754544e-05, "loss": 3.2349, "step": 138000 }, { "epoch": 0.42, "learning_rate": 4.3064298212141334e-05, "loss": 3.218, "step": 138500 }, { "epoch": 0.42, "learning_rate": 4.303925957752813e-05, "loss": 3.2232, "step": 139000 }, { "epoch": 0.42, "learning_rate": 4.3014220942914916e-05, "loss": 3.2183, "step": 139500 }, { "epoch": 0.42, "learning_rate": 4.298918230830171e-05, "loss": 3.2547, "step": 140000 }, { "epoch": 0.42, "learning_rate": 4.2964143673688503e-05, "loss": 3.2168, "step": 140500 }, { "epoch": 0.42, "learning_rate": 4.2939105039075294e-05, "loss": 3.2378, "step": 141000 }, { "epoch": 0.43, "learning_rate": 4.291406640446209e-05, "loss": 3.2035, "step": 141500 }, { "epoch": 0.43, "learning_rate": 4.2889027769848875e-05, "loss": 3.2112, "step": 142000 }, { "epoch": 0.43, "learning_rate": 4.286398913523567e-05, "loss": 3.1962, "step": 142500 }, { "epoch": 0.43, "learning_rate": 4.283895050062246e-05, "loss": 3.2278, "step": 143000 }, { "epoch": 0.43, "learning_rate": 4.2813911866009254e-05, "loss": 3.2453, "step": 143500 }, { "epoch": 0.43, "learning_rate": 4.278887323139605e-05, "loss": 3.2265, "step": 144000 }, { "epoch": 0.43, "learning_rate": 4.2763834596782835e-05, "loss": 3.1941, "step": 144500 }, { "epoch": 0.44, "learning_rate": 4.273879596216963e-05, "loss": 3.2519, "step": 145000 }, { "epoch": 0.44, "learning_rate": 4.271375732755642e-05, "loss": 3.1941, "step": 145500 }, { "epoch": 0.44, "learning_rate": 4.268871869294321e-05, "loss": 3.2421, "step": 146000 }, { "epoch": 0.44, "learning_rate": 4.266368005833001e-05, "loss": 3.2396, "step": 146500 }, { "epoch": 0.44, "learning_rate": 4.2638641423716794e-05, "loss": 3.2021, "step": 147000 }, { "epoch": 0.44, "learning_rate": 4.261360278910359e-05, "loss": 3.2229, "step": 147500 }, { "epoch": 0.44, "learning_rate": 4.258856415449038e-05, "loss": 3.2251, "step": 148000 }, { "epoch": 0.45, "learning_rate": 4.256352551987717e-05, "loss": 3.1941, "step": 148500 }, { "epoch": 0.45, "learning_rate": 4.2538486885263963e-05, "loss": 3.2002, "step": 149000 }, { "epoch": 0.45, "learning_rate": 4.2513448250650754e-05, "loss": 3.2177, "step": 149500 }, { "epoch": 0.45, "learning_rate": 4.248840961603755e-05, "loss": 3.2386, "step": 150000 }, { "epoch": 0.45, "learning_rate": 4.246337098142434e-05, "loss": 3.1961, "step": 150500 }, { "epoch": 0.45, "learning_rate": 4.243833234681113e-05, "loss": 3.1943, "step": 151000 }, { "epoch": 0.46, "learning_rate": 4.241329371219792e-05, "loss": 3.1818, "step": 151500 }, { "epoch": 0.46, "learning_rate": 4.2388255077584714e-05, "loss": 3.2139, "step": 152000 }, { "epoch": 0.46, "learning_rate": 4.236321644297151e-05, "loss": 3.1935, "step": 152500 }, { "epoch": 0.46, "learning_rate": 4.2338177808358295e-05, "loss": 3.206, "step": 153000 }, { "epoch": 0.46, "learning_rate": 4.231313917374509e-05, "loss": 3.2269, "step": 153500 }, { "epoch": 0.46, "learning_rate": 4.228810053913188e-05, "loss": 3.2269, "step": 154000 }, { "epoch": 0.46, "learning_rate": 4.226306190451867e-05, "loss": 3.1957, "step": 154500 }, { "epoch": 0.47, "learning_rate": 4.2238023269905464e-05, "loss": 3.2306, "step": 155000 }, { "epoch": 0.47, "learning_rate": 4.2212984635292254e-05, "loss": 3.224, "step": 155500 }, { "epoch": 0.47, "learning_rate": 4.218794600067905e-05, "loss": 3.2183, "step": 156000 }, { "epoch": 0.47, "learning_rate": 4.216290736606584e-05, "loss": 3.2007, "step": 156500 }, { "epoch": 0.47, "learning_rate": 4.213786873145263e-05, "loss": 3.1717, "step": 157000 }, { "epoch": 0.47, "learning_rate": 4.211283009683942e-05, "loss": 3.1949, "step": 157500 }, { "epoch": 0.47, "learning_rate": 4.2087791462226214e-05, "loss": 3.1852, "step": 158000 }, { "epoch": 0.48, "learning_rate": 4.206275282761301e-05, "loss": 3.1967, "step": 158500 }, { "epoch": 0.48, "learning_rate": 4.20377141929998e-05, "loss": 3.1857, "step": 159000 }, { "epoch": 0.48, "learning_rate": 4.201267555838659e-05, "loss": 3.184, "step": 159500 }, { "epoch": 0.48, "learning_rate": 4.198763692377338e-05, "loss": 3.1921, "step": 160000 }, { "epoch": 0.48, "learning_rate": 4.1962598289160173e-05, "loss": 3.1994, "step": 160500 }, { "epoch": 0.48, "learning_rate": 4.193755965454697e-05, "loss": 3.1853, "step": 161000 }, { "epoch": 0.49, "learning_rate": 4.191252101993376e-05, "loss": 3.207, "step": 161500 }, { "epoch": 0.49, "learning_rate": 4.188748238532055e-05, "loss": 3.1822, "step": 162000 }, { "epoch": 0.49, "learning_rate": 4.186244375070734e-05, "loss": 3.1786, "step": 162500 }, { "epoch": 0.49, "learning_rate": 4.183740511609413e-05, "loss": 3.1686, "step": 163000 }, { "epoch": 0.49, "learning_rate": 4.1812366481480924e-05, "loss": 3.2091, "step": 163500 }, { "epoch": 0.49, "learning_rate": 4.178732784686772e-05, "loss": 3.1702, "step": 164000 }, { "epoch": 0.49, "learning_rate": 4.176228921225451e-05, "loss": 3.1915, "step": 164500 }, { "epoch": 0.5, "learning_rate": 4.17372505776413e-05, "loss": 3.1912, "step": 165000 }, { "epoch": 0.5, "learning_rate": 4.171221194302809e-05, "loss": 3.2141, "step": 165500 }, { "epoch": 0.5, "learning_rate": 4.168717330841488e-05, "loss": 3.1828, "step": 166000 }, { "epoch": 0.5, "learning_rate": 4.166213467380168e-05, "loss": 3.1877, "step": 166500 }, { "epoch": 0.5, "learning_rate": 4.163709603918847e-05, "loss": 3.1859, "step": 167000 }, { "epoch": 0.5, "learning_rate": 4.161205740457526e-05, "loss": 3.1695, "step": 167500 }, { "epoch": 0.5, "learning_rate": 4.158701876996205e-05, "loss": 3.227, "step": 168000 }, { "epoch": 0.51, "learning_rate": 4.156198013534884e-05, "loss": 3.1862, "step": 168500 }, { "epoch": 0.51, "learning_rate": 4.153694150073564e-05, "loss": 3.2209, "step": 169000 }, { "epoch": 0.51, "learning_rate": 4.1511902866122424e-05, "loss": 3.1829, "step": 169500 }, { "epoch": 0.51, "learning_rate": 4.148686423150922e-05, "loss": 3.1981, "step": 170000 }, { "epoch": 0.51, "learning_rate": 4.146182559689601e-05, "loss": 3.1563, "step": 170500 }, { "epoch": 0.51, "learning_rate": 4.14367869622828e-05, "loss": 3.1743, "step": 171000 }, { "epoch": 0.52, "learning_rate": 4.14117483276696e-05, "loss": 3.1754, "step": 171500 }, { "epoch": 0.52, "learning_rate": 4.1386709693056384e-05, "loss": 3.1729, "step": 172000 }, { "epoch": 0.52, "learning_rate": 4.136167105844318e-05, "loss": 3.1476, "step": 172500 }, { "epoch": 0.52, "learning_rate": 4.133663242382997e-05, "loss": 3.1561, "step": 173000 }, { "epoch": 0.52, "learning_rate": 4.131159378921676e-05, "loss": 3.1844, "step": 173500 }, { "epoch": 0.52, "learning_rate": 4.128655515460356e-05, "loss": 3.1756, "step": 174000 }, { "epoch": 0.52, "learning_rate": 4.126151651999034e-05, "loss": 3.181, "step": 174500 }, { "epoch": 0.53, "learning_rate": 4.123647788537714e-05, "loss": 3.1479, "step": 175000 }, { "epoch": 0.53, "learning_rate": 4.121143925076393e-05, "loss": 3.1654, "step": 175500 }, { "epoch": 0.53, "learning_rate": 4.118640061615072e-05, "loss": 3.1903, "step": 176000 }, { "epoch": 0.53, "learning_rate": 4.116136198153752e-05, "loss": 3.1761, "step": 176500 }, { "epoch": 0.53, "learning_rate": 4.11363233469243e-05, "loss": 3.1882, "step": 177000 }, { "epoch": 0.53, "learning_rate": 4.11112847123111e-05, "loss": 3.175, "step": 177500 }, { "epoch": 0.53, "learning_rate": 4.108624607769789e-05, "loss": 3.1517, "step": 178000 }, { "epoch": 0.54, "learning_rate": 4.106120744308468e-05, "loss": 3.1663, "step": 178500 }, { "epoch": 0.54, "learning_rate": 4.103616880847148e-05, "loss": 3.1669, "step": 179000 }, { "epoch": 0.54, "learning_rate": 4.101113017385826e-05, "loss": 3.1764, "step": 179500 }, { "epoch": 0.54, "learning_rate": 4.098609153924506e-05, "loss": 3.1554, "step": 180000 }, { "epoch": 0.54, "learning_rate": 4.096105290463185e-05, "loss": 3.1488, "step": 180500 }, { "epoch": 0.54, "learning_rate": 4.093601427001864e-05, "loss": 3.1657, "step": 181000 }, { "epoch": 0.55, "learning_rate": 4.091097563540544e-05, "loss": 3.1415, "step": 181500 }, { "epoch": 0.55, "learning_rate": 4.088593700079222e-05, "loss": 3.1519, "step": 182000 }, { "epoch": 0.55, "learning_rate": 4.086089836617902e-05, "loss": 3.1763, "step": 182500 }, { "epoch": 0.55, "learning_rate": 4.08358597315658e-05, "loss": 3.1373, "step": 183000 }, { "epoch": 0.55, "learning_rate": 4.08108210969526e-05, "loss": 3.1619, "step": 183500 }, { "epoch": 0.55, "learning_rate": 4.078578246233939e-05, "loss": 3.1682, "step": 184000 }, { "epoch": 0.55, "learning_rate": 4.076074382772618e-05, "loss": 3.1474, "step": 184500 }, { "epoch": 0.56, "learning_rate": 4.073570519311298e-05, "loss": 3.2058, "step": 185000 }, { "epoch": 0.56, "learning_rate": 4.071066655849976e-05, "loss": 3.1558, "step": 185500 }, { "epoch": 0.56, "learning_rate": 4.068562792388656e-05, "loss": 3.1667, "step": 186000 }, { "epoch": 0.56, "learning_rate": 4.066058928927335e-05, "loss": 3.1724, "step": 186500 }, { "epoch": 0.56, "learning_rate": 4.063555065466014e-05, "loss": 3.1421, "step": 187000 }, { "epoch": 0.56, "learning_rate": 4.061051202004694e-05, "loss": 3.1602, "step": 187500 }, { "epoch": 0.56, "learning_rate": 4.058547338543372e-05, "loss": 3.1505, "step": 188000 }, { "epoch": 0.57, "learning_rate": 4.056043475082052e-05, "loss": 3.1426, "step": 188500 }, { "epoch": 0.57, "learning_rate": 4.053539611620731e-05, "loss": 3.1507, "step": 189000 }, { "epoch": 0.57, "learning_rate": 4.05103574815941e-05, "loss": 3.1579, "step": 189500 }, { "epoch": 0.57, "learning_rate": 4.048531884698089e-05, "loss": 3.1455, "step": 190000 }, { "epoch": 0.57, "learning_rate": 4.046028021236768e-05, "loss": 3.1776, "step": 190500 }, { "epoch": 0.57, "learning_rate": 4.043524157775448e-05, "loss": 3.1343, "step": 191000 }, { "epoch": 0.58, "learning_rate": 4.041020294314127e-05, "loss": 3.1487, "step": 191500 }, { "epoch": 0.58, "learning_rate": 4.038516430852806e-05, "loss": 3.15, "step": 192000 }, { "epoch": 0.58, "learning_rate": 4.036012567391485e-05, "loss": 3.1351, "step": 192500 }, { "epoch": 0.58, "learning_rate": 4.033508703930164e-05, "loss": 3.1242, "step": 193000 }, { "epoch": 0.58, "learning_rate": 4.031004840468844e-05, "loss": 3.1655, "step": 193500 }, { "epoch": 0.58, "learning_rate": 4.028500977007523e-05, "loss": 3.1386, "step": 194000 }, { "epoch": 0.58, "learning_rate": 4.025997113546202e-05, "loss": 3.1384, "step": 194500 }, { "epoch": 0.59, "learning_rate": 4.023493250084881e-05, "loss": 3.1433, "step": 195000 }, { "epoch": 0.59, "learning_rate": 4.02098938662356e-05, "loss": 3.1139, "step": 195500 }, { "epoch": 0.59, "learning_rate": 4.018485523162239e-05, "loss": 3.1579, "step": 196000 }, { "epoch": 0.59, "learning_rate": 4.015981659700919e-05, "loss": 3.1283, "step": 196500 }, { "epoch": 0.59, "learning_rate": 4.013477796239598e-05, "loss": 3.138, "step": 197000 }, { "epoch": 0.59, "learning_rate": 4.010973932778277e-05, "loss": 3.1325, "step": 197500 }, { "epoch": 0.59, "learning_rate": 4.008470069316956e-05, "loss": 3.1043, "step": 198000 }, { "epoch": 0.6, "learning_rate": 4.005966205855635e-05, "loss": 3.1572, "step": 198500 }, { "epoch": 0.6, "learning_rate": 4.003462342394315e-05, "loss": 3.1539, "step": 199000 }, { "epoch": 0.6, "learning_rate": 4.000958478932994e-05, "loss": 3.1408, "step": 199500 }, { "epoch": 0.6, "learning_rate": 3.998454615471673e-05, "loss": 3.1204, "step": 200000 }, { "epoch": 0.6, "learning_rate": 3.995950752010352e-05, "loss": 3.1164, "step": 200500 }, { "epoch": 0.6, "learning_rate": 3.993446888549031e-05, "loss": 3.1518, "step": 201000 }, { "epoch": 0.61, "learning_rate": 3.990943025087711e-05, "loss": 3.1399, "step": 201500 }, { "epoch": 0.61, "learning_rate": 3.98843916162639e-05, "loss": 3.116, "step": 202000 }, { "epoch": 0.61, "learning_rate": 3.985935298165069e-05, "loss": 3.1405, "step": 202500 }, { "epoch": 0.61, "learning_rate": 3.983431434703748e-05, "loss": 3.116, "step": 203000 }, { "epoch": 0.61, "learning_rate": 3.980927571242427e-05, "loss": 3.1357, "step": 203500 }, { "epoch": 0.61, "learning_rate": 3.978423707781107e-05, "loss": 3.149, "step": 204000 }, { "epoch": 0.61, "learning_rate": 3.975919844319785e-05, "loss": 3.1418, "step": 204500 }, { "epoch": 0.62, "learning_rate": 3.973415980858465e-05, "loss": 3.1229, "step": 205000 }, { "epoch": 0.62, "learning_rate": 3.970912117397144e-05, "loss": 3.141, "step": 205500 }, { "epoch": 0.62, "learning_rate": 3.968408253935823e-05, "loss": 3.1376, "step": 206000 }, { "epoch": 0.62, "learning_rate": 3.965904390474503e-05, "loss": 3.1284, "step": 206500 }, { "epoch": 0.62, "learning_rate": 3.963400527013181e-05, "loss": 3.1405, "step": 207000 }, { "epoch": 0.62, "learning_rate": 3.960896663551861e-05, "loss": 3.1543, "step": 207500 }, { "epoch": 0.62, "learning_rate": 3.95839280009054e-05, "loss": 3.1693, "step": 208000 }, { "epoch": 0.63, "learning_rate": 3.955888936629219e-05, "loss": 3.116, "step": 208500 }, { "epoch": 0.63, "learning_rate": 3.953385073167899e-05, "loss": 3.125, "step": 209000 }, { "epoch": 0.63, "learning_rate": 3.950881209706577e-05, "loss": 3.1362, "step": 209500 }, { "epoch": 0.63, "learning_rate": 3.948377346245257e-05, "loss": 3.0917, "step": 210000 }, { "epoch": 0.63, "learning_rate": 3.945873482783936e-05, "loss": 3.121, "step": 210500 }, { "epoch": 0.63, "learning_rate": 3.943369619322615e-05, "loss": 3.0947, "step": 211000 }, { "epoch": 0.64, "learning_rate": 3.940865755861295e-05, "loss": 3.1065, "step": 211500 }, { "epoch": 0.64, "learning_rate": 3.938361892399973e-05, "loss": 3.1274, "step": 212000 }, { "epoch": 0.64, "learning_rate": 3.935858028938653e-05, "loss": 3.1071, "step": 212500 }, { "epoch": 0.64, "learning_rate": 3.933354165477331e-05, "loss": 3.1405, "step": 213000 }, { "epoch": 0.64, "learning_rate": 3.930850302016011e-05, "loss": 3.1544, "step": 213500 }, { "epoch": 0.64, "learning_rate": 3.9283464385546906e-05, "loss": 3.1427, "step": 214000 }, { "epoch": 0.64, "learning_rate": 3.925842575093369e-05, "loss": 3.1102, "step": 214500 }, { "epoch": 0.65, "learning_rate": 3.923338711632049e-05, "loss": 3.1475, "step": 215000 }, { "epoch": 0.65, "learning_rate": 3.920834848170727e-05, "loss": 3.125, "step": 215500 }, { "epoch": 0.65, "learning_rate": 3.918330984709407e-05, "loss": 3.1, "step": 216000 }, { "epoch": 0.65, "learning_rate": 3.9158271212480866e-05, "loss": 3.1193, "step": 216500 }, { "epoch": 0.65, "learning_rate": 3.913323257786765e-05, "loss": 3.1084, "step": 217000 }, { "epoch": 0.65, "learning_rate": 3.910819394325445e-05, "loss": 3.1027, "step": 217500 }, { "epoch": 0.66, "learning_rate": 3.908315530864123e-05, "loss": 3.1362, "step": 218000 }, { "epoch": 0.66, "learning_rate": 3.905811667402803e-05, "loss": 3.1049, "step": 218500 }, { "epoch": 0.66, "learning_rate": 3.903307803941482e-05, "loss": 3.1278, "step": 219000 }, { "epoch": 0.66, "learning_rate": 3.900803940480161e-05, "loss": 3.1058, "step": 219500 }, { "epoch": 0.66, "learning_rate": 3.8983000770188407e-05, "loss": 3.0892, "step": 220000 }, { "epoch": 0.66, "learning_rate": 3.895796213557519e-05, "loss": 3.1202, "step": 220500 }, { "epoch": 0.66, "learning_rate": 3.893292350096199e-05, "loss": 3.1286, "step": 221000 }, { "epoch": 0.67, "learning_rate": 3.890788486634878e-05, "loss": 3.1254, "step": 221500 }, { "epoch": 0.67, "learning_rate": 3.888284623173557e-05, "loss": 3.1017, "step": 222000 }, { "epoch": 0.67, "learning_rate": 3.8857807597122366e-05, "loss": 3.0974, "step": 222500 }, { "epoch": 0.67, "learning_rate": 3.883276896250915e-05, "loss": 3.0832, "step": 223000 }, { "epoch": 0.67, "learning_rate": 3.880773032789595e-05, "loss": 3.1195, "step": 223500 }, { "epoch": 0.67, "learning_rate": 3.878269169328274e-05, "loss": 3.1234, "step": 224000 }, { "epoch": 0.67, "learning_rate": 3.875765305866953e-05, "loss": 3.0852, "step": 224500 }, { "epoch": 0.68, "learning_rate": 3.873261442405632e-05, "loss": 3.1366, "step": 225000 }, { "epoch": 0.68, "learning_rate": 3.870757578944311e-05, "loss": 3.1166, "step": 225500 }, { "epoch": 0.68, "learning_rate": 3.868253715482991e-05, "loss": 3.1384, "step": 226000 }, { "epoch": 0.68, "learning_rate": 3.86574985202167e-05, "loss": 3.1121, "step": 226500 }, { "epoch": 0.68, "learning_rate": 3.863245988560349e-05, "loss": 3.1149, "step": 227000 }, { "epoch": 0.68, "learning_rate": 3.860742125099028e-05, "loss": 3.1072, "step": 227500 }, { "epoch": 0.69, "learning_rate": 3.858238261637707e-05, "loss": 3.0932, "step": 228000 }, { "epoch": 0.69, "learning_rate": 3.8557343981763866e-05, "loss": 3.0846, "step": 228500 }, { "epoch": 0.69, "learning_rate": 3.853230534715066e-05, "loss": 3.0819, "step": 229000 }, { "epoch": 0.69, "learning_rate": 3.850726671253745e-05, "loss": 3.104, "step": 229500 }, { "epoch": 0.69, "learning_rate": 3.848222807792424e-05, "loss": 3.1016, "step": 230000 }, { "epoch": 0.69, "learning_rate": 3.845718944331103e-05, "loss": 3.075, "step": 230500 }, { "epoch": 0.69, "learning_rate": 3.843215080869782e-05, "loss": 3.0945, "step": 231000 }, { "epoch": 0.7, "learning_rate": 3.840711217408462e-05, "loss": 3.0949, "step": 231500 }, { "epoch": 0.7, "learning_rate": 3.838207353947141e-05, "loss": 3.1097, "step": 232000 }, { "epoch": 0.7, "learning_rate": 3.83570349048582e-05, "loss": 3.0881, "step": 232500 }, { "epoch": 0.7, "learning_rate": 3.833199627024499e-05, "loss": 3.1087, "step": 233000 }, { "epoch": 0.7, "learning_rate": 3.830695763563178e-05, "loss": 3.1164, "step": 233500 }, { "epoch": 0.7, "learning_rate": 3.8281919001018576e-05, "loss": 3.1171, "step": 234000 }, { "epoch": 0.7, "learning_rate": 3.825688036640537e-05, "loss": 3.0979, "step": 234500 }, { "epoch": 0.71, "learning_rate": 3.823184173179216e-05, "loss": 3.0912, "step": 235000 }, { "epoch": 0.71, "learning_rate": 3.820680309717895e-05, "loss": 3.1041, "step": 235500 }, { "epoch": 0.71, "learning_rate": 3.818176446256574e-05, "loss": 3.0904, "step": 236000 }, { "epoch": 0.71, "learning_rate": 3.8156725827952536e-05, "loss": 3.0874, "step": 236500 }, { "epoch": 0.71, "learning_rate": 3.8131687193339326e-05, "loss": 3.0746, "step": 237000 }, { "epoch": 0.71, "learning_rate": 3.810664855872612e-05, "loss": 3.1111, "step": 237500 }, { "epoch": 0.72, "learning_rate": 3.808160992411291e-05, "loss": 3.0794, "step": 238000 }, { "epoch": 0.72, "learning_rate": 3.80565712894997e-05, "loss": 3.0809, "step": 238500 }, { "epoch": 0.72, "learning_rate": 3.8031532654886495e-05, "loss": 3.0907, "step": 239000 }, { "epoch": 0.72, "learning_rate": 3.800649402027328e-05, "loss": 3.0692, "step": 239500 }, { "epoch": 0.72, "learning_rate": 3.7981455385660077e-05, "loss": 3.1191, "step": 240000 }, { "epoch": 0.72, "learning_rate": 3.795641675104687e-05, "loss": 3.0884, "step": 240500 }, { "epoch": 0.72, "learning_rate": 3.793137811643366e-05, "loss": 3.0955, "step": 241000 }, { "epoch": 0.73, "learning_rate": 3.7906339481820455e-05, "loss": 3.1308, "step": 241500 }, { "epoch": 0.73, "learning_rate": 3.788130084720724e-05, "loss": 3.0821, "step": 242000 }, { "epoch": 0.73, "learning_rate": 3.7856262212594036e-05, "loss": 3.1044, "step": 242500 }, { "epoch": 0.73, "learning_rate": 3.783122357798083e-05, "loss": 3.0543, "step": 243000 }, { "epoch": 0.73, "learning_rate": 3.780618494336762e-05, "loss": 3.0943, "step": 243500 }, { "epoch": 0.73, "learning_rate": 3.7781146308754415e-05, "loss": 3.0787, "step": 244000 }, { "epoch": 0.73, "learning_rate": 3.77561076741412e-05, "loss": 3.0943, "step": 244500 }, { "epoch": 0.74, "learning_rate": 3.7731069039527996e-05, "loss": 3.0755, "step": 245000 }, { "epoch": 0.74, "learning_rate": 3.770603040491478e-05, "loss": 3.0912, "step": 245500 }, { "epoch": 0.74, "learning_rate": 3.768099177030158e-05, "loss": 3.0616, "step": 246000 }, { "epoch": 0.74, "learning_rate": 3.7655953135688374e-05, "loss": 3.0884, "step": 246500 }, { "epoch": 0.74, "learning_rate": 3.763091450107516e-05, "loss": 3.0833, "step": 247000 }, { "epoch": 0.74, "learning_rate": 3.7605875866461955e-05, "loss": 3.0648, "step": 247500 }, { "epoch": 0.75, "learning_rate": 3.758083723184874e-05, "loss": 3.1001, "step": 248000 }, { "epoch": 0.75, "learning_rate": 3.7555798597235537e-05, "loss": 3.0793, "step": 248500 }, { "epoch": 0.75, "learning_rate": 3.7530759962622334e-05, "loss": 3.0914, "step": 249000 }, { "epoch": 0.75, "learning_rate": 3.750572132800912e-05, "loss": 3.0715, "step": 249500 }, { "epoch": 0.75, "learning_rate": 3.7480682693395915e-05, "loss": 3.0778, "step": 250000 }, { "epoch": 0.75, "learning_rate": 3.74556440587827e-05, "loss": 3.0749, "step": 250500 }, { "epoch": 0.75, "learning_rate": 3.7430605424169496e-05, "loss": 3.0795, "step": 251000 }, { "epoch": 0.76, "learning_rate": 3.740556678955629e-05, "loss": 3.0838, "step": 251500 }, { "epoch": 0.76, "learning_rate": 3.738052815494308e-05, "loss": 3.0874, "step": 252000 }, { "epoch": 0.76, "learning_rate": 3.7355489520329875e-05, "loss": 3.1254, "step": 252500 }, { "epoch": 0.76, "learning_rate": 3.733045088571666e-05, "loss": 3.0686, "step": 253000 }, { "epoch": 0.76, "learning_rate": 3.7305412251103456e-05, "loss": 3.0645, "step": 253500 }, { "epoch": 0.76, "learning_rate": 3.7280373616490246e-05, "loss": 3.0608, "step": 254000 }, { "epoch": 0.76, "learning_rate": 3.725533498187704e-05, "loss": 3.0896, "step": 254500 }, { "epoch": 0.77, "learning_rate": 3.7230296347263834e-05, "loss": 3.0622, "step": 255000 }, { "epoch": 0.77, "learning_rate": 3.720525771265062e-05, "loss": 3.0935, "step": 255500 }, { "epoch": 0.77, "learning_rate": 3.7180219078037415e-05, "loss": 3.051, "step": 256000 }, { "epoch": 0.77, "learning_rate": 3.7155180443424206e-05, "loss": 3.05, "step": 256500 }, { "epoch": 0.77, "learning_rate": 3.7130141808810996e-05, "loss": 3.0711, "step": 257000 }, { "epoch": 0.77, "learning_rate": 3.7105103174197794e-05, "loss": 3.086, "step": 257500 }, { "epoch": 0.78, "learning_rate": 3.708006453958458e-05, "loss": 3.0711, "step": 258000 }, { "epoch": 0.78, "learning_rate": 3.7055025904971375e-05, "loss": 3.0573, "step": 258500 }, { "epoch": 0.78, "learning_rate": 3.7029987270358165e-05, "loss": 3.0937, "step": 259000 }, { "epoch": 0.78, "learning_rate": 3.7004948635744956e-05, "loss": 3.0803, "step": 259500 }, { "epoch": 0.78, "learning_rate": 3.6979910001131747e-05, "loss": 3.0894, "step": 260000 }, { "epoch": 0.78, "learning_rate": 3.695487136651854e-05, "loss": 3.0443, "step": 260500 }, { "epoch": 0.78, "learning_rate": 3.6929832731905335e-05, "loss": 3.0307, "step": 261000 }, { "epoch": 0.79, "learning_rate": 3.6904794097292125e-05, "loss": 3.0806, "step": 261500 }, { "epoch": 0.79, "learning_rate": 3.6879755462678916e-05, "loss": 3.0864, "step": 262000 }, { "epoch": 0.79, "learning_rate": 3.6854716828065706e-05, "loss": 3.0655, "step": 262500 }, { "epoch": 0.79, "learning_rate": 3.68296781934525e-05, "loss": 3.0769, "step": 263000 }, { "epoch": 0.79, "learning_rate": 3.6804639558839294e-05, "loss": 3.0842, "step": 263500 }, { "epoch": 0.79, "learning_rate": 3.6779600924226085e-05, "loss": 3.0599, "step": 264000 }, { "epoch": 0.79, "learning_rate": 3.6754562289612875e-05, "loss": 3.0444, "step": 264500 }, { "epoch": 0.8, "learning_rate": 3.6729523654999666e-05, "loss": 3.0919, "step": 265000 }, { "epoch": 0.8, "learning_rate": 3.6704485020386456e-05, "loss": 3.0803, "step": 265500 }, { "epoch": 0.8, "learning_rate": 3.667944638577325e-05, "loss": 3.0625, "step": 266000 }, { "epoch": 0.8, "learning_rate": 3.6654407751160044e-05, "loss": 3.0659, "step": 266500 }, { "epoch": 0.8, "learning_rate": 3.6629369116546835e-05, "loss": 3.058, "step": 267000 }, { "epoch": 0.8, "learning_rate": 3.6604330481933625e-05, "loss": 3.039, "step": 267500 }, { "epoch": 0.81, "learning_rate": 3.6579291847320416e-05, "loss": 3.0609, "step": 268000 }, { "epoch": 0.81, "learning_rate": 3.6554253212707207e-05, "loss": 3.0642, "step": 268500 }, { "epoch": 0.81, "learning_rate": 3.6529214578094004e-05, "loss": 3.0689, "step": 269000 }, { "epoch": 0.81, "learning_rate": 3.6504175943480794e-05, "loss": 3.0742, "step": 269500 }, { "epoch": 0.81, "learning_rate": 3.6479137308867585e-05, "loss": 3.0593, "step": 270000 }, { "epoch": 0.81, "learning_rate": 3.6454098674254376e-05, "loss": 3.0275, "step": 270500 }, { "epoch": 0.81, "learning_rate": 3.6429060039641166e-05, "loss": 3.0494, "step": 271000 }, { "epoch": 0.82, "learning_rate": 3.6404021405027963e-05, "loss": 3.0539, "step": 271500 }, { "epoch": 0.82, "learning_rate": 3.637898277041475e-05, "loss": 3.0219, "step": 272000 }, { "epoch": 0.82, "learning_rate": 3.6353944135801545e-05, "loss": 3.0594, "step": 272500 }, { "epoch": 0.82, "learning_rate": 3.6328905501188335e-05, "loss": 3.0226, "step": 273000 }, { "epoch": 0.82, "learning_rate": 3.6303866866575126e-05, "loss": 3.062, "step": 273500 }, { "epoch": 0.82, "learning_rate": 3.627882823196192e-05, "loss": 3.0278, "step": 274000 }, { "epoch": 0.82, "learning_rate": 3.625378959734871e-05, "loss": 3.0481, "step": 274500 }, { "epoch": 0.83, "learning_rate": 3.6228750962735504e-05, "loss": 3.0711, "step": 275000 }, { "epoch": 0.83, "learning_rate": 3.6203712328122295e-05, "loss": 3.0537, "step": 275500 }, { "epoch": 0.83, "learning_rate": 3.6178673693509085e-05, "loss": 3.0285, "step": 276000 }, { "epoch": 0.83, "learning_rate": 3.615363505889588e-05, "loss": 3.059, "step": 276500 }, { "epoch": 0.83, "learning_rate": 3.6128596424282666e-05, "loss": 3.0548, "step": 277000 }, { "epoch": 0.83, "learning_rate": 3.6103557789669464e-05, "loss": 3.0681, "step": 277500 }, { "epoch": 0.84, "learning_rate": 3.6078519155056254e-05, "loss": 3.0542, "step": 278000 }, { "epoch": 0.84, "learning_rate": 3.6053480520443045e-05, "loss": 3.0432, "step": 278500 }, { "epoch": 0.84, "learning_rate": 3.602844188582984e-05, "loss": 3.0262, "step": 279000 }, { "epoch": 0.84, "learning_rate": 3.6003403251216626e-05, "loss": 3.0477, "step": 279500 }, { "epoch": 0.84, "learning_rate": 3.5978364616603423e-05, "loss": 3.0659, "step": 280000 }, { "epoch": 0.84, "learning_rate": 3.595332598199021e-05, "loss": 3.0485, "step": 280500 }, { "epoch": 0.84, "learning_rate": 3.5928287347377005e-05, "loss": 3.0876, "step": 281000 }, { "epoch": 0.85, "learning_rate": 3.5903248712763795e-05, "loss": 3.0533, "step": 281500 }, { "epoch": 0.85, "learning_rate": 3.5878210078150586e-05, "loss": 3.0515, "step": 282000 }, { "epoch": 0.85, "learning_rate": 3.585317144353738e-05, "loss": 3.0581, "step": 282500 }, { "epoch": 0.85, "learning_rate": 3.582813280892417e-05, "loss": 3.0733, "step": 283000 }, { "epoch": 0.85, "learning_rate": 3.5803094174310964e-05, "loss": 3.0407, "step": 283500 }, { "epoch": 0.85, "learning_rate": 3.5778055539697755e-05, "loss": 3.0176, "step": 284000 }, { "epoch": 0.85, "learning_rate": 3.5753016905084545e-05, "loss": 3.0331, "step": 284500 }, { "epoch": 0.86, "learning_rate": 3.572797827047134e-05, "loss": 3.0587, "step": 285000 }, { "epoch": 0.86, "learning_rate": 3.5702939635858126e-05, "loss": 3.036, "step": 285500 }, { "epoch": 0.86, "learning_rate": 3.5677901001244924e-05, "loss": 3.0488, "step": 286000 }, { "epoch": 0.86, "learning_rate": 3.5652862366631714e-05, "loss": 3.0591, "step": 286500 }, { "epoch": 0.86, "learning_rate": 3.5627823732018505e-05, "loss": 3.0318, "step": 287000 }, { "epoch": 0.86, "learning_rate": 3.56027850974053e-05, "loss": 3.0268, "step": 287500 }, { "epoch": 0.87, "learning_rate": 3.5577746462792086e-05, "loss": 3.0655, "step": 288000 }, { "epoch": 0.87, "learning_rate": 3.555270782817888e-05, "loss": 3.0387, "step": 288500 }, { "epoch": 0.87, "learning_rate": 3.5527669193565674e-05, "loss": 3.0587, "step": 289000 }, { "epoch": 0.87, "learning_rate": 3.5502630558952464e-05, "loss": 3.0486, "step": 289500 }, { "epoch": 0.87, "learning_rate": 3.547759192433926e-05, "loss": 3.078, "step": 290000 }, { "epoch": 0.87, "learning_rate": 3.5452553289726046e-05, "loss": 3.0346, "step": 290500 }, { "epoch": 0.87, "learning_rate": 3.542751465511284e-05, "loss": 3.0631, "step": 291000 }, { "epoch": 0.88, "learning_rate": 3.5402476020499634e-05, "loss": 3.0286, "step": 291500 }, { "epoch": 0.88, "learning_rate": 3.5377437385886424e-05, "loss": 3.0451, "step": 292000 }, { "epoch": 0.88, "learning_rate": 3.5352398751273215e-05, "loss": 3.0457, "step": 292500 }, { "epoch": 0.88, "learning_rate": 3.5327360116660005e-05, "loss": 3.0523, "step": 293000 }, { "epoch": 0.88, "learning_rate": 3.53023214820468e-05, "loss": 3.0505, "step": 293500 }, { "epoch": 0.88, "learning_rate": 3.527728284743359e-05, "loss": 3.0391, "step": 294000 }, { "epoch": 0.88, "learning_rate": 3.5252244212820384e-05, "loss": 3.032, "step": 294500 }, { "epoch": 0.89, "learning_rate": 3.5227205578207174e-05, "loss": 3.048, "step": 295000 }, { "epoch": 0.89, "learning_rate": 3.5202166943593965e-05, "loss": 3.0276, "step": 295500 }, { "epoch": 0.89, "learning_rate": 3.517712830898076e-05, "loss": 3.0132, "step": 296000 }, { "epoch": 0.89, "learning_rate": 3.515208967436755e-05, "loss": 3.0059, "step": 296500 }, { "epoch": 0.89, "learning_rate": 3.512705103975434e-05, "loss": 3.0293, "step": 297000 }, { "epoch": 0.89, "learning_rate": 3.5102012405141134e-05, "loss": 3.0349, "step": 297500 }, { "epoch": 0.9, "learning_rate": 3.5076973770527924e-05, "loss": 3.0338, "step": 298000 }, { "epoch": 0.9, "learning_rate": 3.505193513591472e-05, "loss": 3.0429, "step": 298500 }, { "epoch": 0.9, "learning_rate": 3.502689650130151e-05, "loss": 3.017, "step": 299000 }, { "epoch": 0.9, "learning_rate": 3.50018578666883e-05, "loss": 3.0174, "step": 299500 }, { "epoch": 0.9, "learning_rate": 3.4976819232075093e-05, "loss": 3.0668, "step": 300000 }, { "epoch": 0.9, "learning_rate": 3.4951780597461884e-05, "loss": 3.0236, "step": 300500 }, { "epoch": 0.9, "learning_rate": 3.4926741962848675e-05, "loss": 3.0195, "step": 301000 }, { "epoch": 0.91, "learning_rate": 3.490170332823547e-05, "loss": 3.0443, "step": 301500 } ], "max_steps": 998457, "num_train_epochs": 3, "total_flos": 4.46482559989836e+18, "trial_name": null, "trial_params": null }