diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,23897 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "global_step": 1989841, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.9994974472834763e-05, + "loss": 2.0904, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 1.9989948945669532e-05, + "loss": 2.0417, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 1.9984923418504294e-05, + "loss": 2.0243, + "step": 1500 + }, + { + "epoch": 0.0, + "learning_rate": 1.997989789133906e-05, + "loss": 2.0118, + "step": 2000 + }, + { + "epoch": 0.0, + "learning_rate": 1.9974872364173824e-05, + "loss": 2.012, + "step": 2500 + }, + { + "epoch": 0.0, + "learning_rate": 1.9969846837008586e-05, + "loss": 2.0111, + "step": 3000 + }, + { + "epoch": 0.0, + "learning_rate": 1.996482130984335e-05, + "loss": 1.993, + "step": 3500 + }, + { + "epoch": 0.0, + "learning_rate": 1.9959795782678116e-05, + "loss": 1.9873, + "step": 4000 + }, + { + "epoch": 0.0, + "learning_rate": 1.9954770255512878e-05, + "loss": 1.9861, + "step": 4500 + }, + { + "epoch": 0.0, + "learning_rate": 1.9949744728347643e-05, + "loss": 1.983, + "step": 5000 + }, + { + "epoch": 0.0, + "learning_rate": 1.9944719201182408e-05, + "loss": 1.9875, + "step": 5500 + }, + { + "epoch": 0.0, + "learning_rate": 1.993969367401717e-05, + "loss": 1.9791, + "step": 6000 + }, + { + "epoch": 0.0, + "learning_rate": 1.9934668146851935e-05, + "loss": 1.956, + "step": 6500 + }, + { + "epoch": 0.0, + "learning_rate": 1.99296426196867e-05, + "loss": 1.9894, + "step": 7000 + }, + { + "epoch": 0.0, + "learning_rate": 1.9924617092521465e-05, + "loss": 1.9754, + "step": 7500 + }, + { + "epoch": 0.0, + "learning_rate": 1.991959156535623e-05, + "loss": 1.9592, + "step": 8000 + }, + { + "epoch": 0.0, + "learning_rate": 1.9914566038190992e-05, + "loss": 1.9787, + "step": 8500 + }, + { + "epoch": 0.0, + "learning_rate": 1.9909540511025757e-05, + "loss": 1.9622, + "step": 9000 + }, + { + "epoch": 0.0, + "learning_rate": 1.9904514983860522e-05, + "loss": 1.9672, + "step": 9500 + }, + { + "epoch": 0.01, + "learning_rate": 1.9899489456695284e-05, + "loss": 1.9584, + "step": 10000 + }, + { + "epoch": 0.01, + "learning_rate": 1.989446392953005e-05, + "loss": 1.9628, + "step": 10500 + }, + { + "epoch": 0.01, + "learning_rate": 1.9889438402364814e-05, + "loss": 1.9658, + "step": 11000 + }, + { + "epoch": 0.01, + "learning_rate": 1.9884412875199576e-05, + "loss": 1.951, + "step": 11500 + }, + { + "epoch": 0.01, + "learning_rate": 1.987938734803434e-05, + "loss": 1.9457, + "step": 12000 + }, + { + "epoch": 0.01, + "learning_rate": 1.9874361820869106e-05, + "loss": 1.9566, + "step": 12500 + }, + { + "epoch": 0.01, + "learning_rate": 1.986933629370387e-05, + "loss": 1.945, + "step": 13000 + }, + { + "epoch": 0.01, + "learning_rate": 1.9864310766538633e-05, + "loss": 1.9582, + "step": 13500 + }, + { + "epoch": 0.01, + "learning_rate": 1.98592852393734e-05, + "loss": 1.9622, + "step": 14000 + }, + { + "epoch": 0.01, + "learning_rate": 1.9854259712208164e-05, + "loss": 1.9465, + "step": 14500 + }, + { + "epoch": 0.01, + "learning_rate": 1.984923418504293e-05, + "loss": 1.9425, + "step": 15000 + }, + { + "epoch": 0.01, + "learning_rate": 1.984420865787769e-05, + "loss": 1.9447, + "step": 15500 + }, + { + "epoch": 0.01, + "learning_rate": 1.9839183130712456e-05, + "loss": 1.964, + "step": 16000 + }, + { + "epoch": 0.01, + "learning_rate": 1.983415760354722e-05, + "loss": 1.9522, + "step": 16500 + }, + { + "epoch": 0.01, + "learning_rate": 1.9829132076381986e-05, + "loss": 1.9354, + "step": 17000 + }, + { + "epoch": 0.01, + "learning_rate": 1.9824106549216748e-05, + "loss": 1.9332, + "step": 17500 + }, + { + "epoch": 0.01, + "learning_rate": 1.9819081022051513e-05, + "loss": 1.9335, + "step": 18000 + }, + { + "epoch": 0.01, + "learning_rate": 1.9814055494886278e-05, + "loss": 1.9356, + "step": 18500 + }, + { + "epoch": 0.01, + "learning_rate": 1.980902996772104e-05, + "loss": 1.9318, + "step": 19000 + }, + { + "epoch": 0.01, + "learning_rate": 1.9804004440555805e-05, + "loss": 1.9422, + "step": 19500 + }, + { + "epoch": 0.01, + "learning_rate": 1.979897891339057e-05, + "loss": 1.9365, + "step": 20000 + }, + { + "epoch": 0.01, + "learning_rate": 1.9793953386225332e-05, + "loss": 1.9275, + "step": 20500 + }, + { + "epoch": 0.01, + "learning_rate": 1.9788927859060097e-05, + "loss": 1.9465, + "step": 21000 + }, + { + "epoch": 0.01, + "learning_rate": 1.9783902331894862e-05, + "loss": 1.9072, + "step": 21500 + }, + { + "epoch": 0.01, + "learning_rate": 1.9778876804729624e-05, + "loss": 1.9066, + "step": 22000 + }, + { + "epoch": 0.01, + "learning_rate": 1.9773851277564392e-05, + "loss": 1.9359, + "step": 22500 + }, + { + "epoch": 0.01, + "learning_rate": 1.9768825750399154e-05, + "loss": 1.9215, + "step": 23000 + }, + { + "epoch": 0.01, + "learning_rate": 1.976380022323392e-05, + "loss": 1.9367, + "step": 23500 + }, + { + "epoch": 0.01, + "learning_rate": 1.9758774696068684e-05, + "loss": 1.9225, + "step": 24000 + }, + { + "epoch": 0.01, + "learning_rate": 1.9753749168903446e-05, + "loss": 1.9251, + "step": 24500 + }, + { + "epoch": 0.01, + "learning_rate": 1.974872364173821e-05, + "loss": 1.9133, + "step": 25000 + }, + { + "epoch": 0.01, + "learning_rate": 1.9743698114572976e-05, + "loss": 1.9402, + "step": 25500 + }, + { + "epoch": 0.01, + "learning_rate": 1.9738672587407738e-05, + "loss": 1.9268, + "step": 26000 + }, + { + "epoch": 0.01, + "learning_rate": 1.9733647060242503e-05, + "loss": 1.932, + "step": 26500 + }, + { + "epoch": 0.01, + "learning_rate": 1.972862153307727e-05, + "loss": 1.941, + "step": 27000 + }, + { + "epoch": 0.01, + "learning_rate": 1.972359600591203e-05, + "loss": 1.9119, + "step": 27500 + }, + { + "epoch": 0.01, + "learning_rate": 1.9718570478746795e-05, + "loss": 1.9265, + "step": 28000 + }, + { + "epoch": 0.01, + "learning_rate": 1.971354495158156e-05, + "loss": 1.9221, + "step": 28500 + }, + { + "epoch": 0.01, + "learning_rate": 1.9708519424416322e-05, + "loss": 1.9268, + "step": 29000 + }, + { + "epoch": 0.01, + "learning_rate": 1.970349389725109e-05, + "loss": 1.9034, + "step": 29500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9698468370085852e-05, + "loss": 1.933, + "step": 30000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9693442842920614e-05, + "loss": 1.9202, + "step": 30500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9688417315755383e-05, + "loss": 1.897, + "step": 31000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9683391788590145e-05, + "loss": 1.9342, + "step": 31500 + }, + { + "epoch": 0.02, + "learning_rate": 1.967836626142491e-05, + "loss": 1.9337, + "step": 32000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9673340734259675e-05, + "loss": 1.9169, + "step": 32500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9668315207094437e-05, + "loss": 1.8952, + "step": 33000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9663289679929202e-05, + "loss": 1.9047, + "step": 33500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9658264152763967e-05, + "loss": 1.9202, + "step": 34000 + }, + { + "epoch": 0.02, + "learning_rate": 1.965323862559873e-05, + "loss": 1.9152, + "step": 34500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9648213098433494e-05, + "loss": 1.909, + "step": 35000 + }, + { + "epoch": 0.02, + "learning_rate": 1.964318757126826e-05, + "loss": 1.9229, + "step": 35500 + }, + { + "epoch": 0.02, + "learning_rate": 1.963816204410302e-05, + "loss": 1.9128, + "step": 36000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9633136516937786e-05, + "loss": 1.9035, + "step": 36500 + }, + { + "epoch": 0.02, + "learning_rate": 1.962811098977255e-05, + "loss": 1.923, + "step": 37000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9623085462607316e-05, + "loss": 1.9089, + "step": 37500 + }, + { + "epoch": 0.02, + "learning_rate": 1.961805993544208e-05, + "loss": 1.9228, + "step": 38000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9613034408276843e-05, + "loss": 1.8999, + "step": 38500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9608008881111608e-05, + "loss": 1.8998, + "step": 39000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9602983353946373e-05, + "loss": 1.9048, + "step": 39500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9597957826781135e-05, + "loss": 1.9062, + "step": 40000 + }, + { + "epoch": 0.02, + "learning_rate": 1.95929322996159e-05, + "loss": 1.9114, + "step": 40500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9587906772450665e-05, + "loss": 1.9078, + "step": 41000 + }, + { + "epoch": 0.02, + "learning_rate": 1.958288124528543e-05, + "loss": 1.9103, + "step": 41500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9577855718120192e-05, + "loss": 1.9205, + "step": 42000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9572830190954957e-05, + "loss": 1.9134, + "step": 42500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9567804663789722e-05, + "loss": 1.9364, + "step": 43000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9562779136624484e-05, + "loss": 1.8845, + "step": 43500 + }, + { + "epoch": 0.02, + "learning_rate": 1.955775360945925e-05, + "loss": 1.9329, + "step": 44000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9552728082294014e-05, + "loss": 1.9152, + "step": 44500 + }, + { + "epoch": 0.02, + "learning_rate": 1.954770255512878e-05, + "loss": 1.8961, + "step": 45000 + }, + { + "epoch": 0.02, + "learning_rate": 1.954267702796354e-05, + "loss": 1.9093, + "step": 45500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9537651500798307e-05, + "loss": 1.9187, + "step": 46000 + }, + { + "epoch": 0.02, + "learning_rate": 1.953262597363307e-05, + "loss": 1.9211, + "step": 46500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9527600446467837e-05, + "loss": 1.9137, + "step": 47000 + }, + { + "epoch": 0.02, + "learning_rate": 1.95225749193026e-05, + "loss": 1.898, + "step": 47500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9517549392137364e-05, + "loss": 1.9002, + "step": 48000 + }, + { + "epoch": 0.02, + "learning_rate": 1.951252386497213e-05, + "loss": 1.9016, + "step": 48500 + }, + { + "epoch": 0.02, + "learning_rate": 1.950749833780689e-05, + "loss": 1.8961, + "step": 49000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9502472810641656e-05, + "loss": 1.8984, + "step": 49500 + }, + { + "epoch": 0.03, + "learning_rate": 1.949744728347642e-05, + "loss": 1.8957, + "step": 50000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9492421756311183e-05, + "loss": 1.9109, + "step": 50500 + }, + { + "epoch": 0.03, + "learning_rate": 1.948739622914595e-05, + "loss": 1.9257, + "step": 51000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9482370701980713e-05, + "loss": 1.9162, + "step": 51500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9477345174815475e-05, + "loss": 1.9119, + "step": 52000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9472319647650243e-05, + "loss": 1.9107, + "step": 52500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9467294120485005e-05, + "loss": 1.9131, + "step": 53000 + }, + { + "epoch": 0.03, + "learning_rate": 1.946226859331977e-05, + "loss": 1.8965, + "step": 53500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9457243066154535e-05, + "loss": 1.8955, + "step": 54000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9452217538989297e-05, + "loss": 1.9157, + "step": 54500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9447192011824062e-05, + "loss": 1.9221, + "step": 55000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9442166484658827e-05, + "loss": 1.9069, + "step": 55500 + }, + { + "epoch": 0.03, + "learning_rate": 1.943714095749359e-05, + "loss": 1.8829, + "step": 56000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9432115430328354e-05, + "loss": 1.8974, + "step": 56500 + }, + { + "epoch": 0.03, + "learning_rate": 1.942708990316312e-05, + "loss": 1.8964, + "step": 57000 + }, + { + "epoch": 0.03, + "learning_rate": 1.942206437599788e-05, + "loss": 1.905, + "step": 57500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9417038848832646e-05, + "loss": 1.898, + "step": 58000 + }, + { + "epoch": 0.03, + "learning_rate": 1.941201332166741e-05, + "loss": 1.9037, + "step": 58500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9406987794502173e-05, + "loss": 1.9025, + "step": 59000 + }, + { + "epoch": 0.03, + "learning_rate": 1.940196226733694e-05, + "loss": 1.9108, + "step": 59500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9396936740171703e-05, + "loss": 1.8908, + "step": 60000 + }, + { + "epoch": 0.03, + "learning_rate": 1.939191121300647e-05, + "loss": 1.9117, + "step": 60500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9386885685841234e-05, + "loss": 1.8895, + "step": 61000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9381860158675995e-05, + "loss": 1.9015, + "step": 61500 + }, + { + "epoch": 0.03, + "learning_rate": 1.937683463151076e-05, + "loss": 1.901, + "step": 62000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9371809104345526e-05, + "loss": 1.9025, + "step": 62500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9366783577180287e-05, + "loss": 1.8946, + "step": 63000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9361758050015053e-05, + "loss": 1.8812, + "step": 63500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9356732522849818e-05, + "loss": 1.9052, + "step": 64000 + }, + { + "epoch": 0.03, + "learning_rate": 1.935170699568458e-05, + "loss": 1.8981, + "step": 64500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9346681468519345e-05, + "loss": 1.9005, + "step": 65000 + }, + { + "epoch": 0.03, + "learning_rate": 1.934165594135411e-05, + "loss": 1.8953, + "step": 65500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9336630414188875e-05, + "loss": 1.9017, + "step": 66000 + }, + { + "epoch": 0.03, + "learning_rate": 1.933160488702364e-05, + "loss": 1.8989, + "step": 66500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9326579359858402e-05, + "loss": 1.9042, + "step": 67000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9321553832693167e-05, + "loss": 1.899, + "step": 67500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9316528305527932e-05, + "loss": 1.8968, + "step": 68000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9311502778362694e-05, + "loss": 1.8953, + "step": 68500 + }, + { + "epoch": 0.03, + "learning_rate": 1.930647725119746e-05, + "loss": 1.8935, + "step": 69000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9301451724032224e-05, + "loss": 1.9261, + "step": 69500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9296426196866986e-05, + "loss": 1.8913, + "step": 70000 + }, + { + "epoch": 0.04, + "learning_rate": 1.929140066970175e-05, + "loss": 1.8857, + "step": 70500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9286375142536516e-05, + "loss": 1.8972, + "step": 71000 + }, + { + "epoch": 0.04, + "learning_rate": 1.928134961537128e-05, + "loss": 1.8911, + "step": 71500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9276324088206043e-05, + "loss": 1.8819, + "step": 72000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9271298561040808e-05, + "loss": 1.8916, + "step": 72500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9266273033875573e-05, + "loss": 1.8763, + "step": 73000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9261247506710335e-05, + "loss": 1.8908, + "step": 73500 + }, + { + "epoch": 0.04, + "learning_rate": 1.92562219795451e-05, + "loss": 1.8898, + "step": 74000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9251196452379865e-05, + "loss": 1.9099, + "step": 74500 + }, + { + "epoch": 0.04, + "learning_rate": 1.924617092521463e-05, + "loss": 1.8797, + "step": 75000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9241145398049396e-05, + "loss": 1.873, + "step": 75500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9236119870884157e-05, + "loss": 1.9015, + "step": 76000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9231094343718923e-05, + "loss": 1.8896, + "step": 76500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9226068816553688e-05, + "loss": 1.8999, + "step": 77000 + }, + { + "epoch": 0.04, + "learning_rate": 1.922104328938845e-05, + "loss": 1.8923, + "step": 77500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9216017762223215e-05, + "loss": 1.9138, + "step": 78000 + }, + { + "epoch": 0.04, + "learning_rate": 1.921099223505798e-05, + "loss": 1.8817, + "step": 78500 + }, + { + "epoch": 0.04, + "learning_rate": 1.920596670789274e-05, + "loss": 1.8809, + "step": 79000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9200941180727507e-05, + "loss": 1.8895, + "step": 79500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9195915653562272e-05, + "loss": 1.8795, + "step": 80000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9190890126397033e-05, + "loss": 1.899, + "step": 80500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9185864599231802e-05, + "loss": 1.8938, + "step": 81000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9180839072066564e-05, + "loss": 1.8892, + "step": 81500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9175813544901325e-05, + "loss": 1.9154, + "step": 82000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9170788017736094e-05, + "loss": 1.8767, + "step": 82500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9165762490570856e-05, + "loss": 1.8842, + "step": 83000 + }, + { + "epoch": 0.04, + "learning_rate": 1.916073696340562e-05, + "loss": 1.8904, + "step": 83500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9155711436240386e-05, + "loss": 1.8961, + "step": 84000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9150685909075148e-05, + "loss": 1.8797, + "step": 84500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9145660381909913e-05, + "loss": 1.8881, + "step": 85000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9140634854744678e-05, + "loss": 1.8952, + "step": 85500 + }, + { + "epoch": 0.04, + "learning_rate": 1.913560932757944e-05, + "loss": 1.8861, + "step": 86000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9130583800414205e-05, + "loss": 1.8743, + "step": 86500 + }, + { + "epoch": 0.04, + "learning_rate": 1.912555827324897e-05, + "loss": 1.904, + "step": 87000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9120532746083732e-05, + "loss": 1.8834, + "step": 87500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9115507218918497e-05, + "loss": 1.889, + "step": 88000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9110481691753262e-05, + "loss": 1.8837, + "step": 88500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9105456164588024e-05, + "loss": 1.8753, + "step": 89000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9100430637422792e-05, + "loss": 1.8884, + "step": 89500 + }, + { + "epoch": 0.05, + "learning_rate": 1.9095405110257554e-05, + "loss": 1.8979, + "step": 90000 + }, + { + "epoch": 0.05, + "learning_rate": 1.909037958309232e-05, + "loss": 1.8694, + "step": 90500 + }, + { + "epoch": 0.05, + "learning_rate": 1.9085354055927084e-05, + "loss": 1.8822, + "step": 91000 + }, + { + "epoch": 0.05, + "learning_rate": 1.9080328528761846e-05, + "loss": 1.8989, + "step": 91500 + }, + { + "epoch": 0.05, + "learning_rate": 1.907530300159661e-05, + "loss": 1.8772, + "step": 92000 + }, + { + "epoch": 0.05, + "learning_rate": 1.9070277474431377e-05, + "loss": 1.8733, + "step": 92500 + }, + { + "epoch": 0.05, + "learning_rate": 1.9065251947266138e-05, + "loss": 1.8879, + "step": 93000 + }, + { + "epoch": 0.05, + "learning_rate": 1.9060226420100903e-05, + "loss": 1.8723, + "step": 93500 + }, + { + "epoch": 0.05, + "learning_rate": 1.905520089293567e-05, + "loss": 1.8817, + "step": 94000 + }, + { + "epoch": 0.05, + "learning_rate": 1.905017536577043e-05, + "loss": 1.8812, + "step": 94500 + }, + { + "epoch": 0.05, + "learning_rate": 1.9045149838605195e-05, + "loss": 1.8955, + "step": 95000 + }, + { + "epoch": 0.05, + "learning_rate": 1.904012431143996e-05, + "loss": 1.8751, + "step": 95500 + }, + { + "epoch": 0.05, + "learning_rate": 1.9035098784274726e-05, + "loss": 1.8895, + "step": 96000 + }, + { + "epoch": 0.05, + "learning_rate": 1.903007325710949e-05, + "loss": 1.8811, + "step": 96500 + }, + { + "epoch": 0.05, + "learning_rate": 1.9025047729944253e-05, + "loss": 1.8867, + "step": 97000 + }, + { + "epoch": 0.05, + "learning_rate": 1.9020022202779018e-05, + "loss": 1.8839, + "step": 97500 + }, + { + "epoch": 0.05, + "learning_rate": 1.9014996675613783e-05, + "loss": 1.8969, + "step": 98000 + }, + { + "epoch": 0.05, + "learning_rate": 1.9009971148448545e-05, + "loss": 1.864, + "step": 98500 + }, + { + "epoch": 0.05, + "learning_rate": 1.900494562128331e-05, + "loss": 1.8674, + "step": 99000 + }, + { + "epoch": 0.05, + "learning_rate": 1.8999920094118075e-05, + "loss": 1.8979, + "step": 99500 + }, + { + "epoch": 0.05, + "learning_rate": 1.899489456695284e-05, + "loss": 1.8675, + "step": 100000 + }, + { + "epoch": 0.05, + "learning_rate": 1.8989869039787602e-05, + "loss": 1.8796, + "step": 100500 + }, + { + "epoch": 0.05, + "learning_rate": 1.8984843512622367e-05, + "loss": 1.8694, + "step": 101000 + }, + { + "epoch": 0.05, + "learning_rate": 1.8979817985457132e-05, + "loss": 1.8775, + "step": 101500 + }, + { + "epoch": 0.05, + "learning_rate": 1.8974792458291894e-05, + "loss": 1.8699, + "step": 102000 + }, + { + "epoch": 0.05, + "learning_rate": 1.896976693112666e-05, + "loss": 1.8876, + "step": 102500 + }, + { + "epoch": 0.05, + "learning_rate": 1.8964741403961424e-05, + "loss": 1.8713, + "step": 103000 + }, + { + "epoch": 0.05, + "learning_rate": 1.8959715876796186e-05, + "loss": 1.8815, + "step": 103500 + }, + { + "epoch": 0.05, + "learning_rate": 1.895469034963095e-05, + "loss": 1.891, + "step": 104000 + }, + { + "epoch": 0.05, + "learning_rate": 1.8949664822465716e-05, + "loss": 1.8848, + "step": 104500 + }, + { + "epoch": 0.05, + "learning_rate": 1.894463929530048e-05, + "loss": 1.8723, + "step": 105000 + }, + { + "epoch": 0.05, + "learning_rate": 1.8939613768135246e-05, + "loss": 1.8865, + "step": 105500 + }, + { + "epoch": 0.05, + "learning_rate": 1.8934588240970008e-05, + "loss": 1.8874, + "step": 106000 + }, + { + "epoch": 0.05, + "learning_rate": 1.8929562713804773e-05, + "loss": 1.8692, + "step": 106500 + }, + { + "epoch": 0.05, + "learning_rate": 1.892453718663954e-05, + "loss": 1.9039, + "step": 107000 + }, + { + "epoch": 0.05, + "learning_rate": 1.89195116594743e-05, + "loss": 1.8861, + "step": 107500 + }, + { + "epoch": 0.05, + "learning_rate": 1.8914486132309065e-05, + "loss": 1.8732, + "step": 108000 + }, + { + "epoch": 0.05, + "learning_rate": 1.890946060514383e-05, + "loss": 1.8689, + "step": 108500 + }, + { + "epoch": 0.05, + "learning_rate": 1.8904435077978592e-05, + "loss": 1.87, + "step": 109000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8899409550813357e-05, + "loss": 1.8856, + "step": 109500 + }, + { + "epoch": 0.06, + "learning_rate": 1.8894384023648123e-05, + "loss": 1.8742, + "step": 110000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8889358496482884e-05, + "loss": 1.8779, + "step": 110500 + }, + { + "epoch": 0.06, + "learning_rate": 1.8884332969317653e-05, + "loss": 1.8846, + "step": 111000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8879307442152415e-05, + "loss": 1.8935, + "step": 111500 + }, + { + "epoch": 0.06, + "learning_rate": 1.887428191498718e-05, + "loss": 1.8638, + "step": 112000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8869256387821945e-05, + "loss": 1.862, + "step": 112500 + }, + { + "epoch": 0.06, + "learning_rate": 1.8864230860656707e-05, + "loss": 1.8861, + "step": 113000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8859205333491472e-05, + "loss": 1.8758, + "step": 113500 + }, + { + "epoch": 0.06, + "learning_rate": 1.8854179806326237e-05, + "loss": 1.8917, + "step": 114000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8849154279161e-05, + "loss": 1.8827, + "step": 114500 + }, + { + "epoch": 0.06, + "learning_rate": 1.8844128751995764e-05, + "loss": 1.8786, + "step": 115000 + }, + { + "epoch": 0.06, + "learning_rate": 1.883910322483053e-05, + "loss": 1.8671, + "step": 115500 + }, + { + "epoch": 0.06, + "learning_rate": 1.883407769766529e-05, + "loss": 1.8622, + "step": 116000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8829052170500056e-05, + "loss": 1.8671, + "step": 116500 + }, + { + "epoch": 0.06, + "learning_rate": 1.882402664333482e-05, + "loss": 1.8847, + "step": 117000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8819001116169583e-05, + "loss": 1.8845, + "step": 117500 + }, + { + "epoch": 0.06, + "learning_rate": 1.881397558900435e-05, + "loss": 1.8857, + "step": 118000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8808950061839113e-05, + "loss": 1.8914, + "step": 118500 + }, + { + "epoch": 0.06, + "learning_rate": 1.8803924534673878e-05, + "loss": 1.879, + "step": 119000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8798899007508643e-05, + "loss": 1.8623, + "step": 119500 + }, + { + "epoch": 0.06, + "learning_rate": 1.8793873480343405e-05, + "loss": 1.8866, + "step": 120000 + }, + { + "epoch": 0.06, + "learning_rate": 1.878884795317817e-05, + "loss": 1.8922, + "step": 120500 + }, + { + "epoch": 0.06, + "learning_rate": 1.8783822426012935e-05, + "loss": 1.8772, + "step": 121000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8778796898847697e-05, + "loss": 1.9021, + "step": 121500 + }, + { + "epoch": 0.06, + "learning_rate": 1.8773771371682462e-05, + "loss": 1.8769, + "step": 122000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8768745844517227e-05, + "loss": 1.8795, + "step": 122500 + }, + { + "epoch": 0.06, + "learning_rate": 1.876372031735199e-05, + "loss": 1.8494, + "step": 123000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8758694790186754e-05, + "loss": 1.8739, + "step": 123500 + }, + { + "epoch": 0.06, + "learning_rate": 1.875366926302152e-05, + "loss": 1.8763, + "step": 124000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8748643735856285e-05, + "loss": 1.8732, + "step": 124500 + }, + { + "epoch": 0.06, + "learning_rate": 1.8743618208691046e-05, + "loss": 1.8514, + "step": 125000 + }, + { + "epoch": 0.06, + "learning_rate": 1.873859268152581e-05, + "loss": 1.8649, + "step": 125500 + }, + { + "epoch": 0.06, + "learning_rate": 1.8733567154360577e-05, + "loss": 1.8643, + "step": 126000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8728541627195342e-05, + "loss": 1.8817, + "step": 126500 + }, + { + "epoch": 0.06, + "learning_rate": 1.8723516100030103e-05, + "loss": 1.8706, + "step": 127000 + }, + { + "epoch": 0.06, + "learning_rate": 1.871849057286487e-05, + "loss": 1.8451, + "step": 127500 + }, + { + "epoch": 0.06, + "learning_rate": 1.8713465045699634e-05, + "loss": 1.8716, + "step": 128000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8708439518534396e-05, + "loss": 1.8586, + "step": 128500 + }, + { + "epoch": 0.06, + "learning_rate": 1.870341399136916e-05, + "loss": 1.8603, + "step": 129000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8698388464203926e-05, + "loss": 1.861, + "step": 129500 + }, + { + "epoch": 0.07, + "learning_rate": 1.869336293703869e-05, + "loss": 1.8868, + "step": 130000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8688337409873453e-05, + "loss": 1.8824, + "step": 130500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8683311882708218e-05, + "loss": 1.878, + "step": 131000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8678286355542983e-05, + "loss": 1.8657, + "step": 131500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8673260828377745e-05, + "loss": 1.8682, + "step": 132000 + }, + { + "epoch": 0.07, + "learning_rate": 1.866823530121251e-05, + "loss": 1.8608, + "step": 132500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8663209774047275e-05, + "loss": 1.8835, + "step": 133000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8658184246882037e-05, + "loss": 1.8526, + "step": 133500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8653158719716805e-05, + "loss": 1.8646, + "step": 134000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8648133192551567e-05, + "loss": 1.8618, + "step": 134500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8643107665386332e-05, + "loss": 1.8707, + "step": 135000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8638082138221097e-05, + "loss": 1.8836, + "step": 135500 + }, + { + "epoch": 0.07, + "learning_rate": 1.863305661105586e-05, + "loss": 1.8509, + "step": 136000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8628031083890624e-05, + "loss": 1.8819, + "step": 136500 + }, + { + "epoch": 0.07, + "learning_rate": 1.862300555672539e-05, + "loss": 1.8678, + "step": 137000 + }, + { + "epoch": 0.07, + "learning_rate": 1.861798002956015e-05, + "loss": 1.8604, + "step": 137500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8612954502394916e-05, + "loss": 1.8643, + "step": 138000 + }, + { + "epoch": 0.07, + "learning_rate": 1.860792897522968e-05, + "loss": 1.861, + "step": 138500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8602903448064443e-05, + "loss": 1.8675, + "step": 139000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8597877920899208e-05, + "loss": 1.871, + "step": 139500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8592852393733973e-05, + "loss": 1.8663, + "step": 140000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8587826866568735e-05, + "loss": 1.862, + "step": 140500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8582801339403504e-05, + "loss": 1.8583, + "step": 141000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8577775812238265e-05, + "loss": 1.8681, + "step": 141500 + }, + { + "epoch": 0.07, + "learning_rate": 1.857275028507303e-05, + "loss": 1.8674, + "step": 142000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8567724757907796e-05, + "loss": 1.8617, + "step": 142500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8562699230742557e-05, + "loss": 1.8725, + "step": 143000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8557673703577323e-05, + "loss": 1.8692, + "step": 143500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8552648176412088e-05, + "loss": 1.8668, + "step": 144000 + }, + { + "epoch": 0.07, + "learning_rate": 1.854762264924685e-05, + "loss": 1.8506, + "step": 144500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8542597122081615e-05, + "loss": 1.8729, + "step": 145000 + }, + { + "epoch": 0.07, + "learning_rate": 1.853757159491638e-05, + "loss": 1.8763, + "step": 145500 + }, + { + "epoch": 0.07, + "learning_rate": 1.853254606775114e-05, + "loss": 1.8538, + "step": 146000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8527520540585907e-05, + "loss": 1.8722, + "step": 146500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8522495013420672e-05, + "loss": 1.8532, + "step": 147000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8517469486255434e-05, + "loss": 1.8718, + "step": 147500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8512443959090202e-05, + "loss": 1.8735, + "step": 148000 + }, + { + "epoch": 0.07, + "learning_rate": 1.8507418431924964e-05, + "loss": 1.8591, + "step": 148500 + }, + { + "epoch": 0.07, + "learning_rate": 1.850239290475973e-05, + "loss": 1.8705, + "step": 149000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8497367377594494e-05, + "loss": 1.8545, + "step": 149500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8492341850429256e-05, + "loss": 1.8718, + "step": 150000 + }, + { + "epoch": 0.08, + "learning_rate": 1.848731632326402e-05, + "loss": 1.8659, + "step": 150500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8482290796098786e-05, + "loss": 1.8825, + "step": 151000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8477265268933548e-05, + "loss": 1.8764, + "step": 151500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8472239741768313e-05, + "loss": 1.8634, + "step": 152000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8467214214603078e-05, + "loss": 1.8831, + "step": 152500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8462188687437843e-05, + "loss": 1.864, + "step": 153000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8457163160272605e-05, + "loss": 1.8477, + "step": 153500 + }, + { + "epoch": 0.08, + "learning_rate": 1.845213763310737e-05, + "loss": 1.8615, + "step": 154000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8447112105942135e-05, + "loss": 1.848, + "step": 154500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8442086578776897e-05, + "loss": 1.8711, + "step": 155000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8437061051611662e-05, + "loss": 1.8638, + "step": 155500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8432035524446427e-05, + "loss": 1.8681, + "step": 156000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8427009997281193e-05, + "loss": 1.8569, + "step": 156500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8421984470115954e-05, + "loss": 1.856, + "step": 157000 + }, + { + "epoch": 0.08, + "learning_rate": 1.841695894295072e-05, + "loss": 1.877, + "step": 157500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8411933415785485e-05, + "loss": 1.8851, + "step": 158000 + }, + { + "epoch": 0.08, + "learning_rate": 1.840690788862025e-05, + "loss": 1.87, + "step": 158500 + }, + { + "epoch": 0.08, + "learning_rate": 1.840188236145501e-05, + "loss": 1.8607, + "step": 159000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8396856834289777e-05, + "loss": 1.8443, + "step": 159500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8391831307124542e-05, + "loss": 1.8687, + "step": 160000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8386805779959304e-05, + "loss": 1.8612, + "step": 160500 + }, + { + "epoch": 0.08, + "learning_rate": 1.838178025279407e-05, + "loss": 1.8628, + "step": 161000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8376754725628834e-05, + "loss": 1.8845, + "step": 161500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8371729198463596e-05, + "loss": 1.8728, + "step": 162000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8366703671298364e-05, + "loss": 1.8662, + "step": 162500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8361678144133126e-05, + "loss": 1.8597, + "step": 163000 + }, + { + "epoch": 0.08, + "learning_rate": 1.835665261696789e-05, + "loss": 1.8719, + "step": 163500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8351627089802656e-05, + "loss": 1.8624, + "step": 164000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8346601562637418e-05, + "loss": 1.8492, + "step": 164500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8341576035472183e-05, + "loss": 1.8578, + "step": 165000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8336550508306948e-05, + "loss": 1.8382, + "step": 165500 + }, + { + "epoch": 0.08, + "learning_rate": 1.833152498114171e-05, + "loss": 1.8715, + "step": 166000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8326499453976475e-05, + "loss": 1.8748, + "step": 166500 + }, + { + "epoch": 0.08, + "learning_rate": 1.832147392681124e-05, + "loss": 1.8695, + "step": 167000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8316448399646002e-05, + "loss": 1.8552, + "step": 167500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8311422872480767e-05, + "loss": 1.8675, + "step": 168000 + }, + { + "epoch": 0.08, + "learning_rate": 1.8306397345315532e-05, + "loss": 1.8896, + "step": 168500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8301371818150294e-05, + "loss": 1.8658, + "step": 169000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8296346290985063e-05, + "loss": 1.865, + "step": 169500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8291320763819824e-05, + "loss": 1.88, + "step": 170000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8286295236654586e-05, + "loss": 1.8562, + "step": 170500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8281269709489355e-05, + "loss": 1.8428, + "step": 171000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8276244182324116e-05, + "loss": 1.8545, + "step": 171500 + }, + { + "epoch": 0.09, + "learning_rate": 1.827121865515888e-05, + "loss": 1.8632, + "step": 172000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8266193127993647e-05, + "loss": 1.8737, + "step": 172500 + }, + { + "epoch": 0.09, + "learning_rate": 1.826116760082841e-05, + "loss": 1.8589, + "step": 173000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8256142073663173e-05, + "loss": 1.8507, + "step": 173500 + }, + { + "epoch": 0.09, + "learning_rate": 1.825111654649794e-05, + "loss": 1.8455, + "step": 174000 + }, + { + "epoch": 0.09, + "learning_rate": 1.82460910193327e-05, + "loss": 1.881, + "step": 174500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8241065492167466e-05, + "loss": 1.8615, + "step": 175000 + }, + { + "epoch": 0.09, + "learning_rate": 1.823603996500223e-05, + "loss": 1.87, + "step": 175500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8231014437836992e-05, + "loss": 1.8515, + "step": 176000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8225988910671758e-05, + "loss": 1.8616, + "step": 176500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8220963383506523e-05, + "loss": 1.8534, + "step": 177000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8215937856341288e-05, + "loss": 1.8558, + "step": 177500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8210912329176053e-05, + "loss": 1.8672, + "step": 178000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8205886802010815e-05, + "loss": 1.835, + "step": 178500 + }, + { + "epoch": 0.09, + "learning_rate": 1.820086127484558e-05, + "loss": 1.8528, + "step": 179000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8195835747680345e-05, + "loss": 1.869, + "step": 179500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8190810220515107e-05, + "loss": 1.8826, + "step": 180000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8185784693349872e-05, + "loss": 1.8816, + "step": 180500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8180759166184637e-05, + "loss": 1.8529, + "step": 181000 + }, + { + "epoch": 0.09, + "learning_rate": 1.81757336390194e-05, + "loss": 1.8505, + "step": 181500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8170708111854164e-05, + "loss": 1.8647, + "step": 182000 + }, + { + "epoch": 0.09, + "learning_rate": 1.816568258468893e-05, + "loss": 1.8589, + "step": 182500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8160657057523694e-05, + "loss": 1.8578, + "step": 183000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8155631530358456e-05, + "loss": 1.8513, + "step": 183500 + }, + { + "epoch": 0.09, + "learning_rate": 1.815060600319322e-05, + "loss": 1.8654, + "step": 184000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8145580476027986e-05, + "loss": 1.8339, + "step": 184500 + }, + { + "epoch": 0.09, + "learning_rate": 1.814055494886275e-05, + "loss": 1.8585, + "step": 185000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8135529421697513e-05, + "loss": 1.8543, + "step": 185500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8130503894532278e-05, + "loss": 1.8607, + "step": 186000 + }, + { + "epoch": 0.09, + "learning_rate": 1.8125478367367043e-05, + "loss": 1.8654, + "step": 186500 + }, + { + "epoch": 0.09, + "learning_rate": 1.812045284020181e-05, + "loss": 1.8567, + "step": 187000 + }, + { + "epoch": 0.09, + "learning_rate": 1.811542731303657e-05, + "loss": 1.8582, + "step": 187500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8110401785871335e-05, + "loss": 1.8639, + "step": 188000 + }, + { + "epoch": 0.09, + "learning_rate": 1.81053762587061e-05, + "loss": 1.864, + "step": 188500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8100350731540862e-05, + "loss": 1.8466, + "step": 189000 + }, + { + "epoch": 0.1, + "learning_rate": 1.8095325204375628e-05, + "loss": 1.8363, + "step": 189500 + }, + { + "epoch": 0.1, + "learning_rate": 1.8090299677210393e-05, + "loss": 1.8551, + "step": 190000 + }, + { + "epoch": 0.1, + "learning_rate": 1.8085274150045154e-05, + "loss": 1.8396, + "step": 190500 + }, + { + "epoch": 0.1, + "learning_rate": 1.808024862287992e-05, + "loss": 1.8512, + "step": 191000 + }, + { + "epoch": 0.1, + "learning_rate": 1.8075223095714685e-05, + "loss": 1.8564, + "step": 191500 + }, + { + "epoch": 0.1, + "learning_rate": 1.8070197568549446e-05, + "loss": 1.8481, + "step": 192000 + }, + { + "epoch": 0.1, + "learning_rate": 1.8065172041384215e-05, + "loss": 1.865, + "step": 192500 + }, + { + "epoch": 0.1, + "learning_rate": 1.8060146514218977e-05, + "loss": 1.8696, + "step": 193000 + }, + { + "epoch": 0.1, + "learning_rate": 1.8055120987053742e-05, + "loss": 1.8585, + "step": 193500 + }, + { + "epoch": 0.1, + "learning_rate": 1.8050095459888507e-05, + "loss": 1.8592, + "step": 194000 + }, + { + "epoch": 0.1, + "learning_rate": 1.804506993272327e-05, + "loss": 1.8675, + "step": 194500 + }, + { + "epoch": 0.1, + "learning_rate": 1.8040044405558034e-05, + "loss": 1.84, + "step": 195000 + }, + { + "epoch": 0.1, + "learning_rate": 1.80350188783928e-05, + "loss": 1.8697, + "step": 195500 + }, + { + "epoch": 0.1, + "learning_rate": 1.802999335122756e-05, + "loss": 1.8628, + "step": 196000 + }, + { + "epoch": 0.1, + "learning_rate": 1.8024967824062326e-05, + "loss": 1.8578, + "step": 196500 + }, + { + "epoch": 0.1, + "learning_rate": 1.801994229689709e-05, + "loss": 1.849, + "step": 197000 + }, + { + "epoch": 0.1, + "learning_rate": 1.8014916769731853e-05, + "loss": 1.8472, + "step": 197500 + }, + { + "epoch": 0.1, + "learning_rate": 1.8009891242566618e-05, + "loss": 1.8592, + "step": 198000 + }, + { + "epoch": 0.1, + "learning_rate": 1.8004865715401383e-05, + "loss": 1.8533, + "step": 198500 + }, + { + "epoch": 0.1, + "learning_rate": 1.7999840188236145e-05, + "loss": 1.8617, + "step": 199000 + }, + { + "epoch": 0.1, + "learning_rate": 1.7994814661070913e-05, + "loss": 1.8425, + "step": 199500 + }, + { + "epoch": 0.1, + "learning_rate": 1.7989789133905675e-05, + "loss": 1.8535, + "step": 200000 + }, + { + "epoch": 0.1, + "learning_rate": 1.7984763606740437e-05, + "loss": 1.8509, + "step": 200500 + }, + { + "epoch": 0.1, + "learning_rate": 1.7979738079575205e-05, + "loss": 1.8543, + "step": 201000 + }, + { + "epoch": 0.1, + "learning_rate": 1.7974712552409967e-05, + "loss": 1.8628, + "step": 201500 + }, + { + "epoch": 0.1, + "learning_rate": 1.7969687025244732e-05, + "loss": 1.849, + "step": 202000 + }, + { + "epoch": 0.1, + "learning_rate": 1.7964661498079497e-05, + "loss": 1.8443, + "step": 202500 + }, + { + "epoch": 0.1, + "learning_rate": 1.795963597091426e-05, + "loss": 1.8426, + "step": 203000 + }, + { + "epoch": 0.1, + "learning_rate": 1.7954610443749024e-05, + "loss": 1.8481, + "step": 203500 + }, + { + "epoch": 0.1, + "learning_rate": 1.794958491658379e-05, + "loss": 1.8338, + "step": 204000 + }, + { + "epoch": 0.1, + "learning_rate": 1.794455938941855e-05, + "loss": 1.8408, + "step": 204500 + }, + { + "epoch": 0.1, + "learning_rate": 1.7939533862253316e-05, + "loss": 1.8436, + "step": 205000 + }, + { + "epoch": 0.1, + "learning_rate": 1.793450833508808e-05, + "loss": 1.8345, + "step": 205500 + }, + { + "epoch": 0.1, + "learning_rate": 1.7929482807922843e-05, + "loss": 1.8583, + "step": 206000 + }, + { + "epoch": 0.1, + "learning_rate": 1.792445728075761e-05, + "loss": 1.8531, + "step": 206500 + }, + { + "epoch": 0.1, + "learning_rate": 1.7919431753592374e-05, + "loss": 1.8581, + "step": 207000 + }, + { + "epoch": 0.1, + "learning_rate": 1.791440622642714e-05, + "loss": 1.8678, + "step": 207500 + }, + { + "epoch": 0.1, + "learning_rate": 1.7909380699261904e-05, + "loss": 1.8445, + "step": 208000 + }, + { + "epoch": 0.1, + "learning_rate": 1.7904355172096666e-05, + "loss": 1.8586, + "step": 208500 + }, + { + "epoch": 0.11, + "learning_rate": 1.789932964493143e-05, + "loss": 1.8505, + "step": 209000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7894304117766196e-05, + "loss": 1.8434, + "step": 209500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7889278590600958e-05, + "loss": 1.8502, + "step": 210000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7884253063435723e-05, + "loss": 1.838, + "step": 210500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7879227536270488e-05, + "loss": 1.8606, + "step": 211000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7874202009105253e-05, + "loss": 1.8517, + "step": 211500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7869176481940015e-05, + "loss": 1.8701, + "step": 212000 + }, + { + "epoch": 0.11, + "learning_rate": 1.786415095477478e-05, + "loss": 1.8432, + "step": 212500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7859125427609545e-05, + "loss": 1.8532, + "step": 213000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7854099900444307e-05, + "loss": 1.8604, + "step": 213500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7849074373279072e-05, + "loss": 1.8544, + "step": 214000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7844048846113837e-05, + "loss": 1.8496, + "step": 214500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7839023318948602e-05, + "loss": 1.8552, + "step": 215000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7833997791783364e-05, + "loss": 1.861, + "step": 215500 + }, + { + "epoch": 0.11, + "learning_rate": 1.782897226461813e-05, + "loss": 1.8418, + "step": 216000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7823946737452894e-05, + "loss": 1.845, + "step": 216500 + }, + { + "epoch": 0.11, + "learning_rate": 1.781892121028766e-05, + "loss": 1.8455, + "step": 217000 + }, + { + "epoch": 0.11, + "learning_rate": 1.781389568312242e-05, + "loss": 1.8347, + "step": 217500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7808870155957186e-05, + "loss": 1.8533, + "step": 218000 + }, + { + "epoch": 0.11, + "learning_rate": 1.780384462879195e-05, + "loss": 1.8587, + "step": 218500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7798819101626713e-05, + "loss": 1.8432, + "step": 219000 + }, + { + "epoch": 0.11, + "learning_rate": 1.779379357446148e-05, + "loss": 1.8522, + "step": 219500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7788768047296243e-05, + "loss": 1.8736, + "step": 220000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7783742520131005e-05, + "loss": 1.8751, + "step": 220500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7778716992965774e-05, + "loss": 1.8613, + "step": 221000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7773691465800536e-05, + "loss": 1.8499, + "step": 221500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7768665938635297e-05, + "loss": 1.8803, + "step": 222000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7763640411470066e-05, + "loss": 1.8346, + "step": 222500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7758614884304828e-05, + "loss": 1.8491, + "step": 223000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7753589357139593e-05, + "loss": 1.8387, + "step": 223500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7748563829974358e-05, + "loss": 1.8417, + "step": 224000 + }, + { + "epoch": 0.11, + "learning_rate": 1.774353830280912e-05, + "loss": 1.8604, + "step": 224500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7738512775643885e-05, + "loss": 1.854, + "step": 225000 + }, + { + "epoch": 0.11, + "learning_rate": 1.773348724847865e-05, + "loss": 1.8497, + "step": 225500 + }, + { + "epoch": 0.11, + "learning_rate": 1.772846172131341e-05, + "loss": 1.8533, + "step": 226000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7723436194148177e-05, + "loss": 1.8423, + "step": 226500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7718410666982942e-05, + "loss": 1.873, + "step": 227000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7713385139817704e-05, + "loss": 1.8505, + "step": 227500 + }, + { + "epoch": 0.11, + "learning_rate": 1.770835961265247e-05, + "loss": 1.8682, + "step": 228000 + }, + { + "epoch": 0.11, + "learning_rate": 1.7703334085487234e-05, + "loss": 1.8569, + "step": 228500 + }, + { + "epoch": 0.12, + "learning_rate": 1.7698308558321996e-05, + "loss": 1.8466, + "step": 229000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7693283031156764e-05, + "loss": 1.869, + "step": 229500 + }, + { + "epoch": 0.12, + "learning_rate": 1.7688257503991526e-05, + "loss": 1.8512, + "step": 230000 + }, + { + "epoch": 0.12, + "learning_rate": 1.768323197682629e-05, + "loss": 1.8533, + "step": 230500 + }, + { + "epoch": 0.12, + "learning_rate": 1.7678206449661056e-05, + "loss": 1.8448, + "step": 231000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7673180922495818e-05, + "loss": 1.8499, + "step": 231500 + }, + { + "epoch": 0.12, + "learning_rate": 1.7668155395330583e-05, + "loss": 1.8243, + "step": 232000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7663129868165348e-05, + "loss": 1.8182, + "step": 232500 + }, + { + "epoch": 0.12, + "learning_rate": 1.765810434100011e-05, + "loss": 1.844, + "step": 233000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7653078813834875e-05, + "loss": 1.8544, + "step": 233500 + }, + { + "epoch": 0.12, + "learning_rate": 1.764805328666964e-05, + "loss": 1.845, + "step": 234000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7643027759504402e-05, + "loss": 1.8416, + "step": 234500 + }, + { + "epoch": 0.12, + "learning_rate": 1.7638002232339167e-05, + "loss": 1.8517, + "step": 235000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7632976705173932e-05, + "loss": 1.8577, + "step": 235500 + }, + { + "epoch": 0.12, + "learning_rate": 1.7627951178008698e-05, + "loss": 1.8555, + "step": 236000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7622925650843463e-05, + "loss": 1.8421, + "step": 236500 + }, + { + "epoch": 0.12, + "learning_rate": 1.7617900123678224e-05, + "loss": 1.85, + "step": 237000 + }, + { + "epoch": 0.12, + "learning_rate": 1.761287459651299e-05, + "loss": 1.8687, + "step": 237500 + }, + { + "epoch": 0.12, + "learning_rate": 1.7607849069347755e-05, + "loss": 1.8493, + "step": 238000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7602823542182516e-05, + "loss": 1.8545, + "step": 238500 + }, + { + "epoch": 0.12, + "learning_rate": 1.759779801501728e-05, + "loss": 1.8335, + "step": 239000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7592772487852047e-05, + "loss": 1.8632, + "step": 239500 + }, + { + "epoch": 0.12, + "learning_rate": 1.758774696068681e-05, + "loss": 1.8491, + "step": 240000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7582721433521574e-05, + "loss": 1.8267, + "step": 240500 + }, + { + "epoch": 0.12, + "learning_rate": 1.757769590635634e-05, + "loss": 1.8315, + "step": 241000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7572670379191104e-05, + "loss": 1.8407, + "step": 241500 + }, + { + "epoch": 0.12, + "learning_rate": 1.7567644852025866e-05, + "loss": 1.8259, + "step": 242000 + }, + { + "epoch": 0.12, + "learning_rate": 1.756261932486063e-05, + "loss": 1.8368, + "step": 242500 + }, + { + "epoch": 0.12, + "learning_rate": 1.7557593797695396e-05, + "loss": 1.8522, + "step": 243000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7552568270530158e-05, + "loss": 1.8357, + "step": 243500 + }, + { + "epoch": 0.12, + "learning_rate": 1.7547542743364923e-05, + "loss": 1.8476, + "step": 244000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7542517216199688e-05, + "loss": 1.851, + "step": 244500 + }, + { + "epoch": 0.12, + "learning_rate": 1.7537491689034453e-05, + "loss": 1.8567, + "step": 245000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7532466161869218e-05, + "loss": 1.8462, + "step": 245500 + }, + { + "epoch": 0.12, + "learning_rate": 1.752744063470398e-05, + "loss": 1.8317, + "step": 246000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7522415107538745e-05, + "loss": 1.845, + "step": 246500 + }, + { + "epoch": 0.12, + "learning_rate": 1.751738958037351e-05, + "loss": 1.8484, + "step": 247000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7512364053208272e-05, + "loss": 1.8453, + "step": 247500 + }, + { + "epoch": 0.12, + "learning_rate": 1.7507338526043037e-05, + "loss": 1.8447, + "step": 248000 + }, + { + "epoch": 0.12, + "learning_rate": 1.7502312998877802e-05, + "loss": 1.832, + "step": 248500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7497287471712564e-05, + "loss": 1.8492, + "step": 249000 + }, + { + "epoch": 0.13, + "learning_rate": 1.749226194454733e-05, + "loss": 1.8462, + "step": 249500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7487236417382094e-05, + "loss": 1.8469, + "step": 250000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7482210890216856e-05, + "loss": 1.8653, + "step": 250500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7477185363051625e-05, + "loss": 1.8476, + "step": 251000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7472159835886386e-05, + "loss": 1.8562, + "step": 251500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7467134308721148e-05, + "loss": 1.8421, + "step": 252000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7462108781555917e-05, + "loss": 1.8422, + "step": 252500 + }, + { + "epoch": 0.13, + "learning_rate": 1.745708325439068e-05, + "loss": 1.8404, + "step": 253000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7452057727225444e-05, + "loss": 1.8372, + "step": 253500 + }, + { + "epoch": 0.13, + "learning_rate": 1.744703220006021e-05, + "loss": 1.8352, + "step": 254000 + }, + { + "epoch": 0.13, + "learning_rate": 1.744200667289497e-05, + "loss": 1.8324, + "step": 254500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7436981145729736e-05, + "loss": 1.8503, + "step": 255000 + }, + { + "epoch": 0.13, + "learning_rate": 1.74319556185645e-05, + "loss": 1.8572, + "step": 255500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7426930091399262e-05, + "loss": 1.8495, + "step": 256000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7421904564234028e-05, + "loss": 1.8594, + "step": 256500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7416879037068793e-05, + "loss": 1.8432, + "step": 257000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7411853509903555e-05, + "loss": 1.8335, + "step": 257500 + }, + { + "epoch": 0.13, + "learning_rate": 1.740682798273832e-05, + "loss": 1.8502, + "step": 258000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7401802455573085e-05, + "loss": 1.8371, + "step": 258500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7396776928407847e-05, + "loss": 1.8344, + "step": 259000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7391751401242615e-05, + "loss": 1.8513, + "step": 259500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7386725874077377e-05, + "loss": 1.8428, + "step": 260000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7381700346912142e-05, + "loss": 1.8424, + "step": 260500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7376674819746907e-05, + "loss": 1.8378, + "step": 261000 + }, + { + "epoch": 0.13, + "learning_rate": 1.737164929258167e-05, + "loss": 1.839, + "step": 261500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7366623765416434e-05, + "loss": 1.8342, + "step": 262000 + }, + { + "epoch": 0.13, + "learning_rate": 1.73615982382512e-05, + "loss": 1.841, + "step": 262500 + }, + { + "epoch": 0.13, + "learning_rate": 1.735657271108596e-05, + "loss": 1.8629, + "step": 263000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7351547183920726e-05, + "loss": 1.8491, + "step": 263500 + }, + { + "epoch": 0.13, + "learning_rate": 1.734652165675549e-05, + "loss": 1.8179, + "step": 264000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7341496129590256e-05, + "loss": 1.8313, + "step": 264500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7336470602425018e-05, + "loss": 1.8316, + "step": 265000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7331445075259783e-05, + "loss": 1.8567, + "step": 265500 + }, + { + "epoch": 0.13, + "learning_rate": 1.732641954809455e-05, + "loss": 1.8282, + "step": 266000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7321394020929314e-05, + "loss": 1.8493, + "step": 266500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7316368493764075e-05, + "loss": 1.8392, + "step": 267000 + }, + { + "epoch": 0.13, + "learning_rate": 1.731134296659884e-05, + "loss": 1.8473, + "step": 267500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7306317439433606e-05, + "loss": 1.8385, + "step": 268000 + }, + { + "epoch": 0.13, + "learning_rate": 1.7301291912268367e-05, + "loss": 1.8442, + "step": 268500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7296266385103132e-05, + "loss": 1.8565, + "step": 269000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7291240857937898e-05, + "loss": 1.8572, + "step": 269500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7286215330772663e-05, + "loss": 1.8485, + "step": 270000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7281189803607424e-05, + "loss": 1.8431, + "step": 270500 + }, + { + "epoch": 0.14, + "learning_rate": 1.727616427644219e-05, + "loss": 1.8604, + "step": 271000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7271138749276955e-05, + "loss": 1.8282, + "step": 271500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7266113222111717e-05, + "loss": 1.8369, + "step": 272000 + }, + { + "epoch": 0.14, + "learning_rate": 1.726108769494648e-05, + "loss": 1.861, + "step": 272500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7256062167781247e-05, + "loss": 1.8481, + "step": 273000 + }, + { + "epoch": 0.14, + "learning_rate": 1.725103664061601e-05, + "loss": 1.8273, + "step": 273500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7246011113450777e-05, + "loss": 1.849, + "step": 274000 + }, + { + "epoch": 0.14, + "learning_rate": 1.724098558628554e-05, + "loss": 1.8392, + "step": 274500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7235960059120304e-05, + "loss": 1.8157, + "step": 275000 + }, + { + "epoch": 0.14, + "learning_rate": 1.723093453195507e-05, + "loss": 1.8247, + "step": 275500 + }, + { + "epoch": 0.14, + "learning_rate": 1.722590900478983e-05, + "loss": 1.8424, + "step": 276000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7220883477624596e-05, + "loss": 1.8277, + "step": 276500 + }, + { + "epoch": 0.14, + "learning_rate": 1.721585795045936e-05, + "loss": 1.8326, + "step": 277000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7210832423294123e-05, + "loss": 1.8424, + "step": 277500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7205806896128888e-05, + "loss": 1.8361, + "step": 278000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7200781368963653e-05, + "loss": 1.829, + "step": 278500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7195755841798415e-05, + "loss": 1.8391, + "step": 279000 + }, + { + "epoch": 0.14, + "learning_rate": 1.719073031463318e-05, + "loss": 1.8372, + "step": 279500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7185704787467945e-05, + "loss": 1.8319, + "step": 280000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7180679260302707e-05, + "loss": 1.8629, + "step": 280500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7175653733137475e-05, + "loss": 1.8264, + "step": 281000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7170628205972237e-05, + "loss": 1.8448, + "step": 281500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7165602678807002e-05, + "loss": 1.8567, + "step": 282000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7160577151641768e-05, + "loss": 1.8347, + "step": 282500 + }, + { + "epoch": 0.14, + "learning_rate": 1.715555162447653e-05, + "loss": 1.8483, + "step": 283000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7150526097311294e-05, + "loss": 1.8387, + "step": 283500 + }, + { + "epoch": 0.14, + "learning_rate": 1.714550057014606e-05, + "loss": 1.8453, + "step": 284000 + }, + { + "epoch": 0.14, + "learning_rate": 1.714047504298082e-05, + "loss": 1.8551, + "step": 284500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7135449515815586e-05, + "loss": 1.8352, + "step": 285000 + }, + { + "epoch": 0.14, + "learning_rate": 1.713042398865035e-05, + "loss": 1.8554, + "step": 285500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7125398461485113e-05, + "loss": 1.8288, + "step": 286000 + }, + { + "epoch": 0.14, + "learning_rate": 1.712037293431988e-05, + "loss": 1.8497, + "step": 286500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7115347407154644e-05, + "loss": 1.8311, + "step": 287000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7110321879989405e-05, + "loss": 1.8565, + "step": 287500 + }, + { + "epoch": 0.14, + "learning_rate": 1.7105296352824174e-05, + "loss": 1.8409, + "step": 288000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7100270825658936e-05, + "loss": 1.8299, + "step": 288500 + }, + { + "epoch": 0.15, + "learning_rate": 1.70952452984937e-05, + "loss": 1.8387, + "step": 289000 + }, + { + "epoch": 0.15, + "learning_rate": 1.7090219771328466e-05, + "loss": 1.8544, + "step": 289500 + }, + { + "epoch": 0.15, + "learning_rate": 1.7085194244163228e-05, + "loss": 1.8324, + "step": 290000 + }, + { + "epoch": 0.15, + "learning_rate": 1.7080168716997993e-05, + "loss": 1.8304, + "step": 290500 + }, + { + "epoch": 0.15, + "learning_rate": 1.7075143189832758e-05, + "loss": 1.8356, + "step": 291000 + }, + { + "epoch": 0.15, + "learning_rate": 1.707011766266752e-05, + "loss": 1.8442, + "step": 291500 + }, + { + "epoch": 0.15, + "learning_rate": 1.7065092135502285e-05, + "loss": 1.8301, + "step": 292000 + }, + { + "epoch": 0.15, + "learning_rate": 1.706006660833705e-05, + "loss": 1.832, + "step": 292500 + }, + { + "epoch": 0.15, + "learning_rate": 1.7055041081171812e-05, + "loss": 1.8329, + "step": 293000 + }, + { + "epoch": 0.15, + "learning_rate": 1.7050015554006577e-05, + "loss": 1.8369, + "step": 293500 + }, + { + "epoch": 0.15, + "learning_rate": 1.7044990026841342e-05, + "loss": 1.832, + "step": 294000 + }, + { + "epoch": 0.15, + "learning_rate": 1.7039964499676107e-05, + "loss": 1.829, + "step": 294500 + }, + { + "epoch": 0.15, + "learning_rate": 1.703493897251087e-05, + "loss": 1.8412, + "step": 295000 + }, + { + "epoch": 0.15, + "learning_rate": 1.7029913445345634e-05, + "loss": 1.8259, + "step": 295500 + }, + { + "epoch": 0.15, + "learning_rate": 1.70248879181804e-05, + "loss": 1.8382, + "step": 296000 + }, + { + "epoch": 0.15, + "learning_rate": 1.7019862391015164e-05, + "loss": 1.8486, + "step": 296500 + }, + { + "epoch": 0.15, + "learning_rate": 1.7014836863849926e-05, + "loss": 1.832, + "step": 297000 + }, + { + "epoch": 0.15, + "learning_rate": 1.700981133668469e-05, + "loss": 1.844, + "step": 297500 + }, + { + "epoch": 0.15, + "learning_rate": 1.7004785809519456e-05, + "loss": 1.853, + "step": 298000 + }, + { + "epoch": 0.15, + "learning_rate": 1.699976028235422e-05, + "loss": 1.8268, + "step": 298500 + }, + { + "epoch": 0.15, + "learning_rate": 1.6994734755188983e-05, + "loss": 1.8174, + "step": 299000 + }, + { + "epoch": 0.15, + "learning_rate": 1.698970922802375e-05, + "loss": 1.8529, + "step": 299500 + }, + { + "epoch": 0.15, + "learning_rate": 1.6984683700858514e-05, + "loss": 1.8416, + "step": 300000 + }, + { + "epoch": 0.15, + "learning_rate": 1.6979658173693275e-05, + "loss": 1.8363, + "step": 300500 + }, + { + "epoch": 0.15, + "learning_rate": 1.697463264652804e-05, + "loss": 1.8531, + "step": 301000 + }, + { + "epoch": 0.15, + "learning_rate": 1.6969607119362806e-05, + "loss": 1.8338, + "step": 301500 + }, + { + "epoch": 0.15, + "learning_rate": 1.6964581592197567e-05, + "loss": 1.8575, + "step": 302000 + }, + { + "epoch": 0.15, + "learning_rate": 1.6959556065032332e-05, + "loss": 1.8377, + "step": 302500 + }, + { + "epoch": 0.15, + "learning_rate": 1.6954530537867098e-05, + "loss": 1.8154, + "step": 303000 + }, + { + "epoch": 0.15, + "learning_rate": 1.694950501070186e-05, + "loss": 1.8422, + "step": 303500 + }, + { + "epoch": 0.15, + "learning_rate": 1.6944479483536628e-05, + "loss": 1.8385, + "step": 304000 + }, + { + "epoch": 0.15, + "learning_rate": 1.693945395637139e-05, + "loss": 1.8414, + "step": 304500 + }, + { + "epoch": 0.15, + "learning_rate": 1.6934428429206155e-05, + "loss": 1.8627, + "step": 305000 + }, + { + "epoch": 0.15, + "learning_rate": 1.692940290204092e-05, + "loss": 1.8521, + "step": 305500 + }, + { + "epoch": 0.15, + "learning_rate": 1.6924377374875682e-05, + "loss": 1.8354, + "step": 306000 + }, + { + "epoch": 0.15, + "learning_rate": 1.6919351847710447e-05, + "loss": 1.8481, + "step": 306500 + }, + { + "epoch": 0.15, + "learning_rate": 1.6914326320545212e-05, + "loss": 1.844, + "step": 307000 + }, + { + "epoch": 0.15, + "learning_rate": 1.6909300793379974e-05, + "loss": 1.8223, + "step": 307500 + }, + { + "epoch": 0.15, + "learning_rate": 1.690427526621474e-05, + "loss": 1.8369, + "step": 308000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6899249739049504e-05, + "loss": 1.8261, + "step": 308500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6894224211884266e-05, + "loss": 1.8251, + "step": 309000 + }, + { + "epoch": 0.16, + "learning_rate": 1.688919868471903e-05, + "loss": 1.8402, + "step": 309500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6884173157553796e-05, + "loss": 1.849, + "step": 310000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6879147630388558e-05, + "loss": 1.8428, + "step": 310500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6874122103223326e-05, + "loss": 1.8373, + "step": 311000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6869096576058088e-05, + "loss": 1.8402, + "step": 311500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6864071048892853e-05, + "loss": 1.8454, + "step": 312000 + }, + { + "epoch": 0.16, + "learning_rate": 1.685904552172762e-05, + "loss": 1.8372, + "step": 312500 + }, + { + "epoch": 0.16, + "learning_rate": 1.685401999456238e-05, + "loss": 1.8339, + "step": 313000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6848994467397145e-05, + "loss": 1.8197, + "step": 313500 + }, + { + "epoch": 0.16, + "learning_rate": 1.684396894023191e-05, + "loss": 1.8335, + "step": 314000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6838943413066672e-05, + "loss": 1.8341, + "step": 314500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6833917885901437e-05, + "loss": 1.8314, + "step": 315000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6828892358736202e-05, + "loss": 1.8424, + "step": 315500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6823866831570964e-05, + "loss": 1.8222, + "step": 316000 + }, + { + "epoch": 0.16, + "learning_rate": 1.681884130440573e-05, + "loss": 1.8412, + "step": 316500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6813815777240494e-05, + "loss": 1.834, + "step": 317000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6808790250075256e-05, + "loss": 1.8373, + "step": 317500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6803764722910025e-05, + "loss": 1.8308, + "step": 318000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6798739195744787e-05, + "loss": 1.8328, + "step": 318500 + }, + { + "epoch": 0.16, + "learning_rate": 1.679371366857955e-05, + "loss": 1.8497, + "step": 319000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6788688141414317e-05, + "loss": 1.8261, + "step": 319500 + }, + { + "epoch": 0.16, + "learning_rate": 1.678366261424908e-05, + "loss": 1.8325, + "step": 320000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6778637087083844e-05, + "loss": 1.833, + "step": 320500 + }, + { + "epoch": 0.16, + "learning_rate": 1.677361155991861e-05, + "loss": 1.8289, + "step": 321000 + }, + { + "epoch": 0.16, + "learning_rate": 1.676858603275337e-05, + "loss": 1.8316, + "step": 321500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6763560505588136e-05, + "loss": 1.8402, + "step": 322000 + }, + { + "epoch": 0.16, + "learning_rate": 1.67585349784229e-05, + "loss": 1.8425, + "step": 322500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6753509451257666e-05, + "loss": 1.8302, + "step": 323000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6748483924092428e-05, + "loss": 1.8302, + "step": 323500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6743458396927193e-05, + "loss": 1.8484, + "step": 324000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6738432869761958e-05, + "loss": 1.8255, + "step": 324500 + }, + { + "epoch": 0.16, + "learning_rate": 1.673340734259672e-05, + "loss": 1.8483, + "step": 325000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6728381815431485e-05, + "loss": 1.8378, + "step": 325500 + }, + { + "epoch": 0.16, + "learning_rate": 1.672335628826625e-05, + "loss": 1.8413, + "step": 326000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6718330761101015e-05, + "loss": 1.8404, + "step": 326500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6713305233935777e-05, + "loss": 1.822, + "step": 327000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6708279706770542e-05, + "loss": 1.8409, + "step": 327500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6703254179605307e-05, + "loss": 1.8273, + "step": 328000 + }, + { + "epoch": 0.17, + "learning_rate": 1.6698228652440072e-05, + "loss": 1.8344, + "step": 328500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6693203125274834e-05, + "loss": 1.8289, + "step": 329000 + }, + { + "epoch": 0.17, + "learning_rate": 1.66881775981096e-05, + "loss": 1.8339, + "step": 329500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6683152070944364e-05, + "loss": 1.8062, + "step": 330000 + }, + { + "epoch": 0.17, + "learning_rate": 1.6678126543779126e-05, + "loss": 1.8336, + "step": 330500 + }, + { + "epoch": 0.17, + "learning_rate": 1.667310101661389e-05, + "loss": 1.8492, + "step": 331000 + }, + { + "epoch": 0.17, + "learning_rate": 1.6668075489448656e-05, + "loss": 1.816, + "step": 331500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6663049962283418e-05, + "loss": 1.8197, + "step": 332000 + }, + { + "epoch": 0.17, + "learning_rate": 1.6658024435118187e-05, + "loss": 1.829, + "step": 332500 + }, + { + "epoch": 0.17, + "learning_rate": 1.665299890795295e-05, + "loss": 1.8328, + "step": 333000 + }, + { + "epoch": 0.17, + "learning_rate": 1.6647973380787714e-05, + "loss": 1.8219, + "step": 333500 + }, + { + "epoch": 0.17, + "learning_rate": 1.664294785362248e-05, + "loss": 1.8508, + "step": 334000 + }, + { + "epoch": 0.17, + "learning_rate": 1.663792232645724e-05, + "loss": 1.8411, + "step": 334500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6632896799292006e-05, + "loss": 1.8369, + "step": 335000 + }, + { + "epoch": 0.17, + "learning_rate": 1.662787127212677e-05, + "loss": 1.8268, + "step": 335500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6622845744961533e-05, + "loss": 1.848, + "step": 336000 + }, + { + "epoch": 0.17, + "learning_rate": 1.6617820217796298e-05, + "loss": 1.8288, + "step": 336500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6612794690631063e-05, + "loss": 1.8456, + "step": 337000 + }, + { + "epoch": 0.17, + "learning_rate": 1.6607769163465825e-05, + "loss": 1.8233, + "step": 337500 + }, + { + "epoch": 0.17, + "learning_rate": 1.660274363630059e-05, + "loss": 1.8179, + "step": 338000 + }, + { + "epoch": 0.17, + "learning_rate": 1.6597718109135355e-05, + "loss": 1.831, + "step": 338500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6592692581970117e-05, + "loss": 1.8341, + "step": 339000 + }, + { + "epoch": 0.17, + "learning_rate": 1.6587667054804885e-05, + "loss": 1.8321, + "step": 339500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6582641527639647e-05, + "loss": 1.8295, + "step": 340000 + }, + { + "epoch": 0.17, + "learning_rate": 1.657761600047441e-05, + "loss": 1.8412, + "step": 340500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6572590473309177e-05, + "loss": 1.8317, + "step": 341000 + }, + { + "epoch": 0.17, + "learning_rate": 1.656756494614394e-05, + "loss": 1.8329, + "step": 341500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6562539418978704e-05, + "loss": 1.8123, + "step": 342000 + }, + { + "epoch": 0.17, + "learning_rate": 1.655751389181347e-05, + "loss": 1.8228, + "step": 342500 + }, + { + "epoch": 0.17, + "learning_rate": 1.655248836464823e-05, + "loss": 1.8336, + "step": 343000 + }, + { + "epoch": 0.17, + "learning_rate": 1.6547462837482996e-05, + "loss": 1.8143, + "step": 343500 + }, + { + "epoch": 0.17, + "learning_rate": 1.654243731031776e-05, + "loss": 1.8229, + "step": 344000 + }, + { + "epoch": 0.17, + "learning_rate": 1.6537411783152523e-05, + "loss": 1.8214, + "step": 344500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6532386255987288e-05, + "loss": 1.8328, + "step": 345000 + }, + { + "epoch": 0.17, + "learning_rate": 1.6527360728822053e-05, + "loss": 1.8501, + "step": 345500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6522335201656815e-05, + "loss": 1.8242, + "step": 346000 + }, + { + "epoch": 0.17, + "learning_rate": 1.651730967449158e-05, + "loss": 1.8269, + "step": 346500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6512284147326345e-05, + "loss": 1.8065, + "step": 347000 + }, + { + "epoch": 0.17, + "learning_rate": 1.650725862016111e-05, + "loss": 1.8397, + "step": 347500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6502233092995876e-05, + "loss": 1.8325, + "step": 348000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6497207565830637e-05, + "loss": 1.8428, + "step": 348500 + }, + { + "epoch": 0.18, + "learning_rate": 1.6492182038665403e-05, + "loss": 1.8403, + "step": 349000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6487156511500168e-05, + "loss": 1.8149, + "step": 349500 + }, + { + "epoch": 0.18, + "learning_rate": 1.648213098433493e-05, + "loss": 1.826, + "step": 350000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6477105457169695e-05, + "loss": 1.8136, + "step": 350500 + }, + { + "epoch": 0.18, + "learning_rate": 1.647207993000446e-05, + "loss": 1.8425, + "step": 351000 + }, + { + "epoch": 0.18, + "learning_rate": 1.646705440283922e-05, + "loss": 1.8303, + "step": 351500 + }, + { + "epoch": 0.18, + "learning_rate": 1.6462028875673987e-05, + "loss": 1.8436, + "step": 352000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6457003348508752e-05, + "loss": 1.8207, + "step": 352500 + }, + { + "epoch": 0.18, + "learning_rate": 1.6451977821343517e-05, + "loss": 1.808, + "step": 353000 + }, + { + "epoch": 0.18, + "learning_rate": 1.644695229417828e-05, + "loss": 1.8089, + "step": 353500 + }, + { + "epoch": 0.18, + "learning_rate": 1.6441926767013044e-05, + "loss": 1.8236, + "step": 354000 + }, + { + "epoch": 0.18, + "learning_rate": 1.643690123984781e-05, + "loss": 1.8097, + "step": 354500 + }, + { + "epoch": 0.18, + "learning_rate": 1.643187571268257e-05, + "loss": 1.8336, + "step": 355000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6426850185517336e-05, + "loss": 1.8299, + "step": 355500 + }, + { + "epoch": 0.18, + "learning_rate": 1.64218246583521e-05, + "loss": 1.84, + "step": 356000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6416799131186866e-05, + "loss": 1.8249, + "step": 356500 + }, + { + "epoch": 0.18, + "learning_rate": 1.641177360402163e-05, + "loss": 1.8243, + "step": 357000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6406748076856393e-05, + "loss": 1.8299, + "step": 357500 + }, + { + "epoch": 0.18, + "learning_rate": 1.6401722549691158e-05, + "loss": 1.8081, + "step": 358000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6396697022525923e-05, + "loss": 1.8265, + "step": 358500 + }, + { + "epoch": 0.18, + "learning_rate": 1.6391671495360685e-05, + "loss": 1.8222, + "step": 359000 + }, + { + "epoch": 0.18, + "learning_rate": 1.638664596819545e-05, + "loss": 1.8489, + "step": 359500 + }, + { + "epoch": 0.18, + "learning_rate": 1.6381620441030215e-05, + "loss": 1.8269, + "step": 360000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6376594913864977e-05, + "loss": 1.8135, + "step": 360500 + }, + { + "epoch": 0.18, + "learning_rate": 1.6371569386699742e-05, + "loss": 1.838, + "step": 361000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6366543859534507e-05, + "loss": 1.8218, + "step": 361500 + }, + { + "epoch": 0.18, + "learning_rate": 1.636151833236927e-05, + "loss": 1.8198, + "step": 362000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6356492805204038e-05, + "loss": 1.8216, + "step": 362500 + }, + { + "epoch": 0.18, + "learning_rate": 1.63514672780388e-05, + "loss": 1.8262, + "step": 363000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6346441750873564e-05, + "loss": 1.8223, + "step": 363500 + }, + { + "epoch": 0.18, + "learning_rate": 1.634141622370833e-05, + "loss": 1.8162, + "step": 364000 + }, + { + "epoch": 0.18, + "learning_rate": 1.633639069654309e-05, + "loss": 1.8292, + "step": 364500 + }, + { + "epoch": 0.18, + "learning_rate": 1.6331365169377857e-05, + "loss": 1.8335, + "step": 365000 + }, + { + "epoch": 0.18, + "learning_rate": 1.632633964221262e-05, + "loss": 1.8223, + "step": 365500 + }, + { + "epoch": 0.18, + "learning_rate": 1.6321314115047383e-05, + "loss": 1.8187, + "step": 366000 + }, + { + "epoch": 0.18, + "learning_rate": 1.631628858788215e-05, + "loss": 1.8315, + "step": 366500 + }, + { + "epoch": 0.18, + "learning_rate": 1.6311263060716914e-05, + "loss": 1.8245, + "step": 367000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6306237533551675e-05, + "loss": 1.8192, + "step": 367500 + }, + { + "epoch": 0.18, + "learning_rate": 1.630121200638644e-05, + "loss": 1.8581, + "step": 368000 + }, + { + "epoch": 0.19, + "learning_rate": 1.6296186479221206e-05, + "loss": 1.8396, + "step": 368500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6291160952055967e-05, + "loss": 1.8272, + "step": 369000 + }, + { + "epoch": 0.19, + "learning_rate": 1.6286135424890736e-05, + "loss": 1.8055, + "step": 369500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6281109897725498e-05, + "loss": 1.8339, + "step": 370000 + }, + { + "epoch": 0.19, + "learning_rate": 1.627608437056026e-05, + "loss": 1.8301, + "step": 370500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6271058843395028e-05, + "loss": 1.8417, + "step": 371000 + }, + { + "epoch": 0.19, + "learning_rate": 1.626603331622979e-05, + "loss": 1.8289, + "step": 371500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6261007789064555e-05, + "loss": 1.8282, + "step": 372000 + }, + { + "epoch": 0.19, + "learning_rate": 1.625598226189932e-05, + "loss": 1.8506, + "step": 372500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6250956734734082e-05, + "loss": 1.8479, + "step": 373000 + }, + { + "epoch": 0.19, + "learning_rate": 1.6245931207568847e-05, + "loss": 1.8136, + "step": 373500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6240905680403612e-05, + "loss": 1.8174, + "step": 374000 + }, + { + "epoch": 0.19, + "learning_rate": 1.6235880153238374e-05, + "loss": 1.8375, + "step": 374500 + }, + { + "epoch": 0.19, + "learning_rate": 1.623085462607314e-05, + "loss": 1.8208, + "step": 375000 + }, + { + "epoch": 0.19, + "learning_rate": 1.6225829098907904e-05, + "loss": 1.8191, + "step": 375500 + }, + { + "epoch": 0.19, + "learning_rate": 1.622080357174267e-05, + "loss": 1.8033, + "step": 376000 + }, + { + "epoch": 0.19, + "learning_rate": 1.621577804457743e-05, + "loss": 1.8265, + "step": 376500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6210752517412196e-05, + "loss": 1.8279, + "step": 377000 + }, + { + "epoch": 0.19, + "learning_rate": 1.620572699024696e-05, + "loss": 1.8074, + "step": 377500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6200701463081726e-05, + "loss": 1.8317, + "step": 378000 + }, + { + "epoch": 0.19, + "learning_rate": 1.6195675935916488e-05, + "loss": 1.8402, + "step": 378500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6190650408751253e-05, + "loss": 1.8458, + "step": 379000 + }, + { + "epoch": 0.19, + "learning_rate": 1.618562488158602e-05, + "loss": 1.842, + "step": 379500 + }, + { + "epoch": 0.19, + "learning_rate": 1.618059935442078e-05, + "loss": 1.8181, + "step": 380000 + }, + { + "epoch": 0.19, + "learning_rate": 1.6175573827255545e-05, + "loss": 1.8175, + "step": 380500 + }, + { + "epoch": 0.19, + "learning_rate": 1.617054830009031e-05, + "loss": 1.83, + "step": 381000 + }, + { + "epoch": 0.19, + "learning_rate": 1.6165522772925076e-05, + "loss": 1.8185, + "step": 381500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6160497245759837e-05, + "loss": 1.8293, + "step": 382000 + }, + { + "epoch": 0.19, + "learning_rate": 1.6155471718594603e-05, + "loss": 1.8106, + "step": 382500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6150446191429368e-05, + "loss": 1.8069, + "step": 383000 + }, + { + "epoch": 0.19, + "learning_rate": 1.614542066426413e-05, + "loss": 1.807, + "step": 383500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6140395137098895e-05, + "loss": 1.8058, + "step": 384000 + }, + { + "epoch": 0.19, + "learning_rate": 1.613536960993366e-05, + "loss": 1.838, + "step": 384500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6130344082768425e-05, + "loss": 1.8316, + "step": 385000 + }, + { + "epoch": 0.19, + "learning_rate": 1.6125318555603187e-05, + "loss": 1.7976, + "step": 385500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6120293028437952e-05, + "loss": 1.8289, + "step": 386000 + }, + { + "epoch": 0.19, + "learning_rate": 1.6115267501272717e-05, + "loss": 1.8354, + "step": 386500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6110241974107482e-05, + "loss": 1.8176, + "step": 387000 + }, + { + "epoch": 0.19, + "learning_rate": 1.6105216446942244e-05, + "loss": 1.8228, + "step": 387500 + }, + { + "epoch": 0.19, + "learning_rate": 1.610019091977701e-05, + "loss": 1.814, + "step": 388000 + }, + { + "epoch": 0.2, + "learning_rate": 1.6095165392611774e-05, + "loss": 1.8246, + "step": 388500 + }, + { + "epoch": 0.2, + "learning_rate": 1.6090139865446536e-05, + "loss": 1.825, + "step": 389000 + }, + { + "epoch": 0.2, + "learning_rate": 1.60851143382813e-05, + "loss": 1.8169, + "step": 389500 + }, + { + "epoch": 0.2, + "learning_rate": 1.6080088811116066e-05, + "loss": 1.8295, + "step": 390000 + }, + { + "epoch": 0.2, + "learning_rate": 1.6075063283950828e-05, + "loss": 1.8368, + "step": 390500 + }, + { + "epoch": 0.2, + "learning_rate": 1.6070037756785596e-05, + "loss": 1.8434, + "step": 391000 + }, + { + "epoch": 0.2, + "learning_rate": 1.6065012229620358e-05, + "loss": 1.8078, + "step": 391500 + }, + { + "epoch": 0.2, + "learning_rate": 1.605998670245512e-05, + "loss": 1.8075, + "step": 392000 + }, + { + "epoch": 0.2, + "learning_rate": 1.605496117528989e-05, + "loss": 1.8183, + "step": 392500 + }, + { + "epoch": 0.2, + "learning_rate": 1.604993564812465e-05, + "loss": 1.8238, + "step": 393000 + }, + { + "epoch": 0.2, + "learning_rate": 1.6044910120959415e-05, + "loss": 1.8407, + "step": 393500 + }, + { + "epoch": 0.2, + "learning_rate": 1.603988459379418e-05, + "loss": 1.8415, + "step": 394000 + }, + { + "epoch": 0.2, + "learning_rate": 1.6034859066628942e-05, + "loss": 1.8078, + "step": 394500 + }, + { + "epoch": 0.2, + "learning_rate": 1.6029833539463707e-05, + "loss": 1.8354, + "step": 395000 + }, + { + "epoch": 0.2, + "learning_rate": 1.6024808012298473e-05, + "loss": 1.8244, + "step": 395500 + }, + { + "epoch": 0.2, + "learning_rate": 1.6019782485133234e-05, + "loss": 1.8197, + "step": 396000 + }, + { + "epoch": 0.2, + "learning_rate": 1.6014756957968e-05, + "loss": 1.8287, + "step": 396500 + }, + { + "epoch": 0.2, + "learning_rate": 1.6009731430802765e-05, + "loss": 1.8186, + "step": 397000 + }, + { + "epoch": 0.2, + "learning_rate": 1.6004705903637526e-05, + "loss": 1.8212, + "step": 397500 + }, + { + "epoch": 0.2, + "learning_rate": 1.599968037647229e-05, + "loss": 1.824, + "step": 398000 + }, + { + "epoch": 0.2, + "learning_rate": 1.5994654849307057e-05, + "loss": 1.8179, + "step": 398500 + }, + { + "epoch": 0.2, + "learning_rate": 1.598962932214182e-05, + "loss": 1.8258, + "step": 399000 + }, + { + "epoch": 0.2, + "learning_rate": 1.5984603794976587e-05, + "loss": 1.8291, + "step": 399500 + }, + { + "epoch": 0.2, + "learning_rate": 1.597957826781135e-05, + "loss": 1.8193, + "step": 400000 + }, + { + "epoch": 0.2, + "learning_rate": 1.5974552740646114e-05, + "loss": 1.8366, + "step": 400500 + }, + { + "epoch": 0.2, + "learning_rate": 1.596952721348088e-05, + "loss": 1.8253, + "step": 401000 + }, + { + "epoch": 0.2, + "learning_rate": 1.596450168631564e-05, + "loss": 1.8346, + "step": 401500 + }, + { + "epoch": 0.2, + "learning_rate": 1.5959476159150406e-05, + "loss": 1.8227, + "step": 402000 + }, + { + "epoch": 0.2, + "learning_rate": 1.595445063198517e-05, + "loss": 1.8052, + "step": 402500 + }, + { + "epoch": 0.2, + "learning_rate": 1.5949425104819933e-05, + "loss": 1.8086, + "step": 403000 + }, + { + "epoch": 0.2, + "learning_rate": 1.5944399577654698e-05, + "loss": 1.812, + "step": 403500 + }, + { + "epoch": 0.2, + "learning_rate": 1.5939374050489463e-05, + "loss": 1.8277, + "step": 404000 + }, + { + "epoch": 0.2, + "learning_rate": 1.5934348523324225e-05, + "loss": 1.8105, + "step": 404500 + }, + { + "epoch": 0.2, + "learning_rate": 1.592932299615899e-05, + "loss": 1.8326, + "step": 405000 + }, + { + "epoch": 0.2, + "learning_rate": 1.5924297468993755e-05, + "loss": 1.8293, + "step": 405500 + }, + { + "epoch": 0.2, + "learning_rate": 1.591927194182852e-05, + "loss": 1.8382, + "step": 406000 + }, + { + "epoch": 0.2, + "learning_rate": 1.5914246414663282e-05, + "loss": 1.8107, + "step": 406500 + }, + { + "epoch": 0.2, + "learning_rate": 1.5909220887498047e-05, + "loss": 1.8164, + "step": 407000 + }, + { + "epoch": 0.2, + "learning_rate": 1.5904195360332812e-05, + "loss": 1.8117, + "step": 407500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5899169833167577e-05, + "loss": 1.8233, + "step": 408000 + }, + { + "epoch": 0.21, + "learning_rate": 1.589414430600234e-05, + "loss": 1.8307, + "step": 408500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5889118778837104e-05, + "loss": 1.825, + "step": 409000 + }, + { + "epoch": 0.21, + "learning_rate": 1.588409325167187e-05, + "loss": 1.8286, + "step": 409500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5879067724506635e-05, + "loss": 1.7962, + "step": 410000 + }, + { + "epoch": 0.21, + "learning_rate": 1.5874042197341396e-05, + "loss": 1.8096, + "step": 410500 + }, + { + "epoch": 0.21, + "learning_rate": 1.586901667017616e-05, + "loss": 1.8184, + "step": 411000 + }, + { + "epoch": 0.21, + "learning_rate": 1.5863991143010927e-05, + "loss": 1.8205, + "step": 411500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5858965615845688e-05, + "loss": 1.8271, + "step": 412000 + }, + { + "epoch": 0.21, + "learning_rate": 1.5853940088680453e-05, + "loss": 1.828, + "step": 412500 + }, + { + "epoch": 0.21, + "learning_rate": 1.584891456151522e-05, + "loss": 1.811, + "step": 413000 + }, + { + "epoch": 0.21, + "learning_rate": 1.584388903434998e-05, + "loss": 1.8392, + "step": 413500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5838863507184745e-05, + "loss": 1.7996, + "step": 414000 + }, + { + "epoch": 0.21, + "learning_rate": 1.583383798001951e-05, + "loss": 1.8283, + "step": 414500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5828812452854276e-05, + "loss": 1.8379, + "step": 415000 + }, + { + "epoch": 0.21, + "learning_rate": 1.582378692568904e-05, + "loss": 1.8285, + "step": 415500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5818761398523803e-05, + "loss": 1.8099, + "step": 416000 + }, + { + "epoch": 0.21, + "learning_rate": 1.5813735871358568e-05, + "loss": 1.8121, + "step": 416500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5808710344193333e-05, + "loss": 1.8122, + "step": 417000 + }, + { + "epoch": 0.21, + "learning_rate": 1.5803684817028095e-05, + "loss": 1.8138, + "step": 417500 + }, + { + "epoch": 0.21, + "learning_rate": 1.579865928986286e-05, + "loss": 1.8239, + "step": 418000 + }, + { + "epoch": 0.21, + "learning_rate": 1.5793633762697625e-05, + "loss": 1.828, + "step": 418500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5788608235532387e-05, + "loss": 1.8311, + "step": 419000 + }, + { + "epoch": 0.21, + "learning_rate": 1.5783582708367152e-05, + "loss": 1.8149, + "step": 419500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5778557181201917e-05, + "loss": 1.8195, + "step": 420000 + }, + { + "epoch": 0.21, + "learning_rate": 1.577353165403668e-05, + "loss": 1.824, + "step": 420500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5768506126871447e-05, + "loss": 1.8389, + "step": 421000 + }, + { + "epoch": 0.21, + "learning_rate": 1.576348059970621e-05, + "loss": 1.8225, + "step": 421500 + }, + { + "epoch": 0.21, + "learning_rate": 1.575845507254097e-05, + "loss": 1.8046, + "step": 422000 + }, + { + "epoch": 0.21, + "learning_rate": 1.575342954537574e-05, + "loss": 1.8326, + "step": 422500 + }, + { + "epoch": 0.21, + "learning_rate": 1.57484040182105e-05, + "loss": 1.8184, + "step": 423000 + }, + { + "epoch": 0.21, + "learning_rate": 1.5743378491045266e-05, + "loss": 1.8198, + "step": 423500 + }, + { + "epoch": 0.21, + "learning_rate": 1.573835296388003e-05, + "loss": 1.8289, + "step": 424000 + }, + { + "epoch": 0.21, + "learning_rate": 1.5733327436714793e-05, + "loss": 1.8159, + "step": 424500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5728301909549558e-05, + "loss": 1.8092, + "step": 425000 + }, + { + "epoch": 0.21, + "learning_rate": 1.5723276382384323e-05, + "loss": 1.799, + "step": 425500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5718250855219085e-05, + "loss": 1.8137, + "step": 426000 + }, + { + "epoch": 0.21, + "learning_rate": 1.571322532805385e-05, + "loss": 1.8107, + "step": 426500 + }, + { + "epoch": 0.21, + "learning_rate": 1.5708199800888615e-05, + "loss": 1.814, + "step": 427000 + }, + { + "epoch": 0.21, + "learning_rate": 1.5703174273723377e-05, + "loss": 1.8314, + "step": 427500 + }, + { + "epoch": 0.22, + "learning_rate": 1.5698148746558142e-05, + "loss": 1.8157, + "step": 428000 + }, + { + "epoch": 0.22, + "learning_rate": 1.5693123219392907e-05, + "loss": 1.8035, + "step": 428500 + }, + { + "epoch": 0.22, + "learning_rate": 1.568809769222767e-05, + "loss": 1.8095, + "step": 429000 + }, + { + "epoch": 0.22, + "learning_rate": 1.5683072165062438e-05, + "loss": 1.829, + "step": 429500 + }, + { + "epoch": 0.22, + "learning_rate": 1.56780466378972e-05, + "loss": 1.8159, + "step": 430000 + }, + { + "epoch": 0.22, + "learning_rate": 1.5673021110731965e-05, + "loss": 1.8202, + "step": 430500 + }, + { + "epoch": 0.22, + "learning_rate": 1.566799558356673e-05, + "loss": 1.8205, + "step": 431000 + }, + { + "epoch": 0.22, + "learning_rate": 1.566297005640149e-05, + "loss": 1.8164, + "step": 431500 + }, + { + "epoch": 0.22, + "learning_rate": 1.5657944529236257e-05, + "loss": 1.8306, + "step": 432000 + }, + { + "epoch": 0.22, + "learning_rate": 1.5652919002071022e-05, + "loss": 1.8202, + "step": 432500 + }, + { + "epoch": 0.22, + "learning_rate": 1.5647893474905784e-05, + "loss": 1.7925, + "step": 433000 + }, + { + "epoch": 0.22, + "learning_rate": 1.564286794774055e-05, + "loss": 1.8107, + "step": 433500 + }, + { + "epoch": 0.22, + "learning_rate": 1.5637842420575314e-05, + "loss": 1.8371, + "step": 434000 + }, + { + "epoch": 0.22, + "learning_rate": 1.563281689341008e-05, + "loss": 1.8445, + "step": 434500 + }, + { + "epoch": 0.22, + "learning_rate": 1.562779136624484e-05, + "loss": 1.8323, + "step": 435000 + }, + { + "epoch": 0.22, + "learning_rate": 1.5622765839079606e-05, + "loss": 1.8241, + "step": 435500 + }, + { + "epoch": 0.22, + "learning_rate": 1.561774031191437e-05, + "loss": 1.8277, + "step": 436000 + }, + { + "epoch": 0.22, + "learning_rate": 1.5612714784749136e-05, + "loss": 1.8199, + "step": 436500 + }, + { + "epoch": 0.22, + "learning_rate": 1.5607689257583898e-05, + "loss": 1.8281, + "step": 437000 + }, + { + "epoch": 0.22, + "learning_rate": 1.5602663730418663e-05, + "loss": 1.83, + "step": 437500 + }, + { + "epoch": 0.22, + "learning_rate": 1.5597638203253428e-05, + "loss": 1.8235, + "step": 438000 + }, + { + "epoch": 0.22, + "learning_rate": 1.559261267608819e-05, + "loss": 1.8236, + "step": 438500 + }, + { + "epoch": 0.22, + "learning_rate": 1.5587587148922955e-05, + "loss": 1.8338, + "step": 439000 + }, + { + "epoch": 0.22, + "learning_rate": 1.558256162175772e-05, + "loss": 1.8202, + "step": 439500 + }, + { + "epoch": 0.22, + "learning_rate": 1.5577536094592485e-05, + "loss": 1.8106, + "step": 440000 + }, + { + "epoch": 0.22, + "learning_rate": 1.5572510567427247e-05, + "loss": 1.825, + "step": 440500 + }, + { + "epoch": 0.22, + "learning_rate": 1.5567485040262012e-05, + "loss": 1.8249, + "step": 441000 + }, + { + "epoch": 0.22, + "learning_rate": 1.5562459513096777e-05, + "loss": 1.8062, + "step": 441500 + }, + { + "epoch": 0.22, + "learning_rate": 1.555743398593154e-05, + "loss": 1.8276, + "step": 442000 + }, + { + "epoch": 0.22, + "learning_rate": 1.5552408458766304e-05, + "loss": 1.8077, + "step": 442500 + }, + { + "epoch": 0.22, + "learning_rate": 1.554738293160107e-05, + "loss": 1.8216, + "step": 443000 + }, + { + "epoch": 0.22, + "learning_rate": 1.554235740443583e-05, + "loss": 1.7957, + "step": 443500 + }, + { + "epoch": 0.22, + "learning_rate": 1.55373318772706e-05, + "loss": 1.8267, + "step": 444000 + }, + { + "epoch": 0.22, + "learning_rate": 1.553230635010536e-05, + "loss": 1.8203, + "step": 444500 + }, + { + "epoch": 0.22, + "learning_rate": 1.5527280822940127e-05, + "loss": 1.8231, + "step": 445000 + }, + { + "epoch": 0.22, + "learning_rate": 1.5522255295774892e-05, + "loss": 1.8237, + "step": 445500 + }, + { + "epoch": 0.22, + "learning_rate": 1.5517229768609653e-05, + "loss": 1.8045, + "step": 446000 + }, + { + "epoch": 0.22, + "learning_rate": 1.551220424144442e-05, + "loss": 1.8308, + "step": 446500 + }, + { + "epoch": 0.22, + "learning_rate": 1.5507178714279184e-05, + "loss": 1.8133, + "step": 447000 + }, + { + "epoch": 0.22, + "learning_rate": 1.5502153187113946e-05, + "loss": 1.8102, + "step": 447500 + }, + { + "epoch": 0.23, + "learning_rate": 1.549712765994871e-05, + "loss": 1.8058, + "step": 448000 + }, + { + "epoch": 0.23, + "learning_rate": 1.5492102132783476e-05, + "loss": 1.828, + "step": 448500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5487076605618238e-05, + "loss": 1.7997, + "step": 449000 + }, + { + "epoch": 0.23, + "learning_rate": 1.5482051078453003e-05, + "loss": 1.815, + "step": 449500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5477025551287768e-05, + "loss": 1.8028, + "step": 450000 + }, + { + "epoch": 0.23, + "learning_rate": 1.547200002412253e-05, + "loss": 1.813, + "step": 450500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5466974496957298e-05, + "loss": 1.8198, + "step": 451000 + }, + { + "epoch": 0.23, + "learning_rate": 1.546194896979206e-05, + "loss": 1.8128, + "step": 451500 + }, + { + "epoch": 0.23, + "learning_rate": 1.545692344262682e-05, + "loss": 1.8215, + "step": 452000 + }, + { + "epoch": 0.23, + "learning_rate": 1.545189791546159e-05, + "loss": 1.827, + "step": 452500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5446872388296352e-05, + "loss": 1.8143, + "step": 453000 + }, + { + "epoch": 0.23, + "learning_rate": 1.5441846861131117e-05, + "loss": 1.8151, + "step": 453500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5436821333965882e-05, + "loss": 1.8232, + "step": 454000 + }, + { + "epoch": 0.23, + "learning_rate": 1.5431795806800644e-05, + "loss": 1.8104, + "step": 454500 + }, + { + "epoch": 0.23, + "learning_rate": 1.542677027963541e-05, + "loss": 1.8138, + "step": 455000 + }, + { + "epoch": 0.23, + "learning_rate": 1.5421744752470174e-05, + "loss": 1.8149, + "step": 455500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5416719225304936e-05, + "loss": 1.8448, + "step": 456000 + }, + { + "epoch": 0.23, + "learning_rate": 1.54116936981397e-05, + "loss": 1.828, + "step": 456500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5406668170974466e-05, + "loss": 1.7856, + "step": 457000 + }, + { + "epoch": 0.23, + "learning_rate": 1.5401642643809228e-05, + "loss": 1.8214, + "step": 457500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5396617116643993e-05, + "loss": 1.8302, + "step": 458000 + }, + { + "epoch": 0.23, + "learning_rate": 1.5391591589478758e-05, + "loss": 1.8158, + "step": 458500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5386566062313523e-05, + "loss": 1.8113, + "step": 459000 + }, + { + "epoch": 0.23, + "learning_rate": 1.538154053514829e-05, + "loss": 1.8024, + "step": 459500 + }, + { + "epoch": 0.23, + "learning_rate": 1.537651500798305e-05, + "loss": 1.8101, + "step": 460000 + }, + { + "epoch": 0.23, + "learning_rate": 1.5371489480817815e-05, + "loss": 1.8209, + "step": 460500 + }, + { + "epoch": 0.23, + "learning_rate": 1.536646395365258e-05, + "loss": 1.8116, + "step": 461000 + }, + { + "epoch": 0.23, + "learning_rate": 1.5361438426487342e-05, + "loss": 1.8183, + "step": 461500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5356412899322108e-05, + "loss": 1.8009, + "step": 462000 + }, + { + "epoch": 0.23, + "learning_rate": 1.5351387372156873e-05, + "loss": 1.8135, + "step": 462500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5346361844991634e-05, + "loss": 1.8034, + "step": 463000 + }, + { + "epoch": 0.23, + "learning_rate": 1.53413363178264e-05, + "loss": 1.8004, + "step": 463500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5336310790661165e-05, + "loss": 1.836, + "step": 464000 + }, + { + "epoch": 0.23, + "learning_rate": 1.533128526349593e-05, + "loss": 1.8133, + "step": 464500 + }, + { + "epoch": 0.23, + "learning_rate": 1.532625973633069e-05, + "loss": 1.813, + "step": 465000 + }, + { + "epoch": 0.23, + "learning_rate": 1.5321234209165457e-05, + "loss": 1.8014, + "step": 465500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5316208682000222e-05, + "loss": 1.7992, + "step": 466000 + }, + { + "epoch": 0.23, + "learning_rate": 1.5311183154834987e-05, + "loss": 1.8245, + "step": 466500 + }, + { + "epoch": 0.23, + "learning_rate": 1.530615762766975e-05, + "loss": 1.7904, + "step": 467000 + }, + { + "epoch": 0.23, + "learning_rate": 1.5301132100504514e-05, + "loss": 1.8163, + "step": 467500 + }, + { + "epoch": 0.24, + "learning_rate": 1.529610657333928e-05, + "loss": 1.814, + "step": 468000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5291081046174044e-05, + "loss": 1.8048, + "step": 468500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5286055519008806e-05, + "loss": 1.8023, + "step": 469000 + }, + { + "epoch": 0.24, + "learning_rate": 1.528102999184357e-05, + "loss": 1.8132, + "step": 469500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5276004464678336e-05, + "loss": 1.8093, + "step": 470000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5270978937513098e-05, + "loss": 1.808, + "step": 470500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5265953410347863e-05, + "loss": 1.8281, + "step": 471000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5260927883182628e-05, + "loss": 1.8053, + "step": 471500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5255902356017392e-05, + "loss": 1.8076, + "step": 472000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5250876828852155e-05, + "loss": 1.7941, + "step": 472500 + }, + { + "epoch": 0.24, + "learning_rate": 1.524585130168692e-05, + "loss": 1.8084, + "step": 473000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5240825774521684e-05, + "loss": 1.7997, + "step": 473500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5235800247356449e-05, + "loss": 1.8243, + "step": 474000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5230774720191212e-05, + "loss": 1.813, + "step": 474500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5225749193025976e-05, + "loss": 1.8084, + "step": 475000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5220723665860741e-05, + "loss": 1.8237, + "step": 475500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5215698138695504e-05, + "loss": 1.8218, + "step": 476000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5210672611530268e-05, + "loss": 1.8194, + "step": 476500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5205647084365035e-05, + "loss": 1.8002, + "step": 477000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5200621557199798e-05, + "loss": 1.8029, + "step": 477500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5195596030034563e-05, + "loss": 1.8052, + "step": 478000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5190570502869327e-05, + "loss": 1.8242, + "step": 478500 + }, + { + "epoch": 0.24, + "learning_rate": 1.518554497570409e-05, + "loss": 1.8218, + "step": 479000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5180519448538855e-05, + "loss": 1.8158, + "step": 479500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5175493921373619e-05, + "loss": 1.8074, + "step": 480000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5170468394208382e-05, + "loss": 1.8074, + "step": 480500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5165442867043147e-05, + "loss": 1.8035, + "step": 481000 + }, + { + "epoch": 0.24, + "learning_rate": 1.516041733987791e-05, + "loss": 1.8346, + "step": 481500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5155391812712674e-05, + "loss": 1.8023, + "step": 482000 + }, + { + "epoch": 0.24, + "learning_rate": 1.515036628554744e-05, + "loss": 1.8075, + "step": 482500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5145340758382203e-05, + "loss": 1.8108, + "step": 483000 + }, + { + "epoch": 0.24, + "learning_rate": 1.514031523121697e-05, + "loss": 1.8065, + "step": 483500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5135289704051731e-05, + "loss": 1.8108, + "step": 484000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5130264176886495e-05, + "loss": 1.788, + "step": 484500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5125238649721262e-05, + "loss": 1.7876, + "step": 485000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5120213122556025e-05, + "loss": 1.8285, + "step": 485500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5115187595390789e-05, + "loss": 1.8123, + "step": 486000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5110162068225554e-05, + "loss": 1.8122, + "step": 486500 + }, + { + "epoch": 0.24, + "learning_rate": 1.5105136541060317e-05, + "loss": 1.798, + "step": 487000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5100111013895082e-05, + "loss": 1.797, + "step": 487500 + }, + { + "epoch": 0.25, + "learning_rate": 1.5095085486729846e-05, + "loss": 1.8034, + "step": 488000 + }, + { + "epoch": 0.25, + "learning_rate": 1.509005995956461e-05, + "loss": 1.805, + "step": 488500 + }, + { + "epoch": 0.25, + "learning_rate": 1.5085034432399374e-05, + "loss": 1.8172, + "step": 489000 + }, + { + "epoch": 0.25, + "learning_rate": 1.5080008905234138e-05, + "loss": 1.7979, + "step": 489500 + }, + { + "epoch": 0.25, + "learning_rate": 1.5074983378068901e-05, + "loss": 1.8007, + "step": 490000 + }, + { + "epoch": 0.25, + "learning_rate": 1.5069957850903666e-05, + "loss": 1.8015, + "step": 490500 + }, + { + "epoch": 0.25, + "learning_rate": 1.506493232373843e-05, + "loss": 1.8089, + "step": 491000 + }, + { + "epoch": 0.25, + "learning_rate": 1.5059906796573193e-05, + "loss": 1.8133, + "step": 491500 + }, + { + "epoch": 0.25, + "learning_rate": 1.505488126940796e-05, + "loss": 1.8081, + "step": 492000 + }, + { + "epoch": 0.25, + "learning_rate": 1.5049855742242724e-05, + "loss": 1.8096, + "step": 492500 + }, + { + "epoch": 0.25, + "learning_rate": 1.5044830215077489e-05, + "loss": 1.811, + "step": 493000 + }, + { + "epoch": 0.25, + "learning_rate": 1.5039804687912252e-05, + "loss": 1.8029, + "step": 493500 + }, + { + "epoch": 0.25, + "learning_rate": 1.5034779160747016e-05, + "loss": 1.8183, + "step": 494000 + }, + { + "epoch": 0.25, + "learning_rate": 1.502975363358178e-05, + "loss": 1.8158, + "step": 494500 + }, + { + "epoch": 0.25, + "learning_rate": 1.5024728106416544e-05, + "loss": 1.8205, + "step": 495000 + }, + { + "epoch": 0.25, + "learning_rate": 1.5019702579251308e-05, + "loss": 1.8134, + "step": 495500 + }, + { + "epoch": 0.25, + "learning_rate": 1.5014677052086073e-05, + "loss": 1.8195, + "step": 496000 + }, + { + "epoch": 0.25, + "learning_rate": 1.5009651524920836e-05, + "loss": 1.8086, + "step": 496500 + }, + { + "epoch": 0.25, + "learning_rate": 1.50046259977556e-05, + "loss": 1.8062, + "step": 497000 + }, + { + "epoch": 0.25, + "learning_rate": 1.4999600470590365e-05, + "loss": 1.8191, + "step": 497500 + }, + { + "epoch": 0.25, + "learning_rate": 1.4994574943425128e-05, + "loss": 1.8179, + "step": 498000 + }, + { + "epoch": 0.25, + "learning_rate": 1.4989549416259895e-05, + "loss": 1.7917, + "step": 498500 + }, + { + "epoch": 0.25, + "learning_rate": 1.4984523889094657e-05, + "loss": 1.8096, + "step": 499000 + }, + { + "epoch": 0.25, + "learning_rate": 1.497949836192942e-05, + "loss": 1.8058, + "step": 499500 + }, + { + "epoch": 0.25, + "learning_rate": 1.4974472834764187e-05, + "loss": 1.7932, + "step": 500000 + }, + { + "epoch": 0.25, + "learning_rate": 1.496944730759895e-05, + "loss": 1.8112, + "step": 500500 + }, + { + "epoch": 0.25, + "learning_rate": 1.4964421780433714e-05, + "loss": 1.8253, + "step": 501000 + }, + { + "epoch": 0.25, + "learning_rate": 1.4959396253268479e-05, + "loss": 1.7891, + "step": 501500 + }, + { + "epoch": 0.25, + "learning_rate": 1.4954370726103243e-05, + "loss": 1.7912, + "step": 502000 + }, + { + "epoch": 0.25, + "learning_rate": 1.4949345198938008e-05, + "loss": 1.7874, + "step": 502500 + }, + { + "epoch": 0.25, + "learning_rate": 1.4944319671772771e-05, + "loss": 1.8071, + "step": 503000 + }, + { + "epoch": 0.25, + "learning_rate": 1.4939294144607535e-05, + "loss": 1.7986, + "step": 503500 + }, + { + "epoch": 0.25, + "learning_rate": 1.49342686174423e-05, + "loss": 1.7973, + "step": 504000 + }, + { + "epoch": 0.25, + "learning_rate": 1.4929243090277063e-05, + "loss": 1.8042, + "step": 504500 + }, + { + "epoch": 0.25, + "learning_rate": 1.4924217563111827e-05, + "loss": 1.8085, + "step": 505000 + }, + { + "epoch": 0.25, + "learning_rate": 1.4919192035946592e-05, + "loss": 1.784, + "step": 505500 + }, + { + "epoch": 0.25, + "learning_rate": 1.4914166508781355e-05, + "loss": 1.794, + "step": 506000 + }, + { + "epoch": 0.25, + "learning_rate": 1.4909140981616119e-05, + "loss": 1.8226, + "step": 506500 + }, + { + "epoch": 0.25, + "learning_rate": 1.4904115454450885e-05, + "loss": 1.8222, + "step": 507000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4899089927285649e-05, + "loss": 1.7918, + "step": 507500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4894064400120414e-05, + "loss": 1.8048, + "step": 508000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4889038872955178e-05, + "loss": 1.786, + "step": 508500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4884013345789941e-05, + "loss": 1.8332, + "step": 509000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4878987818624706e-05, + "loss": 1.7995, + "step": 509500 + }, + { + "epoch": 0.26, + "learning_rate": 1.487396229145947e-05, + "loss": 1.8055, + "step": 510000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4868936764294233e-05, + "loss": 1.7954, + "step": 510500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4863911237128998e-05, + "loss": 1.8198, + "step": 511000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4858885709963762e-05, + "loss": 1.792, + "step": 511500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4853860182798527e-05, + "loss": 1.8022, + "step": 512000 + }, + { + "epoch": 0.26, + "learning_rate": 1.484883465563329e-05, + "loss": 1.8072, + "step": 512500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4843809128468054e-05, + "loss": 1.8015, + "step": 513000 + }, + { + "epoch": 0.26, + "learning_rate": 1.483878360130282e-05, + "loss": 1.783, + "step": 513500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4833758074137584e-05, + "loss": 1.7882, + "step": 514000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4828732546972346e-05, + "loss": 1.8245, + "step": 514500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4823707019807112e-05, + "loss": 1.8037, + "step": 515000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4818681492641876e-05, + "loss": 1.7862, + "step": 515500 + }, + { + "epoch": 0.26, + "learning_rate": 1.481365596547664e-05, + "loss": 1.8053, + "step": 516000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4808630438311405e-05, + "loss": 1.8042, + "step": 516500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4803604911146168e-05, + "loss": 1.8028, + "step": 517000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4798579383980933e-05, + "loss": 1.8146, + "step": 517500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4793553856815697e-05, + "loss": 1.8149, + "step": 518000 + }, + { + "epoch": 0.26, + "learning_rate": 1.478852832965046e-05, + "loss": 1.7947, + "step": 518500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4783502802485225e-05, + "loss": 1.814, + "step": 519000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4778477275319989e-05, + "loss": 1.8, + "step": 519500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4773451748154752e-05, + "loss": 1.7947, + "step": 520000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4768426220989517e-05, + "loss": 1.8109, + "step": 520500 + }, + { + "epoch": 0.26, + "learning_rate": 1.476340069382428e-05, + "loss": 1.7981, + "step": 521000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4758375166659047e-05, + "loss": 1.8232, + "step": 521500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4753349639493811e-05, + "loss": 1.8186, + "step": 522000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4748324112328574e-05, + "loss": 1.8004, + "step": 522500 + }, + { + "epoch": 0.26, + "learning_rate": 1.474329858516334e-05, + "loss": 1.7999, + "step": 523000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4738273057998103e-05, + "loss": 1.7981, + "step": 523500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4733247530832866e-05, + "loss": 1.8138, + "step": 524000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4728222003667632e-05, + "loss": 1.8173, + "step": 524500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4723196476502395e-05, + "loss": 1.8066, + "step": 525000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4718170949337158e-05, + "loss": 1.8259, + "step": 525500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4713145422171924e-05, + "loss": 1.7837, + "step": 526000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4708119895006687e-05, + "loss": 1.813, + "step": 526500 + }, + { + "epoch": 0.26, + "learning_rate": 1.4703094367841452e-05, + "loss": 1.8178, + "step": 527000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4698068840676216e-05, + "loss": 1.8176, + "step": 527500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4693043313510979e-05, + "loss": 1.8055, + "step": 528000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4688017786345746e-05, + "loss": 1.8158, + "step": 528500 + }, + { + "epoch": 0.27, + "learning_rate": 1.468299225918051e-05, + "loss": 1.799, + "step": 529000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4677966732015271e-05, + "loss": 1.8, + "step": 529500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4672941204850038e-05, + "loss": 1.8001, + "step": 530000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4667915677684801e-05, + "loss": 1.8153, + "step": 530500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4662890150519567e-05, + "loss": 1.8023, + "step": 531000 + }, + { + "epoch": 0.27, + "learning_rate": 1.465786462335433e-05, + "loss": 1.8121, + "step": 531500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4652839096189093e-05, + "loss": 1.8109, + "step": 532000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4647813569023859e-05, + "loss": 1.7998, + "step": 532500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4642788041858622e-05, + "loss": 1.8168, + "step": 533000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4637762514693385e-05, + "loss": 1.787, + "step": 533500 + }, + { + "epoch": 0.27, + "learning_rate": 1.463273698752815e-05, + "loss": 1.8003, + "step": 534000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4627711460362914e-05, + "loss": 1.8102, + "step": 534500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4622685933197677e-05, + "loss": 1.7954, + "step": 535000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4617660406032443e-05, + "loss": 1.7964, + "step": 535500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4612634878867206e-05, + "loss": 1.8019, + "step": 536000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4607609351701973e-05, + "loss": 1.8049, + "step": 536500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4602583824536736e-05, + "loss": 1.8047, + "step": 537000 + }, + { + "epoch": 0.27, + "learning_rate": 1.45975582973715e-05, + "loss": 1.7901, + "step": 537500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4592532770206265e-05, + "loss": 1.8055, + "step": 538000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4587507243041028e-05, + "loss": 1.7971, + "step": 538500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4582481715875792e-05, + "loss": 1.7853, + "step": 539000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4577456188710557e-05, + "loss": 1.8009, + "step": 539500 + }, + { + "epoch": 0.27, + "learning_rate": 1.457243066154532e-05, + "loss": 1.8094, + "step": 540000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4567405134380084e-05, + "loss": 1.8076, + "step": 540500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4562379607214849e-05, + "loss": 1.7849, + "step": 541000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4557354080049612e-05, + "loss": 1.8004, + "step": 541500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4552328552884378e-05, + "loss": 1.7965, + "step": 542000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4547303025719141e-05, + "loss": 1.7963, + "step": 542500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4542277498553904e-05, + "loss": 1.8123, + "step": 543000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4537251971388671e-05, + "loss": 1.8157, + "step": 543500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4532226444223435e-05, + "loss": 1.7961, + "step": 544000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4527200917058197e-05, + "loss": 1.7998, + "step": 544500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4522175389892963e-05, + "loss": 1.792, + "step": 545000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4517149862727727e-05, + "loss": 1.8035, + "step": 545500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4512124335562492e-05, + "loss": 1.8167, + "step": 546000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4507098808397255e-05, + "loss": 1.8145, + "step": 546500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4502073281232019e-05, + "loss": 1.7929, + "step": 547000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4497047754066784e-05, + "loss": 1.7875, + "step": 547500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4492022226901547e-05, + "loss": 1.7891, + "step": 548000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4486996699736311e-05, + "loss": 1.8059, + "step": 548500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4481971172571076e-05, + "loss": 1.8214, + "step": 549000 + }, + { + "epoch": 0.28, + "learning_rate": 1.447694564540584e-05, + "loss": 1.8044, + "step": 549500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4471920118240603e-05, + "loss": 1.8159, + "step": 550000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4466894591075368e-05, + "loss": 1.8044, + "step": 550500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4461869063910131e-05, + "loss": 1.8237, + "step": 551000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4456843536744898e-05, + "loss": 1.8037, + "step": 551500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4451818009579662e-05, + "loss": 1.8071, + "step": 552000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4446792482414425e-05, + "loss": 1.7924, + "step": 552500 + }, + { + "epoch": 0.28, + "learning_rate": 1.444176695524919e-05, + "loss": 1.814, + "step": 553000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4436741428083954e-05, + "loss": 1.8061, + "step": 553500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4431715900918717e-05, + "loss": 1.7941, + "step": 554000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4426690373753482e-05, + "loss": 1.792, + "step": 554500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4421664846588246e-05, + "loss": 1.8136, + "step": 555000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4416639319423011e-05, + "loss": 1.7847, + "step": 555500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4411613792257774e-05, + "loss": 1.8279, + "step": 556000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4406588265092538e-05, + "loss": 1.7991, + "step": 556500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4401562737927303e-05, + "loss": 1.7941, + "step": 557000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4396537210762066e-05, + "loss": 1.8077, + "step": 557500 + }, + { + "epoch": 0.28, + "learning_rate": 1.439151168359683e-05, + "loss": 1.8, + "step": 558000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4386486156431597e-05, + "loss": 1.8011, + "step": 558500 + }, + { + "epoch": 0.28, + "learning_rate": 1.438146062926636e-05, + "loss": 1.7937, + "step": 559000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4376435102101124e-05, + "loss": 1.8097, + "step": 559500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4371409574935889e-05, + "loss": 1.8067, + "step": 560000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4366384047770652e-05, + "loss": 1.8013, + "step": 560500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4361358520605417e-05, + "loss": 1.8174, + "step": 561000 + }, + { + "epoch": 0.28, + "learning_rate": 1.435633299344018e-05, + "loss": 1.8104, + "step": 561500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4351307466274944e-05, + "loss": 1.7913, + "step": 562000 + }, + { + "epoch": 0.28, + "learning_rate": 1.434628193910971e-05, + "loss": 1.7919, + "step": 562500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4341256411944473e-05, + "loss": 1.811, + "step": 563000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4336230884779236e-05, + "loss": 1.8169, + "step": 563500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4331205357614001e-05, + "loss": 1.8135, + "step": 564000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4326179830448765e-05, + "loss": 1.8073, + "step": 564500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4321154303283532e-05, + "loss": 1.7861, + "step": 565000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4316128776118295e-05, + "loss": 1.7781, + "step": 565500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4311103248953057e-05, + "loss": 1.8095, + "step": 566000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4306077721787824e-05, + "loss": 1.7861, + "step": 566500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4301052194622587e-05, + "loss": 1.7978, + "step": 567000 + }, + { + "epoch": 0.29, + "learning_rate": 1.429602666745735e-05, + "loss": 1.7855, + "step": 567500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4291001140292116e-05, + "loss": 1.7996, + "step": 568000 + }, + { + "epoch": 0.29, + "learning_rate": 1.428597561312688e-05, + "loss": 1.8002, + "step": 568500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4280950085961643e-05, + "loss": 1.8317, + "step": 569000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4275924558796408e-05, + "loss": 1.8066, + "step": 569500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4270899031631171e-05, + "loss": 1.7853, + "step": 570000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4265873504465936e-05, + "loss": 1.7889, + "step": 570500 + }, + { + "epoch": 0.29, + "learning_rate": 1.42608479773007e-05, + "loss": 1.8095, + "step": 571000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4255822450135463e-05, + "loss": 1.7922, + "step": 571500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4250796922970228e-05, + "loss": 1.7945, + "step": 572000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4245771395804992e-05, + "loss": 1.8032, + "step": 572500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4240745868639755e-05, + "loss": 1.7983, + "step": 573000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4235720341474522e-05, + "loss": 1.8151, + "step": 573500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4230694814309286e-05, + "loss": 1.7918, + "step": 574000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4225669287144049e-05, + "loss": 1.7972, + "step": 574500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4220643759978814e-05, + "loss": 1.7788, + "step": 575000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4215618232813578e-05, + "loss": 1.8127, + "step": 575500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4210592705648343e-05, + "loss": 1.7873, + "step": 576000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4205567178483106e-05, + "loss": 1.8017, + "step": 576500 + }, + { + "epoch": 0.29, + "learning_rate": 1.420054165131787e-05, + "loss": 1.7934, + "step": 577000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4195516124152635e-05, + "loss": 1.8091, + "step": 577500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4190490596987398e-05, + "loss": 1.8119, + "step": 578000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4185465069822162e-05, + "loss": 1.8054, + "step": 578500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4180439542656927e-05, + "loss": 1.8012, + "step": 579000 + }, + { + "epoch": 0.29, + "learning_rate": 1.417541401549169e-05, + "loss": 1.8049, + "step": 579500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4170388488326457e-05, + "loss": 1.8045, + "step": 580000 + }, + { + "epoch": 0.29, + "learning_rate": 1.416536296116122e-05, + "loss": 1.7665, + "step": 580500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4160337433995982e-05, + "loss": 1.7997, + "step": 581000 + }, + { + "epoch": 0.29, + "learning_rate": 1.415531190683075e-05, + "loss": 1.8053, + "step": 581500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4150286379665513e-05, + "loss": 1.7994, + "step": 582000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4145260852500276e-05, + "loss": 1.8078, + "step": 582500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4140235325335041e-05, + "loss": 1.8169, + "step": 583000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4135209798169805e-05, + "loss": 1.7754, + "step": 583500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4130184271004568e-05, + "loss": 1.8257, + "step": 584000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4125158743839333e-05, + "loss": 1.807, + "step": 584500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4120133216674097e-05, + "loss": 1.8001, + "step": 585000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4115107689508862e-05, + "loss": 1.8142, + "step": 585500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4110082162343625e-05, + "loss": 1.8038, + "step": 586000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4105056635178389e-05, + "loss": 1.8016, + "step": 586500 + }, + { + "epoch": 0.29, + "learning_rate": 1.4100031108013154e-05, + "loss": 1.8011, + "step": 587000 + }, + { + "epoch": 0.3, + "learning_rate": 1.4095005580847917e-05, + "loss": 1.8165, + "step": 587500 + }, + { + "epoch": 0.3, + "learning_rate": 1.408998005368268e-05, + "loss": 1.7999, + "step": 588000 + }, + { + "epoch": 0.3, + "learning_rate": 1.4084954526517448e-05, + "loss": 1.7924, + "step": 588500 + }, + { + "epoch": 0.3, + "learning_rate": 1.4079928999352211e-05, + "loss": 1.7915, + "step": 589000 + }, + { + "epoch": 0.3, + "learning_rate": 1.4074903472186976e-05, + "loss": 1.8029, + "step": 589500 + }, + { + "epoch": 0.3, + "learning_rate": 1.406987794502174e-05, + "loss": 1.7881, + "step": 590000 + }, + { + "epoch": 0.3, + "learning_rate": 1.4064852417856503e-05, + "loss": 1.8055, + "step": 590500 + }, + { + "epoch": 0.3, + "learning_rate": 1.4059826890691268e-05, + "loss": 1.7635, + "step": 591000 + }, + { + "epoch": 0.3, + "learning_rate": 1.4054801363526032e-05, + "loss": 1.791, + "step": 591500 + }, + { + "epoch": 0.3, + "learning_rate": 1.4049775836360795e-05, + "loss": 1.8001, + "step": 592000 + }, + { + "epoch": 0.3, + "learning_rate": 1.404475030919556e-05, + "loss": 1.8099, + "step": 592500 + }, + { + "epoch": 0.3, + "learning_rate": 1.4039724782030324e-05, + "loss": 1.8049, + "step": 593000 + }, + { + "epoch": 0.3, + "learning_rate": 1.4034699254865087e-05, + "loss": 1.7991, + "step": 593500 + }, + { + "epoch": 0.3, + "learning_rate": 1.4029673727699852e-05, + "loss": 1.7914, + "step": 594000 + }, + { + "epoch": 0.3, + "learning_rate": 1.4024648200534616e-05, + "loss": 1.7984, + "step": 594500 + }, + { + "epoch": 0.3, + "learning_rate": 1.4019622673369383e-05, + "loss": 1.8109, + "step": 595000 + }, + { + "epoch": 0.3, + "learning_rate": 1.4014597146204146e-05, + "loss": 1.7932, + "step": 595500 + }, + { + "epoch": 0.3, + "learning_rate": 1.4009571619038908e-05, + "loss": 1.7924, + "step": 596000 + }, + { + "epoch": 0.3, + "learning_rate": 1.4004546091873675e-05, + "loss": 1.7938, + "step": 596500 + }, + { + "epoch": 0.3, + "learning_rate": 1.3999520564708438e-05, + "loss": 1.7947, + "step": 597000 + }, + { + "epoch": 0.3, + "learning_rate": 1.3994495037543201e-05, + "loss": 1.8066, + "step": 597500 + }, + { + "epoch": 0.3, + "learning_rate": 1.3989469510377967e-05, + "loss": 1.7973, + "step": 598000 + }, + { + "epoch": 0.3, + "learning_rate": 1.398444398321273e-05, + "loss": 1.802, + "step": 598500 + }, + { + "epoch": 0.3, + "learning_rate": 1.3979418456047495e-05, + "loss": 1.8059, + "step": 599000 + }, + { + "epoch": 0.3, + "learning_rate": 1.3974392928882259e-05, + "loss": 1.8041, + "step": 599500 + }, + { + "epoch": 0.3, + "learning_rate": 1.3969367401717022e-05, + "loss": 1.8055, + "step": 600000 + }, + { + "epoch": 0.3, + "learning_rate": 1.3964341874551787e-05, + "loss": 1.791, + "step": 600500 + }, + { + "epoch": 0.3, + "learning_rate": 1.395931634738655e-05, + "loss": 1.811, + "step": 601000 + }, + { + "epoch": 0.3, + "learning_rate": 1.3954290820221314e-05, + "loss": 1.8263, + "step": 601500 + }, + { + "epoch": 0.3, + "learning_rate": 1.394926529305608e-05, + "loss": 1.7866, + "step": 602000 + }, + { + "epoch": 0.3, + "learning_rate": 1.3944239765890843e-05, + "loss": 1.7904, + "step": 602500 + }, + { + "epoch": 0.3, + "learning_rate": 1.3939214238725606e-05, + "loss": 1.7941, + "step": 603000 + }, + { + "epoch": 0.3, + "learning_rate": 1.3934188711560373e-05, + "loss": 1.801, + "step": 603500 + }, + { + "epoch": 0.3, + "learning_rate": 1.3929163184395136e-05, + "loss": 1.7788, + "step": 604000 + }, + { + "epoch": 0.3, + "learning_rate": 1.3924137657229902e-05, + "loss": 1.7875, + "step": 604500 + }, + { + "epoch": 0.3, + "learning_rate": 1.3919112130064665e-05, + "loss": 1.7893, + "step": 605000 + }, + { + "epoch": 0.3, + "learning_rate": 1.3914086602899429e-05, + "loss": 1.7967, + "step": 605500 + }, + { + "epoch": 0.3, + "learning_rate": 1.3909061075734194e-05, + "loss": 1.7893, + "step": 606000 + }, + { + "epoch": 0.3, + "learning_rate": 1.3904035548568957e-05, + "loss": 1.8214, + "step": 606500 + }, + { + "epoch": 0.31, + "learning_rate": 1.389901002140372e-05, + "loss": 1.8052, + "step": 607000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3893984494238486e-05, + "loss": 1.7788, + "step": 607500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3888958967073249e-05, + "loss": 1.8136, + "step": 608000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3883933439908013e-05, + "loss": 1.7837, + "step": 608500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3878907912742778e-05, + "loss": 1.8098, + "step": 609000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3873882385577541e-05, + "loss": 1.7921, + "step": 609500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3868856858412308e-05, + "loss": 1.7973, + "step": 610000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3863831331247071e-05, + "loss": 1.7762, + "step": 610500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3858805804081835e-05, + "loss": 1.821, + "step": 611000 + }, + { + "epoch": 0.31, + "learning_rate": 1.38537802769166e-05, + "loss": 1.7839, + "step": 611500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3848754749751363e-05, + "loss": 1.7974, + "step": 612000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3843729222586127e-05, + "loss": 1.7895, + "step": 612500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3838703695420892e-05, + "loss": 1.7783, + "step": 613000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3833678168255656e-05, + "loss": 1.7962, + "step": 613500 + }, + { + "epoch": 0.31, + "learning_rate": 1.382865264109042e-05, + "loss": 1.8144, + "step": 614000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3823627113925184e-05, + "loss": 1.7685, + "step": 614500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3818601586759948e-05, + "loss": 1.7848, + "step": 615000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3813576059594713e-05, + "loss": 1.7989, + "step": 615500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3808550532429476e-05, + "loss": 1.8043, + "step": 616000 + }, + { + "epoch": 0.31, + "learning_rate": 1.380352500526424e-05, + "loss": 1.7911, + "step": 616500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3798499478099006e-05, + "loss": 1.7914, + "step": 617000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3793473950933768e-05, + "loss": 1.7943, + "step": 617500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3788448423768532e-05, + "loss": 1.7918, + "step": 618000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3783422896603298e-05, + "loss": 1.7857, + "step": 618500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3778397369438062e-05, + "loss": 1.7993, + "step": 619000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3773371842272827e-05, + "loss": 1.7906, + "step": 619500 + }, + { + "epoch": 0.31, + "learning_rate": 1.376834631510759e-05, + "loss": 1.7897, + "step": 620000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3763320787942354e-05, + "loss": 1.7793, + "step": 620500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3758295260777119e-05, + "loss": 1.7893, + "step": 621000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3753269733611883e-05, + "loss": 1.7854, + "step": 621500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3748244206446646e-05, + "loss": 1.7864, + "step": 622000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3743218679281411e-05, + "loss": 1.805, + "step": 622500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3738193152116175e-05, + "loss": 1.8178, + "step": 623000 + }, + { + "epoch": 0.31, + "learning_rate": 1.373316762495094e-05, + "loss": 1.7769, + "step": 623500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3728142097785703e-05, + "loss": 1.8115, + "step": 624000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3723116570620467e-05, + "loss": 1.7943, + "step": 624500 + }, + { + "epoch": 0.31, + "learning_rate": 1.3718091043455233e-05, + "loss": 1.7787, + "step": 625000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3713065516289997e-05, + "loss": 1.7871, + "step": 625500 + }, + { + "epoch": 0.31, + "learning_rate": 1.370803998912476e-05, + "loss": 1.7975, + "step": 626000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3703014461959525e-05, + "loss": 1.7829, + "step": 626500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3697988934794289e-05, + "loss": 1.7829, + "step": 627000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3692963407629052e-05, + "loss": 1.7839, + "step": 627500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3687937880463817e-05, + "loss": 1.8171, + "step": 628000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3682912353298581e-05, + "loss": 1.8002, + "step": 628500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3677886826133346e-05, + "loss": 1.8076, + "step": 629000 + }, + { + "epoch": 0.32, + "learning_rate": 1.367286129896811e-05, + "loss": 1.7875, + "step": 629500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3667835771802873e-05, + "loss": 1.7985, + "step": 630000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3662810244637638e-05, + "loss": 1.7807, + "step": 630500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3657784717472402e-05, + "loss": 1.7927, + "step": 631000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3652759190307165e-05, + "loss": 1.7897, + "step": 631500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3647733663141932e-05, + "loss": 1.7795, + "step": 632000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3642708135976694e-05, + "loss": 1.781, + "step": 632500 + }, + { + "epoch": 0.32, + "learning_rate": 1.363768260881146e-05, + "loss": 1.8021, + "step": 633000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3632657081646224e-05, + "loss": 1.8165, + "step": 633500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3627631554480987e-05, + "loss": 1.8208, + "step": 634000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3622606027315752e-05, + "loss": 1.7868, + "step": 634500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3617580500150516e-05, + "loss": 1.7998, + "step": 635000 + }, + { + "epoch": 0.32, + "learning_rate": 1.361255497298528e-05, + "loss": 1.7921, + "step": 635500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3607529445820044e-05, + "loss": 1.8094, + "step": 636000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3602503918654808e-05, + "loss": 1.8085, + "step": 636500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3597478391489571e-05, + "loss": 1.7801, + "step": 637000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3592452864324337e-05, + "loss": 1.7957, + "step": 637500 + }, + { + "epoch": 0.32, + "learning_rate": 1.35874273371591e-05, + "loss": 1.7763, + "step": 638000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3582401809993865e-05, + "loss": 1.7986, + "step": 638500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3577376282828629e-05, + "loss": 1.7898, + "step": 639000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3572350755663392e-05, + "loss": 1.7951, + "step": 639500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3567325228498159e-05, + "loss": 1.8042, + "step": 640000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3562299701332922e-05, + "loss": 1.7805, + "step": 640500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3557274174167686e-05, + "loss": 1.7937, + "step": 641000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3552248647002451e-05, + "loss": 1.7835, + "step": 641500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3547223119837214e-05, + "loss": 1.7797, + "step": 642000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3542197592671978e-05, + "loss": 1.7956, + "step": 642500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3537172065506743e-05, + "loss": 1.8129, + "step": 643000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3532146538341506e-05, + "loss": 1.8033, + "step": 643500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3527121011176272e-05, + "loss": 1.8017, + "step": 644000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3522095484011035e-05, + "loss": 1.7919, + "step": 644500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3517069956845798e-05, + "loss": 1.7981, + "step": 645000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3512044429680564e-05, + "loss": 1.7861, + "step": 645500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3507018902515327e-05, + "loss": 1.7785, + "step": 646000 + }, + { + "epoch": 0.32, + "learning_rate": 1.350199337535009e-05, + "loss": 1.7609, + "step": 646500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3496967848184857e-05, + "loss": 1.7809, + "step": 647000 + }, + { + "epoch": 0.33, + "learning_rate": 1.349194232101962e-05, + "loss": 1.8013, + "step": 647500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3486916793854386e-05, + "loss": 1.7805, + "step": 648000 + }, + { + "epoch": 0.33, + "learning_rate": 1.348189126668915e-05, + "loss": 1.8123, + "step": 648500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3476865739523913e-05, + "loss": 1.8077, + "step": 649000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3471840212358678e-05, + "loss": 1.7949, + "step": 649500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3466814685193441e-05, + "loss": 1.8042, + "step": 650000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3461789158028205e-05, + "loss": 1.7997, + "step": 650500 + }, + { + "epoch": 0.33, + "learning_rate": 1.345676363086297e-05, + "loss": 1.7845, + "step": 651000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3451738103697733e-05, + "loss": 1.7879, + "step": 651500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3446712576532497e-05, + "loss": 1.7861, + "step": 652000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3441687049367262e-05, + "loss": 1.78, + "step": 652500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3436661522202025e-05, + "loss": 1.7965, + "step": 653000 + }, + { + "epoch": 0.33, + "learning_rate": 1.343163599503679e-05, + "loss": 1.7929, + "step": 653500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3426610467871554e-05, + "loss": 1.7958, + "step": 654000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3421584940706317e-05, + "loss": 1.7945, + "step": 654500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3416559413541084e-05, + "loss": 1.7758, + "step": 655000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3411533886375848e-05, + "loss": 1.7768, + "step": 655500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3406508359210611e-05, + "loss": 1.7878, + "step": 656000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3401482832045376e-05, + "loss": 1.776, + "step": 656500 + }, + { + "epoch": 0.33, + "learning_rate": 1.339645730488014e-05, + "loss": 1.8006, + "step": 657000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3391431777714905e-05, + "loss": 1.7773, + "step": 657500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3386406250549668e-05, + "loss": 1.7997, + "step": 658000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3381380723384432e-05, + "loss": 1.7983, + "step": 658500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3376355196219197e-05, + "loss": 1.7946, + "step": 659000 + }, + { + "epoch": 0.33, + "learning_rate": 1.337132966905396e-05, + "loss": 1.788, + "step": 659500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3366304141888724e-05, + "loss": 1.7895, + "step": 660000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3361278614723489e-05, + "loss": 1.7723, + "step": 660500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3356253087558252e-05, + "loss": 1.7941, + "step": 661000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3351227560393016e-05, + "loss": 1.8041, + "step": 661500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3346202033227783e-05, + "loss": 1.7857, + "step": 662000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3341176506062546e-05, + "loss": 1.7927, + "step": 662500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3336150978897311e-05, + "loss": 1.8029, + "step": 663000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3331125451732075e-05, + "loss": 1.7861, + "step": 663500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3326099924566838e-05, + "loss": 1.7824, + "step": 664000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3321074397401603e-05, + "loss": 1.7953, + "step": 664500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3316048870236367e-05, + "loss": 1.791, + "step": 665000 + }, + { + "epoch": 0.33, + "learning_rate": 1.331102334307113e-05, + "loss": 1.7883, + "step": 665500 + }, + { + "epoch": 0.33, + "learning_rate": 1.3305997815905895e-05, + "loss": 1.7698, + "step": 666000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3300972288740659e-05, + "loss": 1.7773, + "step": 666500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3295946761575424e-05, + "loss": 1.7845, + "step": 667000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3290921234410187e-05, + "loss": 1.7934, + "step": 667500 + }, + { + "epoch": 0.34, + "learning_rate": 1.328589570724495e-05, + "loss": 1.7778, + "step": 668000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3280870180079718e-05, + "loss": 1.7924, + "step": 668500 + }, + { + "epoch": 0.34, + "learning_rate": 1.327584465291448e-05, + "loss": 1.7947, + "step": 669000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3270819125749243e-05, + "loss": 1.7741, + "step": 669500 + }, + { + "epoch": 0.34, + "learning_rate": 1.326579359858401e-05, + "loss": 1.8021, + "step": 670000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3260768071418773e-05, + "loss": 1.7965, + "step": 670500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3255742544253537e-05, + "loss": 1.7772, + "step": 671000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3250717017088302e-05, + "loss": 1.798, + "step": 671500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3245691489923065e-05, + "loss": 1.7809, + "step": 672000 + }, + { + "epoch": 0.34, + "learning_rate": 1.324066596275783e-05, + "loss": 1.7924, + "step": 672500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3235640435592594e-05, + "loss": 1.7766, + "step": 673000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3230614908427357e-05, + "loss": 1.8046, + "step": 673500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3225589381262122e-05, + "loss": 1.7639, + "step": 674000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3220563854096886e-05, + "loss": 1.7892, + "step": 674500 + }, + { + "epoch": 0.34, + "learning_rate": 1.321553832693165e-05, + "loss": 1.7802, + "step": 675000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3210512799766414e-05, + "loss": 1.788, + "step": 675500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3205487272601178e-05, + "loss": 1.7908, + "step": 676000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3200461745435945e-05, + "loss": 1.8166, + "step": 676500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3195436218270708e-05, + "loss": 1.7905, + "step": 677000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3190410691105472e-05, + "loss": 1.778, + "step": 677500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3185385163940237e-05, + "loss": 1.7822, + "step": 678000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3180359636775e-05, + "loss": 1.7942, + "step": 678500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3175334109609764e-05, + "loss": 1.7965, + "step": 679000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3170308582444529e-05, + "loss": 1.7656, + "step": 679500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3165283055279292e-05, + "loss": 1.7911, + "step": 680000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3160257528114056e-05, + "loss": 1.7907, + "step": 680500 + }, + { + "epoch": 0.34, + "learning_rate": 1.315523200094882e-05, + "loss": 1.7731, + "step": 681000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3150206473783584e-05, + "loss": 1.7816, + "step": 681500 + }, + { + "epoch": 0.34, + "learning_rate": 1.314518094661835e-05, + "loss": 1.7843, + "step": 682000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3140155419453113e-05, + "loss": 1.7837, + "step": 682500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3135129892287876e-05, + "loss": 1.7962, + "step": 683000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3130104365122643e-05, + "loss": 1.8018, + "step": 683500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3125078837957405e-05, + "loss": 1.7808, + "step": 684000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3120053310792168e-05, + "loss": 1.7821, + "step": 684500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3115027783626935e-05, + "loss": 1.7735, + "step": 685000 + }, + { + "epoch": 0.34, + "learning_rate": 1.3110002256461699e-05, + "loss": 1.7863, + "step": 685500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3104976729296462e-05, + "loss": 1.7685, + "step": 686000 + }, + { + "epoch": 0.35, + "learning_rate": 1.3099951202131227e-05, + "loss": 1.7822, + "step": 686500 + }, + { + "epoch": 0.35, + "learning_rate": 1.309492567496599e-05, + "loss": 1.7756, + "step": 687000 + }, + { + "epoch": 0.35, + "learning_rate": 1.3089900147800756e-05, + "loss": 1.8025, + "step": 687500 + }, + { + "epoch": 0.35, + "learning_rate": 1.308487462063552e-05, + "loss": 1.7742, + "step": 688000 + }, + { + "epoch": 0.35, + "learning_rate": 1.3079849093470283e-05, + "loss": 1.7892, + "step": 688500 + }, + { + "epoch": 0.35, + "learning_rate": 1.3074823566305048e-05, + "loss": 1.7942, + "step": 689000 + }, + { + "epoch": 0.35, + "learning_rate": 1.3069798039139811e-05, + "loss": 1.7698, + "step": 689500 + }, + { + "epoch": 0.35, + "learning_rate": 1.3064772511974575e-05, + "loss": 1.7748, + "step": 690000 + }, + { + "epoch": 0.35, + "learning_rate": 1.305974698480934e-05, + "loss": 1.7891, + "step": 690500 + }, + { + "epoch": 0.35, + "learning_rate": 1.3054721457644103e-05, + "loss": 1.7868, + "step": 691000 + }, + { + "epoch": 0.35, + "learning_rate": 1.304969593047887e-05, + "loss": 1.7793, + "step": 691500 + }, + { + "epoch": 0.35, + "learning_rate": 1.3044670403313634e-05, + "loss": 1.781, + "step": 692000 + }, + { + "epoch": 0.35, + "learning_rate": 1.3039644876148397e-05, + "loss": 1.786, + "step": 692500 + }, + { + "epoch": 0.35, + "learning_rate": 1.3034619348983162e-05, + "loss": 1.7757, + "step": 693000 + }, + { + "epoch": 0.35, + "learning_rate": 1.3029593821817926e-05, + "loss": 1.7819, + "step": 693500 + }, + { + "epoch": 0.35, + "learning_rate": 1.3024568294652689e-05, + "loss": 1.7891, + "step": 694000 + }, + { + "epoch": 0.35, + "learning_rate": 1.3019542767487454e-05, + "loss": 1.7843, + "step": 694500 + }, + { + "epoch": 0.35, + "learning_rate": 1.3014517240322218e-05, + "loss": 1.792, + "step": 695000 + }, + { + "epoch": 0.35, + "learning_rate": 1.3009491713156981e-05, + "loss": 1.7953, + "step": 695500 + }, + { + "epoch": 0.35, + "learning_rate": 1.3004466185991746e-05, + "loss": 1.7838, + "step": 696000 + }, + { + "epoch": 0.35, + "learning_rate": 1.299944065882651e-05, + "loss": 1.7732, + "step": 696500 + }, + { + "epoch": 0.35, + "learning_rate": 1.2994415131661275e-05, + "loss": 1.7918, + "step": 697000 + }, + { + "epoch": 0.35, + "learning_rate": 1.2989389604496038e-05, + "loss": 1.7921, + "step": 697500 + }, + { + "epoch": 0.35, + "learning_rate": 1.2984364077330802e-05, + "loss": 1.7892, + "step": 698000 + }, + { + "epoch": 0.35, + "learning_rate": 1.2979338550165569e-05, + "loss": 1.7963, + "step": 698500 + }, + { + "epoch": 0.35, + "learning_rate": 1.2974313023000332e-05, + "loss": 1.7915, + "step": 699000 + }, + { + "epoch": 0.35, + "learning_rate": 1.2969287495835094e-05, + "loss": 1.7855, + "step": 699500 + }, + { + "epoch": 0.35, + "learning_rate": 1.296426196866986e-05, + "loss": 1.784, + "step": 700000 + }, + { + "epoch": 0.35, + "learning_rate": 1.2959236441504624e-05, + "loss": 1.7795, + "step": 700500 + }, + { + "epoch": 0.35, + "learning_rate": 1.2954210914339389e-05, + "loss": 1.7968, + "step": 701000 + }, + { + "epoch": 0.35, + "learning_rate": 1.2949185387174153e-05, + "loss": 1.7659, + "step": 701500 + }, + { + "epoch": 0.35, + "learning_rate": 1.2944159860008916e-05, + "loss": 1.7714, + "step": 702000 + }, + { + "epoch": 0.35, + "learning_rate": 1.2939134332843681e-05, + "loss": 1.7674, + "step": 702500 + }, + { + "epoch": 0.35, + "learning_rate": 1.2934108805678445e-05, + "loss": 1.7882, + "step": 703000 + }, + { + "epoch": 0.35, + "learning_rate": 1.2929083278513208e-05, + "loss": 1.7676, + "step": 703500 + }, + { + "epoch": 0.35, + "learning_rate": 1.2924057751347973e-05, + "loss": 1.7879, + "step": 704000 + }, + { + "epoch": 0.35, + "learning_rate": 1.2919032224182737e-05, + "loss": 1.7697, + "step": 704500 + }, + { + "epoch": 0.35, + "learning_rate": 1.29140066970175e-05, + "loss": 1.7859, + "step": 705000 + }, + { + "epoch": 0.35, + "learning_rate": 1.2908981169852265e-05, + "loss": 1.7973, + "step": 705500 + }, + { + "epoch": 0.35, + "learning_rate": 1.2903955642687029e-05, + "loss": 1.7612, + "step": 706000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2898930115521796e-05, + "loss": 1.7813, + "step": 706500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2893904588356559e-05, + "loss": 1.7927, + "step": 707000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2888879061191322e-05, + "loss": 1.79, + "step": 707500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2883853534026088e-05, + "loss": 1.8007, + "step": 708000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2878828006860851e-05, + "loss": 1.7726, + "step": 708500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2873802479695614e-05, + "loss": 1.7682, + "step": 709000 + }, + { + "epoch": 0.36, + "learning_rate": 1.286877695253038e-05, + "loss": 1.78, + "step": 709500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2863751425365143e-05, + "loss": 1.7892, + "step": 710000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2858725898199908e-05, + "loss": 1.7652, + "step": 710500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2853700371034672e-05, + "loss": 1.7841, + "step": 711000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2848674843869435e-05, + "loss": 1.7821, + "step": 711500 + }, + { + "epoch": 0.36, + "learning_rate": 1.28436493167042e-05, + "loss": 1.7822, + "step": 712000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2838623789538964e-05, + "loss": 1.7737, + "step": 712500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2833598262373727e-05, + "loss": 1.7867, + "step": 713000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2828572735208494e-05, + "loss": 1.7882, + "step": 713500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2823547208043257e-05, + "loss": 1.7792, + "step": 714000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2818521680878019e-05, + "loss": 1.7789, + "step": 714500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2813496153712786e-05, + "loss": 1.7655, + "step": 715000 + }, + { + "epoch": 0.36, + "learning_rate": 1.280847062654755e-05, + "loss": 1.7728, + "step": 715500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2803445099382315e-05, + "loss": 1.7781, + "step": 716000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2798419572217078e-05, + "loss": 1.7894, + "step": 716500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2793394045051841e-05, + "loss": 1.777, + "step": 717000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2788368517886607e-05, + "loss": 1.768, + "step": 717500 + }, + { + "epoch": 0.36, + "learning_rate": 1.278334299072137e-05, + "loss": 1.7897, + "step": 718000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2778317463556134e-05, + "loss": 1.7732, + "step": 718500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2773291936390899e-05, + "loss": 1.8003, + "step": 719000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2768266409225662e-05, + "loss": 1.7736, + "step": 719500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2763240882060426e-05, + "loss": 1.7661, + "step": 720000 + }, + { + "epoch": 0.36, + "learning_rate": 1.275821535489519e-05, + "loss": 1.786, + "step": 720500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2753189827729954e-05, + "loss": 1.7868, + "step": 721000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2748164300564721e-05, + "loss": 1.7842, + "step": 721500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2743138773399484e-05, + "loss": 1.7734, + "step": 722000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2738113246234248e-05, + "loss": 1.7816, + "step": 722500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2733087719069013e-05, + "loss": 1.778, + "step": 723000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2728062191903776e-05, + "loss": 1.786, + "step": 723500 + }, + { + "epoch": 0.36, + "learning_rate": 1.272303666473854e-05, + "loss": 1.7917, + "step": 724000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2718011137573305e-05, + "loss": 1.782, + "step": 724500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2712985610408068e-05, + "loss": 1.7829, + "step": 725000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2707960083242834e-05, + "loss": 1.7803, + "step": 725500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2702934556077597e-05, + "loss": 1.7804, + "step": 726000 + }, + { + "epoch": 0.37, + "learning_rate": 1.269790902891236e-05, + "loss": 1.7874, + "step": 726500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2692883501747126e-05, + "loss": 1.7909, + "step": 727000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2687857974581889e-05, + "loss": 1.7767, + "step": 727500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2682832447416653e-05, + "loss": 1.7621, + "step": 728000 + }, + { + "epoch": 0.37, + "learning_rate": 1.267780692025142e-05, + "loss": 1.7971, + "step": 728500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2672781393086183e-05, + "loss": 1.7809, + "step": 729000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2667755865920945e-05, + "loss": 1.7763, + "step": 729500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2662730338755711e-05, + "loss": 1.7818, + "step": 730000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2657704811590475e-05, + "loss": 1.7706, + "step": 730500 + }, + { + "epoch": 0.37, + "learning_rate": 1.265267928442524e-05, + "loss": 1.7802, + "step": 731000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2647653757260003e-05, + "loss": 1.7988, + "step": 731500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2642628230094767e-05, + "loss": 1.778, + "step": 732000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2637602702929532e-05, + "loss": 1.7989, + "step": 732500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2632577175764295e-05, + "loss": 1.7649, + "step": 733000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2627551648599059e-05, + "loss": 1.7704, + "step": 733500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2622526121433824e-05, + "loss": 1.7861, + "step": 734000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2617500594268588e-05, + "loss": 1.7707, + "step": 734500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2612475067103354e-05, + "loss": 1.7962, + "step": 735000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2607449539938116e-05, + "loss": 1.7691, + "step": 735500 + }, + { + "epoch": 0.37, + "learning_rate": 1.260242401277288e-05, + "loss": 1.7861, + "step": 736000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2597398485607646e-05, + "loss": 1.7998, + "step": 736500 + }, + { + "epoch": 0.37, + "learning_rate": 1.259237295844241e-05, + "loss": 1.8115, + "step": 737000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2587347431277173e-05, + "loss": 1.796, + "step": 737500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2582321904111938e-05, + "loss": 1.7824, + "step": 738000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2577296376946702e-05, + "loss": 1.7609, + "step": 738500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2572270849781465e-05, + "loss": 1.7934, + "step": 739000 + }, + { + "epoch": 0.37, + "learning_rate": 1.256724532261623e-05, + "loss": 1.7964, + "step": 739500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2562219795450994e-05, + "loss": 1.7984, + "step": 740000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2557194268285759e-05, + "loss": 1.7685, + "step": 740500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2552168741120522e-05, + "loss": 1.7909, + "step": 741000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2547143213955286e-05, + "loss": 1.7817, + "step": 741500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2542117686790051e-05, + "loss": 1.7534, + "step": 742000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2537092159624815e-05, + "loss": 1.7756, + "step": 742500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2532066632459578e-05, + "loss": 1.7649, + "step": 743000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2527041105294345e-05, + "loss": 1.7655, + "step": 743500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2522015578129108e-05, + "loss": 1.788, + "step": 744000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2516990050963873e-05, + "loss": 1.7853, + "step": 744500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2511964523798637e-05, + "loss": 1.7918, + "step": 745000 + }, + { + "epoch": 0.37, + "learning_rate": 1.25069389966334e-05, + "loss": 1.7738, + "step": 745500 + }, + { + "epoch": 0.37, + "learning_rate": 1.2501913469468165e-05, + "loss": 1.7885, + "step": 746000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2496887942302929e-05, + "loss": 1.7856, + "step": 746500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2491862415137692e-05, + "loss": 1.7689, + "step": 747000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2486836887972457e-05, + "loss": 1.7619, + "step": 747500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2481811360807221e-05, + "loss": 1.7795, + "step": 748000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2476785833641984e-05, + "loss": 1.783, + "step": 748500 + }, + { + "epoch": 0.38, + "learning_rate": 1.247176030647675e-05, + "loss": 1.7789, + "step": 749000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2466734779311513e-05, + "loss": 1.789, + "step": 749500 + }, + { + "epoch": 0.38, + "learning_rate": 1.246170925214628e-05, + "loss": 1.7632, + "step": 750000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2456683724981043e-05, + "loss": 1.7661, + "step": 750500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2451658197815805e-05, + "loss": 1.7827, + "step": 751000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2446632670650572e-05, + "loss": 1.7799, + "step": 751500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2441607143485335e-05, + "loss": 1.7875, + "step": 752000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2436581616320099e-05, + "loss": 1.7978, + "step": 752500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2431556089154864e-05, + "loss": 1.7887, + "step": 753000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2426530561989627e-05, + "loss": 1.7598, + "step": 753500 + }, + { + "epoch": 0.38, + "learning_rate": 1.242150503482439e-05, + "loss": 1.7878, + "step": 754000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2416479507659156e-05, + "loss": 1.7812, + "step": 754500 + }, + { + "epoch": 0.38, + "learning_rate": 1.241145398049392e-05, + "loss": 1.7628, + "step": 755000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2406428453328684e-05, + "loss": 1.7779, + "step": 755500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2401402926163448e-05, + "loss": 1.8025, + "step": 756000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2396377398998211e-05, + "loss": 1.7737, + "step": 756500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2391351871832977e-05, + "loss": 1.7862, + "step": 757000 + }, + { + "epoch": 0.38, + "learning_rate": 1.238632634466774e-05, + "loss": 1.7688, + "step": 757500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2381300817502503e-05, + "loss": 1.7893, + "step": 758000 + }, + { + "epoch": 0.38, + "learning_rate": 1.237627529033727e-05, + "loss": 1.7915, + "step": 758500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2371249763172034e-05, + "loss": 1.7815, + "step": 759000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2366224236006799e-05, + "loss": 1.7735, + "step": 759500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2361198708841562e-05, + "loss": 1.7746, + "step": 760000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2356173181676326e-05, + "loss": 1.7613, + "step": 760500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2351147654511091e-05, + "loss": 1.776, + "step": 761000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2346122127345854e-05, + "loss": 1.7884, + "step": 761500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2341096600180618e-05, + "loss": 1.7961, + "step": 762000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2336071073015383e-05, + "loss": 1.7668, + "step": 762500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2331045545850146e-05, + "loss": 1.7815, + "step": 763000 + }, + { + "epoch": 0.38, + "learning_rate": 1.232602001868491e-05, + "loss": 1.7705, + "step": 763500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2320994491519675e-05, + "loss": 1.7686, + "step": 764000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2315968964354438e-05, + "loss": 1.7752, + "step": 764500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2310943437189205e-05, + "loss": 1.7645, + "step": 765000 + }, + { + "epoch": 0.38, + "learning_rate": 1.2305917910023969e-05, + "loss": 1.7864, + "step": 765500 + }, + { + "epoch": 0.38, + "learning_rate": 1.230089238285873e-05, + "loss": 1.7817, + "step": 766000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2295866855693497e-05, + "loss": 1.7684, + "step": 766500 + }, + { + "epoch": 0.39, + "learning_rate": 1.229084132852826e-05, + "loss": 1.7758, + "step": 767000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2285815801363024e-05, + "loss": 1.7718, + "step": 767500 + }, + { + "epoch": 0.39, + "learning_rate": 1.228079027419779e-05, + "loss": 1.7852, + "step": 768000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2275764747032553e-05, + "loss": 1.776, + "step": 768500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2270739219867318e-05, + "loss": 1.7732, + "step": 769000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2265713692702081e-05, + "loss": 1.7723, + "step": 769500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2260688165536845e-05, + "loss": 1.7766, + "step": 770000 + }, + { + "epoch": 0.39, + "learning_rate": 1.225566263837161e-05, + "loss": 1.7843, + "step": 770500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2250637111206373e-05, + "loss": 1.7759, + "step": 771000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2245611584041137e-05, + "loss": 1.7778, + "step": 771500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2240586056875902e-05, + "loss": 1.7665, + "step": 772000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2235560529710665e-05, + "loss": 1.7897, + "step": 772500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2230535002545429e-05, + "loss": 1.7669, + "step": 773000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2225509475380196e-05, + "loss": 1.7997, + "step": 773500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2220483948214959e-05, + "loss": 1.7877, + "step": 774000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2215458421049724e-05, + "loss": 1.764, + "step": 774500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2210432893884488e-05, + "loss": 1.7897, + "step": 775000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2205407366719251e-05, + "loss": 1.7851, + "step": 775500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2200381839554016e-05, + "loss": 1.7784, + "step": 776000 + }, + { + "epoch": 0.39, + "learning_rate": 1.219535631238878e-05, + "loss": 1.784, + "step": 776500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2190330785223543e-05, + "loss": 1.7744, + "step": 777000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2185305258058308e-05, + "loss": 1.7649, + "step": 777500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2180279730893072e-05, + "loss": 1.7631, + "step": 778000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2175254203727837e-05, + "loss": 1.7804, + "step": 778500 + }, + { + "epoch": 0.39, + "learning_rate": 1.21702286765626e-05, + "loss": 1.7698, + "step": 779000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2165203149397364e-05, + "loss": 1.7754, + "step": 779500 + }, + { + "epoch": 0.39, + "learning_rate": 1.216017762223213e-05, + "loss": 1.7875, + "step": 780000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2155152095066894e-05, + "loss": 1.7672, + "step": 780500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2150126567901656e-05, + "loss": 1.7742, + "step": 781000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2145101040736423e-05, + "loss": 1.7842, + "step": 781500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2140075513571186e-05, + "loss": 1.7883, + "step": 782000 + }, + { + "epoch": 0.39, + "learning_rate": 1.213504998640595e-05, + "loss": 1.7738, + "step": 782500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2130024459240715e-05, + "loss": 1.7815, + "step": 783000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2124998932075478e-05, + "loss": 1.789, + "step": 783500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2119973404910243e-05, + "loss": 1.7847, + "step": 784000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2114947877745007e-05, + "loss": 1.7984, + "step": 784500 + }, + { + "epoch": 0.39, + "learning_rate": 1.210992235057977e-05, + "loss": 1.7988, + "step": 785000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2104896823414535e-05, + "loss": 1.7581, + "step": 785500 + }, + { + "epoch": 0.4, + "learning_rate": 1.2099871296249299e-05, + "loss": 1.7957, + "step": 786000 + }, + { + "epoch": 0.4, + "learning_rate": 1.2094845769084062e-05, + "loss": 1.79, + "step": 786500 + }, + { + "epoch": 0.4, + "learning_rate": 1.2089820241918827e-05, + "loss": 1.7679, + "step": 787000 + }, + { + "epoch": 0.4, + "learning_rate": 1.208479471475359e-05, + "loss": 1.757, + "step": 787500 + }, + { + "epoch": 0.4, + "learning_rate": 1.2079769187588354e-05, + "loss": 1.7732, + "step": 788000 + }, + { + "epoch": 0.4, + "learning_rate": 1.2074743660423121e-05, + "loss": 1.7608, + "step": 788500 + }, + { + "epoch": 0.4, + "learning_rate": 1.2069718133257885e-05, + "loss": 1.7767, + "step": 789000 + }, + { + "epoch": 0.4, + "learning_rate": 1.206469260609265e-05, + "loss": 1.764, + "step": 789500 + }, + { + "epoch": 0.4, + "learning_rate": 1.2059667078927413e-05, + "loss": 1.788, + "step": 790000 + }, + { + "epoch": 0.4, + "learning_rate": 1.2054641551762177e-05, + "loss": 1.7724, + "step": 790500 + }, + { + "epoch": 0.4, + "learning_rate": 1.2049616024596942e-05, + "loss": 1.7646, + "step": 791000 + }, + { + "epoch": 0.4, + "learning_rate": 1.2044590497431705e-05, + "loss": 1.7834, + "step": 791500 + }, + { + "epoch": 0.4, + "learning_rate": 1.2039564970266469e-05, + "loss": 1.7859, + "step": 792000 + }, + { + "epoch": 0.4, + "learning_rate": 1.2034539443101234e-05, + "loss": 1.7754, + "step": 792500 + }, + { + "epoch": 0.4, + "learning_rate": 1.2029513915935997e-05, + "loss": 1.7386, + "step": 793000 + }, + { + "epoch": 0.4, + "learning_rate": 1.2024488388770762e-05, + "loss": 1.7763, + "step": 793500 + }, + { + "epoch": 0.4, + "learning_rate": 1.2019462861605526e-05, + "loss": 1.7929, + "step": 794000 + }, + { + "epoch": 0.4, + "learning_rate": 1.201443733444029e-05, + "loss": 1.797, + "step": 794500 + }, + { + "epoch": 0.4, + "learning_rate": 1.2009411807275056e-05, + "loss": 1.7786, + "step": 795000 + }, + { + "epoch": 0.4, + "learning_rate": 1.200438628010982e-05, + "loss": 1.7712, + "step": 795500 + }, + { + "epoch": 0.4, + "learning_rate": 1.1999360752944583e-05, + "loss": 1.7807, + "step": 796000 + }, + { + "epoch": 0.4, + "learning_rate": 1.1994335225779348e-05, + "loss": 1.7895, + "step": 796500 + }, + { + "epoch": 0.4, + "learning_rate": 1.1989309698614112e-05, + "loss": 1.7748, + "step": 797000 + }, + { + "epoch": 0.4, + "learning_rate": 1.1984284171448875e-05, + "loss": 1.782, + "step": 797500 + }, + { + "epoch": 0.4, + "learning_rate": 1.197925864428364e-05, + "loss": 1.7613, + "step": 798000 + }, + { + "epoch": 0.4, + "learning_rate": 1.1974233117118404e-05, + "loss": 1.7717, + "step": 798500 + }, + { + "epoch": 0.4, + "learning_rate": 1.1969207589953169e-05, + "loss": 1.7574, + "step": 799000 + }, + { + "epoch": 0.4, + "learning_rate": 1.1964182062787932e-05, + "loss": 1.7616, + "step": 799500 + }, + { + "epoch": 0.4, + "learning_rate": 1.1959156535622696e-05, + "loss": 1.7604, + "step": 800000 + }, + { + "epoch": 0.4, + "learning_rate": 1.195413100845746e-05, + "loss": 1.7733, + "step": 800500 + }, + { + "epoch": 0.4, + "learning_rate": 1.1949105481292224e-05, + "loss": 1.7777, + "step": 801000 + }, + { + "epoch": 0.4, + "learning_rate": 1.1944079954126988e-05, + "loss": 1.7727, + "step": 801500 + }, + { + "epoch": 0.4, + "learning_rate": 1.1939054426961754e-05, + "loss": 1.7815, + "step": 802000 + }, + { + "epoch": 0.4, + "learning_rate": 1.1934028899796516e-05, + "loss": 1.7677, + "step": 802500 + }, + { + "epoch": 0.4, + "learning_rate": 1.1929003372631283e-05, + "loss": 1.7906, + "step": 803000 + }, + { + "epoch": 0.4, + "learning_rate": 1.1923977845466047e-05, + "loss": 1.7702, + "step": 803500 + }, + { + "epoch": 0.4, + "learning_rate": 1.191895231830081e-05, + "loss": 1.7696, + "step": 804000 + }, + { + "epoch": 0.4, + "learning_rate": 1.1913926791135575e-05, + "loss": 1.7652, + "step": 804500 + }, + { + "epoch": 0.4, + "learning_rate": 1.1908901263970339e-05, + "loss": 1.7643, + "step": 805000 + }, + { + "epoch": 0.4, + "learning_rate": 1.1903875736805102e-05, + "loss": 1.7663, + "step": 805500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1898850209639867e-05, + "loss": 1.7614, + "step": 806000 + }, + { + "epoch": 0.41, + "learning_rate": 1.189382468247463e-05, + "loss": 1.7707, + "step": 806500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1888799155309394e-05, + "loss": 1.7822, + "step": 807000 + }, + { + "epoch": 0.41, + "learning_rate": 1.188377362814416e-05, + "loss": 1.7633, + "step": 807500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1878748100978923e-05, + "loss": 1.7823, + "step": 808000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1873722573813688e-05, + "loss": 1.7828, + "step": 808500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1868697046648451e-05, + "loss": 1.7767, + "step": 809000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1863671519483215e-05, + "loss": 1.772, + "step": 809500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1858645992317981e-05, + "loss": 1.7591, + "step": 810000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1853620465152745e-05, + "loss": 1.7906, + "step": 810500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1848594937987508e-05, + "loss": 1.7899, + "step": 811000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1843569410822274e-05, + "loss": 1.7911, + "step": 811500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1838543883657037e-05, + "loss": 1.7883, + "step": 812000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1833518356491802e-05, + "loss": 1.7886, + "step": 812500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1828492829326566e-05, + "loss": 1.7794, + "step": 813000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1823467302161329e-05, + "loss": 1.7683, + "step": 813500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1818441774996094e-05, + "loss": 1.7847, + "step": 814000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1813416247830858e-05, + "loss": 1.7959, + "step": 814500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1808390720665621e-05, + "loss": 1.7569, + "step": 815000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1803365193500386e-05, + "loss": 1.7704, + "step": 815500 + }, + { + "epoch": 0.41, + "learning_rate": 1.179833966633515e-05, + "loss": 1.7677, + "step": 816000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1793314139169913e-05, + "loss": 1.7801, + "step": 816500 + }, + { + "epoch": 0.41, + "learning_rate": 1.178828861200468e-05, + "loss": 1.7711, + "step": 817000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1783263084839442e-05, + "loss": 1.7721, + "step": 817500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1778237557674208e-05, + "loss": 1.778, + "step": 818000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1773212030508972e-05, + "loss": 1.7632, + "step": 818500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1768186503343735e-05, + "loss": 1.7739, + "step": 819000 + }, + { + "epoch": 0.41, + "learning_rate": 1.17631609761785e-05, + "loss": 1.7787, + "step": 819500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1758135449013264e-05, + "loss": 1.7656, + "step": 820000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1753109921848027e-05, + "loss": 1.7643, + "step": 820500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1748084394682793e-05, + "loss": 1.7817, + "step": 821000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1743058867517556e-05, + "loss": 1.777, + "step": 821500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1738033340352321e-05, + "loss": 1.7757, + "step": 822000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1733007813187085e-05, + "loss": 1.7704, + "step": 822500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1727982286021848e-05, + "loss": 1.7484, + "step": 823000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1722956758856613e-05, + "loss": 1.765, + "step": 823500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1717931231691377e-05, + "loss": 1.7603, + "step": 824000 + }, + { + "epoch": 0.41, + "learning_rate": 1.171290570452614e-05, + "loss": 1.7636, + "step": 824500 + }, + { + "epoch": 0.41, + "learning_rate": 1.1707880177360907e-05, + "loss": 1.7738, + "step": 825000 + }, + { + "epoch": 0.41, + "learning_rate": 1.170285465019567e-05, + "loss": 1.7707, + "step": 825500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1697829123030434e-05, + "loss": 1.7799, + "step": 826000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1692803595865199e-05, + "loss": 1.7986, + "step": 826500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1687778068699962e-05, + "loss": 1.7579, + "step": 827000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1682752541534728e-05, + "loss": 1.7743, + "step": 827500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1677727014369491e-05, + "loss": 1.7674, + "step": 828000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1672701487204254e-05, + "loss": 1.7649, + "step": 828500 + }, + { + "epoch": 0.42, + "learning_rate": 1.166767596003902e-05, + "loss": 1.7785, + "step": 829000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1662650432873783e-05, + "loss": 1.7611, + "step": 829500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1657624905708546e-05, + "loss": 1.7749, + "step": 830000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1652599378543312e-05, + "loss": 1.7621, + "step": 830500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1647573851378075e-05, + "loss": 1.754, + "step": 831000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1642548324212839e-05, + "loss": 1.7747, + "step": 831500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1637522797047605e-05, + "loss": 1.7668, + "step": 832000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1632497269882369e-05, + "loss": 1.7609, + "step": 832500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1627471742717134e-05, + "loss": 1.7732, + "step": 833000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1622446215551897e-05, + "loss": 1.7652, + "step": 833500 + }, + { + "epoch": 0.42, + "learning_rate": 1.161742068838666e-05, + "loss": 1.7851, + "step": 834000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1612395161221426e-05, + "loss": 1.7555, + "step": 834500 + }, + { + "epoch": 0.42, + "learning_rate": 1.160736963405619e-05, + "loss": 1.7765, + "step": 835000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1602344106890953e-05, + "loss": 1.777, + "step": 835500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1597318579725718e-05, + "loss": 1.7505, + "step": 836000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1592293052560481e-05, + "loss": 1.7763, + "step": 836500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1587267525395247e-05, + "loss": 1.7663, + "step": 837000 + }, + { + "epoch": 0.42, + "learning_rate": 1.158224199823001e-05, + "loss": 1.7719, + "step": 837500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1577216471064773e-05, + "loss": 1.7755, + "step": 838000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1572190943899539e-05, + "loss": 1.7865, + "step": 838500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1567165416734302e-05, + "loss": 1.7585, + "step": 839000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1562139889569066e-05, + "loss": 1.7637, + "step": 839500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1557114362403832e-05, + "loss": 1.7757, + "step": 840000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1552088835238596e-05, + "loss": 1.7441, + "step": 840500 + }, + { + "epoch": 0.42, + "learning_rate": 1.154706330807336e-05, + "loss": 1.7694, + "step": 841000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1542037780908124e-05, + "loss": 1.7562, + "step": 841500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1537012253742888e-05, + "loss": 1.7766, + "step": 842000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1531986726577653e-05, + "loss": 1.776, + "step": 842500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1526961199412416e-05, + "loss": 1.7579, + "step": 843000 + }, + { + "epoch": 0.42, + "learning_rate": 1.152193567224718e-05, + "loss": 1.7624, + "step": 843500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1516910145081945e-05, + "loss": 1.7831, + "step": 844000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1511884617916708e-05, + "loss": 1.7518, + "step": 844500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1506859090751472e-05, + "loss": 1.767, + "step": 845000 + }, + { + "epoch": 0.42, + "learning_rate": 1.1501833563586237e-05, + "loss": 1.7685, + "step": 845500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1496808036421e-05, + "loss": 1.7628, + "step": 846000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1491782509255767e-05, + "loss": 1.7624, + "step": 846500 + }, + { + "epoch": 0.43, + "learning_rate": 1.148675698209053e-05, + "loss": 1.7763, + "step": 847000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1481731454925294e-05, + "loss": 1.7769, + "step": 847500 + }, + { + "epoch": 0.43, + "learning_rate": 1.147670592776006e-05, + "loss": 1.763, + "step": 848000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1471680400594823e-05, + "loss": 1.771, + "step": 848500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1466654873429586e-05, + "loss": 1.7667, + "step": 849000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1461629346264351e-05, + "loss": 1.7661, + "step": 849500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1456603819099115e-05, + "loss": 1.7599, + "step": 850000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1451578291933878e-05, + "loss": 1.7618, + "step": 850500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1446552764768643e-05, + "loss": 1.7665, + "step": 851000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1441527237603407e-05, + "loss": 1.7695, + "step": 851500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1436501710438172e-05, + "loss": 1.7756, + "step": 852000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1431476183272935e-05, + "loss": 1.7505, + "step": 852500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1426450656107699e-05, + "loss": 1.7707, + "step": 853000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1421425128942466e-05, + "loss": 1.782, + "step": 853500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1416399601777227e-05, + "loss": 1.7883, + "step": 854000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1411374074611991e-05, + "loss": 1.7664, + "step": 854500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1406348547446758e-05, + "loss": 1.7581, + "step": 855000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1401323020281521e-05, + "loss": 1.7656, + "step": 855500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1396297493116286e-05, + "loss": 1.7475, + "step": 856000 + }, + { + "epoch": 0.43, + "learning_rate": 1.139127196595105e-05, + "loss": 1.7512, + "step": 856500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1386246438785813e-05, + "loss": 1.7656, + "step": 857000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1381220911620578e-05, + "loss": 1.7889, + "step": 857500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1376195384455342e-05, + "loss": 1.7481, + "step": 858000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1371169857290105e-05, + "loss": 1.7653, + "step": 858500 + }, + { + "epoch": 0.43, + "learning_rate": 1.136614433012487e-05, + "loss": 1.7775, + "step": 859000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1361118802959634e-05, + "loss": 1.7452, + "step": 859500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1356093275794397e-05, + "loss": 1.7604, + "step": 860000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1351067748629162e-05, + "loss": 1.7539, + "step": 860500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1346042221463926e-05, + "loss": 1.7813, + "step": 861000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1341016694298693e-05, + "loss": 1.7692, + "step": 861500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1335991167133456e-05, + "loss": 1.7657, + "step": 862000 + }, + { + "epoch": 0.43, + "learning_rate": 1.133096563996822e-05, + "loss": 1.7693, + "step": 862500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1325940112802985e-05, + "loss": 1.7643, + "step": 863000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1320914585637748e-05, + "loss": 1.777, + "step": 863500 + }, + { + "epoch": 0.43, + "learning_rate": 1.1315889058472512e-05, + "loss": 1.7665, + "step": 864000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1310863531307277e-05, + "loss": 1.77, + "step": 864500 + }, + { + "epoch": 0.43, + "learning_rate": 1.130583800414204e-05, + "loss": 1.7731, + "step": 865000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1300812476976804e-05, + "loss": 1.7614, + "step": 865500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1295786949811569e-05, + "loss": 1.752, + "step": 866000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1290761422646332e-05, + "loss": 1.7487, + "step": 866500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1285735895481097e-05, + "loss": 1.7675, + "step": 867000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1280710368315861e-05, + "loss": 1.7483, + "step": 867500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1275684841150624e-05, + "loss": 1.7623, + "step": 868000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1270659313985391e-05, + "loss": 1.7712, + "step": 868500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1265633786820153e-05, + "loss": 1.7765, + "step": 869000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1260608259654916e-05, + "loss": 1.7692, + "step": 869500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1255582732489683e-05, + "loss": 1.7653, + "step": 870000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1250557205324447e-05, + "loss": 1.758, + "step": 870500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1245531678159212e-05, + "loss": 1.7773, + "step": 871000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1240506150993975e-05, + "loss": 1.7673, + "step": 871500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1235480623828739e-05, + "loss": 1.7621, + "step": 872000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1230455096663504e-05, + "loss": 1.7628, + "step": 872500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1225429569498267e-05, + "loss": 1.7714, + "step": 873000 + }, + { + "epoch": 0.44, + "learning_rate": 1.122040404233303e-05, + "loss": 1.7604, + "step": 873500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1215378515167796e-05, + "loss": 1.7581, + "step": 874000 + }, + { + "epoch": 0.44, + "learning_rate": 1.121035298800256e-05, + "loss": 1.7808, + "step": 874500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1205327460837323e-05, + "loss": 1.7614, + "step": 875000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1200301933672088e-05, + "loss": 1.7809, + "step": 875500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1195276406506851e-05, + "loss": 1.7812, + "step": 876000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1190250879341618e-05, + "loss": 1.7769, + "step": 876500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1185225352176382e-05, + "loss": 1.7848, + "step": 877000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1180199825011145e-05, + "loss": 1.7693, + "step": 877500 + }, + { + "epoch": 0.44, + "learning_rate": 1.117517429784591e-05, + "loss": 1.7697, + "step": 878000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1170148770680674e-05, + "loss": 1.7566, + "step": 878500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1165123243515437e-05, + "loss": 1.7698, + "step": 879000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1160097716350202e-05, + "loss": 1.7768, + "step": 879500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1155072189184966e-05, + "loss": 1.7713, + "step": 880000 + }, + { + "epoch": 0.44, + "learning_rate": 1.115004666201973e-05, + "loss": 1.776, + "step": 880500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1145021134854494e-05, + "loss": 1.7734, + "step": 881000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1139995607689258e-05, + "loss": 1.7698, + "step": 881500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1134970080524023e-05, + "loss": 1.7641, + "step": 882000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1129944553358786e-05, + "loss": 1.7616, + "step": 882500 + }, + { + "epoch": 0.44, + "learning_rate": 1.112491902619355e-05, + "loss": 1.7721, + "step": 883000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1119893499028317e-05, + "loss": 1.7682, + "step": 883500 + }, + { + "epoch": 0.44, + "learning_rate": 1.111486797186308e-05, + "loss": 1.7737, + "step": 884000 + }, + { + "epoch": 0.44, + "learning_rate": 1.1109842444697842e-05, + "loss": 1.7518, + "step": 884500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1104816917532609e-05, + "loss": 1.7906, + "step": 885000 + }, + { + "epoch": 0.45, + "learning_rate": 1.1099791390367372e-05, + "loss": 1.7556, + "step": 885500 + }, + { + "epoch": 0.45, + "learning_rate": 1.1094765863202137e-05, + "loss": 1.7844, + "step": 886000 + }, + { + "epoch": 0.45, + "learning_rate": 1.10897403360369e-05, + "loss": 1.7558, + "step": 886500 + }, + { + "epoch": 0.45, + "learning_rate": 1.1084714808871664e-05, + "loss": 1.7654, + "step": 887000 + }, + { + "epoch": 0.45, + "learning_rate": 1.107968928170643e-05, + "loss": 1.7686, + "step": 887500 + }, + { + "epoch": 0.45, + "learning_rate": 1.1074663754541193e-05, + "loss": 1.7573, + "step": 888000 + }, + { + "epoch": 0.45, + "learning_rate": 1.1069638227375956e-05, + "loss": 1.7632, + "step": 888500 + }, + { + "epoch": 0.45, + "learning_rate": 1.1064612700210721e-05, + "loss": 1.7627, + "step": 889000 + }, + { + "epoch": 0.45, + "learning_rate": 1.1059587173045485e-05, + "loss": 1.7649, + "step": 889500 + }, + { + "epoch": 0.45, + "learning_rate": 1.105456164588025e-05, + "loss": 1.7667, + "step": 890000 + }, + { + "epoch": 0.45, + "learning_rate": 1.1049536118715013e-05, + "loss": 1.7465, + "step": 890500 + }, + { + "epoch": 0.45, + "learning_rate": 1.1044510591549777e-05, + "loss": 1.7752, + "step": 891000 + }, + { + "epoch": 0.45, + "learning_rate": 1.1039485064384544e-05, + "loss": 1.7675, + "step": 891500 + }, + { + "epoch": 0.45, + "learning_rate": 1.1034459537219307e-05, + "loss": 1.7782, + "step": 892000 + }, + { + "epoch": 0.45, + "learning_rate": 1.102943401005407e-05, + "loss": 1.7555, + "step": 892500 + }, + { + "epoch": 0.45, + "learning_rate": 1.1024408482888836e-05, + "loss": 1.7587, + "step": 893000 + }, + { + "epoch": 0.45, + "learning_rate": 1.1019382955723599e-05, + "loss": 1.759, + "step": 893500 + }, + { + "epoch": 0.45, + "learning_rate": 1.1014357428558363e-05, + "loss": 1.7611, + "step": 894000 + }, + { + "epoch": 0.45, + "learning_rate": 1.1009331901393128e-05, + "loss": 1.7484, + "step": 894500 + }, + { + "epoch": 0.45, + "learning_rate": 1.1004306374227891e-05, + "loss": 1.7565, + "step": 895000 + }, + { + "epoch": 0.45, + "learning_rate": 1.0999280847062656e-05, + "loss": 1.7499, + "step": 895500 + }, + { + "epoch": 0.45, + "learning_rate": 1.099425531989742e-05, + "loss": 1.7882, + "step": 896000 + }, + { + "epoch": 0.45, + "learning_rate": 1.0989229792732183e-05, + "loss": 1.7714, + "step": 896500 + }, + { + "epoch": 0.45, + "learning_rate": 1.0984204265566948e-05, + "loss": 1.7586, + "step": 897000 + }, + { + "epoch": 0.45, + "learning_rate": 1.0979178738401712e-05, + "loss": 1.7596, + "step": 897500 + }, + { + "epoch": 0.45, + "learning_rate": 1.0974153211236475e-05, + "loss": 1.7555, + "step": 898000 + }, + { + "epoch": 0.45, + "learning_rate": 1.0969127684071242e-05, + "loss": 1.7696, + "step": 898500 + }, + { + "epoch": 0.45, + "learning_rate": 1.0964102156906005e-05, + "loss": 1.7661, + "step": 899000 + }, + { + "epoch": 0.45, + "learning_rate": 1.0959076629740767e-05, + "loss": 1.7767, + "step": 899500 + }, + { + "epoch": 0.45, + "learning_rate": 1.0954051102575534e-05, + "loss": 1.7515, + "step": 900000 + }, + { + "epoch": 0.45, + "learning_rate": 1.0949025575410298e-05, + "loss": 1.7663, + "step": 900500 + }, + { + "epoch": 0.45, + "learning_rate": 1.0944000048245063e-05, + "loss": 1.7512, + "step": 901000 + }, + { + "epoch": 0.45, + "learning_rate": 1.0938974521079826e-05, + "loss": 1.7532, + "step": 901500 + }, + { + "epoch": 0.45, + "learning_rate": 1.093394899391459e-05, + "loss": 1.7586, + "step": 902000 + }, + { + "epoch": 0.45, + "learning_rate": 1.0928923466749355e-05, + "loss": 1.765, + "step": 902500 + }, + { + "epoch": 0.45, + "learning_rate": 1.0923897939584118e-05, + "loss": 1.7667, + "step": 903000 + }, + { + "epoch": 0.45, + "learning_rate": 1.0918872412418882e-05, + "loss": 1.7633, + "step": 903500 + }, + { + "epoch": 0.45, + "learning_rate": 1.0913846885253647e-05, + "loss": 1.7755, + "step": 904000 + }, + { + "epoch": 0.45, + "learning_rate": 1.090882135808841e-05, + "loss": 1.7596, + "step": 904500 + }, + { + "epoch": 0.45, + "learning_rate": 1.0903795830923177e-05, + "loss": 1.758, + "step": 905000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0898770303757939e-05, + "loss": 1.7634, + "step": 905500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0893744776592702e-05, + "loss": 1.7575, + "step": 906000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0888719249427469e-05, + "loss": 1.7535, + "step": 906500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0883693722262232e-05, + "loss": 1.7672, + "step": 907000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0878668195096996e-05, + "loss": 1.7474, + "step": 907500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0873642667931761e-05, + "loss": 1.7502, + "step": 908000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0868617140766525e-05, + "loss": 1.7469, + "step": 908500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0863591613601288e-05, + "loss": 1.7628, + "step": 909000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0858566086436053e-05, + "loss": 1.7658, + "step": 909500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0853540559270817e-05, + "loss": 1.7683, + "step": 910000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0848515032105582e-05, + "loss": 1.7756, + "step": 910500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0843489504940345e-05, + "loss": 1.7644, + "step": 911000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0838463977775109e-05, + "loss": 1.7542, + "step": 911500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0833438450609874e-05, + "loss": 1.7678, + "step": 912000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0828412923444637e-05, + "loss": 1.773, + "step": 912500 + }, + { + "epoch": 0.46, + "learning_rate": 1.08233873962794e-05, + "loss": 1.7692, + "step": 913000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0818361869114167e-05, + "loss": 1.7562, + "step": 913500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0813336341948931e-05, + "loss": 1.7646, + "step": 914000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0808310814783696e-05, + "loss": 1.7641, + "step": 914500 + }, + { + "epoch": 0.46, + "learning_rate": 1.080328528761846e-05, + "loss": 1.7553, + "step": 915000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0798259760453223e-05, + "loss": 1.7681, + "step": 915500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0793234233287988e-05, + "loss": 1.7649, + "step": 916000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0788208706122752e-05, + "loss": 1.7654, + "step": 916500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0783183178957515e-05, + "loss": 1.741, + "step": 917000 + }, + { + "epoch": 0.46, + "learning_rate": 1.077815765179228e-05, + "loss": 1.747, + "step": 917500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0773132124627044e-05, + "loss": 1.7651, + "step": 918000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0768106597461807e-05, + "loss": 1.7503, + "step": 918500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0763081070296572e-05, + "loss": 1.7721, + "step": 919000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0758055543131336e-05, + "loss": 1.7419, + "step": 919500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0753030015966102e-05, + "loss": 1.7635, + "step": 920000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0748004488800864e-05, + "loss": 1.7452, + "step": 920500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0742978961635628e-05, + "loss": 1.7386, + "step": 921000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0737953434470394e-05, + "loss": 1.7756, + "step": 921500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0732927907305158e-05, + "loss": 1.7551, + "step": 922000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0727902380139921e-05, + "loss": 1.7532, + "step": 922500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0722876852974686e-05, + "loss": 1.7499, + "step": 923000 + }, + { + "epoch": 0.46, + "learning_rate": 1.071785132580945e-05, + "loss": 1.7584, + "step": 923500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0712825798644215e-05, + "loss": 1.7677, + "step": 924000 + }, + { + "epoch": 0.46, + "learning_rate": 1.0707800271478979e-05, + "loss": 1.7288, + "step": 924500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0702774744313742e-05, + "loss": 1.7587, + "step": 925000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0697749217148507e-05, + "loss": 1.7738, + "step": 925500 + }, + { + "epoch": 0.47, + "learning_rate": 1.069272368998327e-05, + "loss": 1.7543, + "step": 926000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0687698162818034e-05, + "loss": 1.7494, + "step": 926500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0682672635652799e-05, + "loss": 1.7657, + "step": 927000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0677647108487563e-05, + "loss": 1.7742, + "step": 927500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0672621581322326e-05, + "loss": 1.7527, + "step": 928000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0667596054157093e-05, + "loss": 1.7681, + "step": 928500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0662570526991856e-05, + "loss": 1.7544, + "step": 929000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0657544999826621e-05, + "loss": 1.7631, + "step": 929500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0652519472661385e-05, + "loss": 1.7626, + "step": 930000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0647493945496148e-05, + "loss": 1.7706, + "step": 930500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0642468418330913e-05, + "loss": 1.7679, + "step": 931000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0637442891165677e-05, + "loss": 1.7411, + "step": 931500 + }, + { + "epoch": 0.47, + "learning_rate": 1.063241736400044e-05, + "loss": 1.7642, + "step": 932000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0627391836835206e-05, + "loss": 1.7745, + "step": 932500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0622366309669969e-05, + "loss": 1.7363, + "step": 933000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0617340782504734e-05, + "loss": 1.7533, + "step": 933500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0612315255339498e-05, + "loss": 1.7607, + "step": 934000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0607289728174261e-05, + "loss": 1.7565, + "step": 934500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0602264201009028e-05, + "loss": 1.7585, + "step": 935000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0597238673843791e-05, + "loss": 1.7675, + "step": 935500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0592213146678553e-05, + "loss": 1.7447, + "step": 936000 + }, + { + "epoch": 0.47, + "learning_rate": 1.058718761951332e-05, + "loss": 1.7671, + "step": 936500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0582162092348083e-05, + "loss": 1.776, + "step": 937000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0577136565182847e-05, + "loss": 1.7492, + "step": 937500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0572111038017612e-05, + "loss": 1.757, + "step": 938000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0567085510852375e-05, + "loss": 1.7551, + "step": 938500 + }, + { + "epoch": 0.47, + "learning_rate": 1.056205998368714e-05, + "loss": 1.7531, + "step": 939000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0557034456521904e-05, + "loss": 1.7617, + "step": 939500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0552008929356667e-05, + "loss": 1.7529, + "step": 940000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0546983402191433e-05, + "loss": 1.7507, + "step": 940500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0541957875026196e-05, + "loss": 1.7432, + "step": 941000 + }, + { + "epoch": 0.47, + "learning_rate": 1.053693234786096e-05, + "loss": 1.7636, + "step": 941500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0531906820695725e-05, + "loss": 1.7613, + "step": 942000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0526881293530488e-05, + "loss": 1.7599, + "step": 942500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0521855766365251e-05, + "loss": 1.7493, + "step": 943000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0516830239200018e-05, + "loss": 1.7835, + "step": 943500 + }, + { + "epoch": 0.47, + "learning_rate": 1.0511804712034782e-05, + "loss": 1.7647, + "step": 944000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0506779184869547e-05, + "loss": 1.7456, + "step": 944500 + }, + { + "epoch": 0.47, + "learning_rate": 1.050175365770431e-05, + "loss": 1.7648, + "step": 945000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0496728130539074e-05, + "loss": 1.7495, + "step": 945500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0491702603373839e-05, + "loss": 1.7602, + "step": 946000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0486677076208602e-05, + "loss": 1.7334, + "step": 946500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0481651549043366e-05, + "loss": 1.7638, + "step": 947000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0476626021878131e-05, + "loss": 1.744, + "step": 947500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0471600494712894e-05, + "loss": 1.7557, + "step": 948000 + }, + { + "epoch": 0.48, + "learning_rate": 1.046657496754766e-05, + "loss": 1.7279, + "step": 948500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0461549440382423e-05, + "loss": 1.761, + "step": 949000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0456523913217186e-05, + "loss": 1.7553, + "step": 949500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0451498386051953e-05, + "loss": 1.7589, + "step": 950000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0446472858886717e-05, + "loss": 1.7422, + "step": 950500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0441447331721478e-05, + "loss": 1.7639, + "step": 951000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0436421804556245e-05, + "loss": 1.7671, + "step": 951500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0431396277391009e-05, + "loss": 1.7444, + "step": 952000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0426370750225772e-05, + "loss": 1.7529, + "step": 952500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0421345223060537e-05, + "loss": 1.7437, + "step": 953000 + }, + { + "epoch": 0.48, + "learning_rate": 1.04163196958953e-05, + "loss": 1.7526, + "step": 953500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0411294168730066e-05, + "loss": 1.756, + "step": 954000 + }, + { + "epoch": 0.48, + "learning_rate": 1.040626864156483e-05, + "loss": 1.7522, + "step": 954500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0401243114399593e-05, + "loss": 1.7632, + "step": 955000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0396217587234358e-05, + "loss": 1.763, + "step": 955500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0391192060069121e-05, + "loss": 1.7578, + "step": 956000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0386166532903885e-05, + "loss": 1.7609, + "step": 956500 + }, + { + "epoch": 0.48, + "learning_rate": 1.038114100573865e-05, + "loss": 1.7697, + "step": 957000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0376115478573413e-05, + "loss": 1.7569, + "step": 957500 + }, + { + "epoch": 0.48, + "learning_rate": 1.037108995140818e-05, + "loss": 1.7574, + "step": 958000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0366064424242944e-05, + "loss": 1.7668, + "step": 958500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0361038897077707e-05, + "loss": 1.7487, + "step": 959000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0356013369912472e-05, + "loss": 1.7497, + "step": 959500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0350987842747236e-05, + "loss": 1.7729, + "step": 960000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0345962315582e-05, + "loss": 1.7566, + "step": 960500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0340936788416764e-05, + "loss": 1.7539, + "step": 961000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0335911261251528e-05, + "loss": 1.7723, + "step": 961500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0330885734086291e-05, + "loss": 1.758, + "step": 962000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0325860206921056e-05, + "loss": 1.7603, + "step": 962500 + }, + { + "epoch": 0.48, + "learning_rate": 1.032083467975582e-05, + "loss": 1.7628, + "step": 963000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0315809152590585e-05, + "loss": 1.7556, + "step": 963500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0310783625425348e-05, + "loss": 1.7537, + "step": 964000 + }, + { + "epoch": 0.48, + "learning_rate": 1.0305758098260112e-05, + "loss": 1.766, + "step": 964500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0300732571094879e-05, + "loss": 1.753, + "step": 965000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0295707043929642e-05, + "loss": 1.7519, + "step": 965500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0290681516764404e-05, + "loss": 1.755, + "step": 966000 + }, + { + "epoch": 0.49, + "learning_rate": 1.028565598959917e-05, + "loss": 1.7431, + "step": 966500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0280630462433934e-05, + "loss": 1.7583, + "step": 967000 + }, + { + "epoch": 0.49, + "learning_rate": 1.02756049352687e-05, + "loss": 1.7719, + "step": 967500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0270579408103463e-05, + "loss": 1.7559, + "step": 968000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0265553880938226e-05, + "loss": 1.755, + "step": 968500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0260528353772991e-05, + "loss": 1.7646, + "step": 969000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0255502826607755e-05, + "loss": 1.7634, + "step": 969500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0250477299442518e-05, + "loss": 1.7379, + "step": 970000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0245451772277283e-05, + "loss": 1.7661, + "step": 970500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0240426245112047e-05, + "loss": 1.7514, + "step": 971000 + }, + { + "epoch": 0.49, + "learning_rate": 1.023540071794681e-05, + "loss": 1.7607, + "step": 971500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0230375190781575e-05, + "loss": 1.7766, + "step": 972000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0225349663616339e-05, + "loss": 1.7431, + "step": 972500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0220324136451106e-05, + "loss": 1.765, + "step": 973000 + }, + { + "epoch": 0.49, + "learning_rate": 1.021529860928587e-05, + "loss": 1.7596, + "step": 973500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0210273082120633e-05, + "loss": 1.7663, + "step": 974000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0205247554955398e-05, + "loss": 1.7612, + "step": 974500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0200222027790161e-05, + "loss": 1.7517, + "step": 975000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0195196500624925e-05, + "loss": 1.7594, + "step": 975500 + }, + { + "epoch": 0.49, + "learning_rate": 1.019017097345969e-05, + "loss": 1.7598, + "step": 976000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0185145446294453e-05, + "loss": 1.7625, + "step": 976500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0180119919129217e-05, + "loss": 1.7481, + "step": 977000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0175094391963982e-05, + "loss": 1.755, + "step": 977500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0170068864798745e-05, + "loss": 1.7424, + "step": 978000 + }, + { + "epoch": 0.49, + "learning_rate": 1.016504333763351e-05, + "loss": 1.7527, + "step": 978500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0160017810468274e-05, + "loss": 1.7539, + "step": 979000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0154992283303037e-05, + "loss": 1.7487, + "step": 979500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0149966756137804e-05, + "loss": 1.7536, + "step": 980000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0144941228972568e-05, + "loss": 1.7581, + "step": 980500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0139915701807331e-05, + "loss": 1.7592, + "step": 981000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0134890174642096e-05, + "loss": 1.781, + "step": 981500 + }, + { + "epoch": 0.49, + "learning_rate": 1.012986464747686e-05, + "loss": 1.7585, + "step": 982000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0124839120311625e-05, + "loss": 1.7562, + "step": 982500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0119813593146388e-05, + "loss": 1.7675, + "step": 983000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0114788065981152e-05, + "loss": 1.7363, + "step": 983500 + }, + { + "epoch": 0.49, + "learning_rate": 1.0109762538815917e-05, + "loss": 1.752, + "step": 984000 + }, + { + "epoch": 0.49, + "learning_rate": 1.010473701165068e-05, + "loss": 1.753, + "step": 984500 + }, + { + "epoch": 0.5, + "learning_rate": 1.0099711484485444e-05, + "loss": 1.7467, + "step": 985000 + }, + { + "epoch": 0.5, + "learning_rate": 1.0094685957320209e-05, + "loss": 1.7542, + "step": 985500 + }, + { + "epoch": 0.5, + "learning_rate": 1.0089660430154972e-05, + "loss": 1.77, + "step": 986000 + }, + { + "epoch": 0.5, + "learning_rate": 1.0084634902989736e-05, + "loss": 1.7863, + "step": 986500 + }, + { + "epoch": 0.5, + "learning_rate": 1.0079609375824503e-05, + "loss": 1.7428, + "step": 987000 + }, + { + "epoch": 0.5, + "learning_rate": 1.0074583848659264e-05, + "loss": 1.7638, + "step": 987500 + }, + { + "epoch": 0.5, + "learning_rate": 1.0069558321494031e-05, + "loss": 1.7611, + "step": 988000 + }, + { + "epoch": 0.5, + "learning_rate": 1.0064532794328795e-05, + "loss": 1.7528, + "step": 988500 + }, + { + "epoch": 0.5, + "learning_rate": 1.0059507267163558e-05, + "loss": 1.7624, + "step": 989000 + }, + { + "epoch": 0.5, + "learning_rate": 1.0054481739998323e-05, + "loss": 1.7602, + "step": 989500 + }, + { + "epoch": 0.5, + "learning_rate": 1.0049456212833087e-05, + "loss": 1.7581, + "step": 990000 + }, + { + "epoch": 0.5, + "learning_rate": 1.004443068566785e-05, + "loss": 1.7475, + "step": 990500 + }, + { + "epoch": 0.5, + "learning_rate": 1.0039405158502615e-05, + "loss": 1.7538, + "step": 991000 + }, + { + "epoch": 0.5, + "learning_rate": 1.0034379631337379e-05, + "loss": 1.7335, + "step": 991500 + }, + { + "epoch": 0.5, + "learning_rate": 1.0029354104172144e-05, + "loss": 1.7564, + "step": 992000 + }, + { + "epoch": 0.5, + "learning_rate": 1.0024328577006907e-05, + "loss": 1.7547, + "step": 992500 + }, + { + "epoch": 0.5, + "learning_rate": 1.001930304984167e-05, + "loss": 1.7574, + "step": 993000 + }, + { + "epoch": 0.5, + "learning_rate": 1.0014277522676436e-05, + "loss": 1.7552, + "step": 993500 + }, + { + "epoch": 0.5, + "learning_rate": 1.00092519955112e-05, + "loss": 1.7471, + "step": 994000 + }, + { + "epoch": 0.5, + "learning_rate": 1.0004226468345963e-05, + "loss": 1.7509, + "step": 994500 + }, + { + "epoch": 0.5, + "learning_rate": 9.999200941180728e-06, + "loss": 1.7683, + "step": 995000 + }, + { + "epoch": 0.5, + "learning_rate": 9.994175414015493e-06, + "loss": 1.7508, + "step": 995500 + }, + { + "epoch": 0.5, + "learning_rate": 9.989149886850256e-06, + "loss": 1.7455, + "step": 996000 + }, + { + "epoch": 0.5, + "learning_rate": 9.984124359685022e-06, + "loss": 1.7548, + "step": 996500 + }, + { + "epoch": 0.5, + "learning_rate": 9.979098832519785e-06, + "loss": 1.7553, + "step": 997000 + }, + { + "epoch": 0.5, + "learning_rate": 9.974073305354548e-06, + "loss": 1.7553, + "step": 997500 + }, + { + "epoch": 0.5, + "learning_rate": 9.969047778189314e-06, + "loss": 1.7523, + "step": 998000 + }, + { + "epoch": 0.5, + "learning_rate": 9.964022251024079e-06, + "loss": 1.7645, + "step": 998500 + }, + { + "epoch": 0.5, + "learning_rate": 9.958996723858842e-06, + "loss": 1.756, + "step": 999000 + }, + { + "epoch": 0.5, + "learning_rate": 9.953971196693606e-06, + "loss": 1.7639, + "step": 999500 + }, + { + "epoch": 0.5, + "learning_rate": 9.94894566952837e-06, + "loss": 1.7579, + "step": 1000000 + }, + { + "epoch": 0.5, + "learning_rate": 9.943920142363134e-06, + "loss": 1.7532, + "step": 1000500 + }, + { + "epoch": 0.5, + "learning_rate": 9.938894615197898e-06, + "loss": 1.76, + "step": 1001000 + }, + { + "epoch": 0.5, + "learning_rate": 9.933869088032663e-06, + "loss": 1.7611, + "step": 1001500 + }, + { + "epoch": 0.5, + "learning_rate": 9.928843560867428e-06, + "loss": 1.7632, + "step": 1002000 + }, + { + "epoch": 0.5, + "learning_rate": 9.92381803370219e-06, + "loss": 1.7489, + "step": 1002500 + }, + { + "epoch": 0.5, + "learning_rate": 9.918792506536955e-06, + "loss": 1.7603, + "step": 1003000 + }, + { + "epoch": 0.5, + "learning_rate": 9.91376697937172e-06, + "loss": 1.7548, + "step": 1003500 + }, + { + "epoch": 0.5, + "learning_rate": 9.908741452206483e-06, + "loss": 1.7558, + "step": 1004000 + }, + { + "epoch": 0.5, + "learning_rate": 9.903715925041247e-06, + "loss": 1.7427, + "step": 1004500 + }, + { + "epoch": 0.51, + "learning_rate": 9.898690397876012e-06, + "loss": 1.7623, + "step": 1005000 + }, + { + "epoch": 0.51, + "learning_rate": 9.893664870710775e-06, + "loss": 1.7712, + "step": 1005500 + }, + { + "epoch": 0.51, + "learning_rate": 9.88863934354554e-06, + "loss": 1.7675, + "step": 1006000 + }, + { + "epoch": 0.51, + "learning_rate": 9.883613816380304e-06, + "loss": 1.7604, + "step": 1006500 + }, + { + "epoch": 0.51, + "learning_rate": 9.87858828921507e-06, + "loss": 1.7537, + "step": 1007000 + }, + { + "epoch": 0.51, + "learning_rate": 9.873562762049833e-06, + "loss": 1.7514, + "step": 1007500 + }, + { + "epoch": 0.51, + "learning_rate": 9.868537234884598e-06, + "loss": 1.7506, + "step": 1008000 + }, + { + "epoch": 0.51, + "learning_rate": 9.863511707719361e-06, + "loss": 1.7559, + "step": 1008500 + }, + { + "epoch": 0.51, + "learning_rate": 9.858486180554125e-06, + "loss": 1.7612, + "step": 1009000 + }, + { + "epoch": 0.51, + "learning_rate": 9.85346065338889e-06, + "loss": 1.7595, + "step": 1009500 + }, + { + "epoch": 0.51, + "learning_rate": 9.848435126223655e-06, + "loss": 1.7594, + "step": 1010000 + }, + { + "epoch": 0.51, + "learning_rate": 9.843409599058418e-06, + "loss": 1.7501, + "step": 1010500 + }, + { + "epoch": 0.51, + "learning_rate": 9.838384071893182e-06, + "loss": 1.7534, + "step": 1011000 + }, + { + "epoch": 0.51, + "learning_rate": 9.833358544727947e-06, + "loss": 1.7352, + "step": 1011500 + }, + { + "epoch": 0.51, + "learning_rate": 9.82833301756271e-06, + "loss": 1.7446, + "step": 1012000 + }, + { + "epoch": 0.51, + "learning_rate": 9.823307490397474e-06, + "loss": 1.7425, + "step": 1012500 + }, + { + "epoch": 0.51, + "learning_rate": 9.818281963232239e-06, + "loss": 1.7406, + "step": 1013000 + }, + { + "epoch": 0.51, + "learning_rate": 9.813256436067004e-06, + "loss": 1.7725, + "step": 1013500 + }, + { + "epoch": 0.51, + "learning_rate": 9.808230908901768e-06, + "loss": 1.7736, + "step": 1014000 + }, + { + "epoch": 0.51, + "learning_rate": 9.803205381736531e-06, + "loss": 1.7406, + "step": 1014500 + }, + { + "epoch": 0.51, + "learning_rate": 9.798179854571296e-06, + "loss": 1.7424, + "step": 1015000 + }, + { + "epoch": 0.51, + "learning_rate": 9.79315432740606e-06, + "loss": 1.75, + "step": 1015500 + }, + { + "epoch": 0.51, + "learning_rate": 9.788128800240823e-06, + "loss": 1.7617, + "step": 1016000 + }, + { + "epoch": 0.51, + "learning_rate": 9.783103273075588e-06, + "loss": 1.759, + "step": 1016500 + }, + { + "epoch": 0.51, + "learning_rate": 9.778077745910353e-06, + "loss": 1.7225, + "step": 1017000 + }, + { + "epoch": 0.51, + "learning_rate": 9.773052218745117e-06, + "loss": 1.7542, + "step": 1017500 + }, + { + "epoch": 0.51, + "learning_rate": 9.76802669157988e-06, + "loss": 1.7619, + "step": 1018000 + }, + { + "epoch": 0.51, + "learning_rate": 9.763001164414645e-06, + "loss": 1.7514, + "step": 1018500 + }, + { + "epoch": 0.51, + "learning_rate": 9.757975637249409e-06, + "loss": 1.7705, + "step": 1019000 + }, + { + "epoch": 0.51, + "learning_rate": 9.752950110084172e-06, + "loss": 1.7531, + "step": 1019500 + }, + { + "epoch": 0.51, + "learning_rate": 9.747924582918937e-06, + "loss": 1.7552, + "step": 1020000 + }, + { + "epoch": 0.51, + "learning_rate": 9.742899055753701e-06, + "loss": 1.7385, + "step": 1020500 + }, + { + "epoch": 0.51, + "learning_rate": 9.737873528588466e-06, + "loss": 1.7438, + "step": 1021000 + }, + { + "epoch": 0.51, + "learning_rate": 9.73284800142323e-06, + "loss": 1.7524, + "step": 1021500 + }, + { + "epoch": 0.51, + "learning_rate": 9.727822474257995e-06, + "loss": 1.7682, + "step": 1022000 + }, + { + "epoch": 0.51, + "learning_rate": 9.722796947092758e-06, + "loss": 1.7457, + "step": 1022500 + }, + { + "epoch": 0.51, + "learning_rate": 9.717771419927523e-06, + "loss": 1.7409, + "step": 1023000 + }, + { + "epoch": 0.51, + "learning_rate": 9.712745892762287e-06, + "loss": 1.7442, + "step": 1023500 + }, + { + "epoch": 0.51, + "learning_rate": 9.70772036559705e-06, + "loss": 1.7528, + "step": 1024000 + }, + { + "epoch": 0.51, + "learning_rate": 9.702694838431815e-06, + "loss": 1.7659, + "step": 1024500 + }, + { + "epoch": 0.52, + "learning_rate": 9.69766931126658e-06, + "loss": 1.7532, + "step": 1025000 + }, + { + "epoch": 0.52, + "learning_rate": 9.692643784101344e-06, + "loss": 1.7498, + "step": 1025500 + }, + { + "epoch": 0.52, + "learning_rate": 9.687618256936107e-06, + "loss": 1.7551, + "step": 1026000 + }, + { + "epoch": 0.52, + "learning_rate": 9.682592729770872e-06, + "loss": 1.7538, + "step": 1026500 + }, + { + "epoch": 0.52, + "learning_rate": 9.677567202605636e-06, + "loss": 1.7458, + "step": 1027000 + }, + { + "epoch": 0.52, + "learning_rate": 9.6725416754404e-06, + "loss": 1.7286, + "step": 1027500 + }, + { + "epoch": 0.52, + "learning_rate": 9.667516148275164e-06, + "loss": 1.7551, + "step": 1028000 + }, + { + "epoch": 0.52, + "learning_rate": 9.66249062110993e-06, + "loss": 1.7558, + "step": 1028500 + }, + { + "epoch": 0.52, + "learning_rate": 9.657465093944693e-06, + "loss": 1.7506, + "step": 1029000 + }, + { + "epoch": 0.52, + "learning_rate": 9.652439566779457e-06, + "loss": 1.746, + "step": 1029500 + }, + { + "epoch": 0.52, + "learning_rate": 9.647414039614222e-06, + "loss": 1.7564, + "step": 1030000 + }, + { + "epoch": 0.52, + "learning_rate": 9.642388512448985e-06, + "loss": 1.7541, + "step": 1030500 + }, + { + "epoch": 0.52, + "learning_rate": 9.637362985283749e-06, + "loss": 1.7445, + "step": 1031000 + }, + { + "epoch": 0.52, + "learning_rate": 9.632337458118514e-06, + "loss": 1.7497, + "step": 1031500 + }, + { + "epoch": 0.52, + "learning_rate": 9.627311930953279e-06, + "loss": 1.7493, + "step": 1032000 + }, + { + "epoch": 0.52, + "learning_rate": 9.622286403788042e-06, + "loss": 1.7496, + "step": 1032500 + }, + { + "epoch": 0.52, + "learning_rate": 9.617260876622806e-06, + "loss": 1.7196, + "step": 1033000 + }, + { + "epoch": 0.52, + "learning_rate": 9.612235349457571e-06, + "loss": 1.7521, + "step": 1033500 + }, + { + "epoch": 0.52, + "learning_rate": 9.607209822292334e-06, + "loss": 1.7328, + "step": 1034000 + }, + { + "epoch": 0.52, + "learning_rate": 9.6021842951271e-06, + "loss": 1.7365, + "step": 1034500 + }, + { + "epoch": 0.52, + "learning_rate": 9.597158767961863e-06, + "loss": 1.7377, + "step": 1035000 + }, + { + "epoch": 0.52, + "learning_rate": 9.592133240796628e-06, + "loss": 1.7494, + "step": 1035500 + }, + { + "epoch": 0.52, + "learning_rate": 9.587107713631391e-06, + "loss": 1.7567, + "step": 1036000 + }, + { + "epoch": 0.52, + "learning_rate": 9.582082186466155e-06, + "loss": 1.749, + "step": 1036500 + }, + { + "epoch": 0.52, + "learning_rate": 9.57705665930092e-06, + "loss": 1.7533, + "step": 1037000 + }, + { + "epoch": 0.52, + "learning_rate": 9.572031132135684e-06, + "loss": 1.7631, + "step": 1037500 + }, + { + "epoch": 0.52, + "learning_rate": 9.567005604970449e-06, + "loss": 1.748, + "step": 1038000 + }, + { + "epoch": 0.52, + "learning_rate": 9.561980077805212e-06, + "loss": 1.757, + "step": 1038500 + }, + { + "epoch": 0.52, + "learning_rate": 9.556954550639976e-06, + "loss": 1.7311, + "step": 1039000 + }, + { + "epoch": 0.52, + "learning_rate": 9.55192902347474e-06, + "loss": 1.7393, + "step": 1039500 + }, + { + "epoch": 0.52, + "learning_rate": 9.546903496309506e-06, + "loss": 1.733, + "step": 1040000 + }, + { + "epoch": 0.52, + "learning_rate": 9.54187796914427e-06, + "loss": 1.7567, + "step": 1040500 + }, + { + "epoch": 0.52, + "learning_rate": 9.536852441979033e-06, + "loss": 1.752, + "step": 1041000 + }, + { + "epoch": 0.52, + "learning_rate": 9.531826914813798e-06, + "loss": 1.7506, + "step": 1041500 + }, + { + "epoch": 0.52, + "learning_rate": 9.526801387648561e-06, + "loss": 1.7564, + "step": 1042000 + }, + { + "epoch": 0.52, + "learning_rate": 9.521775860483325e-06, + "loss": 1.7396, + "step": 1042500 + }, + { + "epoch": 0.52, + "learning_rate": 9.51675033331809e-06, + "loss": 1.7462, + "step": 1043000 + }, + { + "epoch": 0.52, + "learning_rate": 9.511724806152855e-06, + "loss": 1.7504, + "step": 1043500 + }, + { + "epoch": 0.52, + "learning_rate": 9.506699278987618e-06, + "loss": 1.7328, + "step": 1044000 + }, + { + "epoch": 0.52, + "learning_rate": 9.501673751822382e-06, + "loss": 1.7459, + "step": 1044500 + }, + { + "epoch": 0.53, + "learning_rate": 9.496648224657147e-06, + "loss": 1.7776, + "step": 1045000 + }, + { + "epoch": 0.53, + "learning_rate": 9.49162269749191e-06, + "loss": 1.753, + "step": 1045500 + }, + { + "epoch": 0.53, + "learning_rate": 9.486597170326674e-06, + "loss": 1.7432, + "step": 1046000 + }, + { + "epoch": 0.53, + "learning_rate": 9.481571643161439e-06, + "loss": 1.7478, + "step": 1046500 + }, + { + "epoch": 0.53, + "learning_rate": 9.476546115996204e-06, + "loss": 1.7576, + "step": 1047000 + }, + { + "epoch": 0.53, + "learning_rate": 9.471520588830968e-06, + "loss": 1.743, + "step": 1047500 + }, + { + "epoch": 0.53, + "learning_rate": 9.466495061665731e-06, + "loss": 1.7517, + "step": 1048000 + }, + { + "epoch": 0.53, + "learning_rate": 9.461469534500496e-06, + "loss": 1.7453, + "step": 1048500 + }, + { + "epoch": 0.53, + "learning_rate": 9.45644400733526e-06, + "loss": 1.7368, + "step": 1049000 + }, + { + "epoch": 0.53, + "learning_rate": 9.451418480170025e-06, + "loss": 1.7567, + "step": 1049500 + }, + { + "epoch": 0.53, + "learning_rate": 9.446392953004788e-06, + "loss": 1.7359, + "step": 1050000 + }, + { + "epoch": 0.53, + "learning_rate": 9.441367425839553e-06, + "loss": 1.74, + "step": 1050500 + }, + { + "epoch": 0.53, + "learning_rate": 9.436341898674317e-06, + "loss": 1.747, + "step": 1051000 + }, + { + "epoch": 0.53, + "learning_rate": 9.431316371509082e-06, + "loss": 1.749, + "step": 1051500 + }, + { + "epoch": 0.53, + "learning_rate": 9.426290844343846e-06, + "loss": 1.7375, + "step": 1052000 + }, + { + "epoch": 0.53, + "learning_rate": 9.421265317178609e-06, + "loss": 1.7665, + "step": 1052500 + }, + { + "epoch": 0.53, + "learning_rate": 9.416239790013374e-06, + "loss": 1.7566, + "step": 1053000 + }, + { + "epoch": 0.53, + "learning_rate": 9.411214262848138e-06, + "loss": 1.7522, + "step": 1053500 + }, + { + "epoch": 0.53, + "learning_rate": 9.406188735682901e-06, + "loss": 1.7322, + "step": 1054000 + }, + { + "epoch": 0.53, + "learning_rate": 9.401163208517666e-06, + "loss": 1.7645, + "step": 1054500 + }, + { + "epoch": 0.53, + "learning_rate": 9.396137681352431e-06, + "loss": 1.7416, + "step": 1055000 + }, + { + "epoch": 0.53, + "learning_rate": 9.391112154187195e-06, + "loss": 1.7561, + "step": 1055500 + }, + { + "epoch": 0.53, + "learning_rate": 9.386086627021958e-06, + "loss": 1.7593, + "step": 1056000 + }, + { + "epoch": 0.53, + "learning_rate": 9.381061099856723e-06, + "loss": 1.7529, + "step": 1056500 + }, + { + "epoch": 0.53, + "learning_rate": 9.376035572691487e-06, + "loss": 1.7599, + "step": 1057000 + }, + { + "epoch": 0.53, + "learning_rate": 9.37101004552625e-06, + "loss": 1.7398, + "step": 1057500 + }, + { + "epoch": 0.53, + "learning_rate": 9.365984518361015e-06, + "loss": 1.7524, + "step": 1058000 + }, + { + "epoch": 0.53, + "learning_rate": 9.36095899119578e-06, + "loss": 1.7523, + "step": 1058500 + }, + { + "epoch": 0.53, + "learning_rate": 9.355933464030544e-06, + "loss": 1.7421, + "step": 1059000 + }, + { + "epoch": 0.53, + "learning_rate": 9.350907936865307e-06, + "loss": 1.7362, + "step": 1059500 + }, + { + "epoch": 0.53, + "learning_rate": 9.345882409700073e-06, + "loss": 1.774, + "step": 1060000 + }, + { + "epoch": 0.53, + "learning_rate": 9.340856882534836e-06, + "loss": 1.7401, + "step": 1060500 + }, + { + "epoch": 0.53, + "learning_rate": 9.335831355369601e-06, + "loss": 1.7456, + "step": 1061000 + }, + { + "epoch": 0.53, + "learning_rate": 9.330805828204365e-06, + "loss": 1.7542, + "step": 1061500 + }, + { + "epoch": 0.53, + "learning_rate": 9.32578030103913e-06, + "loss": 1.7556, + "step": 1062000 + }, + { + "epoch": 0.53, + "learning_rate": 9.320754773873893e-06, + "loss": 1.7456, + "step": 1062500 + }, + { + "epoch": 0.53, + "learning_rate": 9.315729246708657e-06, + "loss": 1.7413, + "step": 1063000 + }, + { + "epoch": 0.53, + "learning_rate": 9.310703719543422e-06, + "loss": 1.7571, + "step": 1063500 + }, + { + "epoch": 0.53, + "learning_rate": 9.305678192378185e-06, + "loss": 1.7599, + "step": 1064000 + }, + { + "epoch": 0.53, + "learning_rate": 9.30065266521295e-06, + "loss": 1.7371, + "step": 1064500 + }, + { + "epoch": 0.54, + "learning_rate": 9.295627138047714e-06, + "loss": 1.7483, + "step": 1065000 + }, + { + "epoch": 0.54, + "learning_rate": 9.290601610882479e-06, + "loss": 1.7423, + "step": 1065500 + }, + { + "epoch": 0.54, + "learning_rate": 9.285576083717242e-06, + "loss": 1.7383, + "step": 1066000 + }, + { + "epoch": 0.54, + "learning_rate": 9.280550556552007e-06, + "loss": 1.7569, + "step": 1066500 + }, + { + "epoch": 0.54, + "learning_rate": 9.275525029386771e-06, + "loss": 1.7359, + "step": 1067000 + }, + { + "epoch": 0.54, + "learning_rate": 9.270499502221534e-06, + "loss": 1.7569, + "step": 1067500 + }, + { + "epoch": 0.54, + "learning_rate": 9.2654739750563e-06, + "loss": 1.7452, + "step": 1068000 + }, + { + "epoch": 0.54, + "learning_rate": 9.260448447891065e-06, + "loss": 1.7546, + "step": 1068500 + }, + { + "epoch": 0.54, + "learning_rate": 9.255422920725828e-06, + "loss": 1.7298, + "step": 1069000 + }, + { + "epoch": 0.54, + "learning_rate": 9.250397393560592e-06, + "loss": 1.7376, + "step": 1069500 + }, + { + "epoch": 0.54, + "learning_rate": 9.245371866395357e-06, + "loss": 1.757, + "step": 1070000 + }, + { + "epoch": 0.54, + "learning_rate": 9.24034633923012e-06, + "loss": 1.7653, + "step": 1070500 + }, + { + "epoch": 0.54, + "learning_rate": 9.235320812064884e-06, + "loss": 1.7427, + "step": 1071000 + }, + { + "epoch": 0.54, + "learning_rate": 9.230295284899649e-06, + "loss": 1.7189, + "step": 1071500 + }, + { + "epoch": 0.54, + "learning_rate": 9.225269757734414e-06, + "loss": 1.7624, + "step": 1072000 + }, + { + "epoch": 0.54, + "learning_rate": 9.220244230569176e-06, + "loss": 1.7544, + "step": 1072500 + }, + { + "epoch": 0.54, + "learning_rate": 9.21521870340394e-06, + "loss": 1.7387, + "step": 1073000 + }, + { + "epoch": 0.54, + "learning_rate": 9.210193176238706e-06, + "loss": 1.7286, + "step": 1073500 + }, + { + "epoch": 0.54, + "learning_rate": 9.20516764907347e-06, + "loss": 1.7381, + "step": 1074000 + }, + { + "epoch": 0.54, + "learning_rate": 9.200142121908233e-06, + "loss": 1.7402, + "step": 1074500 + }, + { + "epoch": 0.54, + "learning_rate": 9.195116594742998e-06, + "loss": 1.7324, + "step": 1075000 + }, + { + "epoch": 0.54, + "learning_rate": 9.190091067577761e-06, + "loss": 1.7443, + "step": 1075500 + }, + { + "epoch": 0.54, + "learning_rate": 9.185065540412527e-06, + "loss": 1.7422, + "step": 1076000 + }, + { + "epoch": 0.54, + "learning_rate": 9.18004001324729e-06, + "loss": 1.7432, + "step": 1076500 + }, + { + "epoch": 0.54, + "learning_rate": 9.175014486082055e-06, + "loss": 1.7438, + "step": 1077000 + }, + { + "epoch": 0.54, + "learning_rate": 9.169988958916819e-06, + "loss": 1.7511, + "step": 1077500 + }, + { + "epoch": 0.54, + "learning_rate": 9.164963431751584e-06, + "loss": 1.7504, + "step": 1078000 + }, + { + "epoch": 0.54, + "learning_rate": 9.159937904586347e-06, + "loss": 1.7333, + "step": 1078500 + }, + { + "epoch": 0.54, + "learning_rate": 9.15491237742111e-06, + "loss": 1.7355, + "step": 1079000 + }, + { + "epoch": 0.54, + "learning_rate": 9.149886850255876e-06, + "loss": 1.7547, + "step": 1079500 + }, + { + "epoch": 0.54, + "learning_rate": 9.14486132309064e-06, + "loss": 1.755, + "step": 1080000 + }, + { + "epoch": 0.54, + "learning_rate": 9.139835795925404e-06, + "loss": 1.7447, + "step": 1080500 + }, + { + "epoch": 0.54, + "learning_rate": 9.134810268760168e-06, + "loss": 1.7405, + "step": 1081000 + }, + { + "epoch": 0.54, + "learning_rate": 9.129784741594933e-06, + "loss": 1.7365, + "step": 1081500 + }, + { + "epoch": 0.54, + "learning_rate": 9.124759214429696e-06, + "loss": 1.7412, + "step": 1082000 + }, + { + "epoch": 0.54, + "learning_rate": 9.11973368726446e-06, + "loss": 1.7501, + "step": 1082500 + }, + { + "epoch": 0.54, + "learning_rate": 9.114708160099225e-06, + "loss": 1.7387, + "step": 1083000 + }, + { + "epoch": 0.54, + "learning_rate": 9.10968263293399e-06, + "loss": 1.7572, + "step": 1083500 + }, + { + "epoch": 0.54, + "learning_rate": 9.104657105768754e-06, + "loss": 1.761, + "step": 1084000 + }, + { + "epoch": 0.55, + "learning_rate": 9.099631578603517e-06, + "loss": 1.7452, + "step": 1084500 + }, + { + "epoch": 0.55, + "learning_rate": 9.094606051438282e-06, + "loss": 1.7494, + "step": 1085000 + }, + { + "epoch": 0.55, + "learning_rate": 9.089580524273046e-06, + "loss": 1.7326, + "step": 1085500 + }, + { + "epoch": 0.55, + "learning_rate": 9.084554997107809e-06, + "loss": 1.7446, + "step": 1086000 + }, + { + "epoch": 0.55, + "learning_rate": 9.079529469942574e-06, + "loss": 1.7337, + "step": 1086500 + }, + { + "epoch": 0.55, + "learning_rate": 9.07450394277734e-06, + "loss": 1.7387, + "step": 1087000 + }, + { + "epoch": 0.55, + "learning_rate": 9.069478415612103e-06, + "loss": 1.752, + "step": 1087500 + }, + { + "epoch": 0.55, + "learning_rate": 9.064452888446866e-06, + "loss": 1.7361, + "step": 1088000 + }, + { + "epoch": 0.55, + "learning_rate": 9.059427361281631e-06, + "loss": 1.7328, + "step": 1088500 + }, + { + "epoch": 0.55, + "learning_rate": 9.054401834116395e-06, + "loss": 1.7617, + "step": 1089000 + }, + { + "epoch": 0.55, + "learning_rate": 9.049376306951158e-06, + "loss": 1.7544, + "step": 1089500 + }, + { + "epoch": 0.55, + "learning_rate": 9.044350779785923e-06, + "loss": 1.7581, + "step": 1090000 + }, + { + "epoch": 0.55, + "learning_rate": 9.039325252620687e-06, + "loss": 1.7411, + "step": 1090500 + }, + { + "epoch": 0.55, + "learning_rate": 9.034299725455452e-06, + "loss": 1.7488, + "step": 1091000 + }, + { + "epoch": 0.55, + "learning_rate": 9.029274198290215e-06, + "loss": 1.743, + "step": 1091500 + }, + { + "epoch": 0.55, + "learning_rate": 9.02424867112498e-06, + "loss": 1.7313, + "step": 1092000 + }, + { + "epoch": 0.55, + "learning_rate": 9.019223143959744e-06, + "loss": 1.7347, + "step": 1092500 + }, + { + "epoch": 0.55, + "learning_rate": 9.014197616794509e-06, + "loss": 1.7547, + "step": 1093000 + }, + { + "epoch": 0.55, + "learning_rate": 9.009172089629273e-06, + "loss": 1.7451, + "step": 1093500 + }, + { + "epoch": 0.55, + "learning_rate": 9.004146562464036e-06, + "loss": 1.7255, + "step": 1094000 + }, + { + "epoch": 0.55, + "learning_rate": 8.999121035298801e-06, + "loss": 1.7386, + "step": 1094500 + }, + { + "epoch": 0.55, + "learning_rate": 8.994095508133566e-06, + "loss": 1.743, + "step": 1095000 + }, + { + "epoch": 0.55, + "learning_rate": 8.98906998096833e-06, + "loss": 1.7477, + "step": 1095500 + }, + { + "epoch": 0.55, + "learning_rate": 8.984044453803093e-06, + "loss": 1.7545, + "step": 1096000 + }, + { + "epoch": 0.55, + "learning_rate": 8.979018926637858e-06, + "loss": 1.7494, + "step": 1096500 + }, + { + "epoch": 0.55, + "learning_rate": 8.973993399472622e-06, + "loss": 1.7387, + "step": 1097000 + }, + { + "epoch": 0.55, + "learning_rate": 8.968967872307385e-06, + "loss": 1.7451, + "step": 1097500 + }, + { + "epoch": 0.55, + "learning_rate": 8.96394234514215e-06, + "loss": 1.7478, + "step": 1098000 + }, + { + "epoch": 0.55, + "learning_rate": 8.958916817976916e-06, + "loss": 1.7505, + "step": 1098500 + }, + { + "epoch": 0.55, + "learning_rate": 8.953891290811679e-06, + "loss": 1.7492, + "step": 1099000 + }, + { + "epoch": 0.55, + "learning_rate": 8.948865763646442e-06, + "loss": 1.7434, + "step": 1099500 + }, + { + "epoch": 0.55, + "learning_rate": 8.943840236481208e-06, + "loss": 1.7496, + "step": 1100000 + }, + { + "epoch": 0.55, + "learning_rate": 8.938814709315971e-06, + "loss": 1.7491, + "step": 1100500 + }, + { + "epoch": 0.55, + "learning_rate": 8.933789182150734e-06, + "loss": 1.7348, + "step": 1101000 + }, + { + "epoch": 0.55, + "learning_rate": 8.9287636549855e-06, + "loss": 1.7379, + "step": 1101500 + }, + { + "epoch": 0.55, + "learning_rate": 8.923738127820265e-06, + "loss": 1.7383, + "step": 1102000 + }, + { + "epoch": 0.55, + "learning_rate": 8.918712600655028e-06, + "loss": 1.7487, + "step": 1102500 + }, + { + "epoch": 0.55, + "learning_rate": 8.913687073489792e-06, + "loss": 1.7389, + "step": 1103000 + }, + { + "epoch": 0.55, + "learning_rate": 8.908661546324557e-06, + "loss": 1.7379, + "step": 1103500 + }, + { + "epoch": 0.55, + "learning_rate": 8.90363601915932e-06, + "loss": 1.7592, + "step": 1104000 + }, + { + "epoch": 0.56, + "learning_rate": 8.898610491994085e-06, + "loss": 1.7436, + "step": 1104500 + }, + { + "epoch": 0.56, + "learning_rate": 8.893584964828849e-06, + "loss": 1.7362, + "step": 1105000 + }, + { + "epoch": 0.56, + "learning_rate": 8.888559437663612e-06, + "loss": 1.755, + "step": 1105500 + }, + { + "epoch": 0.56, + "learning_rate": 8.883533910498377e-06, + "loss": 1.7397, + "step": 1106000 + }, + { + "epoch": 0.56, + "learning_rate": 8.87850838333314e-06, + "loss": 1.7336, + "step": 1106500 + }, + { + "epoch": 0.56, + "learning_rate": 8.873482856167906e-06, + "loss": 1.737, + "step": 1107000 + }, + { + "epoch": 0.56, + "learning_rate": 8.86845732900267e-06, + "loss": 1.7442, + "step": 1107500 + }, + { + "epoch": 0.56, + "learning_rate": 8.863431801837435e-06, + "loss": 1.7419, + "step": 1108000 + }, + { + "epoch": 0.56, + "learning_rate": 8.858406274672198e-06, + "loss": 1.7419, + "step": 1108500 + }, + { + "epoch": 0.56, + "learning_rate": 8.853380747506961e-06, + "loss": 1.7546, + "step": 1109000 + }, + { + "epoch": 0.56, + "learning_rate": 8.848355220341727e-06, + "loss": 1.7478, + "step": 1109500 + }, + { + "epoch": 0.56, + "learning_rate": 8.843329693176492e-06, + "loss": 1.7303, + "step": 1110000 + }, + { + "epoch": 0.56, + "learning_rate": 8.838304166011255e-06, + "loss": 1.7492, + "step": 1110500 + }, + { + "epoch": 0.56, + "learning_rate": 8.833278638846019e-06, + "loss": 1.7485, + "step": 1111000 + }, + { + "epoch": 0.56, + "learning_rate": 8.828253111680784e-06, + "loss": 1.7373, + "step": 1111500 + }, + { + "epoch": 0.56, + "learning_rate": 8.823227584515547e-06, + "loss": 1.7453, + "step": 1112000 + }, + { + "epoch": 0.56, + "learning_rate": 8.81820205735031e-06, + "loss": 1.7504, + "step": 1112500 + }, + { + "epoch": 0.56, + "learning_rate": 8.813176530185076e-06, + "loss": 1.7398, + "step": 1113000 + }, + { + "epoch": 0.56, + "learning_rate": 8.808151003019841e-06, + "loss": 1.7688, + "step": 1113500 + }, + { + "epoch": 0.56, + "learning_rate": 8.803125475854604e-06, + "loss": 1.7443, + "step": 1114000 + }, + { + "epoch": 0.56, + "learning_rate": 8.798099948689368e-06, + "loss": 1.744, + "step": 1114500 + }, + { + "epoch": 0.56, + "learning_rate": 8.793074421524133e-06, + "loss": 1.7511, + "step": 1115000 + }, + { + "epoch": 0.56, + "learning_rate": 8.788048894358896e-06, + "loss": 1.7382, + "step": 1115500 + }, + { + "epoch": 0.56, + "learning_rate": 8.78302336719366e-06, + "loss": 1.7459, + "step": 1116000 + }, + { + "epoch": 0.56, + "learning_rate": 8.777997840028425e-06, + "loss": 1.7371, + "step": 1116500 + }, + { + "epoch": 0.56, + "learning_rate": 8.77297231286319e-06, + "loss": 1.7467, + "step": 1117000 + }, + { + "epoch": 0.56, + "learning_rate": 8.767946785697954e-06, + "loss": 1.746, + "step": 1117500 + }, + { + "epoch": 0.56, + "learning_rate": 8.762921258532717e-06, + "loss": 1.7432, + "step": 1118000 + }, + { + "epoch": 0.56, + "learning_rate": 8.757895731367482e-06, + "loss": 1.7511, + "step": 1118500 + }, + { + "epoch": 0.56, + "learning_rate": 8.752870204202246e-06, + "loss": 1.744, + "step": 1119000 + }, + { + "epoch": 0.56, + "learning_rate": 8.74784467703701e-06, + "loss": 1.7254, + "step": 1119500 + }, + { + "epoch": 0.56, + "learning_rate": 8.742819149871774e-06, + "loss": 1.7293, + "step": 1120000 + }, + { + "epoch": 0.56, + "learning_rate": 8.73779362270654e-06, + "loss": 1.742, + "step": 1120500 + }, + { + "epoch": 0.56, + "learning_rate": 8.732768095541303e-06, + "loss": 1.7569, + "step": 1121000 + }, + { + "epoch": 0.56, + "learning_rate": 8.727742568376068e-06, + "loss": 1.752, + "step": 1121500 + }, + { + "epoch": 0.56, + "learning_rate": 8.722717041210831e-06, + "loss": 1.7342, + "step": 1122000 + }, + { + "epoch": 0.56, + "learning_rate": 8.717691514045595e-06, + "loss": 1.7349, + "step": 1122500 + }, + { + "epoch": 0.56, + "learning_rate": 8.71266598688036e-06, + "loss": 1.7584, + "step": 1123000 + }, + { + "epoch": 0.56, + "learning_rate": 8.707640459715123e-06, + "loss": 1.7225, + "step": 1123500 + }, + { + "epoch": 0.56, + "learning_rate": 8.702614932549887e-06, + "loss": 1.7365, + "step": 1124000 + }, + { + "epoch": 0.57, + "learning_rate": 8.697589405384652e-06, + "loss": 1.7324, + "step": 1124500 + }, + { + "epoch": 0.57, + "learning_rate": 8.692563878219417e-06, + "loss": 1.7576, + "step": 1125000 + }, + { + "epoch": 0.57, + "learning_rate": 8.68753835105418e-06, + "loss": 1.7472, + "step": 1125500 + }, + { + "epoch": 0.57, + "learning_rate": 8.682512823888944e-06, + "loss": 1.764, + "step": 1126000 + }, + { + "epoch": 0.57, + "learning_rate": 8.67748729672371e-06, + "loss": 1.7341, + "step": 1126500 + }, + { + "epoch": 0.57, + "learning_rate": 8.672461769558473e-06, + "loss": 1.7384, + "step": 1127000 + }, + { + "epoch": 0.57, + "learning_rate": 8.667436242393236e-06, + "loss": 1.7461, + "step": 1127500 + }, + { + "epoch": 0.57, + "learning_rate": 8.662410715228001e-06, + "loss": 1.7524, + "step": 1128000 + }, + { + "epoch": 0.57, + "learning_rate": 8.657385188062766e-06, + "loss": 1.739, + "step": 1128500 + }, + { + "epoch": 0.57, + "learning_rate": 8.65235966089753e-06, + "loss": 1.7622, + "step": 1129000 + }, + { + "epoch": 0.57, + "learning_rate": 8.647334133732293e-06, + "loss": 1.7665, + "step": 1129500 + }, + { + "epoch": 0.57, + "learning_rate": 8.642308606567058e-06, + "loss": 1.7565, + "step": 1130000 + }, + { + "epoch": 0.57, + "learning_rate": 8.637283079401822e-06, + "loss": 1.7314, + "step": 1130500 + }, + { + "epoch": 0.57, + "learning_rate": 8.632257552236585e-06, + "loss": 1.7601, + "step": 1131000 + }, + { + "epoch": 0.57, + "learning_rate": 8.62723202507135e-06, + "loss": 1.7287, + "step": 1131500 + }, + { + "epoch": 0.57, + "learning_rate": 8.622206497906116e-06, + "loss": 1.758, + "step": 1132000 + }, + { + "epoch": 0.57, + "learning_rate": 8.617180970740879e-06, + "loss": 1.7492, + "step": 1132500 + }, + { + "epoch": 0.57, + "learning_rate": 8.612155443575642e-06, + "loss": 1.7444, + "step": 1133000 + }, + { + "epoch": 0.57, + "learning_rate": 8.607129916410408e-06, + "loss": 1.7574, + "step": 1133500 + }, + { + "epoch": 0.57, + "learning_rate": 8.602104389245171e-06, + "loss": 1.7366, + "step": 1134000 + }, + { + "epoch": 0.57, + "learning_rate": 8.597078862079936e-06, + "loss": 1.7376, + "step": 1134500 + }, + { + "epoch": 0.57, + "learning_rate": 8.5920533349147e-06, + "loss": 1.7445, + "step": 1135000 + }, + { + "epoch": 0.57, + "learning_rate": 8.587027807749465e-06, + "loss": 1.7384, + "step": 1135500 + }, + { + "epoch": 0.57, + "learning_rate": 8.582002280584228e-06, + "loss": 1.737, + "step": 1136000 + }, + { + "epoch": 0.57, + "learning_rate": 8.576976753418993e-06, + "loss": 1.7583, + "step": 1136500 + }, + { + "epoch": 0.57, + "learning_rate": 8.571951226253757e-06, + "loss": 1.7345, + "step": 1137000 + }, + { + "epoch": 0.57, + "learning_rate": 8.56692569908852e-06, + "loss": 1.7338, + "step": 1137500 + }, + { + "epoch": 0.57, + "learning_rate": 8.561900171923285e-06, + "loss": 1.7332, + "step": 1138000 + }, + { + "epoch": 0.57, + "learning_rate": 8.55687464475805e-06, + "loss": 1.7379, + "step": 1138500 + }, + { + "epoch": 0.57, + "learning_rate": 8.551849117592812e-06, + "loss": 1.7479, + "step": 1139000 + }, + { + "epoch": 0.57, + "learning_rate": 8.546823590427577e-06, + "loss": 1.7533, + "step": 1139500 + }, + { + "epoch": 0.57, + "learning_rate": 8.541798063262343e-06, + "loss": 1.7498, + "step": 1140000 + }, + { + "epoch": 0.57, + "learning_rate": 8.536772536097106e-06, + "loss": 1.7315, + "step": 1140500 + }, + { + "epoch": 0.57, + "learning_rate": 8.53174700893187e-06, + "loss": 1.7431, + "step": 1141000 + }, + { + "epoch": 0.57, + "learning_rate": 8.526721481766635e-06, + "loss": 1.7304, + "step": 1141500 + }, + { + "epoch": 0.57, + "learning_rate": 8.521695954601398e-06, + "loss": 1.7381, + "step": 1142000 + }, + { + "epoch": 0.57, + "learning_rate": 8.516670427436162e-06, + "loss": 1.7366, + "step": 1142500 + }, + { + "epoch": 0.57, + "learning_rate": 8.511644900270927e-06, + "loss": 1.754, + "step": 1143000 + }, + { + "epoch": 0.57, + "learning_rate": 8.506619373105692e-06, + "loss": 1.7423, + "step": 1143500 + }, + { + "epoch": 0.57, + "learning_rate": 8.501593845940455e-06, + "loss": 1.7347, + "step": 1144000 + }, + { + "epoch": 0.58, + "learning_rate": 8.496568318775219e-06, + "loss": 1.7429, + "step": 1144500 + }, + { + "epoch": 0.58, + "learning_rate": 8.491542791609984e-06, + "loss": 1.7222, + "step": 1145000 + }, + { + "epoch": 0.58, + "learning_rate": 8.486517264444747e-06, + "loss": 1.7298, + "step": 1145500 + }, + { + "epoch": 0.58, + "learning_rate": 8.481491737279512e-06, + "loss": 1.7356, + "step": 1146000 + }, + { + "epoch": 0.58, + "learning_rate": 8.476466210114276e-06, + "loss": 1.7173, + "step": 1146500 + }, + { + "epoch": 0.58, + "learning_rate": 8.471440682949041e-06, + "loss": 1.734, + "step": 1147000 + }, + { + "epoch": 0.58, + "learning_rate": 8.466415155783804e-06, + "loss": 1.7201, + "step": 1147500 + }, + { + "epoch": 0.58, + "learning_rate": 8.461389628618568e-06, + "loss": 1.7495, + "step": 1148000 + }, + { + "epoch": 0.58, + "learning_rate": 8.456364101453333e-06, + "loss": 1.7292, + "step": 1148500 + }, + { + "epoch": 0.58, + "learning_rate": 8.451338574288096e-06, + "loss": 1.7608, + "step": 1149000 + }, + { + "epoch": 0.58, + "learning_rate": 8.446313047122862e-06, + "loss": 1.7475, + "step": 1149500 + }, + { + "epoch": 0.58, + "learning_rate": 8.441287519957625e-06, + "loss": 1.7172, + "step": 1150000 + }, + { + "epoch": 0.58, + "learning_rate": 8.43626199279239e-06, + "loss": 1.7366, + "step": 1150500 + }, + { + "epoch": 0.58, + "learning_rate": 8.431236465627154e-06, + "loss": 1.7342, + "step": 1151000 + }, + { + "epoch": 0.58, + "learning_rate": 8.426210938461919e-06, + "loss": 1.7119, + "step": 1151500 + }, + { + "epoch": 0.58, + "learning_rate": 8.421185411296682e-06, + "loss": 1.745, + "step": 1152000 + }, + { + "epoch": 0.58, + "learning_rate": 8.416159884131446e-06, + "loss": 1.7546, + "step": 1152500 + }, + { + "epoch": 0.58, + "learning_rate": 8.41113435696621e-06, + "loss": 1.7346, + "step": 1153000 + }, + { + "epoch": 0.58, + "learning_rate": 8.406108829800976e-06, + "loss": 1.7389, + "step": 1153500 + }, + { + "epoch": 0.58, + "learning_rate": 8.401083302635738e-06, + "loss": 1.7545, + "step": 1154000 + }, + { + "epoch": 0.58, + "learning_rate": 8.396057775470503e-06, + "loss": 1.7254, + "step": 1154500 + }, + { + "epoch": 0.58, + "learning_rate": 8.391032248305268e-06, + "loss": 1.7453, + "step": 1155000 + }, + { + "epoch": 0.58, + "learning_rate": 8.386006721140031e-06, + "loss": 1.7434, + "step": 1155500 + }, + { + "epoch": 0.58, + "learning_rate": 8.380981193974795e-06, + "loss": 1.7256, + "step": 1156000 + }, + { + "epoch": 0.58, + "learning_rate": 8.37595566680956e-06, + "loss": 1.7473, + "step": 1156500 + }, + { + "epoch": 0.58, + "learning_rate": 8.370930139644323e-06, + "loss": 1.7503, + "step": 1157000 + }, + { + "epoch": 0.58, + "learning_rate": 8.365904612479087e-06, + "loss": 1.7566, + "step": 1157500 + }, + { + "epoch": 0.58, + "learning_rate": 8.360879085313852e-06, + "loss": 1.7499, + "step": 1158000 + }, + { + "epoch": 0.58, + "learning_rate": 8.355853558148617e-06, + "loss": 1.7416, + "step": 1158500 + }, + { + "epoch": 0.58, + "learning_rate": 8.35082803098338e-06, + "loss": 1.7367, + "step": 1159000 + }, + { + "epoch": 0.58, + "learning_rate": 8.345802503818144e-06, + "loss": 1.7305, + "step": 1159500 + }, + { + "epoch": 0.58, + "learning_rate": 8.34077697665291e-06, + "loss": 1.7359, + "step": 1160000 + }, + { + "epoch": 0.58, + "learning_rate": 8.335751449487673e-06, + "loss": 1.7382, + "step": 1160500 + }, + { + "epoch": 0.58, + "learning_rate": 8.330725922322438e-06, + "loss": 1.7324, + "step": 1161000 + }, + { + "epoch": 0.58, + "learning_rate": 8.325700395157201e-06, + "loss": 1.7313, + "step": 1161500 + }, + { + "epoch": 0.58, + "learning_rate": 8.320674867991966e-06, + "loss": 1.7354, + "step": 1162000 + }, + { + "epoch": 0.58, + "learning_rate": 8.31564934082673e-06, + "loss": 1.728, + "step": 1162500 + }, + { + "epoch": 0.58, + "learning_rate": 8.310623813661495e-06, + "loss": 1.7334, + "step": 1163000 + }, + { + "epoch": 0.58, + "learning_rate": 8.305598286496258e-06, + "loss": 1.7427, + "step": 1163500 + }, + { + "epoch": 0.58, + "learning_rate": 8.300572759331022e-06, + "loss": 1.7398, + "step": 1164000 + }, + { + "epoch": 0.59, + "learning_rate": 8.295547232165787e-06, + "loss": 1.7302, + "step": 1164500 + }, + { + "epoch": 0.59, + "learning_rate": 8.29052170500055e-06, + "loss": 1.7541, + "step": 1165000 + }, + { + "epoch": 0.59, + "learning_rate": 8.285496177835316e-06, + "loss": 1.7392, + "step": 1165500 + }, + { + "epoch": 0.59, + "learning_rate": 8.280470650670079e-06, + "loss": 1.7507, + "step": 1166000 + }, + { + "epoch": 0.59, + "learning_rate": 8.275445123504844e-06, + "loss": 1.7341, + "step": 1166500 + }, + { + "epoch": 0.59, + "learning_rate": 8.270419596339608e-06, + "loss": 1.7548, + "step": 1167000 + }, + { + "epoch": 0.59, + "learning_rate": 8.265394069174371e-06, + "loss": 1.7388, + "step": 1167500 + }, + { + "epoch": 0.59, + "learning_rate": 8.260368542009136e-06, + "loss": 1.7489, + "step": 1168000 + }, + { + "epoch": 0.59, + "learning_rate": 8.255343014843901e-06, + "loss": 1.7061, + "step": 1168500 + }, + { + "epoch": 0.59, + "learning_rate": 8.250317487678665e-06, + "loss": 1.7284, + "step": 1169000 + }, + { + "epoch": 0.59, + "learning_rate": 8.245291960513428e-06, + "loss": 1.7443, + "step": 1169500 + }, + { + "epoch": 0.59, + "learning_rate": 8.240266433348193e-06, + "loss": 1.7279, + "step": 1170000 + }, + { + "epoch": 0.59, + "learning_rate": 8.235240906182957e-06, + "loss": 1.7357, + "step": 1170500 + }, + { + "epoch": 0.59, + "learning_rate": 8.23021537901772e-06, + "loss": 1.7449, + "step": 1171000 + }, + { + "epoch": 0.59, + "learning_rate": 8.225189851852485e-06, + "loss": 1.7356, + "step": 1171500 + }, + { + "epoch": 0.59, + "learning_rate": 8.22016432468725e-06, + "loss": 1.7308, + "step": 1172000 + }, + { + "epoch": 0.59, + "learning_rate": 8.215138797522014e-06, + "loss": 1.7435, + "step": 1172500 + }, + { + "epoch": 0.59, + "learning_rate": 8.210113270356778e-06, + "loss": 1.7541, + "step": 1173000 + }, + { + "epoch": 0.59, + "learning_rate": 8.205087743191543e-06, + "loss": 1.7373, + "step": 1173500 + }, + { + "epoch": 0.59, + "learning_rate": 8.200062216026306e-06, + "loss": 1.7407, + "step": 1174000 + }, + { + "epoch": 0.59, + "learning_rate": 8.19503668886107e-06, + "loss": 1.7541, + "step": 1174500 + }, + { + "epoch": 0.59, + "learning_rate": 8.190011161695835e-06, + "loss": 1.7327, + "step": 1175000 + }, + { + "epoch": 0.59, + "learning_rate": 8.184985634530598e-06, + "loss": 1.7372, + "step": 1175500 + }, + { + "epoch": 0.59, + "learning_rate": 8.179960107365363e-06, + "loss": 1.7406, + "step": 1176000 + }, + { + "epoch": 0.59, + "learning_rate": 8.174934580200127e-06, + "loss": 1.7322, + "step": 1176500 + }, + { + "epoch": 0.59, + "learning_rate": 8.169909053034892e-06, + "loss": 1.761, + "step": 1177000 + }, + { + "epoch": 0.59, + "learning_rate": 8.164883525869655e-06, + "loss": 1.7381, + "step": 1177500 + }, + { + "epoch": 0.59, + "learning_rate": 8.15985799870442e-06, + "loss": 1.7374, + "step": 1178000 + }, + { + "epoch": 0.59, + "learning_rate": 8.154832471539184e-06, + "loss": 1.7316, + "step": 1178500 + }, + { + "epoch": 0.59, + "learning_rate": 8.149806944373947e-06, + "loss": 1.7326, + "step": 1179000 + }, + { + "epoch": 0.59, + "learning_rate": 8.144781417208712e-06, + "loss": 1.7361, + "step": 1179500 + }, + { + "epoch": 0.59, + "learning_rate": 8.139755890043478e-06, + "loss": 1.7326, + "step": 1180000 + }, + { + "epoch": 0.59, + "learning_rate": 8.134730362878241e-06, + "loss": 1.733, + "step": 1180500 + }, + { + "epoch": 0.59, + "learning_rate": 8.129704835713005e-06, + "loss": 1.728, + "step": 1181000 + }, + { + "epoch": 0.59, + "learning_rate": 8.12467930854777e-06, + "loss": 1.7349, + "step": 1181500 + }, + { + "epoch": 0.59, + "learning_rate": 8.119653781382533e-06, + "loss": 1.7267, + "step": 1182000 + }, + { + "epoch": 0.59, + "learning_rate": 8.114628254217297e-06, + "loss": 1.7385, + "step": 1182500 + }, + { + "epoch": 0.59, + "learning_rate": 8.109602727052062e-06, + "loss": 1.7328, + "step": 1183000 + }, + { + "epoch": 0.59, + "learning_rate": 8.104577199886827e-06, + "loss": 1.7496, + "step": 1183500 + }, + { + "epoch": 0.6, + "learning_rate": 8.09955167272159e-06, + "loss": 1.7339, + "step": 1184000 + }, + { + "epoch": 0.6, + "learning_rate": 8.094526145556354e-06, + "loss": 1.7534, + "step": 1184500 + }, + { + "epoch": 0.6, + "learning_rate": 8.089500618391119e-06, + "loss": 1.7303, + "step": 1185000 + }, + { + "epoch": 0.6, + "learning_rate": 8.084475091225882e-06, + "loss": 1.7375, + "step": 1185500 + }, + { + "epoch": 0.6, + "learning_rate": 8.079449564060646e-06, + "loss": 1.7448, + "step": 1186000 + }, + { + "epoch": 0.6, + "learning_rate": 8.074424036895411e-06, + "loss": 1.7406, + "step": 1186500 + }, + { + "epoch": 0.6, + "learning_rate": 8.069398509730176e-06, + "loss": 1.7275, + "step": 1187000 + }, + { + "epoch": 0.6, + "learning_rate": 8.06437298256494e-06, + "loss": 1.7251, + "step": 1187500 + }, + { + "epoch": 0.6, + "learning_rate": 8.059347455399703e-06, + "loss": 1.7221, + "step": 1188000 + }, + { + "epoch": 0.6, + "learning_rate": 8.054321928234468e-06, + "loss": 1.7329, + "step": 1188500 + }, + { + "epoch": 0.6, + "learning_rate": 8.049296401069232e-06, + "loss": 1.743, + "step": 1189000 + }, + { + "epoch": 0.6, + "learning_rate": 8.044270873903997e-06, + "loss": 1.7579, + "step": 1189500 + }, + { + "epoch": 0.6, + "learning_rate": 8.03924534673876e-06, + "loss": 1.7454, + "step": 1190000 + }, + { + "epoch": 0.6, + "learning_rate": 8.034219819573524e-06, + "loss": 1.7329, + "step": 1190500 + }, + { + "epoch": 0.6, + "learning_rate": 8.029194292408289e-06, + "loss": 1.7309, + "step": 1191000 + }, + { + "epoch": 0.6, + "learning_rate": 8.024168765243052e-06, + "loss": 1.7382, + "step": 1191500 + }, + { + "epoch": 0.6, + "learning_rate": 8.019143238077817e-06, + "loss": 1.7343, + "step": 1192000 + }, + { + "epoch": 0.6, + "learning_rate": 8.01411771091258e-06, + "loss": 1.7332, + "step": 1192500 + }, + { + "epoch": 0.6, + "learning_rate": 8.009092183747346e-06, + "loss": 1.757, + "step": 1193000 + }, + { + "epoch": 0.6, + "learning_rate": 8.00406665658211e-06, + "loss": 1.7379, + "step": 1193500 + }, + { + "epoch": 0.6, + "learning_rate": 7.999041129416873e-06, + "loss": 1.7352, + "step": 1194000 + }, + { + "epoch": 0.6, + "learning_rate": 7.994015602251638e-06, + "loss": 1.7324, + "step": 1194500 + }, + { + "epoch": 0.6, + "learning_rate": 7.988990075086403e-06, + "loss": 1.7286, + "step": 1195000 + }, + { + "epoch": 0.6, + "learning_rate": 7.983964547921166e-06, + "loss": 1.7523, + "step": 1195500 + }, + { + "epoch": 0.6, + "learning_rate": 7.97893902075593e-06, + "loss": 1.7461, + "step": 1196000 + }, + { + "epoch": 0.6, + "learning_rate": 7.973913493590695e-06, + "loss": 1.7481, + "step": 1196500 + }, + { + "epoch": 0.6, + "learning_rate": 7.968887966425459e-06, + "loss": 1.729, + "step": 1197000 + }, + { + "epoch": 0.6, + "learning_rate": 7.963862439260222e-06, + "loss": 1.7138, + "step": 1197500 + }, + { + "epoch": 0.6, + "learning_rate": 7.958836912094987e-06, + "loss": 1.7392, + "step": 1198000 + }, + { + "epoch": 0.6, + "learning_rate": 7.953811384929752e-06, + "loss": 1.735, + "step": 1198500 + }, + { + "epoch": 0.6, + "learning_rate": 7.948785857764516e-06, + "loss": 1.7189, + "step": 1199000 + }, + { + "epoch": 0.6, + "learning_rate": 7.94376033059928e-06, + "loss": 1.7179, + "step": 1199500 + }, + { + "epoch": 0.6, + "learning_rate": 7.938734803434044e-06, + "loss": 1.7239, + "step": 1200000 + }, + { + "epoch": 0.6, + "learning_rate": 7.933709276268808e-06, + "loss": 1.7426, + "step": 1200500 + }, + { + "epoch": 0.6, + "learning_rate": 7.928683749103571e-06, + "loss": 1.74, + "step": 1201000 + }, + { + "epoch": 0.6, + "learning_rate": 7.923658221938336e-06, + "loss": 1.7418, + "step": 1201500 + }, + { + "epoch": 0.6, + "learning_rate": 7.918632694773101e-06, + "loss": 1.7234, + "step": 1202000 + }, + { + "epoch": 0.6, + "learning_rate": 7.913607167607865e-06, + "loss": 1.7397, + "step": 1202500 + }, + { + "epoch": 0.6, + "learning_rate": 7.908581640442628e-06, + "loss": 1.7336, + "step": 1203000 + }, + { + "epoch": 0.6, + "learning_rate": 7.903556113277394e-06, + "loss": 1.7319, + "step": 1203500 + }, + { + "epoch": 0.61, + "learning_rate": 7.898530586112157e-06, + "loss": 1.726, + "step": 1204000 + }, + { + "epoch": 0.61, + "learning_rate": 7.893505058946922e-06, + "loss": 1.7397, + "step": 1204500 + }, + { + "epoch": 0.61, + "learning_rate": 7.888479531781686e-06, + "loss": 1.7398, + "step": 1205000 + }, + { + "epoch": 0.61, + "learning_rate": 7.883454004616449e-06, + "loss": 1.728, + "step": 1205500 + }, + { + "epoch": 0.61, + "learning_rate": 7.878428477451214e-06, + "loss": 1.7238, + "step": 1206000 + }, + { + "epoch": 0.61, + "learning_rate": 7.87340295028598e-06, + "loss": 1.7362, + "step": 1206500 + }, + { + "epoch": 0.61, + "learning_rate": 7.868377423120743e-06, + "loss": 1.7234, + "step": 1207000 + }, + { + "epoch": 0.61, + "learning_rate": 7.863351895955506e-06, + "loss": 1.7295, + "step": 1207500 + }, + { + "epoch": 0.61, + "learning_rate": 7.858326368790271e-06, + "loss": 1.7376, + "step": 1208000 + }, + { + "epoch": 0.61, + "learning_rate": 7.853300841625035e-06, + "loss": 1.7322, + "step": 1208500 + }, + { + "epoch": 0.61, + "learning_rate": 7.848275314459798e-06, + "loss": 1.7193, + "step": 1209000 + }, + { + "epoch": 0.61, + "learning_rate": 7.843249787294563e-06, + "loss": 1.7394, + "step": 1209500 + }, + { + "epoch": 0.61, + "learning_rate": 7.838224260129328e-06, + "loss": 1.7326, + "step": 1210000 + }, + { + "epoch": 0.61, + "learning_rate": 7.833198732964092e-06, + "loss": 1.7166, + "step": 1210500 + }, + { + "epoch": 0.61, + "learning_rate": 7.828173205798855e-06, + "loss": 1.7345, + "step": 1211000 + }, + { + "epoch": 0.61, + "learning_rate": 7.82314767863362e-06, + "loss": 1.721, + "step": 1211500 + }, + { + "epoch": 0.61, + "learning_rate": 7.818122151468384e-06, + "loss": 1.7252, + "step": 1212000 + }, + { + "epoch": 0.61, + "learning_rate": 7.813096624303147e-06, + "loss": 1.7343, + "step": 1212500 + }, + { + "epoch": 0.61, + "learning_rate": 7.808071097137913e-06, + "loss": 1.7309, + "step": 1213000 + }, + { + "epoch": 0.61, + "learning_rate": 7.803045569972678e-06, + "loss": 1.7227, + "step": 1213500 + }, + { + "epoch": 0.61, + "learning_rate": 7.798020042807441e-06, + "loss": 1.7302, + "step": 1214000 + }, + { + "epoch": 0.61, + "learning_rate": 7.792994515642205e-06, + "loss": 1.7349, + "step": 1214500 + }, + { + "epoch": 0.61, + "learning_rate": 7.78796898847697e-06, + "loss": 1.7346, + "step": 1215000 + }, + { + "epoch": 0.61, + "learning_rate": 7.782943461311733e-06, + "loss": 1.7393, + "step": 1215500 + }, + { + "epoch": 0.61, + "learning_rate": 7.777917934146498e-06, + "loss": 1.7357, + "step": 1216000 + }, + { + "epoch": 0.61, + "learning_rate": 7.772892406981262e-06, + "loss": 1.7342, + "step": 1216500 + }, + { + "epoch": 0.61, + "learning_rate": 7.767866879816027e-06, + "loss": 1.725, + "step": 1217000 + }, + { + "epoch": 0.61, + "learning_rate": 7.76284135265079e-06, + "loss": 1.7415, + "step": 1217500 + }, + { + "epoch": 0.61, + "learning_rate": 7.757815825485554e-06, + "loss": 1.7207, + "step": 1218000 + }, + { + "epoch": 0.61, + "learning_rate": 7.752790298320319e-06, + "loss": 1.7397, + "step": 1218500 + }, + { + "epoch": 0.61, + "learning_rate": 7.747764771155082e-06, + "loss": 1.7185, + "step": 1219000 + }, + { + "epoch": 0.61, + "learning_rate": 7.742739243989848e-06, + "loss": 1.7375, + "step": 1219500 + }, + { + "epoch": 0.61, + "learning_rate": 7.737713716824611e-06, + "loss": 1.7436, + "step": 1220000 + }, + { + "epoch": 0.61, + "learning_rate": 7.732688189659376e-06, + "loss": 1.7209, + "step": 1220500 + }, + { + "epoch": 0.61, + "learning_rate": 7.72766266249414e-06, + "loss": 1.7191, + "step": 1221000 + }, + { + "epoch": 0.61, + "learning_rate": 7.722637135328905e-06, + "loss": 1.7361, + "step": 1221500 + }, + { + "epoch": 0.61, + "learning_rate": 7.717611608163668e-06, + "loss": 1.734, + "step": 1222000 + }, + { + "epoch": 0.61, + "learning_rate": 7.712586080998432e-06, + "loss": 1.7361, + "step": 1222500 + }, + { + "epoch": 0.61, + "learning_rate": 7.707560553833197e-06, + "loss": 1.7391, + "step": 1223000 + }, + { + "epoch": 0.61, + "learning_rate": 7.702535026667962e-06, + "loss": 1.735, + "step": 1223500 + }, + { + "epoch": 0.62, + "learning_rate": 7.697509499502724e-06, + "loss": 1.7341, + "step": 1224000 + }, + { + "epoch": 0.62, + "learning_rate": 7.692483972337489e-06, + "loss": 1.7308, + "step": 1224500 + }, + { + "epoch": 0.62, + "learning_rate": 7.687458445172254e-06, + "loss": 1.7318, + "step": 1225000 + }, + { + "epoch": 0.62, + "learning_rate": 7.682432918007017e-06, + "loss": 1.7383, + "step": 1225500 + }, + { + "epoch": 0.62, + "learning_rate": 7.67740739084178e-06, + "loss": 1.7356, + "step": 1226000 + }, + { + "epoch": 0.62, + "learning_rate": 7.672381863676546e-06, + "loss": 1.7394, + "step": 1226500 + }, + { + "epoch": 0.62, + "learning_rate": 7.66735633651131e-06, + "loss": 1.7498, + "step": 1227000 + }, + { + "epoch": 0.62, + "learning_rate": 7.662330809346073e-06, + "loss": 1.7406, + "step": 1227500 + }, + { + "epoch": 0.62, + "learning_rate": 7.657305282180838e-06, + "loss": 1.7502, + "step": 1228000 + }, + { + "epoch": 0.62, + "learning_rate": 7.652279755015603e-06, + "loss": 1.7481, + "step": 1228500 + }, + { + "epoch": 0.62, + "learning_rate": 7.647254227850367e-06, + "loss": 1.7251, + "step": 1229000 + }, + { + "epoch": 0.62, + "learning_rate": 7.64222870068513e-06, + "loss": 1.7322, + "step": 1229500 + }, + { + "epoch": 0.62, + "learning_rate": 7.637203173519895e-06, + "loss": 1.7337, + "step": 1230000 + }, + { + "epoch": 0.62, + "learning_rate": 7.632177646354659e-06, + "loss": 1.7229, + "step": 1230500 + }, + { + "epoch": 0.62, + "learning_rate": 7.627152119189424e-06, + "loss": 1.7382, + "step": 1231000 + }, + { + "epoch": 0.62, + "learning_rate": 7.622126592024187e-06, + "loss": 1.7245, + "step": 1231500 + }, + { + "epoch": 0.62, + "learning_rate": 7.6171010648589515e-06, + "loss": 1.7283, + "step": 1232000 + }, + { + "epoch": 0.62, + "learning_rate": 7.612075537693716e-06, + "loss": 1.7377, + "step": 1232500 + }, + { + "epoch": 0.62, + "learning_rate": 7.607050010528481e-06, + "loss": 1.7206, + "step": 1233000 + }, + { + "epoch": 0.62, + "learning_rate": 7.602024483363244e-06, + "loss": 1.7171, + "step": 1233500 + }, + { + "epoch": 0.62, + "learning_rate": 7.596998956198009e-06, + "loss": 1.7322, + "step": 1234000 + }, + { + "epoch": 0.62, + "learning_rate": 7.591973429032773e-06, + "loss": 1.7119, + "step": 1234500 + }, + { + "epoch": 0.62, + "learning_rate": 7.586947901867536e-06, + "loss": 1.7434, + "step": 1235000 + }, + { + "epoch": 0.62, + "learning_rate": 7.581922374702301e-06, + "loss": 1.7439, + "step": 1235500 + }, + { + "epoch": 0.62, + "learning_rate": 7.576896847537065e-06, + "loss": 1.7322, + "step": 1236000 + }, + { + "epoch": 0.62, + "learning_rate": 7.57187132037183e-06, + "loss": 1.7352, + "step": 1236500 + }, + { + "epoch": 0.62, + "learning_rate": 7.566845793206593e-06, + "loss": 1.739, + "step": 1237000 + }, + { + "epoch": 0.62, + "learning_rate": 7.561820266041358e-06, + "loss": 1.7448, + "step": 1237500 + }, + { + "epoch": 0.62, + "learning_rate": 7.556794738876122e-06, + "loss": 1.7505, + "step": 1238000 + }, + { + "epoch": 0.62, + "learning_rate": 7.5517692117108865e-06, + "loss": 1.7162, + "step": 1238500 + }, + { + "epoch": 0.62, + "learning_rate": 7.54674368454565e-06, + "loss": 1.7166, + "step": 1239000 + }, + { + "epoch": 0.62, + "learning_rate": 7.541718157380414e-06, + "loss": 1.7395, + "step": 1239500 + }, + { + "epoch": 0.62, + "learning_rate": 7.5366926302151785e-06, + "loss": 1.7327, + "step": 1240000 + }, + { + "epoch": 0.62, + "learning_rate": 7.531667103049944e-06, + "loss": 1.7286, + "step": 1240500 + }, + { + "epoch": 0.62, + "learning_rate": 7.526641575884707e-06, + "loss": 1.7287, + "step": 1241000 + }, + { + "epoch": 0.62, + "learning_rate": 7.521616048719471e-06, + "loss": 1.7273, + "step": 1241500 + }, + { + "epoch": 0.62, + "learning_rate": 7.516590521554236e-06, + "loss": 1.7405, + "step": 1242000 + }, + { + "epoch": 0.62, + "learning_rate": 7.511564994388999e-06, + "loss": 1.7264, + "step": 1242500 + }, + { + "epoch": 0.62, + "learning_rate": 7.506539467223763e-06, + "loss": 1.7318, + "step": 1243000 + }, + { + "epoch": 0.62, + "learning_rate": 7.501513940058528e-06, + "loss": 1.7193, + "step": 1243500 + }, + { + "epoch": 0.63, + "learning_rate": 7.496488412893293e-06, + "loss": 1.733, + "step": 1244000 + }, + { + "epoch": 0.63, + "learning_rate": 7.4914628857280554e-06, + "loss": 1.7388, + "step": 1244500 + }, + { + "epoch": 0.63, + "learning_rate": 7.486437358562821e-06, + "loss": 1.7098, + "step": 1245000 + }, + { + "epoch": 0.63, + "learning_rate": 7.481411831397585e-06, + "loss": 1.7346, + "step": 1245500 + }, + { + "epoch": 0.63, + "learning_rate": 7.476386304232349e-06, + "loss": 1.7331, + "step": 1246000 + }, + { + "epoch": 0.63, + "learning_rate": 7.471360777067113e-06, + "loss": 1.7336, + "step": 1246500 + }, + { + "epoch": 0.63, + "learning_rate": 7.466335249901877e-06, + "loss": 1.7227, + "step": 1247000 + }, + { + "epoch": 0.63, + "learning_rate": 7.461309722736641e-06, + "loss": 1.7225, + "step": 1247500 + }, + { + "epoch": 0.63, + "learning_rate": 7.456284195571406e-06, + "loss": 1.724, + "step": 1248000 + }, + { + "epoch": 0.63, + "learning_rate": 7.45125866840617e-06, + "loss": 1.74, + "step": 1248500 + }, + { + "epoch": 0.63, + "learning_rate": 7.446233141240934e-06, + "loss": 1.7329, + "step": 1249000 + }, + { + "epoch": 0.63, + "learning_rate": 7.441207614075698e-06, + "loss": 1.7467, + "step": 1249500 + }, + { + "epoch": 0.63, + "learning_rate": 7.436182086910463e-06, + "loss": 1.7159, + "step": 1250000 + }, + { + "epoch": 0.63, + "learning_rate": 7.431156559745226e-06, + "loss": 1.7313, + "step": 1250500 + }, + { + "epoch": 0.63, + "learning_rate": 7.42613103257999e-06, + "loss": 1.734, + "step": 1251000 + }, + { + "epoch": 0.63, + "learning_rate": 7.4211055054147555e-06, + "loss": 1.7181, + "step": 1251500 + }, + { + "epoch": 0.63, + "learning_rate": 7.416079978249518e-06, + "loss": 1.734, + "step": 1252000 + }, + { + "epoch": 0.63, + "learning_rate": 7.411054451084283e-06, + "loss": 1.7278, + "step": 1252500 + }, + { + "epoch": 0.63, + "learning_rate": 7.406028923919048e-06, + "loss": 1.7196, + "step": 1253000 + }, + { + "epoch": 0.63, + "learning_rate": 7.401003396753812e-06, + "loss": 1.7312, + "step": 1253500 + }, + { + "epoch": 0.63, + "learning_rate": 7.395977869588575e-06, + "loss": 1.7402, + "step": 1254000 + }, + { + "epoch": 0.63, + "learning_rate": 7.39095234242334e-06, + "loss": 1.7354, + "step": 1254500 + }, + { + "epoch": 0.63, + "learning_rate": 7.385926815258104e-06, + "loss": 1.7313, + "step": 1255000 + }, + { + "epoch": 0.63, + "learning_rate": 7.380901288092869e-06, + "loss": 1.7254, + "step": 1255500 + }, + { + "epoch": 0.63, + "learning_rate": 7.3758757609276325e-06, + "loss": 1.7326, + "step": 1256000 + }, + { + "epoch": 0.63, + "learning_rate": 7.370850233762397e-06, + "loss": 1.7359, + "step": 1256500 + }, + { + "epoch": 0.63, + "learning_rate": 7.365824706597161e-06, + "loss": 1.7196, + "step": 1257000 + }, + { + "epoch": 0.63, + "learning_rate": 7.360799179431925e-06, + "loss": 1.7285, + "step": 1257500 + }, + { + "epoch": 0.63, + "learning_rate": 7.355773652266689e-06, + "loss": 1.724, + "step": 1258000 + }, + { + "epoch": 0.63, + "learning_rate": 7.350748125101453e-06, + "loss": 1.7355, + "step": 1258500 + }, + { + "epoch": 0.63, + "learning_rate": 7.345722597936218e-06, + "loss": 1.7217, + "step": 1259000 + }, + { + "epoch": 0.63, + "learning_rate": 7.340697070770981e-06, + "loss": 1.7319, + "step": 1259500 + }, + { + "epoch": 0.63, + "learning_rate": 7.335671543605746e-06, + "loss": 1.721, + "step": 1260000 + }, + { + "epoch": 0.63, + "learning_rate": 7.33064601644051e-06, + "loss": 1.7331, + "step": 1260500 + }, + { + "epoch": 0.63, + "learning_rate": 7.325620489275275e-06, + "loss": 1.7177, + "step": 1261000 + }, + { + "epoch": 0.63, + "learning_rate": 7.320594962110038e-06, + "loss": 1.7481, + "step": 1261500 + }, + { + "epoch": 0.63, + "learning_rate": 7.315569434944802e-06, + "loss": 1.7217, + "step": 1262000 + }, + { + "epoch": 0.63, + "learning_rate": 7.310543907779567e-06, + "loss": 1.7143, + "step": 1262500 + }, + { + "epoch": 0.63, + "learning_rate": 7.305518380614332e-06, + "loss": 1.7337, + "step": 1263000 + }, + { + "epoch": 0.63, + "learning_rate": 7.300492853449095e-06, + "loss": 1.7274, + "step": 1263500 + }, + { + "epoch": 0.64, + "learning_rate": 7.2954673262838595e-06, + "loss": 1.7209, + "step": 1264000 + }, + { + "epoch": 0.64, + "learning_rate": 7.290441799118624e-06, + "loss": 1.7133, + "step": 1264500 + }, + { + "epoch": 0.64, + "learning_rate": 7.285416271953388e-06, + "loss": 1.7403, + "step": 1265000 + }, + { + "epoch": 0.64, + "learning_rate": 7.2803907447881515e-06, + "loss": 1.7323, + "step": 1265500 + }, + { + "epoch": 0.64, + "learning_rate": 7.275365217622916e-06, + "loss": 1.7288, + "step": 1266000 + }, + { + "epoch": 0.64, + "learning_rate": 7.270339690457681e-06, + "loss": 1.7317, + "step": 1266500 + }, + { + "epoch": 0.64, + "learning_rate": 7.265314163292445e-06, + "loss": 1.755, + "step": 1267000 + }, + { + "epoch": 0.64, + "learning_rate": 7.260288636127209e-06, + "loss": 1.7174, + "step": 1267500 + }, + { + "epoch": 0.64, + "learning_rate": 7.255263108961973e-06, + "loss": 1.7447, + "step": 1268000 + }, + { + "epoch": 0.64, + "learning_rate": 7.250237581796737e-06, + "loss": 1.7225, + "step": 1268500 + }, + { + "epoch": 0.64, + "learning_rate": 7.245212054631501e-06, + "loss": 1.7213, + "step": 1269000 + }, + { + "epoch": 0.64, + "learning_rate": 7.240186527466265e-06, + "loss": 1.727, + "step": 1269500 + }, + { + "epoch": 0.64, + "learning_rate": 7.235161000301029e-06, + "loss": 1.722, + "step": 1270000 + }, + { + "epoch": 0.64, + "learning_rate": 7.2301354731357945e-06, + "loss": 1.7105, + "step": 1270500 + }, + { + "epoch": 0.64, + "learning_rate": 7.225109945970558e-06, + "loss": 1.7153, + "step": 1271000 + }, + { + "epoch": 0.64, + "learning_rate": 7.220084418805322e-06, + "loss": 1.7297, + "step": 1271500 + }, + { + "epoch": 0.64, + "learning_rate": 7.2150588916400865e-06, + "loss": 1.7373, + "step": 1272000 + }, + { + "epoch": 0.64, + "learning_rate": 7.210033364474851e-06, + "loss": 1.7184, + "step": 1272500 + }, + { + "epoch": 0.64, + "learning_rate": 7.205007837309614e-06, + "loss": 1.7186, + "step": 1273000 + }, + { + "epoch": 0.64, + "learning_rate": 7.1999823101443786e-06, + "loss": 1.7386, + "step": 1273500 + }, + { + "epoch": 0.64, + "learning_rate": 7.194956782979144e-06, + "loss": 1.717, + "step": 1274000 + }, + { + "epoch": 0.64, + "learning_rate": 7.189931255813908e-06, + "loss": 1.7259, + "step": 1274500 + }, + { + "epoch": 0.64, + "learning_rate": 7.1849057286486714e-06, + "loss": 1.7299, + "step": 1275000 + }, + { + "epoch": 0.64, + "learning_rate": 7.179880201483436e-06, + "loss": 1.7268, + "step": 1275500 + }, + { + "epoch": 0.64, + "learning_rate": 7.1748546743182e-06, + "loss": 1.72, + "step": 1276000 + }, + { + "epoch": 0.64, + "learning_rate": 7.1698291471529635e-06, + "loss": 1.7224, + "step": 1276500 + }, + { + "epoch": 0.64, + "learning_rate": 7.164803619987728e-06, + "loss": 1.7447, + "step": 1277000 + }, + { + "epoch": 0.64, + "learning_rate": 7.159778092822492e-06, + "loss": 1.7432, + "step": 1277500 + }, + { + "epoch": 0.64, + "learning_rate": 7.154752565657257e-06, + "loss": 1.7409, + "step": 1278000 + }, + { + "epoch": 0.64, + "learning_rate": 7.149727038492021e-06, + "loss": 1.7218, + "step": 1278500 + }, + { + "epoch": 0.64, + "learning_rate": 7.144701511326785e-06, + "loss": 1.7303, + "step": 1279000 + }, + { + "epoch": 0.64, + "learning_rate": 7.139675984161549e-06, + "loss": 1.7261, + "step": 1279500 + }, + { + "epoch": 0.64, + "learning_rate": 7.1346504569963135e-06, + "loss": 1.7374, + "step": 1280000 + }, + { + "epoch": 0.64, + "learning_rate": 7.129624929831077e-06, + "loss": 1.7401, + "step": 1280500 + }, + { + "epoch": 0.64, + "learning_rate": 7.124599402665841e-06, + "loss": 1.7327, + "step": 1281000 + }, + { + "epoch": 0.64, + "learning_rate": 7.119573875500606e-06, + "loss": 1.7201, + "step": 1281500 + }, + { + "epoch": 0.64, + "learning_rate": 7.114548348335371e-06, + "loss": 1.7062, + "step": 1282000 + }, + { + "epoch": 0.64, + "learning_rate": 7.109522821170134e-06, + "loss": 1.7339, + "step": 1282500 + }, + { + "epoch": 0.64, + "learning_rate": 7.1044972940048984e-06, + "loss": 1.7346, + "step": 1283000 + }, + { + "epoch": 0.65, + "learning_rate": 7.099471766839663e-06, + "loss": 1.7184, + "step": 1283500 + }, + { + "epoch": 0.65, + "learning_rate": 7.094446239674427e-06, + "loss": 1.7214, + "step": 1284000 + }, + { + "epoch": 0.65, + "learning_rate": 7.0894207125091905e-06, + "loss": 1.7213, + "step": 1284500 + }, + { + "epoch": 0.65, + "learning_rate": 7.084395185343956e-06, + "loss": 1.712, + "step": 1285000 + }, + { + "epoch": 0.65, + "learning_rate": 7.07936965817872e-06, + "loss": 1.7284, + "step": 1285500 + }, + { + "epoch": 0.65, + "learning_rate": 7.074344131013483e-06, + "loss": 1.7239, + "step": 1286000 + }, + { + "epoch": 0.65, + "learning_rate": 7.069318603848248e-06, + "loss": 1.7264, + "step": 1286500 + }, + { + "epoch": 0.65, + "learning_rate": 7.064293076683012e-06, + "loss": 1.7205, + "step": 1287000 + }, + { + "epoch": 0.65, + "learning_rate": 7.059267549517776e-06, + "loss": 1.7308, + "step": 1287500 + }, + { + "epoch": 0.65, + "learning_rate": 7.05424202235254e-06, + "loss": 1.7274, + "step": 1288000 + }, + { + "epoch": 0.65, + "learning_rate": 7.049216495187304e-06, + "loss": 1.7367, + "step": 1288500 + }, + { + "epoch": 0.65, + "learning_rate": 7.044190968022069e-06, + "loss": 1.72, + "step": 1289000 + }, + { + "epoch": 0.65, + "learning_rate": 7.039165440856833e-06, + "loss": 1.7389, + "step": 1289500 + }, + { + "epoch": 0.65, + "learning_rate": 7.034139913691597e-06, + "loss": 1.7299, + "step": 1290000 + }, + { + "epoch": 0.65, + "learning_rate": 7.029114386526361e-06, + "loss": 1.7272, + "step": 1290500 + }, + { + "epoch": 0.65, + "learning_rate": 7.0240888593611254e-06, + "loss": 1.7475, + "step": 1291000 + }, + { + "epoch": 0.65, + "learning_rate": 7.01906333219589e-06, + "loss": 1.7242, + "step": 1291500 + }, + { + "epoch": 0.65, + "learning_rate": 7.014037805030653e-06, + "loss": 1.7294, + "step": 1292000 + }, + { + "epoch": 0.65, + "learning_rate": 7.009012277865418e-06, + "loss": 1.7208, + "step": 1292500 + }, + { + "epoch": 0.65, + "learning_rate": 7.003986750700183e-06, + "loss": 1.7313, + "step": 1293000 + }, + { + "epoch": 0.65, + "learning_rate": 6.998961223534946e-06, + "loss": 1.7164, + "step": 1293500 + }, + { + "epoch": 0.65, + "learning_rate": 6.99393569636971e-06, + "loss": 1.7343, + "step": 1294000 + }, + { + "epoch": 0.65, + "learning_rate": 6.988910169204475e-06, + "loss": 1.7169, + "step": 1294500 + }, + { + "epoch": 0.65, + "learning_rate": 6.983884642039239e-06, + "loss": 1.7163, + "step": 1295000 + }, + { + "epoch": 0.65, + "learning_rate": 6.978859114874002e-06, + "loss": 1.732, + "step": 1295500 + }, + { + "epoch": 0.65, + "learning_rate": 6.973833587708767e-06, + "loss": 1.7315, + "step": 1296000 + }, + { + "epoch": 0.65, + "learning_rate": 6.968808060543532e-06, + "loss": 1.7195, + "step": 1296500 + }, + { + "epoch": 0.65, + "learning_rate": 6.963782533378296e-06, + "loss": 1.7231, + "step": 1297000 + }, + { + "epoch": 0.65, + "learning_rate": 6.9587570062130596e-06, + "loss": 1.7219, + "step": 1297500 + }, + { + "epoch": 0.65, + "learning_rate": 6.953731479047824e-06, + "loss": 1.728, + "step": 1298000 + }, + { + "epoch": 0.65, + "learning_rate": 6.948705951882588e-06, + "loss": 1.7227, + "step": 1298500 + }, + { + "epoch": 0.65, + "learning_rate": 6.9436804247173525e-06, + "loss": 1.7214, + "step": 1299000 + }, + { + "epoch": 0.65, + "learning_rate": 6.938654897552116e-06, + "loss": 1.7299, + "step": 1299500 + }, + { + "epoch": 0.65, + "learning_rate": 6.933629370386881e-06, + "loss": 1.7228, + "step": 1300000 + }, + { + "epoch": 0.65, + "learning_rate": 6.928603843221645e-06, + "loss": 1.7165, + "step": 1300500 + }, + { + "epoch": 0.65, + "learning_rate": 6.92357831605641e-06, + "loss": 1.7378, + "step": 1301000 + }, + { + "epoch": 0.65, + "learning_rate": 6.918552788891173e-06, + "loss": 1.7063, + "step": 1301500 + }, + { + "epoch": 0.65, + "learning_rate": 6.913527261725937e-06, + "loss": 1.7229, + "step": 1302000 + }, + { + "epoch": 0.65, + "learning_rate": 6.908501734560702e-06, + "loss": 1.7105, + "step": 1302500 + }, + { + "epoch": 0.65, + "learning_rate": 6.903476207395465e-06, + "loss": 1.7401, + "step": 1303000 + }, + { + "epoch": 0.66, + "learning_rate": 6.898450680230229e-06, + "loss": 1.7405, + "step": 1303500 + }, + { + "epoch": 0.66, + "learning_rate": 6.8934251530649945e-06, + "loss": 1.7249, + "step": 1304000 + }, + { + "epoch": 0.66, + "learning_rate": 6.888399625899759e-06, + "loss": 1.7163, + "step": 1304500 + }, + { + "epoch": 0.66, + "learning_rate": 6.883374098734522e-06, + "loss": 1.7064, + "step": 1305000 + }, + { + "epoch": 0.66, + "learning_rate": 6.878348571569287e-06, + "loss": 1.7199, + "step": 1305500 + }, + { + "epoch": 0.66, + "learning_rate": 6.873323044404051e-06, + "loss": 1.7232, + "step": 1306000 + }, + { + "epoch": 0.66, + "learning_rate": 6.868297517238815e-06, + "loss": 1.7338, + "step": 1306500 + }, + { + "epoch": 0.66, + "learning_rate": 6.863271990073579e-06, + "loss": 1.744, + "step": 1307000 + }, + { + "epoch": 0.66, + "learning_rate": 6.858246462908344e-06, + "loss": 1.7275, + "step": 1307500 + }, + { + "epoch": 0.66, + "learning_rate": 6.853220935743108e-06, + "loss": 1.7423, + "step": 1308000 + }, + { + "epoch": 0.66, + "learning_rate": 6.848195408577872e-06, + "loss": 1.717, + "step": 1308500 + }, + { + "epoch": 0.66, + "learning_rate": 6.843169881412636e-06, + "loss": 1.7321, + "step": 1309000 + }, + { + "epoch": 0.66, + "learning_rate": 6.8381443542474e-06, + "loss": 1.7051, + "step": 1309500 + }, + { + "epoch": 0.66, + "learning_rate": 6.833118827082164e-06, + "loss": 1.7243, + "step": 1310000 + }, + { + "epoch": 0.66, + "learning_rate": 6.828093299916928e-06, + "loss": 1.7255, + "step": 1310500 + }, + { + "epoch": 0.66, + "learning_rate": 6.823067772751692e-06, + "loss": 1.7226, + "step": 1311000 + }, + { + "epoch": 0.66, + "learning_rate": 6.818042245586457e-06, + "loss": 1.7325, + "step": 1311500 + }, + { + "epoch": 0.66, + "learning_rate": 6.8130167184212216e-06, + "loss": 1.7352, + "step": 1312000 + }, + { + "epoch": 0.66, + "learning_rate": 6.807991191255985e-06, + "loss": 1.709, + "step": 1312500 + }, + { + "epoch": 0.66, + "learning_rate": 6.802965664090749e-06, + "loss": 1.7316, + "step": 1313000 + }, + { + "epoch": 0.66, + "learning_rate": 6.797940136925514e-06, + "loss": 1.7263, + "step": 1313500 + }, + { + "epoch": 0.66, + "learning_rate": 6.792914609760278e-06, + "loss": 1.7234, + "step": 1314000 + }, + { + "epoch": 0.66, + "learning_rate": 6.787889082595041e-06, + "loss": 1.7352, + "step": 1314500 + }, + { + "epoch": 0.66, + "learning_rate": 6.7828635554298065e-06, + "loss": 1.726, + "step": 1315000 + }, + { + "epoch": 0.66, + "learning_rate": 6.777838028264571e-06, + "loss": 1.7404, + "step": 1315500 + }, + { + "epoch": 0.66, + "learning_rate": 6.772812501099335e-06, + "loss": 1.7024, + "step": 1316000 + }, + { + "epoch": 0.66, + "learning_rate": 6.7677869739340985e-06, + "loss": 1.726, + "step": 1316500 + }, + { + "epoch": 0.66, + "learning_rate": 6.762761446768863e-06, + "loss": 1.7152, + "step": 1317000 + }, + { + "epoch": 0.66, + "learning_rate": 6.757735919603627e-06, + "loss": 1.7169, + "step": 1317500 + }, + { + "epoch": 0.66, + "learning_rate": 6.752710392438392e-06, + "loss": 1.7234, + "step": 1318000 + }, + { + "epoch": 0.66, + "learning_rate": 6.747684865273155e-06, + "loss": 1.7207, + "step": 1318500 + }, + { + "epoch": 0.66, + "learning_rate": 6.74265933810792e-06, + "loss": 1.7175, + "step": 1319000 + }, + { + "epoch": 0.66, + "learning_rate": 6.737633810942684e-06, + "loss": 1.7096, + "step": 1319500 + }, + { + "epoch": 0.66, + "learning_rate": 6.732608283777448e-06, + "loss": 1.7384, + "step": 1320000 + }, + { + "epoch": 0.66, + "learning_rate": 6.727582756612212e-06, + "loss": 1.7261, + "step": 1320500 + }, + { + "epoch": 0.66, + "learning_rate": 6.722557229446976e-06, + "loss": 1.7281, + "step": 1321000 + }, + { + "epoch": 0.66, + "learning_rate": 6.717531702281741e-06, + "loss": 1.7381, + "step": 1321500 + }, + { + "epoch": 0.66, + "learning_rate": 6.712506175116504e-06, + "loss": 1.7458, + "step": 1322000 + }, + { + "epoch": 0.66, + "learning_rate": 6.707480647951269e-06, + "loss": 1.7322, + "step": 1322500 + }, + { + "epoch": 0.66, + "learning_rate": 6.7024551207860335e-06, + "loss": 1.7196, + "step": 1323000 + }, + { + "epoch": 0.67, + "learning_rate": 6.697429593620798e-06, + "loss": 1.7122, + "step": 1323500 + }, + { + "epoch": 0.67, + "learning_rate": 6.692404066455561e-06, + "loss": 1.7165, + "step": 1324000 + }, + { + "epoch": 0.67, + "learning_rate": 6.6873785392903255e-06, + "loss": 1.7267, + "step": 1324500 + }, + { + "epoch": 0.67, + "learning_rate": 6.68235301212509e-06, + "loss": 1.74, + "step": 1325000 + }, + { + "epoch": 0.67, + "learning_rate": 6.677327484959855e-06, + "loss": 1.7218, + "step": 1325500 + }, + { + "epoch": 0.67, + "learning_rate": 6.672301957794618e-06, + "loss": 1.7332, + "step": 1326000 + }, + { + "epoch": 0.67, + "learning_rate": 6.667276430629383e-06, + "loss": 1.715, + "step": 1326500 + }, + { + "epoch": 0.67, + "learning_rate": 6.662250903464147e-06, + "loss": 1.7108, + "step": 1327000 + }, + { + "epoch": 0.67, + "learning_rate": 6.657225376298911e-06, + "loss": 1.7086, + "step": 1327500 + }, + { + "epoch": 0.67, + "learning_rate": 6.652199849133675e-06, + "loss": 1.7229, + "step": 1328000 + }, + { + "epoch": 0.67, + "learning_rate": 6.647174321968439e-06, + "loss": 1.7051, + "step": 1328500 + }, + { + "epoch": 0.67, + "learning_rate": 6.642148794803204e-06, + "loss": 1.728, + "step": 1329000 + }, + { + "epoch": 0.67, + "learning_rate": 6.637123267637967e-06, + "loss": 1.743, + "step": 1329500 + }, + { + "epoch": 0.67, + "learning_rate": 6.632097740472732e-06, + "loss": 1.7381, + "step": 1330000 + }, + { + "epoch": 0.67, + "learning_rate": 6.627072213307496e-06, + "loss": 1.706, + "step": 1330500 + }, + { + "epoch": 0.67, + "learning_rate": 6.6220466861422605e-06, + "loss": 1.7121, + "step": 1331000 + }, + { + "epoch": 0.67, + "learning_rate": 6.617021158977024e-06, + "loss": 1.6993, + "step": 1331500 + }, + { + "epoch": 0.67, + "learning_rate": 6.611995631811788e-06, + "loss": 1.7132, + "step": 1332000 + }, + { + "epoch": 0.67, + "learning_rate": 6.6069701046465525e-06, + "loss": 1.724, + "step": 1332500 + }, + { + "epoch": 0.67, + "learning_rate": 6.601944577481318e-06, + "loss": 1.7193, + "step": 1333000 + }, + { + "epoch": 0.67, + "learning_rate": 6.596919050316081e-06, + "loss": 1.719, + "step": 1333500 + }, + { + "epoch": 0.67, + "learning_rate": 6.591893523150845e-06, + "loss": 1.7087, + "step": 1334000 + }, + { + "epoch": 0.67, + "learning_rate": 6.58686799598561e-06, + "loss": 1.7175, + "step": 1334500 + }, + { + "epoch": 0.67, + "learning_rate": 6.581842468820374e-06, + "loss": 1.7188, + "step": 1335000 + }, + { + "epoch": 0.67, + "learning_rate": 6.5768169416551374e-06, + "loss": 1.7356, + "step": 1335500 + }, + { + "epoch": 0.67, + "learning_rate": 6.571791414489902e-06, + "loss": 1.7072, + "step": 1336000 + }, + { + "epoch": 0.67, + "learning_rate": 6.566765887324667e-06, + "loss": 1.7239, + "step": 1336500 + }, + { + "epoch": 0.67, + "learning_rate": 6.5617403601594295e-06, + "loss": 1.7392, + "step": 1337000 + }, + { + "epoch": 0.67, + "learning_rate": 6.556714832994195e-06, + "loss": 1.7028, + "step": 1337500 + }, + { + "epoch": 0.67, + "learning_rate": 6.551689305828959e-06, + "loss": 1.7238, + "step": 1338000 + }, + { + "epoch": 0.67, + "learning_rate": 6.546663778663723e-06, + "loss": 1.7188, + "step": 1338500 + }, + { + "epoch": 0.67, + "learning_rate": 6.541638251498487e-06, + "loss": 1.7133, + "step": 1339000 + }, + { + "epoch": 0.67, + "learning_rate": 6.536612724333251e-06, + "loss": 1.7191, + "step": 1339500 + }, + { + "epoch": 0.67, + "learning_rate": 6.531587197168015e-06, + "loss": 1.7127, + "step": 1340000 + }, + { + "epoch": 0.67, + "learning_rate": 6.52656167000278e-06, + "loss": 1.7203, + "step": 1340500 + }, + { + "epoch": 0.67, + "learning_rate": 6.521536142837544e-06, + "loss": 1.7251, + "step": 1341000 + }, + { + "epoch": 0.67, + "learning_rate": 6.516510615672308e-06, + "loss": 1.7434, + "step": 1341500 + }, + { + "epoch": 0.67, + "learning_rate": 6.511485088507072e-06, + "loss": 1.7262, + "step": 1342000 + }, + { + "epoch": 0.67, + "learning_rate": 6.506459561341837e-06, + "loss": 1.7144, + "step": 1342500 + }, + { + "epoch": 0.67, + "learning_rate": 6.5014340341766e-06, + "loss": 1.7254, + "step": 1343000 + }, + { + "epoch": 0.68, + "learning_rate": 6.4964085070113644e-06, + "loss": 1.7019, + "step": 1343500 + }, + { + "epoch": 0.68, + "learning_rate": 6.49138297984613e-06, + "loss": 1.7329, + "step": 1344000 + }, + { + "epoch": 0.68, + "learning_rate": 6.486357452680894e-06, + "loss": 1.7373, + "step": 1344500 + }, + { + "epoch": 0.68, + "learning_rate": 6.481331925515657e-06, + "loss": 1.7032, + "step": 1345000 + }, + { + "epoch": 0.68, + "learning_rate": 6.476306398350422e-06, + "loss": 1.7076, + "step": 1345500 + }, + { + "epoch": 0.68, + "learning_rate": 6.471280871185186e-06, + "loss": 1.6985, + "step": 1346000 + }, + { + "epoch": 0.68, + "learning_rate": 6.466255344019949e-06, + "loss": 1.7408, + "step": 1346500 + }, + { + "epoch": 0.68, + "learning_rate": 6.461229816854714e-06, + "loss": 1.7439, + "step": 1347000 + }, + { + "epoch": 0.68, + "learning_rate": 6.456204289689478e-06, + "loss": 1.7203, + "step": 1347500 + }, + { + "epoch": 0.68, + "learning_rate": 6.451178762524243e-06, + "loss": 1.7221, + "step": 1348000 + }, + { + "epoch": 0.68, + "learning_rate": 6.4461532353590065e-06, + "loss": 1.7252, + "step": 1348500 + }, + { + "epoch": 0.68, + "learning_rate": 6.441127708193771e-06, + "loss": 1.7285, + "step": 1349000 + }, + { + "epoch": 0.68, + "learning_rate": 6.436102181028535e-06, + "loss": 1.7381, + "step": 1349500 + }, + { + "epoch": 0.68, + "learning_rate": 6.431076653863299e-06, + "loss": 1.7243, + "step": 1350000 + }, + { + "epoch": 0.68, + "learning_rate": 6.426051126698063e-06, + "loss": 1.7223, + "step": 1350500 + }, + { + "epoch": 0.68, + "learning_rate": 6.421025599532827e-06, + "loss": 1.7313, + "step": 1351000 + }, + { + "epoch": 0.68, + "learning_rate": 6.416000072367592e-06, + "loss": 1.7172, + "step": 1351500 + }, + { + "epoch": 0.68, + "learning_rate": 6.410974545202357e-06, + "loss": 1.7306, + "step": 1352000 + }, + { + "epoch": 0.68, + "learning_rate": 6.40594901803712e-06, + "loss": 1.709, + "step": 1352500 + }, + { + "epoch": 0.68, + "learning_rate": 6.400923490871884e-06, + "loss": 1.7381, + "step": 1353000 + }, + { + "epoch": 0.68, + "learning_rate": 6.395897963706649e-06, + "loss": 1.7199, + "step": 1353500 + }, + { + "epoch": 0.68, + "learning_rate": 6.390872436541412e-06, + "loss": 1.7227, + "step": 1354000 + }, + { + "epoch": 0.68, + "learning_rate": 6.385846909376176e-06, + "loss": 1.7253, + "step": 1354500 + }, + { + "epoch": 0.68, + "learning_rate": 6.380821382210941e-06, + "loss": 1.7188, + "step": 1355000 + }, + { + "epoch": 0.68, + "learning_rate": 6.375795855045706e-06, + "loss": 1.7109, + "step": 1355500 + }, + { + "epoch": 0.68, + "learning_rate": 6.370770327880469e-06, + "loss": 1.726, + "step": 1356000 + }, + { + "epoch": 0.68, + "learning_rate": 6.3657448007152335e-06, + "loss": 1.7013, + "step": 1356500 + }, + { + "epoch": 0.68, + "learning_rate": 6.360719273549998e-06, + "loss": 1.7237, + "step": 1357000 + }, + { + "epoch": 0.68, + "learning_rate": 6.355693746384762e-06, + "loss": 1.7355, + "step": 1357500 + }, + { + "epoch": 0.68, + "learning_rate": 6.3506682192195256e-06, + "loss": 1.7277, + "step": 1358000 + }, + { + "epoch": 0.68, + "learning_rate": 6.34564269205429e-06, + "loss": 1.6967, + "step": 1358500 + }, + { + "epoch": 0.68, + "learning_rate": 6.340617164889055e-06, + "loss": 1.7103, + "step": 1359000 + }, + { + "epoch": 0.68, + "learning_rate": 6.335591637723819e-06, + "loss": 1.7256, + "step": 1359500 + }, + { + "epoch": 0.68, + "learning_rate": 6.330566110558583e-06, + "loss": 1.732, + "step": 1360000 + }, + { + "epoch": 0.68, + "learning_rate": 6.325540583393347e-06, + "loss": 1.7193, + "step": 1360500 + }, + { + "epoch": 0.68, + "learning_rate": 6.320515056228111e-06, + "loss": 1.7192, + "step": 1361000 + }, + { + "epoch": 0.68, + "learning_rate": 6.315489529062876e-06, + "loss": 1.7102, + "step": 1361500 + }, + { + "epoch": 0.68, + "learning_rate": 6.310464001897639e-06, + "loss": 1.714, + "step": 1362000 + }, + { + "epoch": 0.68, + "learning_rate": 6.305438474732403e-06, + "loss": 1.7206, + "step": 1362500 + }, + { + "epoch": 0.68, + "learning_rate": 6.3004129475671685e-06, + "loss": 1.7267, + "step": 1363000 + }, + { + "epoch": 0.69, + "learning_rate": 6.295387420401932e-06, + "loss": 1.7038, + "step": 1363500 + }, + { + "epoch": 0.69, + "learning_rate": 6.290361893236696e-06, + "loss": 1.7208, + "step": 1364000 + }, + { + "epoch": 0.69, + "learning_rate": 6.2853363660714605e-06, + "loss": 1.735, + "step": 1364500 + }, + { + "epoch": 0.69, + "learning_rate": 6.280310838906225e-06, + "loss": 1.7229, + "step": 1365000 + }, + { + "epoch": 0.69, + "learning_rate": 6.275285311740988e-06, + "loss": 1.7105, + "step": 1365500 + }, + { + "epoch": 0.69, + "learning_rate": 6.270259784575753e-06, + "loss": 1.7266, + "step": 1366000 + }, + { + "epoch": 0.69, + "learning_rate": 6.265234257410518e-06, + "loss": 1.7279, + "step": 1366500 + }, + { + "epoch": 0.69, + "learning_rate": 6.260208730245282e-06, + "loss": 1.7013, + "step": 1367000 + }, + { + "epoch": 0.69, + "learning_rate": 6.2551832030800455e-06, + "loss": 1.7147, + "step": 1367500 + }, + { + "epoch": 0.69, + "learning_rate": 6.25015767591481e-06, + "loss": 1.715, + "step": 1368000 + }, + { + "epoch": 0.69, + "learning_rate": 6.245132148749574e-06, + "loss": 1.7361, + "step": 1368500 + }, + { + "epoch": 0.69, + "learning_rate": 6.240106621584338e-06, + "loss": 1.7212, + "step": 1369000 + }, + { + "epoch": 0.69, + "learning_rate": 6.235081094419102e-06, + "loss": 1.7098, + "step": 1369500 + }, + { + "epoch": 0.69, + "learning_rate": 6.230055567253866e-06, + "loss": 1.7105, + "step": 1370000 + }, + { + "epoch": 0.69, + "learning_rate": 6.225030040088631e-06, + "loss": 1.7328, + "step": 1370500 + }, + { + "epoch": 0.69, + "learning_rate": 6.220004512923395e-06, + "loss": 1.716, + "step": 1371000 + }, + { + "epoch": 0.69, + "learning_rate": 6.214978985758159e-06, + "loss": 1.7226, + "step": 1371500 + }, + { + "epoch": 0.69, + "learning_rate": 6.209953458592923e-06, + "loss": 1.7247, + "step": 1372000 + }, + { + "epoch": 0.69, + "learning_rate": 6.2049279314276876e-06, + "loss": 1.7401, + "step": 1372500 + }, + { + "epoch": 0.69, + "learning_rate": 6.199902404262451e-06, + "loss": 1.7348, + "step": 1373000 + }, + { + "epoch": 0.69, + "learning_rate": 6.194876877097215e-06, + "loss": 1.7253, + "step": 1373500 + }, + { + "epoch": 0.69, + "learning_rate": 6.1898513499319804e-06, + "loss": 1.7311, + "step": 1374000 + }, + { + "epoch": 0.69, + "learning_rate": 6.184825822766745e-06, + "loss": 1.7248, + "step": 1374500 + }, + { + "epoch": 0.69, + "learning_rate": 6.179800295601508e-06, + "loss": 1.7048, + "step": 1375000 + }, + { + "epoch": 0.69, + "learning_rate": 6.1747747684362725e-06, + "loss": 1.7113, + "step": 1375500 + }, + { + "epoch": 0.69, + "learning_rate": 6.169749241271037e-06, + "loss": 1.7322, + "step": 1376000 + }, + { + "epoch": 0.69, + "learning_rate": 6.164723714105801e-06, + "loss": 1.7207, + "step": 1376500 + }, + { + "epoch": 0.69, + "learning_rate": 6.1596981869405645e-06, + "loss": 1.7125, + "step": 1377000 + }, + { + "epoch": 0.69, + "learning_rate": 6.15467265977533e-06, + "loss": 1.7392, + "step": 1377500 + }, + { + "epoch": 0.69, + "learning_rate": 6.149647132610094e-06, + "loss": 1.7183, + "step": 1378000 + }, + { + "epoch": 0.69, + "learning_rate": 6.144621605444858e-06, + "loss": 1.7167, + "step": 1378500 + }, + { + "epoch": 0.69, + "learning_rate": 6.139596078279622e-06, + "loss": 1.7188, + "step": 1379000 + }, + { + "epoch": 0.69, + "learning_rate": 6.134570551114386e-06, + "loss": 1.7149, + "step": 1379500 + }, + { + "epoch": 0.69, + "learning_rate": 6.12954502394915e-06, + "loss": 1.7262, + "step": 1380000 + }, + { + "epoch": 0.69, + "learning_rate": 6.124519496783914e-06, + "loss": 1.7129, + "step": 1380500 + }, + { + "epoch": 0.69, + "learning_rate": 6.119493969618678e-06, + "loss": 1.712, + "step": 1381000 + }, + { + "epoch": 0.69, + "learning_rate": 6.114468442453443e-06, + "loss": 1.727, + "step": 1381500 + }, + { + "epoch": 0.69, + "learning_rate": 6.1094429152882074e-06, + "loss": 1.7083, + "step": 1382000 + }, + { + "epoch": 0.69, + "learning_rate": 6.104417388122971e-06, + "loss": 1.7232, + "step": 1382500 + }, + { + "epoch": 0.7, + "learning_rate": 6.099391860957735e-06, + "loss": 1.7036, + "step": 1383000 + }, + { + "epoch": 0.7, + "learning_rate": 6.0943663337924995e-06, + "loss": 1.723, + "step": 1383500 + }, + { + "epoch": 0.7, + "learning_rate": 6.089340806627264e-06, + "loss": 1.7237, + "step": 1384000 + }, + { + "epoch": 0.7, + "learning_rate": 6.084315279462027e-06, + "loss": 1.7317, + "step": 1384500 + }, + { + "epoch": 0.7, + "learning_rate": 6.079289752296792e-06, + "loss": 1.7256, + "step": 1385000 + }, + { + "epoch": 0.7, + "learning_rate": 6.074264225131557e-06, + "loss": 1.7376, + "step": 1385500 + }, + { + "epoch": 0.7, + "learning_rate": 6.069238697966321e-06, + "loss": 1.714, + "step": 1386000 + }, + { + "epoch": 0.7, + "learning_rate": 6.064213170801084e-06, + "loss": 1.7202, + "step": 1386500 + }, + { + "epoch": 0.7, + "learning_rate": 6.059187643635849e-06, + "loss": 1.7142, + "step": 1387000 + }, + { + "epoch": 0.7, + "learning_rate": 6.054162116470613e-06, + "loss": 1.7214, + "step": 1387500 + }, + { + "epoch": 0.7, + "learning_rate": 6.0491365893053764e-06, + "loss": 1.7172, + "step": 1388000 + }, + { + "epoch": 0.7, + "learning_rate": 6.044111062140141e-06, + "loss": 1.7397, + "step": 1388500 + }, + { + "epoch": 0.7, + "learning_rate": 6.039085534974906e-06, + "loss": 1.7373, + "step": 1389000 + }, + { + "epoch": 0.7, + "learning_rate": 6.03406000780967e-06, + "loss": 1.7089, + "step": 1389500 + }, + { + "epoch": 0.7, + "learning_rate": 6.029034480644434e-06, + "loss": 1.722, + "step": 1390000 + }, + { + "epoch": 0.7, + "learning_rate": 6.024008953479198e-06, + "loss": 1.6921, + "step": 1390500 + }, + { + "epoch": 0.7, + "learning_rate": 6.018983426313962e-06, + "loss": 1.7291, + "step": 1391000 + }, + { + "epoch": 0.7, + "learning_rate": 6.0139578991487265e-06, + "loss": 1.7156, + "step": 1391500 + }, + { + "epoch": 0.7, + "learning_rate": 6.00893237198349e-06, + "loss": 1.7216, + "step": 1392000 + }, + { + "epoch": 0.7, + "learning_rate": 6.003906844818255e-06, + "loss": 1.7108, + "step": 1392500 + }, + { + "epoch": 0.7, + "learning_rate": 5.998881317653019e-06, + "loss": 1.7187, + "step": 1393000 + }, + { + "epoch": 0.7, + "learning_rate": 5.993855790487784e-06, + "loss": 1.7103, + "step": 1393500 + }, + { + "epoch": 0.7, + "learning_rate": 5.988830263322547e-06, + "loss": 1.7076, + "step": 1394000 + }, + { + "epoch": 0.7, + "learning_rate": 5.983804736157311e-06, + "loss": 1.7199, + "step": 1394500 + }, + { + "epoch": 0.7, + "learning_rate": 5.978779208992076e-06, + "loss": 1.6986, + "step": 1395000 + }, + { + "epoch": 0.7, + "learning_rate": 5.973753681826841e-06, + "loss": 1.6946, + "step": 1395500 + }, + { + "epoch": 0.7, + "learning_rate": 5.9687281546616034e-06, + "loss": 1.7401, + "step": 1396000 + }, + { + "epoch": 0.7, + "learning_rate": 5.963702627496369e-06, + "loss": 1.7072, + "step": 1396500 + }, + { + "epoch": 0.7, + "learning_rate": 5.958677100331133e-06, + "loss": 1.7061, + "step": 1397000 + }, + { + "epoch": 0.7, + "learning_rate": 5.953651573165896e-06, + "loss": 1.7034, + "step": 1397500 + }, + { + "epoch": 0.7, + "learning_rate": 5.948626046000661e-06, + "loss": 1.7195, + "step": 1398000 + }, + { + "epoch": 0.7, + "learning_rate": 5.943600518835425e-06, + "loss": 1.729, + "step": 1398500 + }, + { + "epoch": 0.7, + "learning_rate": 5.938574991670189e-06, + "loss": 1.7075, + "step": 1399000 + }, + { + "epoch": 0.7, + "learning_rate": 5.933549464504953e-06, + "loss": 1.7368, + "step": 1399500 + }, + { + "epoch": 0.7, + "learning_rate": 5.928523937339718e-06, + "loss": 1.7363, + "step": 1400000 + }, + { + "epoch": 0.7, + "learning_rate": 5.923498410174482e-06, + "loss": 1.7051, + "step": 1400500 + }, + { + "epoch": 0.7, + "learning_rate": 5.918472883009246e-06, + "loss": 1.7119, + "step": 1401000 + }, + { + "epoch": 0.7, + "learning_rate": 5.91344735584401e-06, + "loss": 1.7055, + "step": 1401500 + }, + { + "epoch": 0.7, + "learning_rate": 5.908421828678774e-06, + "loss": 1.7135, + "step": 1402000 + }, + { + "epoch": 0.7, + "learning_rate": 5.903396301513538e-06, + "loss": 1.7115, + "step": 1402500 + }, + { + "epoch": 0.71, + "learning_rate": 5.8983707743483036e-06, + "loss": 1.7081, + "step": 1403000 + }, + { + "epoch": 0.71, + "learning_rate": 5.893345247183066e-06, + "loss": 1.7217, + "step": 1403500 + }, + { + "epoch": 0.71, + "learning_rate": 5.888319720017831e-06, + "loss": 1.7086, + "step": 1404000 + }, + { + "epoch": 0.71, + "learning_rate": 5.883294192852596e-06, + "loss": 1.7161, + "step": 1404500 + }, + { + "epoch": 0.71, + "learning_rate": 5.878268665687359e-06, + "loss": 1.7128, + "step": 1405000 + }, + { + "epoch": 0.71, + "learning_rate": 5.873243138522123e-06, + "loss": 1.7244, + "step": 1405500 + }, + { + "epoch": 0.71, + "learning_rate": 5.868217611356888e-06, + "loss": 1.7003, + "step": 1406000 + }, + { + "epoch": 0.71, + "learning_rate": 5.863192084191652e-06, + "loss": 1.7265, + "step": 1406500 + }, + { + "epoch": 0.71, + "learning_rate": 5.858166557026415e-06, + "loss": 1.705, + "step": 1407000 + }, + { + "epoch": 0.71, + "learning_rate": 5.8531410298611805e-06, + "loss": 1.7163, + "step": 1407500 + }, + { + "epoch": 0.71, + "learning_rate": 5.848115502695945e-06, + "loss": 1.7175, + "step": 1408000 + }, + { + "epoch": 0.71, + "learning_rate": 5.843089975530709e-06, + "loss": 1.7207, + "step": 1408500 + }, + { + "epoch": 0.71, + "learning_rate": 5.8380644483654725e-06, + "loss": 1.7126, + "step": 1409000 + }, + { + "epoch": 0.71, + "learning_rate": 5.833038921200237e-06, + "loss": 1.7173, + "step": 1409500 + }, + { + "epoch": 0.71, + "learning_rate": 5.828013394035001e-06, + "loss": 1.7148, + "step": 1410000 + }, + { + "epoch": 0.71, + "learning_rate": 5.822987866869766e-06, + "loss": 1.6933, + "step": 1410500 + }, + { + "epoch": 0.71, + "learning_rate": 5.817962339704529e-06, + "loss": 1.7191, + "step": 1411000 + }, + { + "epoch": 0.71, + "learning_rate": 5.812936812539294e-06, + "loss": 1.7205, + "step": 1411500 + }, + { + "epoch": 0.71, + "learning_rate": 5.807911285374058e-06, + "loss": 1.7027, + "step": 1412000 + }, + { + "epoch": 0.71, + "learning_rate": 5.802885758208823e-06, + "loss": 1.7246, + "step": 1412500 + }, + { + "epoch": 0.71, + "learning_rate": 5.797860231043586e-06, + "loss": 1.7234, + "step": 1413000 + }, + { + "epoch": 0.71, + "learning_rate": 5.79283470387835e-06, + "loss": 1.7151, + "step": 1413500 + }, + { + "epoch": 0.71, + "learning_rate": 5.787809176713115e-06, + "loss": 1.7118, + "step": 1414000 + }, + { + "epoch": 0.71, + "learning_rate": 5.782783649547878e-06, + "loss": 1.7134, + "step": 1414500 + }, + { + "epoch": 0.71, + "learning_rate": 5.777758122382643e-06, + "loss": 1.7098, + "step": 1415000 + }, + { + "epoch": 0.71, + "learning_rate": 5.7727325952174075e-06, + "loss": 1.7243, + "step": 1415500 + }, + { + "epoch": 0.71, + "learning_rate": 5.767707068052172e-06, + "loss": 1.7253, + "step": 1416000 + }, + { + "epoch": 0.71, + "learning_rate": 5.762681540886935e-06, + "loss": 1.7199, + "step": 1416500 + }, + { + "epoch": 0.71, + "learning_rate": 5.7576560137216995e-06, + "loss": 1.7226, + "step": 1417000 + }, + { + "epoch": 0.71, + "learning_rate": 5.752630486556464e-06, + "loss": 1.7105, + "step": 1417500 + }, + { + "epoch": 0.71, + "learning_rate": 5.747604959391229e-06, + "loss": 1.7151, + "step": 1418000 + }, + { + "epoch": 0.71, + "learning_rate": 5.7425794322259924e-06, + "loss": 1.7175, + "step": 1418500 + }, + { + "epoch": 0.71, + "learning_rate": 5.737553905060757e-06, + "loss": 1.7293, + "step": 1419000 + }, + { + "epoch": 0.71, + "learning_rate": 5.732528377895521e-06, + "loss": 1.7151, + "step": 1419500 + }, + { + "epoch": 0.71, + "learning_rate": 5.727502850730285e-06, + "loss": 1.7067, + "step": 1420000 + }, + { + "epoch": 0.71, + "learning_rate": 5.722477323565049e-06, + "loss": 1.7179, + "step": 1420500 + }, + { + "epoch": 0.71, + "learning_rate": 5.717451796399813e-06, + "loss": 1.7256, + "step": 1421000 + }, + { + "epoch": 0.71, + "learning_rate": 5.712426269234578e-06, + "loss": 1.7164, + "step": 1421500 + }, + { + "epoch": 0.71, + "learning_rate": 5.707400742069341e-06, + "loss": 1.7237, + "step": 1422000 + }, + { + "epoch": 0.71, + "learning_rate": 5.702375214904106e-06, + "loss": 1.6989, + "step": 1422500 + }, + { + "epoch": 0.72, + "learning_rate": 5.69734968773887e-06, + "loss": 1.7063, + "step": 1423000 + }, + { + "epoch": 0.72, + "learning_rate": 5.6923241605736345e-06, + "loss": 1.723, + "step": 1423500 + }, + { + "epoch": 0.72, + "learning_rate": 5.687298633408398e-06, + "loss": 1.7046, + "step": 1424000 + }, + { + "epoch": 0.72, + "learning_rate": 5.682273106243162e-06, + "loss": 1.7013, + "step": 1424500 + }, + { + "epoch": 0.72, + "learning_rate": 5.6772475790779266e-06, + "loss": 1.728, + "step": 1425000 + }, + { + "epoch": 0.72, + "learning_rate": 5.672222051912692e-06, + "loss": 1.7254, + "step": 1425500 + }, + { + "epoch": 0.72, + "learning_rate": 5.667196524747455e-06, + "loss": 1.722, + "step": 1426000 + }, + { + "epoch": 0.72, + "learning_rate": 5.6621709975822194e-06, + "loss": 1.7179, + "step": 1426500 + }, + { + "epoch": 0.72, + "learning_rate": 5.657145470416984e-06, + "loss": 1.7165, + "step": 1427000 + }, + { + "epoch": 0.72, + "learning_rate": 5.652119943251748e-06, + "loss": 1.705, + "step": 1427500 + }, + { + "epoch": 0.72, + "learning_rate": 5.6470944160865115e-06, + "loss": 1.7015, + "step": 1428000 + }, + { + "epoch": 0.72, + "learning_rate": 5.642068888921276e-06, + "loss": 1.7023, + "step": 1428500 + }, + { + "epoch": 0.72, + "learning_rate": 5.637043361756041e-06, + "loss": 1.7148, + "step": 1429000 + }, + { + "epoch": 0.72, + "learning_rate": 5.632017834590805e-06, + "loss": 1.7168, + "step": 1429500 + }, + { + "epoch": 0.72, + "learning_rate": 5.626992307425569e-06, + "loss": 1.7113, + "step": 1430000 + }, + { + "epoch": 0.72, + "learning_rate": 5.621966780260333e-06, + "loss": 1.7131, + "step": 1430500 + }, + { + "epoch": 0.72, + "learning_rate": 5.616941253095097e-06, + "loss": 1.7119, + "step": 1431000 + }, + { + "epoch": 0.72, + "learning_rate": 5.611915725929861e-06, + "loss": 1.6993, + "step": 1431500 + }, + { + "epoch": 0.72, + "learning_rate": 5.606890198764625e-06, + "loss": 1.687, + "step": 1432000 + }, + { + "epoch": 0.72, + "learning_rate": 5.601864671599389e-06, + "loss": 1.7054, + "step": 1432500 + }, + { + "epoch": 0.72, + "learning_rate": 5.596839144434154e-06, + "loss": 1.7172, + "step": 1433000 + }, + { + "epoch": 0.72, + "learning_rate": 5.591813617268918e-06, + "loss": 1.7066, + "step": 1433500 + }, + { + "epoch": 0.72, + "learning_rate": 5.586788090103682e-06, + "loss": 1.7172, + "step": 1434000 + }, + { + "epoch": 0.72, + "learning_rate": 5.5817625629384464e-06, + "loss": 1.7003, + "step": 1434500 + }, + { + "epoch": 0.72, + "learning_rate": 5.576737035773211e-06, + "loss": 1.7152, + "step": 1435000 + }, + { + "epoch": 0.72, + "learning_rate": 5.571711508607974e-06, + "loss": 1.7088, + "step": 1435500 + }, + { + "epoch": 0.72, + "learning_rate": 5.5666859814427385e-06, + "loss": 1.723, + "step": 1436000 + }, + { + "epoch": 0.72, + "learning_rate": 5.561660454277504e-06, + "loss": 1.7115, + "step": 1436500 + }, + { + "epoch": 0.72, + "learning_rate": 5.556634927112268e-06, + "loss": 1.7339, + "step": 1437000 + }, + { + "epoch": 0.72, + "learning_rate": 5.551609399947031e-06, + "loss": 1.7112, + "step": 1437500 + }, + { + "epoch": 0.72, + "learning_rate": 5.546583872781796e-06, + "loss": 1.7317, + "step": 1438000 + }, + { + "epoch": 0.72, + "learning_rate": 5.54155834561656e-06, + "loss": 1.7112, + "step": 1438500 + }, + { + "epoch": 0.72, + "learning_rate": 5.536532818451323e-06, + "loss": 1.7206, + "step": 1439000 + }, + { + "epoch": 0.72, + "learning_rate": 5.531507291286088e-06, + "loss": 1.713, + "step": 1439500 + }, + { + "epoch": 0.72, + "learning_rate": 5.526481764120852e-06, + "loss": 1.7117, + "step": 1440000 + }, + { + "epoch": 0.72, + "learning_rate": 5.521456236955617e-06, + "loss": 1.6947, + "step": 1440500 + }, + { + "epoch": 0.72, + "learning_rate": 5.5164307097903806e-06, + "loss": 1.7193, + "step": 1441000 + }, + { + "epoch": 0.72, + "learning_rate": 5.511405182625145e-06, + "loss": 1.7295, + "step": 1441500 + }, + { + "epoch": 0.72, + "learning_rate": 5.506379655459909e-06, + "loss": 1.7154, + "step": 1442000 + }, + { + "epoch": 0.72, + "learning_rate": 5.5013541282946734e-06, + "loss": 1.7212, + "step": 1442500 + }, + { + "epoch": 0.73, + "learning_rate": 5.496328601129437e-06, + "loss": 1.7066, + "step": 1443000 + }, + { + "epoch": 0.73, + "learning_rate": 5.491303073964201e-06, + "loss": 1.727, + "step": 1443500 + }, + { + "epoch": 0.73, + "learning_rate": 5.486277546798966e-06, + "loss": 1.7155, + "step": 1444000 + }, + { + "epoch": 0.73, + "learning_rate": 5.481252019633731e-06, + "loss": 1.7015, + "step": 1444500 + }, + { + "epoch": 0.73, + "learning_rate": 5.476226492468494e-06, + "loss": 1.7013, + "step": 1445000 + }, + { + "epoch": 0.73, + "learning_rate": 5.471200965303258e-06, + "loss": 1.7013, + "step": 1445500 + }, + { + "epoch": 0.73, + "learning_rate": 5.466175438138023e-06, + "loss": 1.7184, + "step": 1446000 + }, + { + "epoch": 0.73, + "learning_rate": 5.461149910972787e-06, + "loss": 1.7255, + "step": 1446500 + }, + { + "epoch": 0.73, + "learning_rate": 5.45612438380755e-06, + "loss": 1.7216, + "step": 1447000 + }, + { + "epoch": 0.73, + "learning_rate": 5.451098856642315e-06, + "loss": 1.7228, + "step": 1447500 + }, + { + "epoch": 0.73, + "learning_rate": 5.44607332947708e-06, + "loss": 1.7162, + "step": 1448000 + }, + { + "epoch": 0.73, + "learning_rate": 5.441047802311843e-06, + "loss": 1.6996, + "step": 1448500 + }, + { + "epoch": 0.73, + "learning_rate": 5.4360222751466076e-06, + "loss": 1.7021, + "step": 1449000 + }, + { + "epoch": 0.73, + "learning_rate": 5.430996747981372e-06, + "loss": 1.7027, + "step": 1449500 + }, + { + "epoch": 0.73, + "learning_rate": 5.425971220816136e-06, + "loss": 1.6924, + "step": 1450000 + }, + { + "epoch": 0.73, + "learning_rate": 5.4209456936509e-06, + "loss": 1.7176, + "step": 1450500 + }, + { + "epoch": 0.73, + "learning_rate": 5.415920166485664e-06, + "loss": 1.7206, + "step": 1451000 + }, + { + "epoch": 0.73, + "learning_rate": 5.410894639320429e-06, + "loss": 1.7052, + "step": 1451500 + }, + { + "epoch": 0.73, + "learning_rate": 5.405869112155193e-06, + "loss": 1.7136, + "step": 1452000 + }, + { + "epoch": 0.73, + "learning_rate": 5.400843584989957e-06, + "loss": 1.7095, + "step": 1452500 + }, + { + "epoch": 0.73, + "learning_rate": 5.395818057824721e-06, + "loss": 1.7095, + "step": 1453000 + }, + { + "epoch": 0.73, + "learning_rate": 5.390792530659485e-06, + "loss": 1.709, + "step": 1453500 + }, + { + "epoch": 0.73, + "learning_rate": 5.38576700349425e-06, + "loss": 1.7117, + "step": 1454000 + }, + { + "epoch": 0.73, + "learning_rate": 5.380741476329013e-06, + "loss": 1.7197, + "step": 1454500 + }, + { + "epoch": 0.73, + "learning_rate": 5.375715949163777e-06, + "loss": 1.688, + "step": 1455000 + }, + { + "epoch": 0.73, + "learning_rate": 5.3706904219985425e-06, + "loss": 1.697, + "step": 1455500 + }, + { + "epoch": 0.73, + "learning_rate": 5.365664894833307e-06, + "loss": 1.7192, + "step": 1456000 + }, + { + "epoch": 0.73, + "learning_rate": 5.36063936766807e-06, + "loss": 1.7277, + "step": 1456500 + }, + { + "epoch": 0.73, + "learning_rate": 5.355613840502835e-06, + "loss": 1.7308, + "step": 1457000 + }, + { + "epoch": 0.73, + "learning_rate": 5.350588313337599e-06, + "loss": 1.7, + "step": 1457500 + }, + { + "epoch": 0.73, + "learning_rate": 5.345562786172362e-06, + "loss": 1.7134, + "step": 1458000 + }, + { + "epoch": 0.73, + "learning_rate": 5.340537259007127e-06, + "loss": 1.7172, + "step": 1458500 + }, + { + "epoch": 0.73, + "learning_rate": 5.335511731841892e-06, + "loss": 1.7165, + "step": 1459000 + }, + { + "epoch": 0.73, + "learning_rate": 5.330486204676656e-06, + "loss": 1.6984, + "step": 1459500 + }, + { + "epoch": 0.73, + "learning_rate": 5.3254606775114195e-06, + "loss": 1.7141, + "step": 1460000 + }, + { + "epoch": 0.73, + "learning_rate": 5.320435150346184e-06, + "loss": 1.7139, + "step": 1460500 + }, + { + "epoch": 0.73, + "learning_rate": 5.315409623180948e-06, + "loss": 1.6815, + "step": 1461000 + }, + { + "epoch": 0.73, + "learning_rate": 5.310384096015712e-06, + "loss": 1.7189, + "step": 1461500 + }, + { + "epoch": 0.73, + "learning_rate": 5.305358568850476e-06, + "loss": 1.724, + "step": 1462000 + }, + { + "epoch": 0.73, + "learning_rate": 5.30033304168524e-06, + "loss": 1.7024, + "step": 1462500 + }, + { + "epoch": 0.74, + "learning_rate": 5.295307514520005e-06, + "loss": 1.728, + "step": 1463000 + }, + { + "epoch": 0.74, + "learning_rate": 5.2902819873547696e-06, + "loss": 1.7013, + "step": 1463500 + }, + { + "epoch": 0.74, + "learning_rate": 5.285256460189533e-06, + "loss": 1.7142, + "step": 1464000 + }, + { + "epoch": 0.74, + "learning_rate": 5.280230933024297e-06, + "loss": 1.7176, + "step": 1464500 + }, + { + "epoch": 0.74, + "learning_rate": 5.275205405859062e-06, + "loss": 1.7257, + "step": 1465000 + }, + { + "epoch": 0.74, + "learning_rate": 5.270179878693825e-06, + "loss": 1.7159, + "step": 1465500 + }, + { + "epoch": 0.74, + "learning_rate": 5.265154351528589e-06, + "loss": 1.7139, + "step": 1466000 + }, + { + "epoch": 0.74, + "learning_rate": 5.2601288243633545e-06, + "loss": 1.7182, + "step": 1466500 + }, + { + "epoch": 0.74, + "learning_rate": 5.255103297198119e-06, + "loss": 1.7084, + "step": 1467000 + }, + { + "epoch": 0.74, + "learning_rate": 5.250077770032882e-06, + "loss": 1.6989, + "step": 1467500 + }, + { + "epoch": 0.74, + "learning_rate": 5.2450522428676465e-06, + "loss": 1.7124, + "step": 1468000 + }, + { + "epoch": 0.74, + "learning_rate": 5.240026715702411e-06, + "loss": 1.7283, + "step": 1468500 + }, + { + "epoch": 0.74, + "learning_rate": 5.235001188537175e-06, + "loss": 1.6828, + "step": 1469000 + }, + { + "epoch": 0.74, + "learning_rate": 5.2299756613719385e-06, + "loss": 1.7145, + "step": 1469500 + }, + { + "epoch": 0.74, + "learning_rate": 5.224950134206704e-06, + "loss": 1.7017, + "step": 1470000 + }, + { + "epoch": 0.74, + "learning_rate": 5.219924607041468e-06, + "loss": 1.7013, + "step": 1470500 + }, + { + "epoch": 0.74, + "learning_rate": 5.214899079876232e-06, + "loss": 1.7108, + "step": 1471000 + }, + { + "epoch": 0.74, + "learning_rate": 5.209873552710996e-06, + "loss": 1.6973, + "step": 1471500 + }, + { + "epoch": 0.74, + "learning_rate": 5.20484802554576e-06, + "loss": 1.7099, + "step": 1472000 + }, + { + "epoch": 0.74, + "learning_rate": 5.199822498380524e-06, + "loss": 1.7182, + "step": 1472500 + }, + { + "epoch": 0.74, + "learning_rate": 5.1947969712152894e-06, + "loss": 1.7042, + "step": 1473000 + }, + { + "epoch": 0.74, + "learning_rate": 5.189771444050052e-06, + "loss": 1.709, + "step": 1473500 + }, + { + "epoch": 0.74, + "learning_rate": 5.184745916884817e-06, + "loss": 1.7215, + "step": 1474000 + }, + { + "epoch": 0.74, + "learning_rate": 5.1797203897195815e-06, + "loss": 1.7084, + "step": 1474500 + }, + { + "epoch": 0.74, + "learning_rate": 5.174694862554345e-06, + "loss": 1.7229, + "step": 1475000 + }, + { + "epoch": 0.74, + "learning_rate": 5.169669335389109e-06, + "loss": 1.6923, + "step": 1475500 + }, + { + "epoch": 0.74, + "learning_rate": 5.1646438082238735e-06, + "loss": 1.7088, + "step": 1476000 + }, + { + "epoch": 0.74, + "learning_rate": 5.159618281058638e-06, + "loss": 1.7031, + "step": 1476500 + }, + { + "epoch": 0.74, + "learning_rate": 5.154592753893401e-06, + "loss": 1.7061, + "step": 1477000 + }, + { + "epoch": 0.74, + "learning_rate": 5.149567226728166e-06, + "loss": 1.7001, + "step": 1477500 + }, + { + "epoch": 0.74, + "learning_rate": 5.144541699562931e-06, + "loss": 1.705, + "step": 1478000 + }, + { + "epoch": 0.74, + "learning_rate": 5.139516172397695e-06, + "loss": 1.718, + "step": 1478500 + }, + { + "epoch": 0.74, + "learning_rate": 5.1344906452324584e-06, + "loss": 1.7042, + "step": 1479000 + }, + { + "epoch": 0.74, + "learning_rate": 5.129465118067223e-06, + "loss": 1.7109, + "step": 1479500 + }, + { + "epoch": 0.74, + "learning_rate": 5.124439590901987e-06, + "loss": 1.7202, + "step": 1480000 + }, + { + "epoch": 0.74, + "learning_rate": 5.119414063736752e-06, + "loss": 1.7195, + "step": 1480500 + }, + { + "epoch": 0.74, + "learning_rate": 5.114388536571515e-06, + "loss": 1.7076, + "step": 1481000 + }, + { + "epoch": 0.74, + "learning_rate": 5.10936300940628e-06, + "loss": 1.7151, + "step": 1481500 + }, + { + "epoch": 0.74, + "learning_rate": 5.104337482241044e-06, + "loss": 1.6981, + "step": 1482000 + }, + { + "epoch": 0.75, + "learning_rate": 5.099311955075808e-06, + "loss": 1.7075, + "step": 1482500 + }, + { + "epoch": 0.75, + "learning_rate": 5.094286427910572e-06, + "loss": 1.7229, + "step": 1483000 + }, + { + "epoch": 0.75, + "learning_rate": 5.089260900745336e-06, + "loss": 1.7268, + "step": 1483500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0842353735801005e-06, + "loss": 1.7246, + "step": 1484000 + }, + { + "epoch": 0.75, + "learning_rate": 5.079209846414864e-06, + "loss": 1.7241, + "step": 1484500 + }, + { + "epoch": 0.75, + "learning_rate": 5.074184319249629e-06, + "loss": 1.7105, + "step": 1485000 + }, + { + "epoch": 0.75, + "learning_rate": 5.069158792084393e-06, + "loss": 1.7127, + "step": 1485500 + }, + { + "epoch": 0.75, + "learning_rate": 5.064133264919158e-06, + "loss": 1.7321, + "step": 1486000 + }, + { + "epoch": 0.75, + "learning_rate": 5.059107737753921e-06, + "loss": 1.7109, + "step": 1486500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0540822105886854e-06, + "loss": 1.7071, + "step": 1487000 + }, + { + "epoch": 0.75, + "learning_rate": 5.04905668342345e-06, + "loss": 1.7247, + "step": 1487500 + }, + { + "epoch": 0.75, + "learning_rate": 5.044031156258215e-06, + "loss": 1.729, + "step": 1488000 + }, + { + "epoch": 0.75, + "learning_rate": 5.0390056290929775e-06, + "loss": 1.7097, + "step": 1488500 + }, + { + "epoch": 0.75, + "learning_rate": 5.033980101927743e-06, + "loss": 1.7071, + "step": 1489000 + }, + { + "epoch": 0.75, + "learning_rate": 5.028954574762507e-06, + "loss": 1.7121, + "step": 1489500 + }, + { + "epoch": 0.75, + "learning_rate": 5.023929047597271e-06, + "loss": 1.719, + "step": 1490000 + }, + { + "epoch": 0.75, + "learning_rate": 5.018903520432035e-06, + "loss": 1.725, + "step": 1490500 + }, + { + "epoch": 0.75, + "learning_rate": 5.013877993266799e-06, + "loss": 1.7115, + "step": 1491000 + }, + { + "epoch": 0.75, + "learning_rate": 5.008852466101563e-06, + "loss": 1.7046, + "step": 1491500 + }, + { + "epoch": 0.75, + "learning_rate": 5.003826938936327e-06, + "loss": 1.7121, + "step": 1492000 + }, + { + "epoch": 0.75, + "learning_rate": 4.998801411771092e-06, + "loss": 1.6941, + "step": 1492500 + }, + { + "epoch": 0.75, + "learning_rate": 4.993775884605856e-06, + "loss": 1.7092, + "step": 1493000 + }, + { + "epoch": 0.75, + "learning_rate": 4.9887503574406196e-06, + "loss": 1.7086, + "step": 1493500 + }, + { + "epoch": 0.75, + "learning_rate": 4.983724830275385e-06, + "loss": 1.6829, + "step": 1494000 + }, + { + "epoch": 0.75, + "learning_rate": 4.978699303110148e-06, + "loss": 1.7086, + "step": 1494500 + }, + { + "epoch": 0.75, + "learning_rate": 4.9736737759449124e-06, + "loss": 1.7409, + "step": 1495000 + }, + { + "epoch": 0.75, + "learning_rate": 4.968648248779677e-06, + "loss": 1.705, + "step": 1495500 + }, + { + "epoch": 0.75, + "learning_rate": 4.963622721614441e-06, + "loss": 1.7092, + "step": 1496000 + }, + { + "epoch": 0.75, + "learning_rate": 4.958597194449205e-06, + "loss": 1.7175, + "step": 1496500 + }, + { + "epoch": 0.75, + "learning_rate": 4.95357166728397e-06, + "loss": 1.7279, + "step": 1497000 + }, + { + "epoch": 0.75, + "learning_rate": 4.948546140118733e-06, + "loss": 1.7174, + "step": 1497500 + }, + { + "epoch": 0.75, + "learning_rate": 4.943520612953498e-06, + "loss": 1.7125, + "step": 1498000 + }, + { + "epoch": 0.75, + "learning_rate": 4.938495085788262e-06, + "loss": 1.7036, + "step": 1498500 + }, + { + "epoch": 0.75, + "learning_rate": 4.933469558623026e-06, + "loss": 1.7141, + "step": 1499000 + }, + { + "epoch": 0.75, + "learning_rate": 4.92844403145779e-06, + "loss": 1.6992, + "step": 1499500 + }, + { + "epoch": 0.75, + "learning_rate": 4.9234185042925545e-06, + "loss": 1.6905, + "step": 1500000 + }, + { + "epoch": 0.75, + "learning_rate": 4.918392977127319e-06, + "loss": 1.7025, + "step": 1500500 + }, + { + "epoch": 0.75, + "learning_rate": 4.913367449962082e-06, + "loss": 1.7026, + "step": 1501000 + }, + { + "epoch": 0.75, + "learning_rate": 4.908341922796847e-06, + "loss": 1.7191, + "step": 1501500 + }, + { + "epoch": 0.75, + "learning_rate": 4.903316395631611e-06, + "loss": 1.6906, + "step": 1502000 + }, + { + "epoch": 0.76, + "learning_rate": 4.898290868466375e-06, + "loss": 1.6973, + "step": 1502500 + }, + { + "epoch": 0.76, + "learning_rate": 4.8932653413011395e-06, + "loss": 1.713, + "step": 1503000 + }, + { + "epoch": 0.76, + "learning_rate": 4.888239814135904e-06, + "loss": 1.7085, + "step": 1503500 + }, + { + "epoch": 0.76, + "learning_rate": 4.883214286970668e-06, + "loss": 1.71, + "step": 1504000 + }, + { + "epoch": 0.76, + "learning_rate": 4.878188759805432e-06, + "loss": 1.7039, + "step": 1504500 + }, + { + "epoch": 0.76, + "learning_rate": 4.873163232640196e-06, + "loss": 1.7214, + "step": 1505000 + }, + { + "epoch": 0.76, + "learning_rate": 4.868137705474961e-06, + "loss": 1.7166, + "step": 1505500 + }, + { + "epoch": 0.76, + "learning_rate": 4.863112178309724e-06, + "loss": 1.7074, + "step": 1506000 + }, + { + "epoch": 0.76, + "learning_rate": 4.858086651144489e-06, + "loss": 1.6968, + "step": 1506500 + }, + { + "epoch": 0.76, + "learning_rate": 4.853061123979253e-06, + "loss": 1.7299, + "step": 1507000 + }, + { + "epoch": 0.76, + "learning_rate": 4.848035596814017e-06, + "loss": 1.7129, + "step": 1507500 + }, + { + "epoch": 0.76, + "learning_rate": 4.8430100696487815e-06, + "loss": 1.7015, + "step": 1508000 + }, + { + "epoch": 0.76, + "learning_rate": 4.837984542483545e-06, + "loss": 1.7327, + "step": 1508500 + }, + { + "epoch": 0.76, + "learning_rate": 4.83295901531831e-06, + "loss": 1.7069, + "step": 1509000 + }, + { + "epoch": 0.76, + "learning_rate": 4.827933488153074e-06, + "loss": 1.7198, + "step": 1509500 + }, + { + "epoch": 0.76, + "learning_rate": 4.822907960987838e-06, + "loss": 1.7081, + "step": 1510000 + }, + { + "epoch": 0.76, + "learning_rate": 4.817882433822602e-06, + "loss": 1.6928, + "step": 1510500 + }, + { + "epoch": 0.76, + "learning_rate": 4.8128569066573665e-06, + "loss": 1.6888, + "step": 1511000 + }, + { + "epoch": 0.76, + "learning_rate": 4.807831379492131e-06, + "loss": 1.6777, + "step": 1511500 + }, + { + "epoch": 0.76, + "learning_rate": 4.802805852326895e-06, + "loss": 1.7097, + "step": 1512000 + }, + { + "epoch": 0.76, + "learning_rate": 4.797780325161659e-06, + "loss": 1.7023, + "step": 1512500 + }, + { + "epoch": 0.76, + "learning_rate": 4.792754797996424e-06, + "loss": 1.6993, + "step": 1513000 + }, + { + "epoch": 0.76, + "learning_rate": 4.787729270831187e-06, + "loss": 1.7267, + "step": 1513500 + }, + { + "epoch": 0.76, + "learning_rate": 4.782703743665952e-06, + "loss": 1.7178, + "step": 1514000 + }, + { + "epoch": 0.76, + "learning_rate": 4.777678216500716e-06, + "loss": 1.7064, + "step": 1514500 + }, + { + "epoch": 0.76, + "learning_rate": 4.77265268933548e-06, + "loss": 1.6967, + "step": 1515000 + }, + { + "epoch": 0.76, + "learning_rate": 4.767627162170244e-06, + "loss": 1.714, + "step": 1515500 + }, + { + "epoch": 0.76, + "learning_rate": 4.762601635005008e-06, + "loss": 1.7137, + "step": 1516000 + }, + { + "epoch": 0.76, + "learning_rate": 4.757576107839773e-06, + "loss": 1.6956, + "step": 1516500 + }, + { + "epoch": 0.76, + "learning_rate": 4.752550580674536e-06, + "loss": 1.7171, + "step": 1517000 + }, + { + "epoch": 0.76, + "learning_rate": 4.747525053509301e-06, + "loss": 1.7083, + "step": 1517500 + }, + { + "epoch": 0.76, + "learning_rate": 4.742499526344065e-06, + "loss": 1.7174, + "step": 1518000 + }, + { + "epoch": 0.76, + "learning_rate": 4.737473999178829e-06, + "loss": 1.6969, + "step": 1518500 + }, + { + "epoch": 0.76, + "learning_rate": 4.7324484720135935e-06, + "loss": 1.7119, + "step": 1519000 + }, + { + "epoch": 0.76, + "learning_rate": 4.727422944848358e-06, + "loss": 1.6962, + "step": 1519500 + }, + { + "epoch": 0.76, + "learning_rate": 4.722397417683122e-06, + "loss": 1.7071, + "step": 1520000 + }, + { + "epoch": 0.76, + "learning_rate": 4.717371890517886e-06, + "loss": 1.7185, + "step": 1520500 + }, + { + "epoch": 0.76, + "learning_rate": 4.71234636335265e-06, + "loss": 1.7063, + "step": 1521000 + }, + { + "epoch": 0.76, + "learning_rate": 4.707320836187415e-06, + "loss": 1.7145, + "step": 1521500 + }, + { + "epoch": 0.76, + "learning_rate": 4.702295309022178e-06, + "loss": 1.6981, + "step": 1522000 + }, + { + "epoch": 0.77, + "learning_rate": 4.697269781856943e-06, + "loss": 1.7032, + "step": 1522500 + }, + { + "epoch": 0.77, + "learning_rate": 4.692244254691707e-06, + "loss": 1.7007, + "step": 1523000 + }, + { + "epoch": 0.77, + "learning_rate": 4.687218727526471e-06, + "loss": 1.7125, + "step": 1523500 + }, + { + "epoch": 0.77, + "learning_rate": 4.6821932003612356e-06, + "loss": 1.702, + "step": 1524000 + }, + { + "epoch": 0.77, + "learning_rate": 4.677167673196e-06, + "loss": 1.6936, + "step": 1524500 + }, + { + "epoch": 0.77, + "learning_rate": 4.672142146030763e-06, + "loss": 1.7117, + "step": 1525000 + }, + { + "epoch": 0.77, + "learning_rate": 4.667116618865528e-06, + "loss": 1.6959, + "step": 1525500 + }, + { + "epoch": 0.77, + "learning_rate": 4.662091091700292e-06, + "loss": 1.7147, + "step": 1526000 + }, + { + "epoch": 0.77, + "learning_rate": 4.657065564535056e-06, + "loss": 1.7041, + "step": 1526500 + }, + { + "epoch": 0.77, + "learning_rate": 4.6520400373698205e-06, + "loss": 1.7075, + "step": 1527000 + }, + { + "epoch": 0.77, + "learning_rate": 4.647014510204585e-06, + "loss": 1.7072, + "step": 1527500 + }, + { + "epoch": 0.77, + "learning_rate": 4.641988983039349e-06, + "loss": 1.7052, + "step": 1528000 + }, + { + "epoch": 0.77, + "learning_rate": 4.6369634558741125e-06, + "loss": 1.7206, + "step": 1528500 + }, + { + "epoch": 0.77, + "learning_rate": 4.631937928708878e-06, + "loss": 1.7248, + "step": 1529000 + }, + { + "epoch": 0.77, + "learning_rate": 4.626912401543641e-06, + "loss": 1.7035, + "step": 1529500 + }, + { + "epoch": 0.77, + "learning_rate": 4.621886874378405e-06, + "loss": 1.7084, + "step": 1530000 + }, + { + "epoch": 0.77, + "learning_rate": 4.61686134721317e-06, + "loss": 1.6849, + "step": 1530500 + }, + { + "epoch": 0.77, + "learning_rate": 4.611835820047934e-06, + "loss": 1.7226, + "step": 1531000 + }, + { + "epoch": 0.77, + "learning_rate": 4.606810292882698e-06, + "loss": 1.7205, + "step": 1531500 + }, + { + "epoch": 0.77, + "learning_rate": 4.6017847657174626e-06, + "loss": 1.7169, + "step": 1532000 + }, + { + "epoch": 0.77, + "learning_rate": 4.596759238552226e-06, + "loss": 1.6964, + "step": 1532500 + }, + { + "epoch": 0.77, + "learning_rate": 4.591733711386991e-06, + "loss": 1.7186, + "step": 1533000 + }, + { + "epoch": 0.77, + "learning_rate": 4.586708184221755e-06, + "loss": 1.7095, + "step": 1533500 + }, + { + "epoch": 0.77, + "learning_rate": 4.581682657056519e-06, + "loss": 1.7025, + "step": 1534000 + }, + { + "epoch": 0.77, + "learning_rate": 4.576657129891283e-06, + "loss": 1.7106, + "step": 1534500 + }, + { + "epoch": 0.77, + "learning_rate": 4.5716316027260475e-06, + "loss": 1.6944, + "step": 1535000 + }, + { + "epoch": 0.77, + "learning_rate": 4.566606075560812e-06, + "loss": 1.7194, + "step": 1535500 + }, + { + "epoch": 0.77, + "learning_rate": 4.561580548395575e-06, + "loss": 1.7376, + "step": 1536000 + }, + { + "epoch": 0.77, + "learning_rate": 4.55655502123034e-06, + "loss": 1.7112, + "step": 1536500 + }, + { + "epoch": 0.77, + "learning_rate": 4.551529494065104e-06, + "loss": 1.699, + "step": 1537000 + }, + { + "epoch": 0.77, + "learning_rate": 4.546503966899868e-06, + "loss": 1.7291, + "step": 1537500 + }, + { + "epoch": 0.77, + "learning_rate": 4.541478439734632e-06, + "loss": 1.703, + "step": 1538000 + }, + { + "epoch": 0.77, + "learning_rate": 4.536452912569397e-06, + "loss": 1.7016, + "step": 1538500 + }, + { + "epoch": 0.77, + "learning_rate": 4.531427385404161e-06, + "loss": 1.7018, + "step": 1539000 + }, + { + "epoch": 0.77, + "learning_rate": 4.526401858238925e-06, + "loss": 1.7181, + "step": 1539500 + }, + { + "epoch": 0.77, + "learning_rate": 4.521376331073689e-06, + "loss": 1.6906, + "step": 1540000 + }, + { + "epoch": 0.77, + "learning_rate": 4.516350803908454e-06, + "loss": 1.7073, + "step": 1540500 + }, + { + "epoch": 0.77, + "learning_rate": 4.511325276743217e-06, + "loss": 1.7043, + "step": 1541000 + }, + { + "epoch": 0.77, + "learning_rate": 4.506299749577982e-06, + "loss": 1.6925, + "step": 1541500 + }, + { + "epoch": 0.77, + "learning_rate": 4.501274222412746e-06, + "loss": 1.7102, + "step": 1542000 + }, + { + "epoch": 0.78, + "learning_rate": 4.49624869524751e-06, + "loss": 1.7194, + "step": 1542500 + }, + { + "epoch": 0.78, + "learning_rate": 4.4912231680822745e-06, + "loss": 1.7133, + "step": 1543000 + }, + { + "epoch": 0.78, + "learning_rate": 4.486197640917038e-06, + "loss": 1.7266, + "step": 1543500 + }, + { + "epoch": 0.78, + "learning_rate": 4.481172113751803e-06, + "loss": 1.7022, + "step": 1544000 + }, + { + "epoch": 0.78, + "learning_rate": 4.4761465865865665e-06, + "loss": 1.7087, + "step": 1544500 + }, + { + "epoch": 0.78, + "learning_rate": 4.471121059421331e-06, + "loss": 1.7069, + "step": 1545000 + }, + { + "epoch": 0.78, + "learning_rate": 4.466095532256095e-06, + "loss": 1.7052, + "step": 1545500 + }, + { + "epoch": 0.78, + "learning_rate": 4.461070005090859e-06, + "loss": 1.7086, + "step": 1546000 + }, + { + "epoch": 0.78, + "learning_rate": 4.456044477925624e-06, + "loss": 1.6825, + "step": 1546500 + }, + { + "epoch": 0.78, + "learning_rate": 4.451018950760388e-06, + "loss": 1.7168, + "step": 1547000 + }, + { + "epoch": 0.78, + "learning_rate": 4.4459934235951514e-06, + "loss": 1.6971, + "step": 1547500 + }, + { + "epoch": 0.78, + "learning_rate": 4.440967896429917e-06, + "loss": 1.703, + "step": 1548000 + }, + { + "epoch": 0.78, + "learning_rate": 4.43594236926468e-06, + "loss": 1.7157, + "step": 1548500 + }, + { + "epoch": 0.78, + "learning_rate": 4.430916842099444e-06, + "loss": 1.6926, + "step": 1549000 + }, + { + "epoch": 0.78, + "learning_rate": 4.425891314934209e-06, + "loss": 1.7109, + "step": 1549500 + }, + { + "epoch": 0.78, + "learning_rate": 4.420865787768973e-06, + "loss": 1.7161, + "step": 1550000 + }, + { + "epoch": 0.78, + "learning_rate": 4.415840260603737e-06, + "loss": 1.7165, + "step": 1550500 + }, + { + "epoch": 0.78, + "learning_rate": 4.410814733438501e-06, + "loss": 1.6981, + "step": 1551000 + }, + { + "epoch": 0.78, + "learning_rate": 4.405789206273266e-06, + "loss": 1.7136, + "step": 1551500 + }, + { + "epoch": 0.78, + "learning_rate": 4.400763679108029e-06, + "loss": 1.6931, + "step": 1552000 + }, + { + "epoch": 0.78, + "learning_rate": 4.3957381519427935e-06, + "loss": 1.7124, + "step": 1552500 + }, + { + "epoch": 0.78, + "learning_rate": 4.390712624777558e-06, + "loss": 1.7137, + "step": 1553000 + }, + { + "epoch": 0.78, + "learning_rate": 4.385687097612322e-06, + "loss": 1.7067, + "step": 1553500 + }, + { + "epoch": 0.78, + "learning_rate": 4.380661570447086e-06, + "loss": 1.7104, + "step": 1554000 + }, + { + "epoch": 0.78, + "learning_rate": 4.375636043281851e-06, + "loss": 1.6938, + "step": 1554500 + }, + { + "epoch": 0.78, + "learning_rate": 4.370610516116614e-06, + "loss": 1.7108, + "step": 1555000 + }, + { + "epoch": 0.78, + "learning_rate": 4.365584988951379e-06, + "loss": 1.6993, + "step": 1555500 + }, + { + "epoch": 0.78, + "learning_rate": 4.360559461786143e-06, + "loss": 1.6979, + "step": 1556000 + }, + { + "epoch": 0.78, + "learning_rate": 4.355533934620907e-06, + "loss": 1.7071, + "step": 1556500 + }, + { + "epoch": 0.78, + "learning_rate": 4.350508407455671e-06, + "loss": 1.6868, + "step": 1557000 + }, + { + "epoch": 0.78, + "learning_rate": 4.345482880290436e-06, + "loss": 1.6955, + "step": 1557500 + }, + { + "epoch": 0.78, + "learning_rate": 4.3404573531252e-06, + "loss": 1.7025, + "step": 1558000 + }, + { + "epoch": 0.78, + "learning_rate": 4.335431825959964e-06, + "loss": 1.6994, + "step": 1558500 + }, + { + "epoch": 0.78, + "learning_rate": 4.3304062987947285e-06, + "loss": 1.6983, + "step": 1559000 + }, + { + "epoch": 0.78, + "learning_rate": 4.325380771629492e-06, + "loss": 1.703, + "step": 1559500 + }, + { + "epoch": 0.78, + "learning_rate": 4.320355244464256e-06, + "loss": 1.6899, + "step": 1560000 + }, + { + "epoch": 0.78, + "learning_rate": 4.3153297172990205e-06, + "loss": 1.6882, + "step": 1560500 + }, + { + "epoch": 0.78, + "learning_rate": 4.310304190133785e-06, + "loss": 1.7136, + "step": 1561000 + }, + { + "epoch": 0.78, + "learning_rate": 4.305278662968549e-06, + "loss": 1.7219, + "step": 1561500 + }, + { + "epoch": 0.78, + "learning_rate": 4.300253135803313e-06, + "loss": 1.6865, + "step": 1562000 + }, + { + "epoch": 0.79, + "learning_rate": 4.295227608638078e-06, + "loss": 1.7153, + "step": 1562500 + }, + { + "epoch": 0.79, + "learning_rate": 4.290202081472842e-06, + "loss": 1.7043, + "step": 1563000 + }, + { + "epoch": 0.79, + "learning_rate": 4.2851765543076055e-06, + "loss": 1.6962, + "step": 1563500 + }, + { + "epoch": 0.79, + "learning_rate": 4.280151027142371e-06, + "loss": 1.7147, + "step": 1564000 + }, + { + "epoch": 0.79, + "learning_rate": 4.275125499977134e-06, + "loss": 1.6991, + "step": 1564500 + }, + { + "epoch": 0.79, + "learning_rate": 4.270099972811898e-06, + "loss": 1.6902, + "step": 1565000 + }, + { + "epoch": 0.79, + "learning_rate": 4.265074445646663e-06, + "loss": 1.6838, + "step": 1565500 + }, + { + "epoch": 0.79, + "learning_rate": 4.260048918481427e-06, + "loss": 1.6928, + "step": 1566000 + }, + { + "epoch": 0.79, + "learning_rate": 4.255023391316191e-06, + "loss": 1.6937, + "step": 1566500 + }, + { + "epoch": 0.79, + "learning_rate": 4.2499978641509555e-06, + "loss": 1.7004, + "step": 1567000 + }, + { + "epoch": 0.79, + "learning_rate": 4.244972336985719e-06, + "loss": 1.6967, + "step": 1567500 + }, + { + "epoch": 0.79, + "learning_rate": 4.239946809820483e-06, + "loss": 1.6977, + "step": 1568000 + }, + { + "epoch": 0.79, + "learning_rate": 4.2349212826552475e-06, + "loss": 1.7122, + "step": 1568500 + }, + { + "epoch": 0.79, + "learning_rate": 4.229895755490012e-06, + "loss": 1.7079, + "step": 1569000 + }, + { + "epoch": 0.79, + "learning_rate": 4.224870228324776e-06, + "loss": 1.709, + "step": 1569500 + }, + { + "epoch": 0.79, + "learning_rate": 4.2198447011595404e-06, + "loss": 1.704, + "step": 1570000 + }, + { + "epoch": 0.79, + "learning_rate": 4.214819173994305e-06, + "loss": 1.7075, + "step": 1570500 + }, + { + "epoch": 0.79, + "learning_rate": 4.209793646829068e-06, + "loss": 1.6917, + "step": 1571000 + }, + { + "epoch": 0.79, + "learning_rate": 4.204768119663833e-06, + "loss": 1.6998, + "step": 1571500 + }, + { + "epoch": 0.79, + "learning_rate": 4.199742592498597e-06, + "loss": 1.6996, + "step": 1572000 + }, + { + "epoch": 0.79, + "learning_rate": 4.194717065333361e-06, + "loss": 1.7037, + "step": 1572500 + }, + { + "epoch": 0.79, + "learning_rate": 4.189691538168125e-06, + "loss": 1.7203, + "step": 1573000 + }, + { + "epoch": 0.79, + "learning_rate": 4.18466601100289e-06, + "loss": 1.7127, + "step": 1573500 + }, + { + "epoch": 0.79, + "learning_rate": 4.179640483837654e-06, + "loss": 1.7102, + "step": 1574000 + }, + { + "epoch": 0.79, + "learning_rate": 4.174614956672418e-06, + "loss": 1.7126, + "step": 1574500 + }, + { + "epoch": 0.79, + "learning_rate": 4.169589429507182e-06, + "loss": 1.6951, + "step": 1575000 + }, + { + "epoch": 0.79, + "learning_rate": 4.164563902341947e-06, + "loss": 1.6937, + "step": 1575500 + }, + { + "epoch": 0.79, + "learning_rate": 4.15953837517671e-06, + "loss": 1.6995, + "step": 1576000 + }, + { + "epoch": 0.79, + "learning_rate": 4.1545128480114746e-06, + "loss": 1.6997, + "step": 1576500 + }, + { + "epoch": 0.79, + "learning_rate": 4.149487320846239e-06, + "loss": 1.6845, + "step": 1577000 + }, + { + "epoch": 0.79, + "learning_rate": 4.144461793681003e-06, + "loss": 1.6972, + "step": 1577500 + }, + { + "epoch": 0.79, + "learning_rate": 4.1394362665157674e-06, + "loss": 1.7184, + "step": 1578000 + }, + { + "epoch": 0.79, + "learning_rate": 4.134410739350531e-06, + "loss": 1.7217, + "step": 1578500 + }, + { + "epoch": 0.79, + "learning_rate": 4.129385212185296e-06, + "loss": 1.7114, + "step": 1579000 + }, + { + "epoch": 0.79, + "learning_rate": 4.1243596850200595e-06, + "loss": 1.6807, + "step": 1579500 + }, + { + "epoch": 0.79, + "learning_rate": 4.119334157854824e-06, + "loss": 1.7022, + "step": 1580000 + }, + { + "epoch": 0.79, + "learning_rate": 4.114308630689588e-06, + "loss": 1.7007, + "step": 1580500 + }, + { + "epoch": 0.79, + "learning_rate": 4.109283103524352e-06, + "loss": 1.6906, + "step": 1581000 + }, + { + "epoch": 0.79, + "learning_rate": 4.104257576359117e-06, + "loss": 1.7046, + "step": 1581500 + }, + { + "epoch": 0.8, + "learning_rate": 4.099232049193881e-06, + "loss": 1.6968, + "step": 1582000 + }, + { + "epoch": 0.8, + "learning_rate": 4.094206522028644e-06, + "loss": 1.7092, + "step": 1582500 + }, + { + "epoch": 0.8, + "learning_rate": 4.0891809948634095e-06, + "loss": 1.6957, + "step": 1583000 + }, + { + "epoch": 0.8, + "learning_rate": 4.084155467698173e-06, + "loss": 1.7062, + "step": 1583500 + }, + { + "epoch": 0.8, + "learning_rate": 4.079129940532937e-06, + "loss": 1.7186, + "step": 1584000 + }, + { + "epoch": 0.8, + "learning_rate": 4.0741044133677016e-06, + "loss": 1.7064, + "step": 1584500 + }, + { + "epoch": 0.8, + "learning_rate": 4.069078886202466e-06, + "loss": 1.6973, + "step": 1585000 + }, + { + "epoch": 0.8, + "learning_rate": 4.06405335903723e-06, + "loss": 1.6938, + "step": 1585500 + }, + { + "epoch": 0.8, + "learning_rate": 4.059027831871994e-06, + "loss": 1.6952, + "step": 1586000 + }, + { + "epoch": 0.8, + "learning_rate": 4.054002304706759e-06, + "loss": 1.6881, + "step": 1586500 + }, + { + "epoch": 0.8, + "learning_rate": 4.048976777541522e-06, + "loss": 1.6836, + "step": 1587000 + }, + { + "epoch": 0.8, + "learning_rate": 4.0439512503762865e-06, + "loss": 1.7088, + "step": 1587500 + }, + { + "epoch": 0.8, + "learning_rate": 4.038925723211051e-06, + "loss": 1.713, + "step": 1588000 + }, + { + "epoch": 0.8, + "learning_rate": 4.033900196045815e-06, + "loss": 1.6875, + "step": 1588500 + }, + { + "epoch": 0.8, + "learning_rate": 4.028874668880579e-06, + "loss": 1.7076, + "step": 1589000 + }, + { + "epoch": 0.8, + "learning_rate": 4.023849141715344e-06, + "loss": 1.6906, + "step": 1589500 + }, + { + "epoch": 0.8, + "learning_rate": 4.018823614550107e-06, + "loss": 1.6828, + "step": 1590000 + }, + { + "epoch": 0.8, + "learning_rate": 4.013798087384872e-06, + "loss": 1.7158, + "step": 1590500 + }, + { + "epoch": 0.8, + "learning_rate": 4.008772560219636e-06, + "loss": 1.6784, + "step": 1591000 + }, + { + "epoch": 0.8, + "learning_rate": 4.0037470330544e-06, + "loss": 1.7008, + "step": 1591500 + }, + { + "epoch": 0.8, + "learning_rate": 3.998721505889164e-06, + "loss": 1.6891, + "step": 1592000 + }, + { + "epoch": 0.8, + "learning_rate": 3.9936959787239286e-06, + "loss": 1.704, + "step": 1592500 + }, + { + "epoch": 0.8, + "learning_rate": 3.988670451558693e-06, + "loss": 1.7092, + "step": 1593000 + }, + { + "epoch": 0.8, + "learning_rate": 3.983644924393456e-06, + "loss": 1.6986, + "step": 1593500 + }, + { + "epoch": 0.8, + "learning_rate": 3.9786193972282215e-06, + "loss": 1.6908, + "step": 1594000 + }, + { + "epoch": 0.8, + "learning_rate": 3.973593870062985e-06, + "loss": 1.7247, + "step": 1594500 + }, + { + "epoch": 0.8, + "learning_rate": 3.968568342897749e-06, + "loss": 1.6983, + "step": 1595000 + }, + { + "epoch": 0.8, + "learning_rate": 3.9635428157325135e-06, + "loss": 1.7171, + "step": 1595500 + }, + { + "epoch": 0.8, + "learning_rate": 3.958517288567278e-06, + "loss": 1.6974, + "step": 1596000 + }, + { + "epoch": 0.8, + "learning_rate": 3.953491761402042e-06, + "loss": 1.7143, + "step": 1596500 + }, + { + "epoch": 0.8, + "learning_rate": 3.948466234236806e-06, + "loss": 1.6968, + "step": 1597000 + }, + { + "epoch": 0.8, + "learning_rate": 3.94344070707157e-06, + "loss": 1.6892, + "step": 1597500 + }, + { + "epoch": 0.8, + "learning_rate": 3.938415179906335e-06, + "loss": 1.7162, + "step": 1598000 + }, + { + "epoch": 0.8, + "learning_rate": 3.933389652741098e-06, + "loss": 1.695, + "step": 1598500 + }, + { + "epoch": 0.8, + "learning_rate": 3.928364125575863e-06, + "loss": 1.7045, + "step": 1599000 + }, + { + "epoch": 0.8, + "learning_rate": 3.923338598410627e-06, + "loss": 1.6892, + "step": 1599500 + }, + { + "epoch": 0.8, + "learning_rate": 3.918313071245391e-06, + "loss": 1.7136, + "step": 1600000 + }, + { + "epoch": 0.8, + "learning_rate": 3.913287544080156e-06, + "loss": 1.7079, + "step": 1600500 + }, + { + "epoch": 0.8, + "learning_rate": 3.90826201691492e-06, + "loss": 1.6808, + "step": 1601000 + }, + { + "epoch": 0.8, + "learning_rate": 3.903236489749684e-06, + "loss": 1.7134, + "step": 1601500 + }, + { + "epoch": 0.81, + "learning_rate": 3.898210962584448e-06, + "loss": 1.6902, + "step": 1602000 + }, + { + "epoch": 0.81, + "learning_rate": 3.893185435419212e-06, + "loss": 1.7061, + "step": 1602500 + }, + { + "epoch": 0.81, + "learning_rate": 3.888159908253976e-06, + "loss": 1.6991, + "step": 1603000 + }, + { + "epoch": 0.81, + "learning_rate": 3.8831343810887405e-06, + "loss": 1.7079, + "step": 1603500 + }, + { + "epoch": 0.81, + "learning_rate": 3.878108853923505e-06, + "loss": 1.7008, + "step": 1604000 + }, + { + "epoch": 0.81, + "learning_rate": 3.873083326758269e-06, + "loss": 1.7027, + "step": 1604500 + }, + { + "epoch": 0.81, + "learning_rate": 3.868057799593033e-06, + "loss": 1.6897, + "step": 1605000 + }, + { + "epoch": 0.81, + "learning_rate": 3.863032272427798e-06, + "loss": 1.6958, + "step": 1605500 + }, + { + "epoch": 0.81, + "learning_rate": 3.858006745262561e-06, + "loss": 1.704, + "step": 1606000 + }, + { + "epoch": 0.81, + "learning_rate": 3.852981218097326e-06, + "loss": 1.7234, + "step": 1606500 + }, + { + "epoch": 0.81, + "learning_rate": 3.84795569093209e-06, + "loss": 1.6925, + "step": 1607000 + }, + { + "epoch": 0.81, + "learning_rate": 3.842930163766854e-06, + "loss": 1.7023, + "step": 1607500 + }, + { + "epoch": 0.81, + "learning_rate": 3.837904636601618e-06, + "loss": 1.7098, + "step": 1608000 + }, + { + "epoch": 0.81, + "learning_rate": 3.832879109436383e-06, + "loss": 1.7019, + "step": 1608500 + }, + { + "epoch": 0.81, + "learning_rate": 3.827853582271147e-06, + "loss": 1.6912, + "step": 1609000 + }, + { + "epoch": 0.81, + "learning_rate": 3.822828055105911e-06, + "loss": 1.717, + "step": 1609500 + }, + { + "epoch": 0.81, + "learning_rate": 3.817802527940675e-06, + "loss": 1.6964, + "step": 1610000 + }, + { + "epoch": 0.81, + "learning_rate": 3.812777000775439e-06, + "loss": 1.7123, + "step": 1610500 + }, + { + "epoch": 0.81, + "learning_rate": 3.807751473610203e-06, + "loss": 1.692, + "step": 1611000 + }, + { + "epoch": 0.81, + "learning_rate": 3.802725946444967e-06, + "loss": 1.6622, + "step": 1611500 + }, + { + "epoch": 0.81, + "learning_rate": 3.797700419279732e-06, + "loss": 1.7017, + "step": 1612000 + }, + { + "epoch": 0.81, + "learning_rate": 3.7926748921144957e-06, + "loss": 1.7064, + "step": 1612500 + }, + { + "epoch": 0.81, + "learning_rate": 3.78764936494926e-06, + "loss": 1.6921, + "step": 1613000 + }, + { + "epoch": 0.81, + "learning_rate": 3.7826238377840243e-06, + "loss": 1.6872, + "step": 1613500 + }, + { + "epoch": 0.81, + "learning_rate": 3.7775983106187885e-06, + "loss": 1.7016, + "step": 1614000 + }, + { + "epoch": 0.81, + "learning_rate": 3.7725727834535524e-06, + "loss": 1.6942, + "step": 1614500 + }, + { + "epoch": 0.81, + "learning_rate": 3.767547256288317e-06, + "loss": 1.6975, + "step": 1615000 + }, + { + "epoch": 0.81, + "learning_rate": 3.762521729123081e-06, + "loss": 1.7089, + "step": 1615500 + }, + { + "epoch": 0.81, + "learning_rate": 3.7574962019578453e-06, + "loss": 1.7174, + "step": 1616000 + }, + { + "epoch": 0.81, + "learning_rate": 3.752470674792609e-06, + "loss": 1.709, + "step": 1616500 + }, + { + "epoch": 0.81, + "learning_rate": 3.747445147627374e-06, + "loss": 1.7125, + "step": 1617000 + }, + { + "epoch": 0.81, + "learning_rate": 3.7424196204621378e-06, + "loss": 1.7057, + "step": 1617500 + }, + { + "epoch": 0.81, + "learning_rate": 3.737394093296902e-06, + "loss": 1.7083, + "step": 1618000 + }, + { + "epoch": 0.81, + "learning_rate": 3.732368566131666e-06, + "loss": 1.6874, + "step": 1618500 + }, + { + "epoch": 0.81, + "learning_rate": 3.7273430389664302e-06, + "loss": 1.7136, + "step": 1619000 + }, + { + "epoch": 0.81, + "learning_rate": 3.7223175118011945e-06, + "loss": 1.6811, + "step": 1619500 + }, + { + "epoch": 0.81, + "learning_rate": 3.7172919846359584e-06, + "loss": 1.6919, + "step": 1620000 + }, + { + "epoch": 0.81, + "learning_rate": 3.712266457470723e-06, + "loss": 1.7144, + "step": 1620500 + }, + { + "epoch": 0.81, + "learning_rate": 3.707240930305487e-06, + "loss": 1.7156, + "step": 1621000 + }, + { + "epoch": 0.81, + "learning_rate": 3.7022154031402513e-06, + "loss": 1.6806, + "step": 1621500 + }, + { + "epoch": 0.82, + "learning_rate": 3.697189875975015e-06, + "loss": 1.7125, + "step": 1622000 + }, + { + "epoch": 0.82, + "learning_rate": 3.69216434880978e-06, + "loss": 1.7045, + "step": 1622500 + }, + { + "epoch": 0.82, + "learning_rate": 3.6871388216445437e-06, + "loss": 1.6886, + "step": 1623000 + }, + { + "epoch": 0.82, + "learning_rate": 3.682113294479308e-06, + "loss": 1.7094, + "step": 1623500 + }, + { + "epoch": 0.82, + "learning_rate": 3.677087767314072e-06, + "loss": 1.6887, + "step": 1624000 + }, + { + "epoch": 0.82, + "learning_rate": 3.6720622401488366e-06, + "loss": 1.6974, + "step": 1624500 + }, + { + "epoch": 0.82, + "learning_rate": 3.6670367129836005e-06, + "loss": 1.7076, + "step": 1625000 + }, + { + "epoch": 0.82, + "learning_rate": 3.6620111858183648e-06, + "loss": 1.7019, + "step": 1625500 + }, + { + "epoch": 0.82, + "learning_rate": 3.6569856586531286e-06, + "loss": 1.6875, + "step": 1626000 + }, + { + "epoch": 0.82, + "learning_rate": 3.6519601314878934e-06, + "loss": 1.6877, + "step": 1626500 + }, + { + "epoch": 0.82, + "learning_rate": 3.6469346043226572e-06, + "loss": 1.7081, + "step": 1627000 + }, + { + "epoch": 0.82, + "learning_rate": 3.641909077157421e-06, + "loss": 1.6764, + "step": 1627500 + }, + { + "epoch": 0.82, + "learning_rate": 3.636883549992186e-06, + "loss": 1.6954, + "step": 1628000 + }, + { + "epoch": 0.82, + "learning_rate": 3.6318580228269497e-06, + "loss": 1.6872, + "step": 1628500 + }, + { + "epoch": 0.82, + "learning_rate": 3.626832495661714e-06, + "loss": 1.6932, + "step": 1629000 + }, + { + "epoch": 0.82, + "learning_rate": 3.621806968496478e-06, + "loss": 1.6974, + "step": 1629500 + }, + { + "epoch": 0.82, + "learning_rate": 3.6167814413312426e-06, + "loss": 1.7117, + "step": 1630000 + }, + { + "epoch": 0.82, + "learning_rate": 3.6117559141660064e-06, + "loss": 1.6999, + "step": 1630500 + }, + { + "epoch": 0.82, + "learning_rate": 3.6067303870007707e-06, + "loss": 1.7113, + "step": 1631000 + }, + { + "epoch": 0.82, + "learning_rate": 3.6017048598355346e-06, + "loss": 1.6989, + "step": 1631500 + }, + { + "epoch": 0.82, + "learning_rate": 3.5966793326702993e-06, + "loss": 1.6967, + "step": 1632000 + }, + { + "epoch": 0.82, + "learning_rate": 3.591653805505063e-06, + "loss": 1.707, + "step": 1632500 + }, + { + "epoch": 0.82, + "learning_rate": 3.5866282783398275e-06, + "loss": 1.6761, + "step": 1633000 + }, + { + "epoch": 0.82, + "learning_rate": 3.5816027511745913e-06, + "loss": 1.717, + "step": 1633500 + }, + { + "epoch": 0.82, + "learning_rate": 3.576577224009356e-06, + "loss": 1.7118, + "step": 1634000 + }, + { + "epoch": 0.82, + "learning_rate": 3.57155169684412e-06, + "loss": 1.691, + "step": 1634500 + }, + { + "epoch": 0.82, + "learning_rate": 3.5665261696788842e-06, + "loss": 1.6835, + "step": 1635000 + }, + { + "epoch": 0.82, + "learning_rate": 3.5615006425136485e-06, + "loss": 1.6861, + "step": 1635500 + }, + { + "epoch": 0.82, + "learning_rate": 3.5564751153484124e-06, + "loss": 1.7022, + "step": 1636000 + }, + { + "epoch": 0.82, + "learning_rate": 3.5514495881831767e-06, + "loss": 1.7019, + "step": 1636500 + }, + { + "epoch": 0.82, + "learning_rate": 3.5464240610179406e-06, + "loss": 1.7117, + "step": 1637000 + }, + { + "epoch": 0.82, + "learning_rate": 3.5413985338527053e-06, + "loss": 1.6844, + "step": 1637500 + }, + { + "epoch": 0.82, + "learning_rate": 3.536373006687469e-06, + "loss": 1.6987, + "step": 1638000 + }, + { + "epoch": 0.82, + "learning_rate": 3.5313474795222334e-06, + "loss": 1.6796, + "step": 1638500 + }, + { + "epoch": 0.82, + "learning_rate": 3.5263219523569973e-06, + "loss": 1.701, + "step": 1639000 + }, + { + "epoch": 0.82, + "learning_rate": 3.521296425191762e-06, + "loss": 1.7153, + "step": 1639500 + }, + { + "epoch": 0.82, + "learning_rate": 3.516270898026526e-06, + "loss": 1.6977, + "step": 1640000 + }, + { + "epoch": 0.82, + "learning_rate": 3.51124537086129e-06, + "loss": 1.7074, + "step": 1640500 + }, + { + "epoch": 0.82, + "learning_rate": 3.5062198436960545e-06, + "loss": 1.6895, + "step": 1641000 + }, + { + "epoch": 0.82, + "learning_rate": 3.5011943165308188e-06, + "loss": 1.6912, + "step": 1641500 + }, + { + "epoch": 0.83, + "learning_rate": 3.4961687893655826e-06, + "loss": 1.7011, + "step": 1642000 + }, + { + "epoch": 0.83, + "learning_rate": 3.4911432622003474e-06, + "loss": 1.6877, + "step": 1642500 + }, + { + "epoch": 0.83, + "learning_rate": 3.4861177350351112e-06, + "loss": 1.7201, + "step": 1643000 + }, + { + "epoch": 0.83, + "learning_rate": 3.4810922078698755e-06, + "loss": 1.6995, + "step": 1643500 + }, + { + "epoch": 0.83, + "learning_rate": 3.4760666807046394e-06, + "loss": 1.6959, + "step": 1644000 + }, + { + "epoch": 0.83, + "learning_rate": 3.4710411535394033e-06, + "loss": 1.7057, + "step": 1644500 + }, + { + "epoch": 0.83, + "learning_rate": 3.466015626374168e-06, + "loss": 1.7114, + "step": 1645000 + }, + { + "epoch": 0.83, + "learning_rate": 3.460990099208932e-06, + "loss": 1.6999, + "step": 1645500 + }, + { + "epoch": 0.83, + "learning_rate": 3.455964572043696e-06, + "loss": 1.694, + "step": 1646000 + }, + { + "epoch": 0.83, + "learning_rate": 3.45093904487846e-06, + "loss": 1.6896, + "step": 1646500 + }, + { + "epoch": 0.83, + "learning_rate": 3.4459135177132247e-06, + "loss": 1.6893, + "step": 1647000 + }, + { + "epoch": 0.83, + "learning_rate": 3.4408879905479886e-06, + "loss": 1.7171, + "step": 1647500 + }, + { + "epoch": 0.83, + "learning_rate": 3.435862463382753e-06, + "loss": 1.6965, + "step": 1648000 + }, + { + "epoch": 0.83, + "learning_rate": 3.430836936217517e-06, + "loss": 1.6961, + "step": 1648500 + }, + { + "epoch": 0.83, + "learning_rate": 3.4258114090522815e-06, + "loss": 1.6858, + "step": 1649000 + }, + { + "epoch": 0.83, + "learning_rate": 3.4207858818870454e-06, + "loss": 1.6946, + "step": 1649500 + }, + { + "epoch": 0.83, + "learning_rate": 3.41576035472181e-06, + "loss": 1.7144, + "step": 1650000 + }, + { + "epoch": 0.83, + "learning_rate": 3.410734827556574e-06, + "loss": 1.6937, + "step": 1650500 + }, + { + "epoch": 0.83, + "learning_rate": 3.4057093003913382e-06, + "loss": 1.7031, + "step": 1651000 + }, + { + "epoch": 0.83, + "learning_rate": 3.400683773226102e-06, + "loss": 1.7207, + "step": 1651500 + }, + { + "epoch": 0.83, + "learning_rate": 3.395658246060867e-06, + "loss": 1.7179, + "step": 1652000 + }, + { + "epoch": 0.83, + "learning_rate": 3.3906327188956307e-06, + "loss": 1.6886, + "step": 1652500 + }, + { + "epoch": 0.83, + "learning_rate": 3.385607191730395e-06, + "loss": 1.6953, + "step": 1653000 + }, + { + "epoch": 0.83, + "learning_rate": 3.380581664565159e-06, + "loss": 1.7185, + "step": 1653500 + }, + { + "epoch": 0.83, + "learning_rate": 3.3755561373999227e-06, + "loss": 1.6866, + "step": 1654000 + }, + { + "epoch": 0.83, + "learning_rate": 3.3705306102346875e-06, + "loss": 1.6995, + "step": 1654500 + }, + { + "epoch": 0.83, + "learning_rate": 3.3655050830694513e-06, + "loss": 1.6841, + "step": 1655000 + }, + { + "epoch": 0.83, + "learning_rate": 3.3604795559042156e-06, + "loss": 1.6798, + "step": 1655500 + }, + { + "epoch": 0.83, + "learning_rate": 3.35545402873898e-06, + "loss": 1.7163, + "step": 1656000 + }, + { + "epoch": 0.83, + "learning_rate": 3.350428501573744e-06, + "loss": 1.6892, + "step": 1656500 + }, + { + "epoch": 0.83, + "learning_rate": 3.345402974408508e-06, + "loss": 1.6886, + "step": 1657000 + }, + { + "epoch": 0.83, + "learning_rate": 3.340377447243273e-06, + "loss": 1.7029, + "step": 1657500 + }, + { + "epoch": 0.83, + "learning_rate": 3.3353519200780367e-06, + "loss": 1.6987, + "step": 1658000 + }, + { + "epoch": 0.83, + "learning_rate": 3.330326392912801e-06, + "loss": 1.6974, + "step": 1658500 + }, + { + "epoch": 0.83, + "learning_rate": 3.325300865747565e-06, + "loss": 1.697, + "step": 1659000 + }, + { + "epoch": 0.83, + "learning_rate": 3.3202753385823295e-06, + "loss": 1.6909, + "step": 1659500 + }, + { + "epoch": 0.83, + "learning_rate": 3.3152498114170934e-06, + "loss": 1.6894, + "step": 1660000 + }, + { + "epoch": 0.83, + "learning_rate": 3.3102242842518577e-06, + "loss": 1.7207, + "step": 1660500 + }, + { + "epoch": 0.83, + "learning_rate": 3.3051987570866216e-06, + "loss": 1.7003, + "step": 1661000 + }, + { + "epoch": 0.83, + "learning_rate": 3.3001732299213863e-06, + "loss": 1.7032, + "step": 1661500 + }, + { + "epoch": 0.84, + "learning_rate": 3.29514770275615e-06, + "loss": 1.7083, + "step": 1662000 + }, + { + "epoch": 0.84, + "learning_rate": 3.290122175590914e-06, + "loss": 1.706, + "step": 1662500 + }, + { + "epoch": 0.84, + "learning_rate": 3.2850966484256788e-06, + "loss": 1.6975, + "step": 1663000 + }, + { + "epoch": 0.84, + "learning_rate": 3.2800711212604426e-06, + "loss": 1.7063, + "step": 1663500 + }, + { + "epoch": 0.84, + "learning_rate": 3.275045594095207e-06, + "loss": 1.6937, + "step": 1664000 + }, + { + "epoch": 0.84, + "learning_rate": 3.270020066929971e-06, + "loss": 1.6866, + "step": 1664500 + }, + { + "epoch": 0.84, + "learning_rate": 3.2649945397647355e-06, + "loss": 1.6877, + "step": 1665000 + }, + { + "epoch": 0.84, + "learning_rate": 3.2599690125994994e-06, + "loss": 1.6966, + "step": 1665500 + }, + { + "epoch": 0.84, + "learning_rate": 3.2549434854342637e-06, + "loss": 1.6844, + "step": 1666000 + }, + { + "epoch": 0.84, + "learning_rate": 3.2499179582690275e-06, + "loss": 1.6894, + "step": 1666500 + }, + { + "epoch": 0.84, + "learning_rate": 3.2448924311037923e-06, + "loss": 1.693, + "step": 1667000 + }, + { + "epoch": 0.84, + "learning_rate": 3.239866903938556e-06, + "loss": 1.7046, + "step": 1667500 + }, + { + "epoch": 0.84, + "learning_rate": 3.2348413767733204e-06, + "loss": 1.6986, + "step": 1668000 + }, + { + "epoch": 0.84, + "learning_rate": 3.2298158496080843e-06, + "loss": 1.6931, + "step": 1668500 + }, + { + "epoch": 0.84, + "learning_rate": 3.224790322442849e-06, + "loss": 1.7134, + "step": 1669000 + }, + { + "epoch": 0.84, + "learning_rate": 3.219764795277613e-06, + "loss": 1.6982, + "step": 1669500 + }, + { + "epoch": 0.84, + "learning_rate": 3.214739268112377e-06, + "loss": 1.6833, + "step": 1670000 + }, + { + "epoch": 0.84, + "learning_rate": 3.2097137409471415e-06, + "loss": 1.6897, + "step": 1670500 + }, + { + "epoch": 0.84, + "learning_rate": 3.2046882137819053e-06, + "loss": 1.693, + "step": 1671000 + }, + { + "epoch": 0.84, + "learning_rate": 3.1996626866166696e-06, + "loss": 1.6992, + "step": 1671500 + }, + { + "epoch": 0.84, + "learning_rate": 3.1946371594514335e-06, + "loss": 1.7041, + "step": 1672000 + }, + { + "epoch": 0.84, + "learning_rate": 3.1896116322861982e-06, + "loss": 1.706, + "step": 1672500 + }, + { + "epoch": 0.84, + "learning_rate": 3.184586105120962e-06, + "loss": 1.6876, + "step": 1673000 + }, + { + "epoch": 0.84, + "learning_rate": 3.1795605779557264e-06, + "loss": 1.6766, + "step": 1673500 + }, + { + "epoch": 0.84, + "learning_rate": 3.1745350507904903e-06, + "loss": 1.6945, + "step": 1674000 + }, + { + "epoch": 0.84, + "learning_rate": 3.169509523625255e-06, + "loss": 1.6829, + "step": 1674500 + }, + { + "epoch": 0.84, + "learning_rate": 3.164483996460019e-06, + "loss": 1.7064, + "step": 1675000 + }, + { + "epoch": 0.84, + "learning_rate": 3.159458469294783e-06, + "loss": 1.704, + "step": 1675500 + }, + { + "epoch": 0.84, + "learning_rate": 3.154432942129547e-06, + "loss": 1.6948, + "step": 1676000 + }, + { + "epoch": 0.84, + "learning_rate": 3.1494074149643117e-06, + "loss": 1.701, + "step": 1676500 + }, + { + "epoch": 0.84, + "learning_rate": 3.1443818877990756e-06, + "loss": 1.7078, + "step": 1677000 + }, + { + "epoch": 0.84, + "learning_rate": 3.13935636063384e-06, + "loss": 1.712, + "step": 1677500 + }, + { + "epoch": 0.84, + "learning_rate": 3.134330833468604e-06, + "loss": 1.7009, + "step": 1678000 + }, + { + "epoch": 0.84, + "learning_rate": 3.1293053063033685e-06, + "loss": 1.6889, + "step": 1678500 + }, + { + "epoch": 0.84, + "learning_rate": 3.1242797791381323e-06, + "loss": 1.6841, + "step": 1679000 + }, + { + "epoch": 0.84, + "learning_rate": 3.1192542519728962e-06, + "loss": 1.7059, + "step": 1679500 + }, + { + "epoch": 0.84, + "learning_rate": 3.114228724807661e-06, + "loss": 1.6918, + "step": 1680000 + }, + { + "epoch": 0.84, + "learning_rate": 3.109203197642425e-06, + "loss": 1.7067, + "step": 1680500 + }, + { + "epoch": 0.84, + "learning_rate": 3.104177670477189e-06, + "loss": 1.7031, + "step": 1681000 + }, + { + "epoch": 0.85, + "learning_rate": 3.099152143311953e-06, + "loss": 1.6842, + "step": 1681500 + }, + { + "epoch": 0.85, + "learning_rate": 3.0941266161467177e-06, + "loss": 1.7099, + "step": 1682000 + }, + { + "epoch": 0.85, + "learning_rate": 3.0891010889814816e-06, + "loss": 1.6881, + "step": 1682500 + }, + { + "epoch": 0.85, + "learning_rate": 3.084075561816246e-06, + "loss": 1.6891, + "step": 1683000 + }, + { + "epoch": 0.85, + "learning_rate": 3.07905003465101e-06, + "loss": 1.7085, + "step": 1683500 + }, + { + "epoch": 0.85, + "learning_rate": 3.0740245074857744e-06, + "loss": 1.7129, + "step": 1684000 + }, + { + "epoch": 0.85, + "learning_rate": 3.0689989803205383e-06, + "loss": 1.7017, + "step": 1684500 + }, + { + "epoch": 0.85, + "learning_rate": 3.0639734531553026e-06, + "loss": 1.713, + "step": 1685000 + }, + { + "epoch": 0.85, + "learning_rate": 3.058947925990067e-06, + "loss": 1.6812, + "step": 1685500 + }, + { + "epoch": 0.85, + "learning_rate": 3.053922398824831e-06, + "loss": 1.704, + "step": 1686000 + }, + { + "epoch": 0.85, + "learning_rate": 3.048896871659595e-06, + "loss": 1.6964, + "step": 1686500 + }, + { + "epoch": 0.85, + "learning_rate": 3.0438713444943598e-06, + "loss": 1.6939, + "step": 1687000 + }, + { + "epoch": 0.85, + "learning_rate": 3.0388458173291236e-06, + "loss": 1.6905, + "step": 1687500 + }, + { + "epoch": 0.85, + "learning_rate": 3.0338202901638875e-06, + "loss": 1.6837, + "step": 1688000 + }, + { + "epoch": 0.85, + "learning_rate": 3.028794762998652e-06, + "loss": 1.6798, + "step": 1688500 + }, + { + "epoch": 0.85, + "learning_rate": 3.0237692358334157e-06, + "loss": 1.721, + "step": 1689000 + }, + { + "epoch": 0.85, + "learning_rate": 3.0187437086681804e-06, + "loss": 1.7029, + "step": 1689500 + }, + { + "epoch": 0.85, + "learning_rate": 3.0137181815029443e-06, + "loss": 1.6995, + "step": 1690000 + }, + { + "epoch": 0.85, + "learning_rate": 3.0086926543377086e-06, + "loss": 1.6688, + "step": 1690500 + }, + { + "epoch": 0.85, + "learning_rate": 3.003667127172473e-06, + "loss": 1.6931, + "step": 1691000 + }, + { + "epoch": 0.85, + "learning_rate": 2.998641600007237e-06, + "loss": 1.7024, + "step": 1691500 + }, + { + "epoch": 0.85, + "learning_rate": 2.993616072842001e-06, + "loss": 1.6985, + "step": 1692000 + }, + { + "epoch": 0.85, + "learning_rate": 2.9885905456767657e-06, + "loss": 1.6845, + "step": 1692500 + }, + { + "epoch": 0.85, + "learning_rate": 2.9835650185115296e-06, + "loss": 1.7033, + "step": 1693000 + }, + { + "epoch": 0.85, + "learning_rate": 2.978539491346294e-06, + "loss": 1.7036, + "step": 1693500 + }, + { + "epoch": 0.85, + "learning_rate": 2.9735139641810578e-06, + "loss": 1.6994, + "step": 1694000 + }, + { + "epoch": 0.85, + "learning_rate": 2.9684884370158225e-06, + "loss": 1.7082, + "step": 1694500 + }, + { + "epoch": 0.85, + "learning_rate": 2.9634629098505864e-06, + "loss": 1.6831, + "step": 1695000 + }, + { + "epoch": 0.85, + "learning_rate": 2.9584373826853507e-06, + "loss": 1.6906, + "step": 1695500 + }, + { + "epoch": 0.85, + "learning_rate": 2.9534118555201145e-06, + "loss": 1.6895, + "step": 1696000 + }, + { + "epoch": 0.85, + "learning_rate": 2.9483863283548784e-06, + "loss": 1.6992, + "step": 1696500 + }, + { + "epoch": 0.85, + "learning_rate": 2.943360801189643e-06, + "loss": 1.7045, + "step": 1697000 + }, + { + "epoch": 0.85, + "learning_rate": 2.938335274024407e-06, + "loss": 1.6893, + "step": 1697500 + }, + { + "epoch": 0.85, + "learning_rate": 2.9333097468591713e-06, + "loss": 1.6983, + "step": 1698000 + }, + { + "epoch": 0.85, + "learning_rate": 2.9282842196939356e-06, + "loss": 1.6947, + "step": 1698500 + }, + { + "epoch": 0.85, + "learning_rate": 2.9232586925287e-06, + "loss": 1.7043, + "step": 1699000 + }, + { + "epoch": 0.85, + "learning_rate": 2.9182331653634637e-06, + "loss": 1.6939, + "step": 1699500 + }, + { + "epoch": 0.85, + "learning_rate": 2.9132076381982285e-06, + "loss": 1.6855, + "step": 1700000 + }, + { + "epoch": 0.85, + "learning_rate": 2.9081821110329923e-06, + "loss": 1.6939, + "step": 1700500 + }, + { + "epoch": 0.85, + "learning_rate": 2.9031565838677566e-06, + "loss": 1.6925, + "step": 1701000 + }, + { + "epoch": 0.86, + "learning_rate": 2.8981310567025205e-06, + "loss": 1.7074, + "step": 1701500 + }, + { + "epoch": 0.86, + "learning_rate": 2.893105529537285e-06, + "loss": 1.6908, + "step": 1702000 + }, + { + "epoch": 0.86, + "learning_rate": 2.888080002372049e-06, + "loss": 1.6887, + "step": 1702500 + }, + { + "epoch": 0.86, + "learning_rate": 2.8830544752068134e-06, + "loss": 1.6925, + "step": 1703000 + }, + { + "epoch": 0.86, + "learning_rate": 2.8780289480415772e-06, + "loss": 1.6984, + "step": 1703500 + }, + { + "epoch": 0.86, + "learning_rate": 2.873003420876342e-06, + "loss": 1.7003, + "step": 1704000 + }, + { + "epoch": 0.86, + "learning_rate": 2.867977893711106e-06, + "loss": 1.6905, + "step": 1704500 + }, + { + "epoch": 0.86, + "learning_rate": 2.8629523665458697e-06, + "loss": 1.6924, + "step": 1705000 + }, + { + "epoch": 0.86, + "learning_rate": 2.857926839380634e-06, + "loss": 1.6913, + "step": 1705500 + }, + { + "epoch": 0.86, + "learning_rate": 2.8529013122153983e-06, + "loss": 1.6992, + "step": 1706000 + }, + { + "epoch": 0.86, + "learning_rate": 2.8478757850501626e-06, + "loss": 1.7061, + "step": 1706500 + }, + { + "epoch": 0.86, + "learning_rate": 2.8428502578849264e-06, + "loss": 1.7018, + "step": 1707000 + }, + { + "epoch": 0.86, + "learning_rate": 2.837824730719691e-06, + "loss": 1.6844, + "step": 1707500 + }, + { + "epoch": 0.86, + "learning_rate": 2.832799203554455e-06, + "loss": 1.6775, + "step": 1708000 + }, + { + "epoch": 0.86, + "learning_rate": 2.8277736763892193e-06, + "loss": 1.6889, + "step": 1708500 + }, + { + "epoch": 0.86, + "learning_rate": 2.822748149223983e-06, + "loss": 1.6777, + "step": 1709000 + }, + { + "epoch": 0.86, + "learning_rate": 2.817722622058748e-06, + "loss": 1.6762, + "step": 1709500 + }, + { + "epoch": 0.86, + "learning_rate": 2.812697094893512e-06, + "loss": 1.6965, + "step": 1710000 + }, + { + "epoch": 0.86, + "learning_rate": 2.807671567728276e-06, + "loss": 1.7157, + "step": 1710500 + }, + { + "epoch": 0.86, + "learning_rate": 2.80264604056304e-06, + "loss": 1.6945, + "step": 1711000 + }, + { + "epoch": 0.86, + "learning_rate": 2.7976205133978047e-06, + "loss": 1.6854, + "step": 1711500 + }, + { + "epoch": 0.86, + "learning_rate": 2.7925949862325685e-06, + "loss": 1.7035, + "step": 1712000 + }, + { + "epoch": 0.86, + "learning_rate": 2.787569459067333e-06, + "loss": 1.6827, + "step": 1712500 + }, + { + "epoch": 0.86, + "learning_rate": 2.782543931902097e-06, + "loss": 1.698, + "step": 1713000 + }, + { + "epoch": 0.86, + "learning_rate": 2.777518404736861e-06, + "loss": 1.6803, + "step": 1713500 + }, + { + "epoch": 0.86, + "learning_rate": 2.7724928775716253e-06, + "loss": 1.6922, + "step": 1714000 + }, + { + "epoch": 0.86, + "learning_rate": 2.767467350406389e-06, + "loss": 1.6824, + "step": 1714500 + }, + { + "epoch": 0.86, + "learning_rate": 2.762441823241154e-06, + "loss": 1.6994, + "step": 1715000 + }, + { + "epoch": 0.86, + "learning_rate": 2.7574162960759178e-06, + "loss": 1.6993, + "step": 1715500 + }, + { + "epoch": 0.86, + "learning_rate": 2.752390768910682e-06, + "loss": 1.702, + "step": 1716000 + }, + { + "epoch": 0.86, + "learning_rate": 2.747365241745446e-06, + "loss": 1.7004, + "step": 1716500 + }, + { + "epoch": 0.86, + "learning_rate": 2.7423397145802106e-06, + "loss": 1.6963, + "step": 1717000 + }, + { + "epoch": 0.86, + "learning_rate": 2.7373141874149745e-06, + "loss": 1.6955, + "step": 1717500 + }, + { + "epoch": 0.86, + "learning_rate": 2.732288660249739e-06, + "loss": 1.7005, + "step": 1718000 + }, + { + "epoch": 0.86, + "learning_rate": 2.7272631330845027e-06, + "loss": 1.6941, + "step": 1718500 + }, + { + "epoch": 0.86, + "learning_rate": 2.7222376059192674e-06, + "loss": 1.6921, + "step": 1719000 + }, + { + "epoch": 0.86, + "learning_rate": 2.7172120787540313e-06, + "loss": 1.7126, + "step": 1719500 + }, + { + "epoch": 0.86, + "learning_rate": 2.7121865515887955e-06, + "loss": 1.6935, + "step": 1720000 + }, + { + "epoch": 0.86, + "learning_rate": 2.70716102442356e-06, + "loss": 1.6889, + "step": 1720500 + }, + { + "epoch": 0.86, + "learning_rate": 2.702135497258324e-06, + "loss": 1.7065, + "step": 1721000 + }, + { + "epoch": 0.87, + "learning_rate": 2.697109970093088e-06, + "loss": 1.6976, + "step": 1721500 + }, + { + "epoch": 0.87, + "learning_rate": 2.692084442927852e-06, + "loss": 1.6987, + "step": 1722000 + }, + { + "epoch": 0.87, + "learning_rate": 2.6870589157626166e-06, + "loss": 1.6852, + "step": 1722500 + }, + { + "epoch": 0.87, + "learning_rate": 2.6820333885973805e-06, + "loss": 1.6974, + "step": 1723000 + }, + { + "epoch": 0.87, + "learning_rate": 2.6770078614321448e-06, + "loss": 1.6856, + "step": 1723500 + }, + { + "epoch": 0.87, + "learning_rate": 2.6719823342669086e-06, + "loss": 1.6875, + "step": 1724000 + }, + { + "epoch": 0.87, + "learning_rate": 2.6669568071016733e-06, + "loss": 1.697, + "step": 1724500 + }, + { + "epoch": 0.87, + "learning_rate": 2.6619312799364372e-06, + "loss": 1.7025, + "step": 1725000 + }, + { + "epoch": 0.87, + "learning_rate": 2.6569057527712015e-06, + "loss": 1.6929, + "step": 1725500 + }, + { + "epoch": 0.87, + "learning_rate": 2.6518802256059654e-06, + "loss": 1.6901, + "step": 1726000 + }, + { + "epoch": 0.87, + "learning_rate": 2.64685469844073e-06, + "loss": 1.6866, + "step": 1726500 + }, + { + "epoch": 0.87, + "learning_rate": 2.641829171275494e-06, + "loss": 1.7017, + "step": 1727000 + }, + { + "epoch": 0.87, + "learning_rate": 2.6368036441102583e-06, + "loss": 1.7006, + "step": 1727500 + }, + { + "epoch": 0.87, + "learning_rate": 2.6317781169450226e-06, + "loss": 1.6837, + "step": 1728000 + }, + { + "epoch": 0.87, + "learning_rate": 2.626752589779787e-06, + "loss": 1.7012, + "step": 1728500 + }, + { + "epoch": 0.87, + "learning_rate": 2.6217270626145507e-06, + "loss": 1.6834, + "step": 1729000 + }, + { + "epoch": 0.87, + "learning_rate": 2.6167015354493154e-06, + "loss": 1.7085, + "step": 1729500 + }, + { + "epoch": 0.87, + "learning_rate": 2.6116760082840793e-06, + "loss": 1.6857, + "step": 1730000 + }, + { + "epoch": 0.87, + "learning_rate": 2.606650481118843e-06, + "loss": 1.7078, + "step": 1730500 + }, + { + "epoch": 0.87, + "learning_rate": 2.6016249539536075e-06, + "loss": 1.7059, + "step": 1731000 + }, + { + "epoch": 0.87, + "learning_rate": 2.5965994267883713e-06, + "loss": 1.6869, + "step": 1731500 + }, + { + "epoch": 0.87, + "learning_rate": 2.591573899623136e-06, + "loss": 1.6907, + "step": 1732000 + }, + { + "epoch": 0.87, + "learning_rate": 2.5865483724579e-06, + "loss": 1.691, + "step": 1732500 + }, + { + "epoch": 0.87, + "learning_rate": 2.5815228452926642e-06, + "loss": 1.684, + "step": 1733000 + }, + { + "epoch": 0.87, + "learning_rate": 2.5764973181274285e-06, + "loss": 1.6954, + "step": 1733500 + }, + { + "epoch": 0.87, + "learning_rate": 2.571471790962193e-06, + "loss": 1.6979, + "step": 1734000 + }, + { + "epoch": 0.87, + "learning_rate": 2.5664462637969567e-06, + "loss": 1.691, + "step": 1734500 + }, + { + "epoch": 0.87, + "learning_rate": 2.5614207366317214e-06, + "loss": 1.6957, + "step": 1735000 + }, + { + "epoch": 0.87, + "learning_rate": 2.5563952094664853e-06, + "loss": 1.7016, + "step": 1735500 + }, + { + "epoch": 0.87, + "learning_rate": 2.5513696823012496e-06, + "loss": 1.6768, + "step": 1736000 + }, + { + "epoch": 0.87, + "learning_rate": 2.5463441551360134e-06, + "loss": 1.6838, + "step": 1736500 + }, + { + "epoch": 0.87, + "learning_rate": 2.541318627970778e-06, + "loss": 1.691, + "step": 1737000 + }, + { + "epoch": 0.87, + "learning_rate": 2.536293100805542e-06, + "loss": 1.6784, + "step": 1737500 + }, + { + "epoch": 0.87, + "learning_rate": 2.5312675736403063e-06, + "loss": 1.7008, + "step": 1738000 + }, + { + "epoch": 0.87, + "learning_rate": 2.52624204647507e-06, + "loss": 1.6978, + "step": 1738500 + }, + { + "epoch": 0.87, + "learning_rate": 2.521216519309834e-06, + "loss": 1.6971, + "step": 1739000 + }, + { + "epoch": 0.87, + "learning_rate": 2.5161909921445988e-06, + "loss": 1.6913, + "step": 1739500 + }, + { + "epoch": 0.87, + "learning_rate": 2.5111654649793626e-06, + "loss": 1.6978, + "step": 1740000 + }, + { + "epoch": 0.87, + "learning_rate": 2.506139937814127e-06, + "loss": 1.6702, + "step": 1740500 + }, + { + "epoch": 0.87, + "learning_rate": 2.5011144106488912e-06, + "loss": 1.6933, + "step": 1741000 + }, + { + "epoch": 0.88, + "learning_rate": 2.4960888834836555e-06, + "loss": 1.688, + "step": 1741500 + }, + { + "epoch": 0.88, + "learning_rate": 2.49106335631842e-06, + "loss": 1.7022, + "step": 1742000 + }, + { + "epoch": 0.88, + "learning_rate": 2.486037829153184e-06, + "loss": 1.6944, + "step": 1742500 + }, + { + "epoch": 0.88, + "learning_rate": 2.481012301987948e-06, + "loss": 1.6806, + "step": 1743000 + }, + { + "epoch": 0.88, + "learning_rate": 2.475986774822712e-06, + "loss": 1.6815, + "step": 1743500 + }, + { + "epoch": 0.88, + "learning_rate": 2.470961247657476e-06, + "loss": 1.702, + "step": 1744000 + }, + { + "epoch": 0.88, + "learning_rate": 2.4659357204922404e-06, + "loss": 1.7213, + "step": 1744500 + }, + { + "epoch": 0.88, + "learning_rate": 2.4609101933270047e-06, + "loss": 1.684, + "step": 1745000 + }, + { + "epoch": 0.88, + "learning_rate": 2.455884666161769e-06, + "loss": 1.6988, + "step": 1745500 + }, + { + "epoch": 0.88, + "learning_rate": 2.450859138996533e-06, + "loss": 1.6831, + "step": 1746000 + }, + { + "epoch": 0.88, + "learning_rate": 2.445833611831297e-06, + "loss": 1.6758, + "step": 1746500 + }, + { + "epoch": 0.88, + "learning_rate": 2.4408080846660615e-06, + "loss": 1.6861, + "step": 1747000 + }, + { + "epoch": 0.88, + "learning_rate": 2.4357825575008258e-06, + "loss": 1.7089, + "step": 1747500 + }, + { + "epoch": 0.88, + "learning_rate": 2.4307570303355897e-06, + "loss": 1.6999, + "step": 1748000 + }, + { + "epoch": 0.88, + "learning_rate": 2.425731503170354e-06, + "loss": 1.6869, + "step": 1748500 + }, + { + "epoch": 0.88, + "learning_rate": 2.4207059760051182e-06, + "loss": 1.6955, + "step": 1749000 + }, + { + "epoch": 0.88, + "learning_rate": 2.4156804488398825e-06, + "loss": 1.6756, + "step": 1749500 + }, + { + "epoch": 0.88, + "learning_rate": 2.410654921674647e-06, + "loss": 1.6813, + "step": 1750000 + }, + { + "epoch": 0.88, + "learning_rate": 2.4056293945094107e-06, + "loss": 1.7004, + "step": 1750500 + }, + { + "epoch": 0.88, + "learning_rate": 2.400603867344175e-06, + "loss": 1.6848, + "step": 1751000 + }, + { + "epoch": 0.88, + "learning_rate": 2.3955783401789393e-06, + "loss": 1.6816, + "step": 1751500 + }, + { + "epoch": 0.88, + "learning_rate": 2.390552813013703e-06, + "loss": 1.6913, + "step": 1752000 + }, + { + "epoch": 0.88, + "learning_rate": 2.3855272858484674e-06, + "loss": 1.6898, + "step": 1752500 + }, + { + "epoch": 0.88, + "learning_rate": 2.3805017586832317e-06, + "loss": 1.7117, + "step": 1753000 + }, + { + "epoch": 0.88, + "learning_rate": 2.3754762315179956e-06, + "loss": 1.6923, + "step": 1753500 + }, + { + "epoch": 0.88, + "learning_rate": 2.37045070435276e-06, + "loss": 1.6902, + "step": 1754000 + }, + { + "epoch": 0.88, + "learning_rate": 2.365425177187524e-06, + "loss": 1.7003, + "step": 1754500 + }, + { + "epoch": 0.88, + "learning_rate": 2.3603996500222885e-06, + "loss": 1.6758, + "step": 1755000 + }, + { + "epoch": 0.88, + "learning_rate": 2.355374122857053e-06, + "loss": 1.6922, + "step": 1755500 + }, + { + "epoch": 0.88, + "learning_rate": 2.3503485956918167e-06, + "loss": 1.6984, + "step": 1756000 + }, + { + "epoch": 0.88, + "learning_rate": 2.345323068526581e-06, + "loss": 1.682, + "step": 1756500 + }, + { + "epoch": 0.88, + "learning_rate": 2.3402975413613452e-06, + "loss": 1.6936, + "step": 1757000 + }, + { + "epoch": 0.88, + "learning_rate": 2.3352720141961095e-06, + "loss": 1.6835, + "step": 1757500 + }, + { + "epoch": 0.88, + "learning_rate": 2.3302464870308734e-06, + "loss": 1.6917, + "step": 1758000 + }, + { + "epoch": 0.88, + "learning_rate": 2.3252209598656377e-06, + "loss": 1.6992, + "step": 1758500 + }, + { + "epoch": 0.88, + "learning_rate": 2.320195432700402e-06, + "loss": 1.6996, + "step": 1759000 + }, + { + "epoch": 0.88, + "learning_rate": 2.3151699055351663e-06, + "loss": 1.6723, + "step": 1759500 + }, + { + "epoch": 0.88, + "learning_rate": 2.3101443783699306e-06, + "loss": 1.6654, + "step": 1760000 + }, + { + "epoch": 0.88, + "learning_rate": 2.3051188512046945e-06, + "loss": 1.6979, + "step": 1760500 + }, + { + "epoch": 0.88, + "learning_rate": 2.3000933240394583e-06, + "loss": 1.6823, + "step": 1761000 + }, + { + "epoch": 0.89, + "learning_rate": 2.2950677968742226e-06, + "loss": 1.6949, + "step": 1761500 + }, + { + "epoch": 0.89, + "learning_rate": 2.290042269708987e-06, + "loss": 1.6976, + "step": 1762000 + }, + { + "epoch": 0.89, + "learning_rate": 2.285016742543751e-06, + "loss": 1.7006, + "step": 1762500 + }, + { + "epoch": 0.89, + "learning_rate": 2.2799912153785155e-06, + "loss": 1.6917, + "step": 1763000 + }, + { + "epoch": 0.89, + "learning_rate": 2.2749656882132794e-06, + "loss": 1.7008, + "step": 1763500 + }, + { + "epoch": 0.89, + "learning_rate": 2.2699401610480437e-06, + "loss": 1.6882, + "step": 1764000 + }, + { + "epoch": 0.89, + "learning_rate": 2.264914633882808e-06, + "loss": 1.6823, + "step": 1764500 + }, + { + "epoch": 0.89, + "learning_rate": 2.2598891067175723e-06, + "loss": 1.6794, + "step": 1765000 + }, + { + "epoch": 0.89, + "learning_rate": 2.254863579552336e-06, + "loss": 1.6912, + "step": 1765500 + }, + { + "epoch": 0.89, + "learning_rate": 2.2498380523871004e-06, + "loss": 1.6722, + "step": 1766000 + }, + { + "epoch": 0.89, + "learning_rate": 2.2448125252218647e-06, + "loss": 1.6825, + "step": 1766500 + }, + { + "epoch": 0.89, + "learning_rate": 2.239786998056629e-06, + "loss": 1.6776, + "step": 1767000 + }, + { + "epoch": 0.89, + "learning_rate": 2.2347614708913933e-06, + "loss": 1.6861, + "step": 1767500 + }, + { + "epoch": 0.89, + "learning_rate": 2.229735943726157e-06, + "loss": 1.6907, + "step": 1768000 + }, + { + "epoch": 0.89, + "learning_rate": 2.2247104165609215e-06, + "loss": 1.6901, + "step": 1768500 + }, + { + "epoch": 0.89, + "learning_rate": 2.2196848893956853e-06, + "loss": 1.6749, + "step": 1769000 + }, + { + "epoch": 0.89, + "learning_rate": 2.2146593622304496e-06, + "loss": 1.6687, + "step": 1769500 + }, + { + "epoch": 0.89, + "learning_rate": 2.209633835065214e-06, + "loss": 1.7042, + "step": 1770000 + }, + { + "epoch": 0.89, + "learning_rate": 2.2046083078999782e-06, + "loss": 1.6827, + "step": 1770500 + }, + { + "epoch": 0.89, + "learning_rate": 2.199582780734742e-06, + "loss": 1.69, + "step": 1771000 + }, + { + "epoch": 0.89, + "learning_rate": 2.1945572535695064e-06, + "loss": 1.6927, + "step": 1771500 + }, + { + "epoch": 0.89, + "learning_rate": 2.1895317264042707e-06, + "loss": 1.6954, + "step": 1772000 + }, + { + "epoch": 0.89, + "learning_rate": 2.184506199239035e-06, + "loss": 1.7015, + "step": 1772500 + }, + { + "epoch": 0.89, + "learning_rate": 2.179480672073799e-06, + "loss": 1.6896, + "step": 1773000 + }, + { + "epoch": 0.89, + "learning_rate": 2.174455144908563e-06, + "loss": 1.7152, + "step": 1773500 + }, + { + "epoch": 0.89, + "learning_rate": 2.1694296177433274e-06, + "loss": 1.6839, + "step": 1774000 + }, + { + "epoch": 0.89, + "learning_rate": 2.1644040905780917e-06, + "loss": 1.7138, + "step": 1774500 + }, + { + "epoch": 0.89, + "learning_rate": 2.159378563412856e-06, + "loss": 1.6722, + "step": 1775000 + }, + { + "epoch": 0.89, + "learning_rate": 2.15435303624762e-06, + "loss": 1.6893, + "step": 1775500 + }, + { + "epoch": 0.89, + "learning_rate": 2.149327509082384e-06, + "loss": 1.6851, + "step": 1776000 + }, + { + "epoch": 0.89, + "learning_rate": 2.1443019819171485e-06, + "loss": 1.6899, + "step": 1776500 + }, + { + "epoch": 0.89, + "learning_rate": 2.1392764547519128e-06, + "loss": 1.677, + "step": 1777000 + }, + { + "epoch": 0.89, + "learning_rate": 2.1342509275866766e-06, + "loss": 1.6847, + "step": 1777500 + }, + { + "epoch": 0.89, + "learning_rate": 2.129225400421441e-06, + "loss": 1.6982, + "step": 1778000 + }, + { + "epoch": 0.89, + "learning_rate": 2.124199873256205e-06, + "loss": 1.6892, + "step": 1778500 + }, + { + "epoch": 0.89, + "learning_rate": 2.119174346090969e-06, + "loss": 1.6983, + "step": 1779000 + }, + { + "epoch": 0.89, + "learning_rate": 2.1141488189257334e-06, + "loss": 1.6918, + "step": 1779500 + }, + { + "epoch": 0.89, + "learning_rate": 2.1091232917604977e-06, + "loss": 1.6741, + "step": 1780000 + }, + { + "epoch": 0.89, + "learning_rate": 2.104097764595262e-06, + "loss": 1.702, + "step": 1780500 + }, + { + "epoch": 0.9, + "learning_rate": 2.099072237430026e-06, + "loss": 1.6643, + "step": 1781000 + }, + { + "epoch": 0.9, + "learning_rate": 2.09404671026479e-06, + "loss": 1.6904, + "step": 1781500 + }, + { + "epoch": 0.9, + "learning_rate": 2.0890211830995544e-06, + "loss": 1.6938, + "step": 1782000 + }, + { + "epoch": 0.9, + "learning_rate": 2.0839956559343187e-06, + "loss": 1.6919, + "step": 1782500 + }, + { + "epoch": 0.9, + "learning_rate": 2.0789701287690826e-06, + "loss": 1.6838, + "step": 1783000 + }, + { + "epoch": 0.9, + "learning_rate": 2.073944601603847e-06, + "loss": 1.6941, + "step": 1783500 + }, + { + "epoch": 0.9, + "learning_rate": 2.068919074438611e-06, + "loss": 1.7116, + "step": 1784000 + }, + { + "epoch": 0.9, + "learning_rate": 2.0638935472733755e-06, + "loss": 1.6957, + "step": 1784500 + }, + { + "epoch": 0.9, + "learning_rate": 2.0588680201081398e-06, + "loss": 1.6829, + "step": 1785000 + }, + { + "epoch": 0.9, + "learning_rate": 2.0538424929429036e-06, + "loss": 1.6972, + "step": 1785500 + }, + { + "epoch": 0.9, + "learning_rate": 2.0488169657776675e-06, + "loss": 1.6718, + "step": 1786000 + }, + { + "epoch": 0.9, + "learning_rate": 2.043791438612432e-06, + "loss": 1.6841, + "step": 1786500 + }, + { + "epoch": 0.9, + "learning_rate": 2.038765911447196e-06, + "loss": 1.7022, + "step": 1787000 + }, + { + "epoch": 0.9, + "learning_rate": 2.0337403842819604e-06, + "loss": 1.6901, + "step": 1787500 + }, + { + "epoch": 0.9, + "learning_rate": 2.0287148571167247e-06, + "loss": 1.6866, + "step": 1788000 + }, + { + "epoch": 0.9, + "learning_rate": 2.0236893299514886e-06, + "loss": 1.6893, + "step": 1788500 + }, + { + "epoch": 0.9, + "learning_rate": 2.018663802786253e-06, + "loss": 1.7018, + "step": 1789000 + }, + { + "epoch": 0.9, + "learning_rate": 2.013638275621017e-06, + "loss": 1.6704, + "step": 1789500 + }, + { + "epoch": 0.9, + "learning_rate": 2.0086127484557814e-06, + "loss": 1.6841, + "step": 1790000 + }, + { + "epoch": 0.9, + "learning_rate": 2.0035872212905453e-06, + "loss": 1.6881, + "step": 1790500 + }, + { + "epoch": 0.9, + "learning_rate": 1.9985616941253096e-06, + "loss": 1.674, + "step": 1791000 + }, + { + "epoch": 0.9, + "learning_rate": 1.993536166960074e-06, + "loss": 1.6806, + "step": 1791500 + }, + { + "epoch": 0.9, + "learning_rate": 1.988510639794838e-06, + "loss": 1.6919, + "step": 1792000 + }, + { + "epoch": 0.9, + "learning_rate": 1.9834851126296025e-06, + "loss": 1.6835, + "step": 1792500 + }, + { + "epoch": 0.9, + "learning_rate": 1.9784595854643664e-06, + "loss": 1.6882, + "step": 1793000 + }, + { + "epoch": 0.9, + "learning_rate": 1.9734340582991307e-06, + "loss": 1.6753, + "step": 1793500 + }, + { + "epoch": 0.9, + "learning_rate": 1.968408531133895e-06, + "loss": 1.6863, + "step": 1794000 + }, + { + "epoch": 0.9, + "learning_rate": 1.963383003968659e-06, + "loss": 1.6836, + "step": 1794500 + }, + { + "epoch": 0.9, + "learning_rate": 1.958357476803423e-06, + "loss": 1.6761, + "step": 1795000 + }, + { + "epoch": 0.9, + "learning_rate": 1.9533319496381874e-06, + "loss": 1.6942, + "step": 1795500 + }, + { + "epoch": 0.9, + "learning_rate": 1.9483064224729513e-06, + "loss": 1.6828, + "step": 1796000 + }, + { + "epoch": 0.9, + "learning_rate": 1.9432808953077156e-06, + "loss": 1.693, + "step": 1796500 + }, + { + "epoch": 0.9, + "learning_rate": 1.93825536814248e-06, + "loss": 1.6828, + "step": 1797000 + }, + { + "epoch": 0.9, + "learning_rate": 1.933229840977244e-06, + "loss": 1.6773, + "step": 1797500 + }, + { + "epoch": 0.9, + "learning_rate": 1.928204313812008e-06, + "loss": 1.678, + "step": 1798000 + }, + { + "epoch": 0.9, + "learning_rate": 1.9231787866467723e-06, + "loss": 1.6748, + "step": 1798500 + }, + { + "epoch": 0.9, + "learning_rate": 1.9181532594815366e-06, + "loss": 1.6938, + "step": 1799000 + }, + { + "epoch": 0.9, + "learning_rate": 1.913127732316301e-06, + "loss": 1.7007, + "step": 1799500 + }, + { + "epoch": 0.9, + "learning_rate": 1.908102205151065e-06, + "loss": 1.6881, + "step": 1800000 + }, + { + "epoch": 0.9, + "learning_rate": 1.9030766779858293e-06, + "loss": 1.6871, + "step": 1800500 + }, + { + "epoch": 0.91, + "learning_rate": 1.8980511508205934e-06, + "loss": 1.6845, + "step": 1801000 + }, + { + "epoch": 0.91, + "learning_rate": 1.8930256236553577e-06, + "loss": 1.6902, + "step": 1801500 + }, + { + "epoch": 0.91, + "learning_rate": 1.8880000964901217e-06, + "loss": 1.6838, + "step": 1802000 + }, + { + "epoch": 0.91, + "learning_rate": 1.882974569324886e-06, + "loss": 1.6798, + "step": 1802500 + }, + { + "epoch": 0.91, + "learning_rate": 1.87794904215965e-06, + "loss": 1.6858, + "step": 1803000 + }, + { + "epoch": 0.91, + "learning_rate": 1.8729235149944142e-06, + "loss": 1.7039, + "step": 1803500 + }, + { + "epoch": 0.91, + "learning_rate": 1.8678979878291783e-06, + "loss": 1.6786, + "step": 1804000 + }, + { + "epoch": 0.91, + "learning_rate": 1.8628724606639426e-06, + "loss": 1.6898, + "step": 1804500 + }, + { + "epoch": 0.91, + "learning_rate": 1.8578469334987067e-06, + "loss": 1.6891, + "step": 1805000 + }, + { + "epoch": 0.91, + "learning_rate": 1.852821406333471e-06, + "loss": 1.6831, + "step": 1805500 + }, + { + "epoch": 0.91, + "learning_rate": 1.8477958791682352e-06, + "loss": 1.6928, + "step": 1806000 + }, + { + "epoch": 0.91, + "learning_rate": 1.8427703520029993e-06, + "loss": 1.6945, + "step": 1806500 + }, + { + "epoch": 0.91, + "learning_rate": 1.8377448248377636e-06, + "loss": 1.6792, + "step": 1807000 + }, + { + "epoch": 0.91, + "learning_rate": 1.8327192976725277e-06, + "loss": 1.6849, + "step": 1807500 + }, + { + "epoch": 0.91, + "learning_rate": 1.827693770507292e-06, + "loss": 1.6882, + "step": 1808000 + }, + { + "epoch": 0.91, + "learning_rate": 1.822668243342056e-06, + "loss": 1.7077, + "step": 1808500 + }, + { + "epoch": 0.91, + "learning_rate": 1.8176427161768204e-06, + "loss": 1.7045, + "step": 1809000 + }, + { + "epoch": 0.91, + "learning_rate": 1.8126171890115847e-06, + "loss": 1.6976, + "step": 1809500 + }, + { + "epoch": 0.91, + "learning_rate": 1.8075916618463487e-06, + "loss": 1.6906, + "step": 1810000 + }, + { + "epoch": 0.91, + "learning_rate": 1.802566134681113e-06, + "loss": 1.683, + "step": 1810500 + }, + { + "epoch": 0.91, + "learning_rate": 1.7975406075158771e-06, + "loss": 1.6991, + "step": 1811000 + }, + { + "epoch": 0.91, + "learning_rate": 1.792515080350641e-06, + "loss": 1.6774, + "step": 1811500 + }, + { + "epoch": 0.91, + "learning_rate": 1.7874895531854053e-06, + "loss": 1.6821, + "step": 1812000 + }, + { + "epoch": 0.91, + "learning_rate": 1.7824640260201696e-06, + "loss": 1.6897, + "step": 1812500 + }, + { + "epoch": 0.91, + "learning_rate": 1.7774384988549337e-06, + "loss": 1.6987, + "step": 1813000 + }, + { + "epoch": 0.91, + "learning_rate": 1.772412971689698e-06, + "loss": 1.6837, + "step": 1813500 + }, + { + "epoch": 0.91, + "learning_rate": 1.767387444524462e-06, + "loss": 1.6832, + "step": 1814000 + }, + { + "epoch": 0.91, + "learning_rate": 1.7623619173592263e-06, + "loss": 1.6862, + "step": 1814500 + }, + { + "epoch": 0.91, + "learning_rate": 1.7573363901939904e-06, + "loss": 1.6904, + "step": 1815000 + }, + { + "epoch": 0.91, + "learning_rate": 1.7523108630287547e-06, + "loss": 1.6895, + "step": 1815500 + }, + { + "epoch": 0.91, + "learning_rate": 1.7472853358635188e-06, + "loss": 1.6967, + "step": 1816000 + }, + { + "epoch": 0.91, + "learning_rate": 1.742259808698283e-06, + "loss": 1.6736, + "step": 1816500 + }, + { + "epoch": 0.91, + "learning_rate": 1.7372342815330474e-06, + "loss": 1.6951, + "step": 1817000 + }, + { + "epoch": 0.91, + "learning_rate": 1.7322087543678115e-06, + "loss": 1.6789, + "step": 1817500 + }, + { + "epoch": 0.91, + "learning_rate": 1.7271832272025758e-06, + "loss": 1.7094, + "step": 1818000 + }, + { + "epoch": 0.91, + "learning_rate": 1.7221577000373398e-06, + "loss": 1.6881, + "step": 1818500 + }, + { + "epoch": 0.91, + "learning_rate": 1.7171321728721041e-06, + "loss": 1.6914, + "step": 1819000 + }, + { + "epoch": 0.91, + "learning_rate": 1.7121066457068682e-06, + "loss": 1.6806, + "step": 1819500 + }, + { + "epoch": 0.91, + "learning_rate": 1.7070811185416325e-06, + "loss": 1.6795, + "step": 1820000 + }, + { + "epoch": 0.91, + "learning_rate": 1.7020555913763964e-06, + "loss": 1.6556, + "step": 1820500 + }, + { + "epoch": 0.92, + "learning_rate": 1.6970300642111607e-06, + "loss": 1.6841, + "step": 1821000 + }, + { + "epoch": 0.92, + "learning_rate": 1.6920045370459248e-06, + "loss": 1.6794, + "step": 1821500 + }, + { + "epoch": 0.92, + "learning_rate": 1.686979009880689e-06, + "loss": 1.6862, + "step": 1822000 + }, + { + "epoch": 0.92, + "learning_rate": 1.6819534827154531e-06, + "loss": 1.7003, + "step": 1822500 + }, + { + "epoch": 0.92, + "learning_rate": 1.6769279555502174e-06, + "loss": 1.6843, + "step": 1823000 + }, + { + "epoch": 0.92, + "learning_rate": 1.6719024283849817e-06, + "loss": 1.6841, + "step": 1823500 + }, + { + "epoch": 0.92, + "learning_rate": 1.6668769012197458e-06, + "loss": 1.692, + "step": 1824000 + }, + { + "epoch": 0.92, + "learning_rate": 1.66185137405451e-06, + "loss": 1.6956, + "step": 1824500 + }, + { + "epoch": 0.92, + "learning_rate": 1.6568258468892742e-06, + "loss": 1.6964, + "step": 1825000 + }, + { + "epoch": 0.92, + "learning_rate": 1.6518003197240385e-06, + "loss": 1.6924, + "step": 1825500 + }, + { + "epoch": 0.92, + "learning_rate": 1.6467747925588026e-06, + "loss": 1.6795, + "step": 1826000 + }, + { + "epoch": 0.92, + "learning_rate": 1.6417492653935668e-06, + "loss": 1.6793, + "step": 1826500 + }, + { + "epoch": 0.92, + "learning_rate": 1.636723738228331e-06, + "loss": 1.6779, + "step": 1827000 + }, + { + "epoch": 0.92, + "learning_rate": 1.6316982110630952e-06, + "loss": 1.7041, + "step": 1827500 + }, + { + "epoch": 0.92, + "learning_rate": 1.6266726838978595e-06, + "loss": 1.7017, + "step": 1828000 + }, + { + "epoch": 0.92, + "learning_rate": 1.6216471567326236e-06, + "loss": 1.686, + "step": 1828500 + }, + { + "epoch": 0.92, + "learning_rate": 1.6166216295673875e-06, + "loss": 1.6748, + "step": 1829000 + }, + { + "epoch": 0.92, + "learning_rate": 1.6115961024021518e-06, + "loss": 1.6849, + "step": 1829500 + }, + { + "epoch": 0.92, + "learning_rate": 1.606570575236916e-06, + "loss": 1.6867, + "step": 1830000 + }, + { + "epoch": 0.92, + "learning_rate": 1.6015450480716801e-06, + "loss": 1.6892, + "step": 1830500 + }, + { + "epoch": 0.92, + "learning_rate": 1.5965195209064444e-06, + "loss": 1.7048, + "step": 1831000 + }, + { + "epoch": 0.92, + "learning_rate": 1.5914939937412085e-06, + "loss": 1.6881, + "step": 1831500 + }, + { + "epoch": 0.92, + "learning_rate": 1.5864684665759728e-06, + "loss": 1.6941, + "step": 1832000 + }, + { + "epoch": 0.92, + "learning_rate": 1.5814429394107369e-06, + "loss": 1.6866, + "step": 1832500 + }, + { + "epoch": 0.92, + "learning_rate": 1.5764174122455012e-06, + "loss": 1.6849, + "step": 1833000 + }, + { + "epoch": 0.92, + "learning_rate": 1.5713918850802653e-06, + "loss": 1.6701, + "step": 1833500 + }, + { + "epoch": 0.92, + "learning_rate": 1.5663663579150296e-06, + "loss": 1.6854, + "step": 1834000 + }, + { + "epoch": 0.92, + "learning_rate": 1.5613408307497939e-06, + "loss": 1.662, + "step": 1834500 + }, + { + "epoch": 0.92, + "learning_rate": 1.556315303584558e-06, + "loss": 1.7037, + "step": 1835000 + }, + { + "epoch": 0.92, + "learning_rate": 1.5512897764193222e-06, + "loss": 1.6926, + "step": 1835500 + }, + { + "epoch": 0.92, + "learning_rate": 1.5462642492540863e-06, + "loss": 1.6875, + "step": 1836000 + }, + { + "epoch": 0.92, + "learning_rate": 1.5412387220888506e-06, + "loss": 1.6869, + "step": 1836500 + }, + { + "epoch": 0.92, + "learning_rate": 1.5362131949236147e-06, + "loss": 1.6926, + "step": 1837000 + }, + { + "epoch": 0.92, + "learning_rate": 1.5311876677583788e-06, + "loss": 1.6838, + "step": 1837500 + }, + { + "epoch": 0.92, + "learning_rate": 1.5261621405931429e-06, + "loss": 1.7062, + "step": 1838000 + }, + { + "epoch": 0.92, + "learning_rate": 1.5211366134279071e-06, + "loss": 1.6747, + "step": 1838500 + }, + { + "epoch": 0.92, + "learning_rate": 1.5161110862626712e-06, + "loss": 1.6798, + "step": 1839000 + }, + { + "epoch": 0.92, + "learning_rate": 1.5110855590974355e-06, + "loss": 1.6835, + "step": 1839500 + }, + { + "epoch": 0.92, + "learning_rate": 1.5060600319321996e-06, + "loss": 1.6894, + "step": 1840000 + }, + { + "epoch": 0.92, + "learning_rate": 1.501034504766964e-06, + "loss": 1.6816, + "step": 1840500 + }, + { + "epoch": 0.93, + "learning_rate": 1.4960089776017282e-06, + "loss": 1.6867, + "step": 1841000 + }, + { + "epoch": 0.93, + "learning_rate": 1.4909834504364923e-06, + "loss": 1.6873, + "step": 1841500 + }, + { + "epoch": 0.93, + "learning_rate": 1.4859579232712566e-06, + "loss": 1.6908, + "step": 1842000 + }, + { + "epoch": 0.93, + "learning_rate": 1.4809323961060206e-06, + "loss": 1.6718, + "step": 1842500 + }, + { + "epoch": 0.93, + "learning_rate": 1.475906868940785e-06, + "loss": 1.6923, + "step": 1843000 + }, + { + "epoch": 0.93, + "learning_rate": 1.470881341775549e-06, + "loss": 1.6828, + "step": 1843500 + }, + { + "epoch": 0.93, + "learning_rate": 1.4658558146103133e-06, + "loss": 1.6907, + "step": 1844000 + }, + { + "epoch": 0.93, + "learning_rate": 1.4608302874450774e-06, + "loss": 1.6965, + "step": 1844500 + }, + { + "epoch": 0.93, + "learning_rate": 1.4558047602798417e-06, + "loss": 1.6916, + "step": 1845000 + }, + { + "epoch": 0.93, + "learning_rate": 1.450779233114606e-06, + "loss": 1.6967, + "step": 1845500 + }, + { + "epoch": 0.93, + "learning_rate": 1.4457537059493699e-06, + "loss": 1.7036, + "step": 1846000 + }, + { + "epoch": 0.93, + "learning_rate": 1.440728178784134e-06, + "loss": 1.692, + "step": 1846500 + }, + { + "epoch": 0.93, + "learning_rate": 1.4357026516188982e-06, + "loss": 1.6829, + "step": 1847000 + }, + { + "epoch": 0.93, + "learning_rate": 1.4306771244536623e-06, + "loss": 1.7074, + "step": 1847500 + }, + { + "epoch": 0.93, + "learning_rate": 1.4256515972884266e-06, + "loss": 1.6774, + "step": 1848000 + }, + { + "epoch": 0.93, + "learning_rate": 1.420626070123191e-06, + "loss": 1.691, + "step": 1848500 + }, + { + "epoch": 0.93, + "learning_rate": 1.415600542957955e-06, + "loss": 1.6857, + "step": 1849000 + }, + { + "epoch": 0.93, + "learning_rate": 1.4105750157927193e-06, + "loss": 1.6802, + "step": 1849500 + }, + { + "epoch": 0.93, + "learning_rate": 1.4055494886274834e-06, + "loss": 1.7081, + "step": 1850000 + }, + { + "epoch": 0.93, + "learning_rate": 1.4005239614622477e-06, + "loss": 1.6747, + "step": 1850500 + }, + { + "epoch": 0.93, + "learning_rate": 1.3954984342970117e-06, + "loss": 1.6819, + "step": 1851000 + }, + { + "epoch": 0.93, + "learning_rate": 1.390472907131776e-06, + "loss": 1.6805, + "step": 1851500 + }, + { + "epoch": 0.93, + "learning_rate": 1.3854473799665401e-06, + "loss": 1.6893, + "step": 1852000 + }, + { + "epoch": 0.93, + "learning_rate": 1.3804218528013044e-06, + "loss": 1.6725, + "step": 1852500 + }, + { + "epoch": 0.93, + "learning_rate": 1.3753963256360687e-06, + "loss": 1.6851, + "step": 1853000 + }, + { + "epoch": 0.93, + "learning_rate": 1.3703707984708328e-06, + "loss": 1.6977, + "step": 1853500 + }, + { + "epoch": 0.93, + "learning_rate": 1.365345271305597e-06, + "loss": 1.6839, + "step": 1854000 + }, + { + "epoch": 0.93, + "learning_rate": 1.360319744140361e-06, + "loss": 1.7006, + "step": 1854500 + }, + { + "epoch": 0.93, + "learning_rate": 1.3552942169751252e-06, + "loss": 1.6682, + "step": 1855000 + }, + { + "epoch": 0.93, + "learning_rate": 1.3502686898098893e-06, + "loss": 1.6917, + "step": 1855500 + }, + { + "epoch": 0.93, + "learning_rate": 1.3452431626446536e-06, + "loss": 1.6845, + "step": 1856000 + }, + { + "epoch": 0.93, + "learning_rate": 1.3402176354794177e-06, + "loss": 1.7012, + "step": 1856500 + }, + { + "epoch": 0.93, + "learning_rate": 1.335192108314182e-06, + "loss": 1.6928, + "step": 1857000 + }, + { + "epoch": 0.93, + "learning_rate": 1.330166581148946e-06, + "loss": 1.68, + "step": 1857500 + }, + { + "epoch": 0.93, + "learning_rate": 1.3251410539837104e-06, + "loss": 1.6909, + "step": 1858000 + }, + { + "epoch": 0.93, + "learning_rate": 1.3201155268184745e-06, + "loss": 1.7077, + "step": 1858500 + }, + { + "epoch": 0.93, + "learning_rate": 1.3150899996532387e-06, + "loss": 1.6902, + "step": 1859000 + }, + { + "epoch": 0.93, + "learning_rate": 1.310064472488003e-06, + "loss": 1.6896, + "step": 1859500 + }, + { + "epoch": 0.93, + "learning_rate": 1.3050389453227671e-06, + "loss": 1.6861, + "step": 1860000 + }, + { + "epoch": 0.93, + "learning_rate": 1.3000134181575314e-06, + "loss": 1.6923, + "step": 1860500 + }, + { + "epoch": 0.94, + "learning_rate": 1.2949878909922955e-06, + "loss": 1.6869, + "step": 1861000 + }, + { + "epoch": 0.94, + "learning_rate": 1.2899623638270598e-06, + "loss": 1.6834, + "step": 1861500 + }, + { + "epoch": 0.94, + "learning_rate": 1.2849368366618239e-06, + "loss": 1.6835, + "step": 1862000 + }, + { + "epoch": 0.94, + "learning_rate": 1.2799113094965882e-06, + "loss": 1.6762, + "step": 1862500 + }, + { + "epoch": 0.94, + "learning_rate": 1.274885782331352e-06, + "loss": 1.6905, + "step": 1863000 + }, + { + "epoch": 0.94, + "learning_rate": 1.2698602551661163e-06, + "loss": 1.6831, + "step": 1863500 + }, + { + "epoch": 0.94, + "learning_rate": 1.2648347280008804e-06, + "loss": 1.6685, + "step": 1864000 + }, + { + "epoch": 0.94, + "learning_rate": 1.2598092008356447e-06, + "loss": 1.6942, + "step": 1864500 + }, + { + "epoch": 0.94, + "learning_rate": 1.2547836736704088e-06, + "loss": 1.6771, + "step": 1865000 + }, + { + "epoch": 0.94, + "learning_rate": 1.249758146505173e-06, + "loss": 1.6994, + "step": 1865500 + }, + { + "epoch": 0.94, + "learning_rate": 1.2447326193399374e-06, + "loss": 1.6696, + "step": 1866000 + }, + { + "epoch": 0.94, + "learning_rate": 1.2397070921747015e-06, + "loss": 1.6796, + "step": 1866500 + }, + { + "epoch": 0.94, + "learning_rate": 1.2346815650094658e-06, + "loss": 1.6863, + "step": 1867000 + }, + { + "epoch": 0.94, + "learning_rate": 1.2296560378442298e-06, + "loss": 1.6902, + "step": 1867500 + }, + { + "epoch": 0.94, + "learning_rate": 1.2246305106789941e-06, + "loss": 1.6852, + "step": 1868000 + }, + { + "epoch": 0.94, + "learning_rate": 1.2196049835137582e-06, + "loss": 1.666, + "step": 1868500 + }, + { + "epoch": 0.94, + "learning_rate": 1.2145794563485225e-06, + "loss": 1.6863, + "step": 1869000 + }, + { + "epoch": 0.94, + "learning_rate": 1.2095539291832866e-06, + "loss": 1.6942, + "step": 1869500 + }, + { + "epoch": 0.94, + "learning_rate": 1.2045284020180507e-06, + "loss": 1.6801, + "step": 1870000 + }, + { + "epoch": 0.94, + "learning_rate": 1.199502874852815e-06, + "loss": 1.6777, + "step": 1870500 + }, + { + "epoch": 0.94, + "learning_rate": 1.194477347687579e-06, + "loss": 1.6963, + "step": 1871000 + }, + { + "epoch": 0.94, + "learning_rate": 1.1894518205223433e-06, + "loss": 1.6735, + "step": 1871500 + }, + { + "epoch": 0.94, + "learning_rate": 1.1844262933571076e-06, + "loss": 1.6699, + "step": 1872000 + }, + { + "epoch": 0.94, + "learning_rate": 1.1794007661918717e-06, + "loss": 1.678, + "step": 1872500 + }, + { + "epoch": 0.94, + "learning_rate": 1.174375239026636e-06, + "loss": 1.6712, + "step": 1873000 + }, + { + "epoch": 0.94, + "learning_rate": 1.1693497118614e-06, + "loss": 1.6841, + "step": 1873500 + }, + { + "epoch": 0.94, + "learning_rate": 1.1643241846961642e-06, + "loss": 1.681, + "step": 1874000 + }, + { + "epoch": 0.94, + "learning_rate": 1.1592986575309285e-06, + "loss": 1.6775, + "step": 1874500 + }, + { + "epoch": 0.94, + "learning_rate": 1.1542731303656925e-06, + "loss": 1.691, + "step": 1875000 + }, + { + "epoch": 0.94, + "learning_rate": 1.1492476032004568e-06, + "loss": 1.6859, + "step": 1875500 + }, + { + "epoch": 0.94, + "learning_rate": 1.144222076035221e-06, + "loss": 1.6799, + "step": 1876000 + }, + { + "epoch": 0.94, + "learning_rate": 1.1391965488699852e-06, + "loss": 1.6691, + "step": 1876500 + }, + { + "epoch": 0.94, + "learning_rate": 1.1341710217047495e-06, + "loss": 1.6886, + "step": 1877000 + }, + { + "epoch": 0.94, + "learning_rate": 1.1291454945395136e-06, + "loss": 1.679, + "step": 1877500 + }, + { + "epoch": 0.94, + "learning_rate": 1.1241199673742777e-06, + "loss": 1.683, + "step": 1878000 + }, + { + "epoch": 0.94, + "learning_rate": 1.119094440209042e-06, + "loss": 1.6906, + "step": 1878500 + }, + { + "epoch": 0.94, + "learning_rate": 1.114068913043806e-06, + "loss": 1.6791, + "step": 1879000 + }, + { + "epoch": 0.94, + "learning_rate": 1.1090433858785703e-06, + "loss": 1.6834, + "step": 1879500 + }, + { + "epoch": 0.94, + "learning_rate": 1.1040178587133344e-06, + "loss": 1.6822, + "step": 1880000 + }, + { + "epoch": 0.95, + "learning_rate": 1.0989923315480987e-06, + "loss": 1.6921, + "step": 1880500 + }, + { + "epoch": 0.95, + "learning_rate": 1.0939668043828628e-06, + "loss": 1.6787, + "step": 1881000 + }, + { + "epoch": 0.95, + "learning_rate": 1.088941277217627e-06, + "loss": 1.6645, + "step": 1881500 + }, + { + "epoch": 0.95, + "learning_rate": 1.0839157500523912e-06, + "loss": 1.6643, + "step": 1882000 + }, + { + "epoch": 0.95, + "learning_rate": 1.0788902228871553e-06, + "loss": 1.6897, + "step": 1882500 + }, + { + "epoch": 0.95, + "learning_rate": 1.0738646957219196e-06, + "loss": 1.6779, + "step": 1883000 + }, + { + "epoch": 0.95, + "learning_rate": 1.0688391685566836e-06, + "loss": 1.6776, + "step": 1883500 + }, + { + "epoch": 0.95, + "learning_rate": 1.063813641391448e-06, + "loss": 1.6874, + "step": 1884000 + }, + { + "epoch": 0.95, + "learning_rate": 1.0587881142262122e-06, + "loss": 1.6859, + "step": 1884500 + }, + { + "epoch": 0.95, + "learning_rate": 1.0537625870609763e-06, + "loss": 1.6999, + "step": 1885000 + }, + { + "epoch": 0.95, + "learning_rate": 1.0487370598957406e-06, + "loss": 1.6679, + "step": 1885500 + }, + { + "epoch": 0.95, + "learning_rate": 1.0437115327305047e-06, + "loss": 1.6787, + "step": 1886000 + }, + { + "epoch": 0.95, + "learning_rate": 1.0386860055652688e-06, + "loss": 1.6705, + "step": 1886500 + }, + { + "epoch": 0.95, + "learning_rate": 1.033660478400033e-06, + "loss": 1.6698, + "step": 1887000 + }, + { + "epoch": 0.95, + "learning_rate": 1.0286349512347971e-06, + "loss": 1.6669, + "step": 1887500 + }, + { + "epoch": 0.95, + "learning_rate": 1.0236094240695614e-06, + "loss": 1.6573, + "step": 1888000 + }, + { + "epoch": 0.95, + "learning_rate": 1.0185838969043255e-06, + "loss": 1.6889, + "step": 1888500 + }, + { + "epoch": 0.95, + "learning_rate": 1.0135583697390898e-06, + "loss": 1.6836, + "step": 1889000 + }, + { + "epoch": 0.95, + "learning_rate": 1.0085328425738541e-06, + "loss": 1.6906, + "step": 1889500 + }, + { + "epoch": 0.95, + "learning_rate": 1.0035073154086182e-06, + "loss": 1.6952, + "step": 1890000 + }, + { + "epoch": 0.95, + "learning_rate": 9.984817882433825e-07, + "loss": 1.6814, + "step": 1890500 + }, + { + "epoch": 0.95, + "learning_rate": 9.934562610781466e-07, + "loss": 1.685, + "step": 1891000 + }, + { + "epoch": 0.95, + "learning_rate": 9.884307339129106e-07, + "loss": 1.6861, + "step": 1891500 + }, + { + "epoch": 0.95, + "learning_rate": 9.83405206747675e-07, + "loss": 1.6827, + "step": 1892000 + }, + { + "epoch": 0.95, + "learning_rate": 9.78379679582439e-07, + "loss": 1.6627, + "step": 1892500 + }, + { + "epoch": 0.95, + "learning_rate": 9.733541524172033e-07, + "loss": 1.6922, + "step": 1893000 + }, + { + "epoch": 0.95, + "learning_rate": 9.683286252519674e-07, + "loss": 1.6966, + "step": 1893500 + }, + { + "epoch": 0.95, + "learning_rate": 9.633030980867317e-07, + "loss": 1.6748, + "step": 1894000 + }, + { + "epoch": 0.95, + "learning_rate": 9.582775709214958e-07, + "loss": 1.6658, + "step": 1894500 + }, + { + "epoch": 0.95, + "learning_rate": 9.5325204375626e-07, + "loss": 1.6801, + "step": 1895000 + }, + { + "epoch": 0.95, + "learning_rate": 9.482265165910242e-07, + "loss": 1.676, + "step": 1895500 + }, + { + "epoch": 0.95, + "learning_rate": 9.432009894257883e-07, + "loss": 1.6737, + "step": 1896000 + }, + { + "epoch": 0.95, + "learning_rate": 9.381754622605525e-07, + "loss": 1.6822, + "step": 1896500 + }, + { + "epoch": 0.95, + "learning_rate": 9.331499350953167e-07, + "loss": 1.6844, + "step": 1897000 + }, + { + "epoch": 0.95, + "learning_rate": 9.281244079300809e-07, + "loss": 1.6793, + "step": 1897500 + }, + { + "epoch": 0.95, + "learning_rate": 9.230988807648451e-07, + "loss": 1.7039, + "step": 1898000 + }, + { + "epoch": 0.95, + "learning_rate": 9.180733535996094e-07, + "loss": 1.7046, + "step": 1898500 + }, + { + "epoch": 0.95, + "learning_rate": 9.130478264343736e-07, + "loss": 1.6903, + "step": 1899000 + }, + { + "epoch": 0.95, + "learning_rate": 9.080222992691375e-07, + "loss": 1.6684, + "step": 1899500 + }, + { + "epoch": 0.95, + "learning_rate": 9.029967721039018e-07, + "loss": 1.69, + "step": 1900000 + }, + { + "epoch": 0.96, + "learning_rate": 8.97971244938666e-07, + "loss": 1.6728, + "step": 1900500 + }, + { + "epoch": 0.96, + "learning_rate": 8.929457177734302e-07, + "loss": 1.6851, + "step": 1901000 + }, + { + "epoch": 0.96, + "learning_rate": 8.879201906081944e-07, + "loss": 1.6928, + "step": 1901500 + }, + { + "epoch": 0.96, + "learning_rate": 8.828946634429586e-07, + "loss": 1.6791, + "step": 1902000 + }, + { + "epoch": 0.96, + "learning_rate": 8.778691362777228e-07, + "loss": 1.6827, + "step": 1902500 + }, + { + "epoch": 0.96, + "learning_rate": 8.72843609112487e-07, + "loss": 1.6927, + "step": 1903000 + }, + { + "epoch": 0.96, + "learning_rate": 8.678180819472512e-07, + "loss": 1.679, + "step": 1903500 + }, + { + "epoch": 0.96, + "learning_rate": 8.627925547820152e-07, + "loss": 1.6715, + "step": 1904000 + }, + { + "epoch": 0.96, + "learning_rate": 8.577670276167794e-07, + "loss": 1.6804, + "step": 1904500 + }, + { + "epoch": 0.96, + "learning_rate": 8.527415004515436e-07, + "loss": 1.7004, + "step": 1905000 + }, + { + "epoch": 0.96, + "learning_rate": 8.477159732863079e-07, + "loss": 1.6792, + "step": 1905500 + }, + { + "epoch": 0.96, + "learning_rate": 8.426904461210721e-07, + "loss": 1.6672, + "step": 1906000 + }, + { + "epoch": 0.96, + "learning_rate": 8.376649189558363e-07, + "loss": 1.7027, + "step": 1906500 + }, + { + "epoch": 0.96, + "learning_rate": 8.326393917906005e-07, + "loss": 1.7093, + "step": 1907000 + }, + { + "epoch": 0.96, + "learning_rate": 8.276138646253647e-07, + "loss": 1.6663, + "step": 1907500 + }, + { + "epoch": 0.96, + "learning_rate": 8.225883374601287e-07, + "loss": 1.6764, + "step": 1908000 + }, + { + "epoch": 0.96, + "learning_rate": 8.175628102948929e-07, + "loss": 1.6873, + "step": 1908500 + }, + { + "epoch": 0.96, + "learning_rate": 8.125372831296571e-07, + "loss": 1.6772, + "step": 1909000 + }, + { + "epoch": 0.96, + "learning_rate": 8.075117559644213e-07, + "loss": 1.6818, + "step": 1909500 + }, + { + "epoch": 0.96, + "learning_rate": 8.024862287991855e-07, + "loss": 1.693, + "step": 1910000 + }, + { + "epoch": 0.96, + "learning_rate": 7.974607016339497e-07, + "loss": 1.6619, + "step": 1910500 + }, + { + "epoch": 0.96, + "learning_rate": 7.92435174468714e-07, + "loss": 1.6772, + "step": 1911000 + }, + { + "epoch": 0.96, + "learning_rate": 7.874096473034782e-07, + "loss": 1.6809, + "step": 1911500 + }, + { + "epoch": 0.96, + "learning_rate": 7.823841201382424e-07, + "loss": 1.6735, + "step": 1912000 + }, + { + "epoch": 0.96, + "learning_rate": 7.773585929730064e-07, + "loss": 1.6577, + "step": 1912500 + }, + { + "epoch": 0.96, + "learning_rate": 7.723330658077706e-07, + "loss": 1.6641, + "step": 1913000 + }, + { + "epoch": 0.96, + "learning_rate": 7.673075386425348e-07, + "loss": 1.6578, + "step": 1913500 + }, + { + "epoch": 0.96, + "learning_rate": 7.62282011477299e-07, + "loss": 1.686, + "step": 1914000 + }, + { + "epoch": 0.96, + "learning_rate": 7.572564843120632e-07, + "loss": 1.6724, + "step": 1914500 + }, + { + "epoch": 0.96, + "learning_rate": 7.522309571468274e-07, + "loss": 1.6863, + "step": 1915000 + }, + { + "epoch": 0.96, + "learning_rate": 7.472054299815916e-07, + "loss": 1.6942, + "step": 1915500 + }, + { + "epoch": 0.96, + "learning_rate": 7.421799028163558e-07, + "loss": 1.6737, + "step": 1916000 + }, + { + "epoch": 0.96, + "learning_rate": 7.371543756511198e-07, + "loss": 1.6737, + "step": 1916500 + }, + { + "epoch": 0.96, + "learning_rate": 7.32128848485884e-07, + "loss": 1.6661, + "step": 1917000 + }, + { + "epoch": 0.96, + "learning_rate": 7.271033213206482e-07, + "loss": 1.6774, + "step": 1917500 + }, + { + "epoch": 0.96, + "learning_rate": 7.220777941554125e-07, + "loss": 1.6862, + "step": 1918000 + }, + { + "epoch": 0.96, + "learning_rate": 7.170522669901767e-07, + "loss": 1.6744, + "step": 1918500 + }, + { + "epoch": 0.96, + "learning_rate": 7.120267398249409e-07, + "loss": 1.6824, + "step": 1919000 + }, + { + "epoch": 0.96, + "learning_rate": 7.070012126597051e-07, + "loss": 1.6898, + "step": 1919500 + }, + { + "epoch": 0.96, + "learning_rate": 7.019756854944693e-07, + "loss": 1.6923, + "step": 1920000 + }, + { + "epoch": 0.97, + "learning_rate": 6.969501583292334e-07, + "loss": 1.6766, + "step": 1920500 + }, + { + "epoch": 0.97, + "learning_rate": 6.919246311639975e-07, + "loss": 1.6794, + "step": 1921000 + }, + { + "epoch": 0.97, + "learning_rate": 6.868991039987617e-07, + "loss": 1.6953, + "step": 1921500 + }, + { + "epoch": 0.97, + "learning_rate": 6.818735768335259e-07, + "loss": 1.6866, + "step": 1922000 + }, + { + "epoch": 0.97, + "learning_rate": 6.768480496682901e-07, + "loss": 1.6915, + "step": 1922500 + }, + { + "epoch": 0.97, + "learning_rate": 6.718225225030543e-07, + "loss": 1.6916, + "step": 1923000 + }, + { + "epoch": 0.97, + "learning_rate": 6.667969953378186e-07, + "loss": 1.6846, + "step": 1923500 + }, + { + "epoch": 0.97, + "learning_rate": 6.617714681725828e-07, + "loss": 1.6863, + "step": 1924000 + }, + { + "epoch": 0.97, + "learning_rate": 6.567459410073469e-07, + "loss": 1.6706, + "step": 1924500 + }, + { + "epoch": 0.97, + "learning_rate": 6.51720413842111e-07, + "loss": 1.6684, + "step": 1925000 + }, + { + "epoch": 0.97, + "learning_rate": 6.466948866768752e-07, + "loss": 1.6607, + "step": 1925500 + }, + { + "epoch": 0.97, + "learning_rate": 6.416693595116394e-07, + "loss": 1.6796, + "step": 1926000 + }, + { + "epoch": 0.97, + "learning_rate": 6.366438323464036e-07, + "loss": 1.6779, + "step": 1926500 + }, + { + "epoch": 0.97, + "learning_rate": 6.316183051811678e-07, + "loss": 1.6844, + "step": 1927000 + }, + { + "epoch": 0.97, + "learning_rate": 6.26592778015932e-07, + "loss": 1.7068, + "step": 1927500 + }, + { + "epoch": 0.97, + "learning_rate": 6.215672508506962e-07, + "loss": 1.6942, + "step": 1928000 + }, + { + "epoch": 0.97, + "learning_rate": 6.165417236854603e-07, + "loss": 1.6785, + "step": 1928500 + }, + { + "epoch": 0.97, + "learning_rate": 6.115161965202245e-07, + "loss": 1.6815, + "step": 1929000 + }, + { + "epoch": 0.97, + "learning_rate": 6.064906693549887e-07, + "loss": 1.6738, + "step": 1929500 + }, + { + "epoch": 0.97, + "learning_rate": 6.014651421897529e-07, + "loss": 1.6704, + "step": 1930000 + }, + { + "epoch": 0.97, + "learning_rate": 5.964396150245171e-07, + "loss": 1.6856, + "step": 1930500 + }, + { + "epoch": 0.97, + "learning_rate": 5.914140878592813e-07, + "loss": 1.7018, + "step": 1931000 + }, + { + "epoch": 0.97, + "learning_rate": 5.863885606940455e-07, + "loss": 1.6716, + "step": 1931500 + }, + { + "epoch": 0.97, + "learning_rate": 5.813630335288097e-07, + "loss": 1.6772, + "step": 1932000 + }, + { + "epoch": 0.97, + "learning_rate": 5.763375063635737e-07, + "loss": 1.6984, + "step": 1932500 + }, + { + "epoch": 0.97, + "learning_rate": 5.71311979198338e-07, + "loss": 1.6923, + "step": 1933000 + }, + { + "epoch": 0.97, + "learning_rate": 5.662864520331022e-07, + "loss": 1.6772, + "step": 1933500 + }, + { + "epoch": 0.97, + "learning_rate": 5.612609248678664e-07, + "loss": 1.6748, + "step": 1934000 + }, + { + "epoch": 0.97, + "learning_rate": 5.562353977026306e-07, + "loss": 1.6777, + "step": 1934500 + }, + { + "epoch": 0.97, + "learning_rate": 5.512098705373947e-07, + "loss": 1.6702, + "step": 1935000 + }, + { + "epoch": 0.97, + "learning_rate": 5.461843433721589e-07, + "loss": 1.6623, + "step": 1935500 + }, + { + "epoch": 0.97, + "learning_rate": 5.411588162069232e-07, + "loss": 1.6805, + "step": 1936000 + }, + { + "epoch": 0.97, + "learning_rate": 5.361332890416874e-07, + "loss": 1.6877, + "step": 1936500 + }, + { + "epoch": 0.97, + "learning_rate": 5.311077618764514e-07, + "loss": 1.6942, + "step": 1937000 + }, + { + "epoch": 0.97, + "learning_rate": 5.260822347112156e-07, + "loss": 1.6905, + "step": 1937500 + }, + { + "epoch": 0.97, + "learning_rate": 5.210567075459798e-07, + "loss": 1.7077, + "step": 1938000 + }, + { + "epoch": 0.97, + "learning_rate": 5.160311803807441e-07, + "loss": 1.6845, + "step": 1938500 + }, + { + "epoch": 0.97, + "learning_rate": 5.110056532155082e-07, + "loss": 1.6582, + "step": 1939000 + }, + { + "epoch": 0.97, + "learning_rate": 5.059801260502724e-07, + "loss": 1.6899, + "step": 1939500 + }, + { + "epoch": 0.97, + "learning_rate": 5.009545988850366e-07, + "loss": 1.6976, + "step": 1940000 + }, + { + "epoch": 0.98, + "learning_rate": 4.959290717198007e-07, + "loss": 1.6805, + "step": 1940500 + }, + { + "epoch": 0.98, + "learning_rate": 4.909035445545649e-07, + "loss": 1.6877, + "step": 1941000 + }, + { + "epoch": 0.98, + "learning_rate": 4.858780173893291e-07, + "loss": 1.6853, + "step": 1941500 + }, + { + "epoch": 0.98, + "learning_rate": 4.808524902240933e-07, + "loss": 1.6921, + "step": 1942000 + }, + { + "epoch": 0.98, + "learning_rate": 4.758269630588575e-07, + "loss": 1.6696, + "step": 1942500 + }, + { + "epoch": 0.98, + "learning_rate": 4.7080143589362174e-07, + "loss": 1.6818, + "step": 1943000 + }, + { + "epoch": 0.98, + "learning_rate": 4.657759087283858e-07, + "loss": 1.6918, + "step": 1943500 + }, + { + "epoch": 0.98, + "learning_rate": 4.6075038156315007e-07, + "loss": 1.6612, + "step": 1944000 + }, + { + "epoch": 0.98, + "learning_rate": 4.5572485439791425e-07, + "loss": 1.6725, + "step": 1944500 + }, + { + "epoch": 0.98, + "learning_rate": 4.5069932723267844e-07, + "loss": 1.6919, + "step": 1945000 + }, + { + "epoch": 0.98, + "learning_rate": 4.456738000674426e-07, + "loss": 1.6915, + "step": 1945500 + }, + { + "epoch": 0.98, + "learning_rate": 4.4064827290220676e-07, + "loss": 1.6836, + "step": 1946000 + }, + { + "epoch": 0.98, + "learning_rate": 4.35622745736971e-07, + "loss": 1.6769, + "step": 1946500 + }, + { + "epoch": 0.98, + "learning_rate": 4.305972185717352e-07, + "loss": 1.6694, + "step": 1947000 + }, + { + "epoch": 0.98, + "learning_rate": 4.2557169140649933e-07, + "loss": 1.6809, + "step": 1947500 + }, + { + "epoch": 0.98, + "learning_rate": 4.205461642412635e-07, + "loss": 1.6709, + "step": 1948000 + }, + { + "epoch": 0.98, + "learning_rate": 4.155206370760277e-07, + "loss": 1.6682, + "step": 1948500 + }, + { + "epoch": 0.98, + "learning_rate": 4.104951099107919e-07, + "loss": 1.6691, + "step": 1949000 + }, + { + "epoch": 0.98, + "learning_rate": 4.0546958274555613e-07, + "loss": 1.6797, + "step": 1949500 + }, + { + "epoch": 0.98, + "learning_rate": 4.0044405558032027e-07, + "loss": 1.6735, + "step": 1950000 + }, + { + "epoch": 0.98, + "learning_rate": 3.9541852841508446e-07, + "loss": 1.6829, + "step": 1950500 + }, + { + "epoch": 0.98, + "learning_rate": 3.9039300124984864e-07, + "loss": 1.6838, + "step": 1951000 + }, + { + "epoch": 0.98, + "learning_rate": 3.8536747408461283e-07, + "loss": 1.699, + "step": 1951500 + }, + { + "epoch": 0.98, + "learning_rate": 3.8034194691937697e-07, + "loss": 1.6807, + "step": 1952000 + }, + { + "epoch": 0.98, + "learning_rate": 3.7531641975414116e-07, + "loss": 1.6841, + "step": 1952500 + }, + { + "epoch": 0.98, + "learning_rate": 3.702908925889054e-07, + "loss": 1.6859, + "step": 1953000 + }, + { + "epoch": 0.98, + "learning_rate": 3.652653654236696e-07, + "loss": 1.665, + "step": 1953500 + }, + { + "epoch": 0.98, + "learning_rate": 3.602398382584337e-07, + "loss": 1.6829, + "step": 1954000 + }, + { + "epoch": 0.98, + "learning_rate": 3.552143110931979e-07, + "loss": 1.6883, + "step": 1954500 + }, + { + "epoch": 0.98, + "learning_rate": 3.501887839279621e-07, + "loss": 1.6835, + "step": 1955000 + }, + { + "epoch": 0.98, + "learning_rate": 3.4516325676272634e-07, + "loss": 1.6895, + "step": 1955500 + }, + { + "epoch": 0.98, + "learning_rate": 3.401377295974905e-07, + "loss": 1.6909, + "step": 1956000 + }, + { + "epoch": 0.98, + "learning_rate": 3.3511220243225466e-07, + "loss": 1.6698, + "step": 1956500 + }, + { + "epoch": 0.98, + "learning_rate": 3.3008667526701885e-07, + "loss": 1.6784, + "step": 1957000 + }, + { + "epoch": 0.98, + "learning_rate": 3.2506114810178303e-07, + "loss": 1.6916, + "step": 1957500 + }, + { + "epoch": 0.98, + "learning_rate": 3.200356209365472e-07, + "loss": 1.6786, + "step": 1958000 + }, + { + "epoch": 0.98, + "learning_rate": 3.1501009377131136e-07, + "loss": 1.675, + "step": 1958500 + }, + { + "epoch": 0.98, + "learning_rate": 3.099845666060756e-07, + "loss": 1.6965, + "step": 1959000 + }, + { + "epoch": 0.98, + "learning_rate": 3.049590394408398e-07, + "loss": 1.6861, + "step": 1959500 + }, + { + "epoch": 0.99, + "learning_rate": 2.999335122756039e-07, + "loss": 1.6723, + "step": 1960000 + }, + { + "epoch": 0.99, + "learning_rate": 2.9490798511036816e-07, + "loss": 1.6726, + "step": 1960500 + }, + { + "epoch": 0.99, + "learning_rate": 2.898824579451323e-07, + "loss": 1.6783, + "step": 1961000 + }, + { + "epoch": 0.99, + "learning_rate": 2.8485693077989654e-07, + "loss": 1.6768, + "step": 1961500 + }, + { + "epoch": 0.99, + "learning_rate": 2.798314036146607e-07, + "loss": 1.6825, + "step": 1962000 + }, + { + "epoch": 0.99, + "learning_rate": 2.7480587644942486e-07, + "loss": 1.6731, + "step": 1962500 + }, + { + "epoch": 0.99, + "learning_rate": 2.6978034928418905e-07, + "loss": 1.6837, + "step": 1963000 + }, + { + "epoch": 0.99, + "learning_rate": 2.6475482211895324e-07, + "loss": 1.6671, + "step": 1963500 + }, + { + "epoch": 0.99, + "learning_rate": 2.597292949537174e-07, + "loss": 1.6628, + "step": 1964000 + }, + { + "epoch": 0.99, + "learning_rate": 2.547037677884816e-07, + "loss": 1.6721, + "step": 1964500 + }, + { + "epoch": 0.99, + "learning_rate": 2.496782406232458e-07, + "loss": 1.6692, + "step": 1965000 + }, + { + "epoch": 0.99, + "learning_rate": 2.4465271345801e-07, + "loss": 1.66, + "step": 1965500 + }, + { + "epoch": 0.99, + "learning_rate": 2.396271862927742e-07, + "loss": 1.7002, + "step": 1966000 + }, + { + "epoch": 0.99, + "learning_rate": 2.3460165912753834e-07, + "loss": 1.6806, + "step": 1966500 + }, + { + "epoch": 0.99, + "learning_rate": 2.2957613196230255e-07, + "loss": 1.6981, + "step": 1967000 + }, + { + "epoch": 0.99, + "learning_rate": 2.2455060479706671e-07, + "loss": 1.6896, + "step": 1967500 + }, + { + "epoch": 0.99, + "learning_rate": 2.195250776318309e-07, + "loss": 1.6839, + "step": 1968000 + }, + { + "epoch": 0.99, + "learning_rate": 2.1449955046659506e-07, + "loss": 1.6783, + "step": 1968500 + }, + { + "epoch": 0.99, + "learning_rate": 2.0947402330135928e-07, + "loss": 1.6715, + "step": 1969000 + }, + { + "epoch": 0.99, + "learning_rate": 2.0444849613612344e-07, + "loss": 1.6628, + "step": 1969500 + }, + { + "epoch": 0.99, + "learning_rate": 1.9942296897088765e-07, + "loss": 1.6962, + "step": 1970000 + }, + { + "epoch": 0.99, + "learning_rate": 1.9439744180565182e-07, + "loss": 1.6911, + "step": 1970500 + }, + { + "epoch": 0.99, + "learning_rate": 1.89371914640416e-07, + "loss": 1.6791, + "step": 1971000 + }, + { + "epoch": 0.99, + "learning_rate": 1.8434638747518022e-07, + "loss": 1.6779, + "step": 1971500 + }, + { + "epoch": 0.99, + "learning_rate": 1.7932086030994438e-07, + "loss": 1.6708, + "step": 1972000 + }, + { + "epoch": 0.99, + "learning_rate": 1.7429533314470857e-07, + "loss": 1.6801, + "step": 1972500 + }, + { + "epoch": 0.99, + "learning_rate": 1.6926980597947273e-07, + "loss": 1.6754, + "step": 1973000 + }, + { + "epoch": 0.99, + "learning_rate": 1.6424427881423694e-07, + "loss": 1.6658, + "step": 1973500 + }, + { + "epoch": 0.99, + "learning_rate": 1.592187516490011e-07, + "loss": 1.6754, + "step": 1974000 + }, + { + "epoch": 0.99, + "learning_rate": 1.541932244837653e-07, + "loss": 1.6789, + "step": 1974500 + }, + { + "epoch": 0.99, + "learning_rate": 1.4916769731852948e-07, + "loss": 1.6647, + "step": 1975000 + }, + { + "epoch": 0.99, + "learning_rate": 1.4414217015329367e-07, + "loss": 1.6966, + "step": 1975500 + }, + { + "epoch": 0.99, + "learning_rate": 1.3911664298805786e-07, + "loss": 1.6757, + "step": 1976000 + }, + { + "epoch": 0.99, + "learning_rate": 1.3409111582282205e-07, + "loss": 1.6641, + "step": 1976500 + }, + { + "epoch": 0.99, + "learning_rate": 1.2906558865758623e-07, + "loss": 1.6953, + "step": 1977000 + }, + { + "epoch": 0.99, + "learning_rate": 1.240400614923504e-07, + "loss": 1.6674, + "step": 1977500 + }, + { + "epoch": 0.99, + "learning_rate": 1.190145343271146e-07, + "loss": 1.683, + "step": 1978000 + }, + { + "epoch": 0.99, + "learning_rate": 1.1398900716187877e-07, + "loss": 1.6875, + "step": 1978500 + }, + { + "epoch": 0.99, + "learning_rate": 1.0896347999664296e-07, + "loss": 1.6845, + "step": 1979000 + }, + { + "epoch": 0.99, + "learning_rate": 1.0393795283140713e-07, + "loss": 1.6881, + "step": 1979500 + }, + { + "epoch": 1.0, + "learning_rate": 9.891242566617132e-08, + "loss": 1.6706, + "step": 1980000 + }, + { + "epoch": 1.0, + "learning_rate": 9.388689850093551e-08, + "loss": 1.6829, + "step": 1980500 + }, + { + "epoch": 1.0, + "learning_rate": 8.886137133569968e-08, + "loss": 1.6691, + "step": 1981000 + }, + { + "epoch": 1.0, + "learning_rate": 8.383584417046387e-08, + "loss": 1.6824, + "step": 1981500 + }, + { + "epoch": 1.0, + "learning_rate": 7.881031700522806e-08, + "loss": 1.6752, + "step": 1982000 + }, + { + "epoch": 1.0, + "learning_rate": 7.378478983999225e-08, + "loss": 1.6912, + "step": 1982500 + }, + { + "epoch": 1.0, + "learning_rate": 6.875926267475644e-08, + "loss": 1.6698, + "step": 1983000 + }, + { + "epoch": 1.0, + "learning_rate": 6.373373550952061e-08, + "loss": 1.6762, + "step": 1983500 + }, + { + "epoch": 1.0, + "learning_rate": 5.87082083442848e-08, + "loss": 1.6821, + "step": 1984000 + }, + { + "epoch": 1.0, + "learning_rate": 5.368268117904899e-08, + "loss": 1.6798, + "step": 1984500 + }, + { + "epoch": 1.0, + "learning_rate": 4.865715401381317e-08, + "loss": 1.6822, + "step": 1985000 + }, + { + "epoch": 1.0, + "learning_rate": 4.3631626848577356e-08, + "loss": 1.6883, + "step": 1985500 + }, + { + "epoch": 1.0, + "learning_rate": 3.860609968334154e-08, + "loss": 1.6712, + "step": 1986000 + }, + { + "epoch": 1.0, + "learning_rate": 3.358057251810572e-08, + "loss": 1.6905, + "step": 1986500 + }, + { + "epoch": 1.0, + "learning_rate": 2.8555045352869903e-08, + "loss": 1.6558, + "step": 1987000 + }, + { + "epoch": 1.0, + "learning_rate": 2.352951818763409e-08, + "loss": 1.6894, + "step": 1987500 + }, + { + "epoch": 1.0, + "learning_rate": 1.8503991022398273e-08, + "loss": 1.6832, + "step": 1988000 + }, + { + "epoch": 1.0, + "learning_rate": 1.3478463857162457e-08, + "loss": 1.6851, + "step": 1988500 + }, + { + "epoch": 1.0, + "learning_rate": 8.452936691926642e-09, + "loss": 1.7012, + "step": 1989000 + }, + { + "epoch": 1.0, + "learning_rate": 3.427409526690826e-09, + "loss": 1.6849, + "step": 1989500 + }, + { + "epoch": 1.0, + "step": 1989841, + "total_flos": 1413004618874880.0, + "train_runtime": 1073185.0771, + "train_samples_per_second": 1.854 + } + ], + "max_steps": 1989841, + "num_train_epochs": 1, + "total_flos": 1413004618874880.0, + "trial_name": null, + "trial_params": null +}