diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,15085 +1,2317 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 6.0, - "global_step": 1254162, + "epoch": 0.9999973749767029, + "global_step": 190474, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.1, + "epoch": 0.27, "learning_rate": 1.0000000000000002e-06, - "loss": 11.2242, + "loss": 3.5718, "step": 500 }, { - "epoch": 0.2, + "epoch": 0.54, "learning_rate": 2.0000000000000003e-06, - "loss": 10.4452, + "loss": 3.5525, "step": 1000 }, { - "epoch": 0.3, + "epoch": 0.81, "learning_rate": 3e-06, - "loss": 10.0268, + "loss": 3.5047, "step": 1500 }, { - "epoch": 0.4, + "epoch": 1.08, "learning_rate": 4.000000000000001e-06, - "loss": 9.5717, + "loss": 3.4853, "step": 2000 }, { - "epoch": 0.5, + "epoch": 1.35, "learning_rate": 5e-06, - "loss": 9.1643, + "loss": 3.4635, "step": 2500 }, { - "epoch": 0.6, + "epoch": 1.61, "learning_rate": 6e-06, - "loss": 8.8893, + "loss": 3.4339, "step": 3000 }, { - "epoch": 0.7, + "epoch": 1.88, "learning_rate": 7e-06, - "loss": 8.7069, + "loss": 3.4143, "step": 3500 }, { - "epoch": 0.8, + "epoch": 2.15, "learning_rate": 8.000000000000001e-06, - "loss": 8.5915, + "loss": 3.3797, "step": 4000 }, { - "epoch": 0.9, + "epoch": 2.42, "learning_rate": 9e-06, - "loss": 8.5111, + "loss": 3.368, "step": 4500 }, { - "epoch": 1.0, + "epoch": 2.69, "learning_rate": 1e-05, - "loss": 8.4616, + "loss": 3.3519, "step": 5000 }, { - "epoch": 1.1, - "learning_rate": 9.666844349680172e-06, - "loss": 8.4, + "epoch": 2.96, + "learning_rate": 7.944078947368422e-06, + "loss": 3.3406, "step": 5500 }, { - "epoch": 1.2, - "learning_rate": 9.333688699360341e-06, - "loss": 8.3691, + "epoch": 3.23, + "learning_rate": 5.888157894736842e-06, + "loss": 3.2885, "step": 6000 }, { - "epoch": 1.3, - "learning_rate": 9.000533049040512e-06, - "loss": 8.3255, + "epoch": 3.5, + "learning_rate": 3.832236842105263e-06, + "loss": 3.262, "step": 6500 }, { - "epoch": 1.4, - "learning_rate": 8.667377398720683e-06, - "loss": 8.2952, + "epoch": 3.77, + "learning_rate": 1.7763157894736844e-06, + "loss": 3.2664, "step": 7000 }, { - "epoch": 1.5, - "learning_rate": 8.334221748400854e-06, - "loss": 8.272, + "epoch": 4.0, + "step": 7432, + "total_flos": 32244153748534272, + "train_runtime": 4035.7935, + "train_samples_per_second": 1.842 + }, + { + "epoch": 4.04, + "learning_rate": 1.36e-07, + "loss": 3.2257, "step": 7500 }, { - "epoch": 1.6, - "learning_rate": 8.001066098081024e-06, - "loss": 8.2567, + "epoch": 4.31, + "learning_rate": 1.1360000000000002e-06, + "loss": 3.2579, "step": 8000 }, { - "epoch": 1.7, - "learning_rate": 7.667910447761195e-06, - "loss": 8.2145, + "epoch": 4.57, + "learning_rate": 2.1360000000000004e-06, + "loss": 3.2497, "step": 8500 }, { - "epoch": 1.8, - "learning_rate": 7.334754797441366e-06, - "loss": 8.2102, + "epoch": 4.84, + "learning_rate": 3.136e-06, + "loss": 3.2399, "step": 9000 }, { - "epoch": 1.9, - "learning_rate": 7.001599147121536e-06, - "loss": 8.172, + "epoch": 5.11, + "learning_rate": 4.136000000000001e-06, + "loss": 3.2354, "step": 9500 }, { - "epoch": 2.0, - "learning_rate": 6.668443496801706e-06, - "loss": 8.1503, + "epoch": 5.38, + "learning_rate": 5.136e-06, + "loss": 3.2227, "step": 10000 }, { - "epoch": 2.1, - "learning_rate": 6.3352878464818765e-06, - "loss": 8.1389, + "epoch": 5.65, + "learning_rate": 6.136000000000001e-06, + "loss": 3.2309, "step": 10500 }, { - "epoch": 2.2, - "learning_rate": 6.002132196162047e-06, - "loss": 8.13, + "epoch": 5.92, + "learning_rate": 7.136000000000001e-06, + "loss": 3.237, "step": 11000 }, { - "epoch": 2.3, - "learning_rate": 5.668976545842218e-06, - "loss": 8.1228, + "epoch": 6.0, + "step": 11148, + "total_flos": 48368948381409792, + "train_runtime": 2016.9377, + "train_samples_per_second": 5.527 + }, + { + "epoch": 0.06, + "learning_rate": 7.040000000000001e-07, + "loss": 3.1385, "step": 11500 }, { - "epoch": 2.4, - "learning_rate": 5.335820895522389e-06, - "loss": 8.1019, + "epoch": 0.06, + "learning_rate": 1.7040000000000001e-06, + "loss": 3.1219, "step": 12000 }, { - "epoch": 2.5, - "learning_rate": 5.002665245202559e-06, - "loss": 8.0855, + "epoch": 0.07, + "learning_rate": 2.704e-06, + "loss": 3.0988, "step": 12500 }, { - "epoch": 2.6, - "learning_rate": 4.669509594882729e-06, - "loss": 8.0685, + "epoch": 0.07, + "learning_rate": 3.7040000000000005e-06, + "loss": 3.0695, "step": 13000 }, { - "epoch": 2.7, - "learning_rate": 4.3363539445629e-06, - "loss": 8.0593, + "epoch": 0.07, + "learning_rate": 4.704e-06, + "loss": 3.0795, "step": 13500 }, { - "epoch": 2.8, - "learning_rate": 4.0031982942430705e-06, - "loss": 8.0628, + "epoch": 0.07, + "learning_rate": 5.704000000000001e-06, + "loss": 3.0666, "step": 14000 }, { - "epoch": 2.9, - "learning_rate": 3.670042643923241e-06, - "loss": 8.051, + "epoch": 0.08, + "learning_rate": 6.7040000000000005e-06, + "loss": 3.0593, "step": 14500 }, { - "epoch": 3.0, - "learning_rate": 3.336886993603412e-06, - "loss": 8.0213, + "epoch": 0.08, + "learning_rate": 7.704000000000001e-06, + "loss": 3.0466, "step": 15000 }, { - "epoch": 3.1, - "learning_rate": 3.0037313432835824e-06, - "loss": 8.0247, + "epoch": 0.08, + "learning_rate": 8.704e-06, + "loss": 3.0547, "step": 15500 }, { - "epoch": 3.2, - "learning_rate": 2.6705756929637526e-06, - "loss": 8.0126, + "epoch": 0.08, + "learning_rate": 9.704e-06, + "loss": 3.0598, "step": 16000 }, { - "epoch": 3.3, - "learning_rate": 2.3374200426439233e-06, - "loss": 8.0008, + "epoch": 0.09, + "learning_rate": 9.981021598714645e-06, + "loss": 3.0721, "step": 16500 }, { - "epoch": 3.4, - "learning_rate": 2.004264392324094e-06, - "loss": 7.9928, + "epoch": 0.09, + "learning_rate": 9.954063642343403e-06, + "loss": 3.0614, "step": 17000 }, { - "epoch": 3.5, - "learning_rate": 1.6711087420042645e-06, - "loss": 8.0083, + "epoch": 0.09, + "learning_rate": 9.927105685972159e-06, + "loss": 3.0434, "step": 17500 }, { - "epoch": 3.6, - "learning_rate": 1.3379530916844352e-06, - "loss": 8.0019, + "epoch": 0.09, + "learning_rate": 9.900147729600915e-06, + "loss": 3.0653, "step": 18000 }, { - "epoch": 3.7, - "learning_rate": 1.0047974413646058e-06, - "loss": 7.9937, + "epoch": 0.1, + "learning_rate": 9.873189773229673e-06, + "loss": 3.0618, "step": 18500 }, { - "epoch": 3.8, - "learning_rate": 6.716417910447762e-07, - "loss": 7.9982, + "epoch": 0.1, + "learning_rate": 9.846231816858427e-06, + "loss": 3.0674, "step": 19000 }, { - "epoch": 3.9, - "learning_rate": 3.3848614072494674e-07, - "loss": 7.998, + "epoch": 0.1, + "learning_rate": 9.819273860487185e-06, + "loss": 3.0515, "step": 19500 }, { - "epoch": 4.0, - "learning_rate": 5.330490405117271e-09, - "loss": 7.9901, + "epoch": 0.11, + "learning_rate": 9.792315904115941e-06, + "loss": 3.0559, "step": 20000 }, { - "epoch": 4.0, - "step": 20008, - "total_flos": 109772945935441920, - "train_runtime": 13759.1079, - "train_samples_per_second": 1.454 - }, - { - "epoch": 0.09, - "learning_rate": 9.84e-07, - "loss": 8.0645, + "epoch": 0.11, + "learning_rate": 9.765357947744698e-06, + "loss": 3.0658, "step": 20500 }, { - "epoch": 0.09, - "learning_rate": 1.984e-06, - "loss": 8.0449, + "epoch": 0.11, + "learning_rate": 9.738399991373455e-06, + "loss": 3.0525, "step": 21000 }, { - "epoch": 0.09, - "learning_rate": 2.984e-06, - "loss": 8.0578, + "epoch": 0.11, + "learning_rate": 9.711442035002212e-06, + "loss": 3.0425, "step": 21500 }, { - "epoch": 0.1, - "learning_rate": 3.984e-06, - "loss": 8.0419, + "epoch": 0.12, + "learning_rate": 9.684484078630968e-06, + "loss": 3.0293, "step": 22000 }, { - "epoch": 0.1, - "learning_rate": 4.984000000000001e-06, - "loss": 8.0566, + "epoch": 0.12, + "learning_rate": 9.657526122259726e-06, + "loss": 3.05, "step": 22500 }, { - "epoch": 0.1, - "learning_rate": 5.984000000000001e-06, - "loss": 8.0375, + "epoch": 0.12, + "learning_rate": 9.63056816588848e-06, + "loss": 3.053, "step": 23000 }, { - "epoch": 0.1, - "learning_rate": 6.984e-06, - "loss": 8.035, + "epoch": 0.12, + "learning_rate": 9.603610209517238e-06, + "loss": 3.0306, "step": 23500 }, { - "epoch": 0.11, - "learning_rate": 7.984e-06, - "loss": 8.0378, + "epoch": 0.13, + "learning_rate": 9.576652253145994e-06, + "loss": 3.0416, "step": 24000 }, { - "epoch": 0.11, - "learning_rate": 8.984e-06, - "loss": 8.0114, + "epoch": 0.13, + "learning_rate": 9.54969429677475e-06, + "loss": 3.0292, "step": 24500 }, { - "epoch": 0.11, - "learning_rate": 9.984e-06, - "loss": 8.0101, + "epoch": 0.13, + "learning_rate": 9.522736340403508e-06, + "loss": 3.0431, "step": 25000 }, { - "epoch": 0.11, - "learning_rate": 9.994587673095457e-06, - "loss": 7.998, + "epoch": 0.13, + "learning_rate": 9.495778384032264e-06, + "loss": 3.0429, "step": 25500 }, { - "epoch": 0.11, - "learning_rate": 9.989087340875389e-06, - "loss": 7.9806, + "epoch": 0.14, + "learning_rate": 9.46882042766102e-06, + "loss": 3.0305, "step": 26000 }, { - "epoch": 0.12, - "learning_rate": 9.983587008655323e-06, - "loss": 7.9583, + "epoch": 0.14, + "learning_rate": 9.441862471289779e-06, + "loss": 3.0355, "step": 26500 }, { - "epoch": 0.12, - "learning_rate": 9.978086676435259e-06, - "loss": 7.9416, + "epoch": 0.14, + "learning_rate": 9.414904514918533e-06, + "loss": 3.0251, "step": 27000 }, { - "epoch": 0.12, - "learning_rate": 9.972586344215191e-06, - "loss": 7.933, + "epoch": 0.14, + "learning_rate": 9.387946558547291e-06, + "loss": 3.0323, "step": 27500 }, { - "epoch": 0.12, - "learning_rate": 9.967086011995125e-06, - "loss": 7.9444, + "epoch": 0.15, + "learning_rate": 9.360988602176047e-06, + "loss": 3.0118, "step": 28000 }, { - "epoch": 0.12, - "learning_rate": 9.961585679775059e-06, - "loss": 7.9236, + "epoch": 0.15, + "learning_rate": 9.334030645804803e-06, + "loss": 3.0138, "step": 28500 }, { - "epoch": 0.13, - "learning_rate": 9.956085347554993e-06, - "loss": 7.8992, + "epoch": 0.15, + "learning_rate": 9.307072689433561e-06, + "loss": 3.0217, "step": 29000 }, { - "epoch": 0.13, - "learning_rate": 9.950585015334927e-06, - "loss": 7.9036, + "epoch": 0.15, + "learning_rate": 9.280114733062316e-06, + "loss": 3.0138, "step": 29500 }, { - "epoch": 0.13, - "learning_rate": 9.94508468311486e-06, - "loss": 7.8702, + "epoch": 0.16, + "learning_rate": 9.253156776691074e-06, + "loss": 3.0402, "step": 30000 }, { - "epoch": 0.13, - "learning_rate": 9.939584350894796e-06, - "loss": 7.897, + "epoch": 0.16, + "learning_rate": 9.22619882031983e-06, + "loss": 3.0297, "step": 30500 }, { - "epoch": 0.14, - "learning_rate": 9.934084018674728e-06, - "loss": 7.8663, + "epoch": 0.16, + "learning_rate": 9.199240863948586e-06, + "loss": 3.0147, "step": 31000 }, { - "epoch": 0.14, - "learning_rate": 9.928583686454662e-06, - "loss": 7.8587, + "epoch": 0.17, + "learning_rate": 9.172282907577344e-06, + "loss": 3.0317, "step": 31500 }, { - "epoch": 0.14, - "learning_rate": 9.923083354234596e-06, - "loss": 7.8413, + "epoch": 0.17, + "learning_rate": 9.1453249512061e-06, + "loss": 3.021, "step": 32000 }, { - "epoch": 0.14, - "learning_rate": 9.91758302201453e-06, - "loss": 7.8537, + "epoch": 0.17, + "learning_rate": 9.118366994834856e-06, + "loss": 3.0284, "step": 32500 }, { - "epoch": 0.14, - "learning_rate": 9.912082689794464e-06, - "loss": 7.8288, + "epoch": 0.17, + "learning_rate": 9.091409038463614e-06, + "loss": 3.0274, "step": 33000 }, { - "epoch": 0.15, - "learning_rate": 9.906582357574398e-06, - "loss": 7.8265, + "epoch": 0.18, + "learning_rate": 9.064451082092369e-06, + "loss": 3.0163, "step": 33500 }, { - "epoch": 0.15, - "learning_rate": 9.901082025354332e-06, - "loss": 7.828, + "epoch": 0.18, + "learning_rate": 9.037493125721126e-06, + "loss": 3.0173, "step": 34000 }, { - "epoch": 0.15, - "learning_rate": 9.895581693134266e-06, - "loss": 7.8077, + "epoch": 0.18, + "learning_rate": 9.010535169349883e-06, + "loss": 3.033, "step": 34500 }, { - "epoch": 0.15, - "learning_rate": 9.8900813609142e-06, - "loss": 7.8088, + "epoch": 0.18, + "learning_rate": 8.983577212978639e-06, + "loss": 3.0174, "step": 35000 }, { - "epoch": 0.16, - "learning_rate": 9.884581028694134e-06, - "loss": 7.7975, + "epoch": 0.19, + "learning_rate": 8.956619256607397e-06, + "loss": 3.0274, "step": 35500 }, { - "epoch": 0.16, - "learning_rate": 9.879080696474068e-06, - "loss": 7.7805, + "epoch": 0.19, + "learning_rate": 8.929661300236153e-06, + "loss": 3.0003, "step": 36000 }, { - "epoch": 0.16, - "learning_rate": 9.873580364254002e-06, - "loss": 7.7865, + "epoch": 0.19, + "learning_rate": 8.902703343864909e-06, + "loss": 3.0088, "step": 36500 }, { - "epoch": 0.16, - "learning_rate": 9.868080032033936e-06, - "loss": 7.7577, + "epoch": 0.19, + "learning_rate": 8.875745387493667e-06, + "loss": 3.0126, "step": 37000 }, { - "epoch": 0.16, - "learning_rate": 9.86257969981387e-06, - "loss": 7.7753, + "epoch": 0.2, + "learning_rate": 8.848787431122421e-06, + "loss": 2.9996, "step": 37500 }, { - "epoch": 0.17, - "learning_rate": 9.857079367593803e-06, - "loss": 7.7512, + "epoch": 0.2, + "learning_rate": 8.82182947475118e-06, + "loss": 3.0292, "step": 38000 }, { - "epoch": 0.17, - "learning_rate": 9.851579035373737e-06, - "loss": 7.7535, + "epoch": 0.2, + "learning_rate": 8.794871518379935e-06, + "loss": 3.008, "step": 38500 }, { - "epoch": 0.17, - "learning_rate": 9.846078703153671e-06, - "loss": 7.762, + "epoch": 0.2, + "learning_rate": 8.767913562008692e-06, + "loss": 3.0032, "step": 39000 }, { - "epoch": 0.17, - "learning_rate": 9.840578370933605e-06, - "loss": 7.7484, + "epoch": 0.21, + "learning_rate": 8.74095560563745e-06, + "loss": 2.9903, "step": 39500 }, { - "epoch": 0.18, - "learning_rate": 9.835078038713539e-06, - "loss": 7.7356, + "epoch": 0.21, + "learning_rate": 8.713997649266204e-06, + "loss": 3.0021, "step": 40000 }, { - "epoch": 0.18, - "learning_rate": 9.829577706493473e-06, - "loss": 7.7407, + "epoch": 0.21, + "learning_rate": 8.687039692894962e-06, + "loss": 3.0082, "step": 40500 }, { - "epoch": 0.18, - "learning_rate": 9.824077374273407e-06, - "loss": 7.7426, + "epoch": 0.22, + "learning_rate": 8.660081736523718e-06, + "loss": 2.9998, "step": 41000 }, { - "epoch": 0.18, - "learning_rate": 9.818577042053341e-06, - "loss": 7.7142, + "epoch": 0.22, + "learning_rate": 8.633123780152474e-06, + "loss": 2.9956, "step": 41500 }, { - "epoch": 0.18, - "learning_rate": 9.813076709833275e-06, - "loss": 7.7211, + "epoch": 0.22, + "learning_rate": 8.606165823781232e-06, + "loss": 3.0029, "step": 42000 }, { - "epoch": 0.19, - "learning_rate": 9.807576377613209e-06, - "loss": 7.7111, + "epoch": 0.22, + "learning_rate": 8.579207867409988e-06, + "loss": 2.9969, "step": 42500 }, { - "epoch": 0.19, - "learning_rate": 9.802076045393143e-06, - "loss": 7.7011, + "epoch": 0.23, + "learning_rate": 8.552249911038744e-06, + "loss": 2.9917, "step": 43000 }, { - "epoch": 0.19, - "learning_rate": 9.796575713173077e-06, - "loss": 7.7001, + "epoch": 0.23, + "learning_rate": 8.525291954667502e-06, + "loss": 2.9977, "step": 43500 }, { - "epoch": 0.19, - "learning_rate": 9.79107538095301e-06, - "loss": 7.6904, + "epoch": 0.23, + "learning_rate": 8.498333998296257e-06, + "loss": 3.0194, "step": 44000 }, { - "epoch": 0.19, - "learning_rate": 9.785575048732944e-06, - "loss": 7.6927, + "epoch": 0.23, + "learning_rate": 8.471376041925015e-06, + "loss": 2.9922, "step": 44500 }, { - "epoch": 0.2, - "learning_rate": 9.780074716512878e-06, - "loss": 7.697, + "epoch": 0.24, + "learning_rate": 8.444418085553771e-06, + "loss": 3.0042, "step": 45000 }, { - "epoch": 0.2, - "learning_rate": 9.774574384292812e-06, - "loss": 7.6826, + "epoch": 0.24, + "learning_rate": 8.417460129182527e-06, + "loss": 3.0048, "step": 45500 }, { - "epoch": 0.2, - "learning_rate": 9.769074052072746e-06, - "loss": 7.6948, + "epoch": 0.24, + "learning_rate": 8.390502172811285e-06, + "loss": 2.9955, "step": 46000 }, { - "epoch": 0.2, - "learning_rate": 9.76357371985268e-06, - "loss": 7.6657, + "epoch": 0.24, + "learning_rate": 8.363544216440041e-06, + "loss": 3.0094, "step": 46500 }, { - "epoch": 0.21, - "learning_rate": 9.758073387632614e-06, - "loss": 7.6656, + "epoch": 0.25, + "learning_rate": 8.336586260068797e-06, + "loss": 2.9846, "step": 47000 }, { - "epoch": 0.21, - "learning_rate": 9.752573055412548e-06, - "loss": 7.6701, + "epoch": 0.25, + "learning_rate": 8.309628303697555e-06, + "loss": 3.0013, "step": 47500 }, { - "epoch": 0.21, - "learning_rate": 9.74707272319248e-06, - "loss": 7.6587, + "epoch": 0.25, + "learning_rate": 8.28267034732631e-06, + "loss": 2.9834, "step": 48000 }, { - "epoch": 0.21, - "learning_rate": 9.741572390972416e-06, - "loss": 7.6808, + "epoch": 0.25, + "learning_rate": 8.255712390955068e-06, + "loss": 3.0037, "step": 48500 }, { - "epoch": 0.21, - "learning_rate": 9.73607205875235e-06, - "loss": 7.6528, + "epoch": 0.26, + "learning_rate": 8.228754434583824e-06, + "loss": 2.9997, "step": 49000 }, { - "epoch": 0.22, - "learning_rate": 9.730571726532284e-06, - "loss": 7.6578, + "epoch": 0.26, + "learning_rate": 8.20179647821258e-06, + "loss": 2.9993, "step": 49500 }, { - "epoch": 0.22, - "learning_rate": 9.725071394312218e-06, - "loss": 7.6404, + "epoch": 0.26, + "learning_rate": 8.174838521841338e-06, + "loss": 2.979, "step": 50000 }, { - "epoch": 0.22, - "learning_rate": 9.719571062092152e-06, - "loss": 7.6389, + "epoch": 0.27, + "learning_rate": 8.147880565470094e-06, + "loss": 2.9908, "step": 50500 }, { - "epoch": 0.22, - "learning_rate": 9.714070729872086e-06, - "loss": 7.6365, + "epoch": 0.27, + "learning_rate": 8.12092260909885e-06, + "loss": 2.9872, "step": 51000 }, { - "epoch": 0.23, - "learning_rate": 9.708570397652018e-06, - "loss": 7.6291, + "epoch": 0.27, + "learning_rate": 8.093964652727606e-06, + "loss": 2.9869, "step": 51500 }, { - "epoch": 0.23, - "learning_rate": 9.703070065431953e-06, - "loss": 7.6347, + "epoch": 0.27, + "learning_rate": 8.067006696356363e-06, + "loss": 2.9975, "step": 52000 }, { - "epoch": 0.23, - "learning_rate": 9.697569733211887e-06, - "loss": 7.6025, + "epoch": 0.28, + "learning_rate": 8.04004873998512e-06, + "loss": 2.9916, "step": 52500 }, { - "epoch": 0.23, - "learning_rate": 9.69206940099182e-06, - "loss": 7.6127, + "epoch": 0.28, + "learning_rate": 8.013090783613877e-06, + "loss": 2.9964, "step": 53000 }, { - "epoch": 0.23, - "learning_rate": 9.686569068771755e-06, - "loss": 7.6166, + "epoch": 0.28, + "learning_rate": 7.986132827242633e-06, + "loss": 2.9879, "step": 53500 }, { - "epoch": 0.24, - "learning_rate": 9.681068736551689e-06, - "loss": 7.6208, + "epoch": 0.28, + "learning_rate": 7.95917487087139e-06, + "loss": 2.9832, "step": 54000 }, { - "epoch": 0.24, - "learning_rate": 9.675568404331623e-06, - "loss": 7.6046, + "epoch": 0.29, + "learning_rate": 7.932216914500145e-06, + "loss": 2.9927, "step": 54500 }, { - "epoch": 0.24, - "learning_rate": 9.670068072111555e-06, - "loss": 7.6301, + "epoch": 0.29, + "learning_rate": 7.905258958128903e-06, + "loss": 2.981, "step": 55000 }, { - "epoch": 0.24, - "learning_rate": 9.664567739891491e-06, - "loss": 7.6091, + "epoch": 0.29, + "learning_rate": 7.87830100175766e-06, + "loss": 2.9894, "step": 55500 }, { - "epoch": 0.25, - "learning_rate": 9.659067407671425e-06, - "loss": 7.5908, + "epoch": 0.29, + "learning_rate": 7.851343045386415e-06, + "loss": 2.987, "step": 56000 }, { - "epoch": 0.25, - "learning_rate": 9.653567075451357e-06, - "loss": 7.6034, + "epoch": 0.3, + "learning_rate": 7.824385089015173e-06, + "loss": 2.9632, "step": 56500 }, { - "epoch": 0.25, - "learning_rate": 9.648066743231293e-06, - "loss": 7.6109, + "epoch": 0.3, + "learning_rate": 7.79742713264393e-06, + "loss": 2.9863, "step": 57000 }, { - "epoch": 0.25, - "learning_rate": 9.642566411011227e-06, - "loss": 7.5909, + "epoch": 0.3, + "learning_rate": 7.770469176272686e-06, + "loss": 2.9848, "step": 57500 }, { - "epoch": 0.25, - "learning_rate": 9.637066078791159e-06, - "loss": 7.5836, + "epoch": 0.3, + "learning_rate": 7.743511219901444e-06, + "loss": 2.987, "step": 58000 }, { - "epoch": 0.26, - "learning_rate": 9.631565746571093e-06, - "loss": 7.5926, + "epoch": 0.31, + "learning_rate": 7.716553263530198e-06, + "loss": 2.9731, "step": 58500 }, { - "epoch": 0.26, - "learning_rate": 9.626065414351028e-06, - "loss": 7.5807, + "epoch": 0.31, + "learning_rate": 7.689595307158956e-06, + "loss": 2.9886, "step": 59000 }, { - "epoch": 0.26, - "learning_rate": 9.620565082130962e-06, - "loss": 7.5775, + "epoch": 0.31, + "learning_rate": 7.662637350787712e-06, + "loss": 2.9826, "step": 59500 }, { - "epoch": 0.26, - "learning_rate": 9.615064749910895e-06, - "loss": 7.5696, + "epoch": 0.32, + "learning_rate": 7.635679394416468e-06, + "loss": 2.9773, "step": 60000 }, { - "epoch": 0.26, - "learning_rate": 9.60956441769083e-06, - "loss": 7.5757, + "epoch": 0.32, + "learning_rate": 7.608721438045225e-06, + "loss": 2.958, "step": 60500 }, { - "epoch": 0.27, - "learning_rate": 9.604064085470764e-06, - "loss": 7.5625, + "epoch": 0.32, + "learning_rate": 7.5817634816739815e-06, + "loss": 2.9887, "step": 61000 }, { - "epoch": 0.27, - "learning_rate": 9.598563753250696e-06, - "loss": 7.5729, + "epoch": 0.32, + "learning_rate": 7.5548055253027386e-06, + "loss": 2.9685, "step": 61500 }, { - "epoch": 0.27, - "learning_rate": 9.59306342103063e-06, - "loss": 7.5756, + "epoch": 0.33, + "learning_rate": 7.527847568931496e-06, + "loss": 2.9632, "step": 62000 }, { - "epoch": 0.27, - "learning_rate": 9.587563088810566e-06, - "loss": 7.5439, + "epoch": 0.33, + "learning_rate": 7.500889612560252e-06, + "loss": 2.9816, "step": 62500 }, { - "epoch": 0.28, - "learning_rate": 9.582062756590498e-06, - "loss": 7.5754, + "epoch": 0.33, + "learning_rate": 7.473931656189009e-06, + "loss": 2.9726, "step": 63000 }, { - "epoch": 0.28, - "learning_rate": 9.576562424370432e-06, - "loss": 7.5438, + "epoch": 0.33, + "learning_rate": 7.446973699817764e-06, + "loss": 2.9753, "step": 63500 }, { - "epoch": 0.28, - "learning_rate": 9.571062092150368e-06, - "loss": 7.5685, + "epoch": 0.34, + "learning_rate": 7.420015743446521e-06, + "loss": 2.9785, "step": 64000 }, { - "epoch": 0.28, - "learning_rate": 9.565561759930302e-06, - "loss": 7.5329, + "epoch": 0.34, + "learning_rate": 7.393057787075278e-06, + "loss": 2.9749, "step": 64500 }, { - "epoch": 0.28, - "learning_rate": 9.560061427710234e-06, - "loss": 7.5443, + "epoch": 0.34, + "learning_rate": 7.366099830704034e-06, + "loss": 2.9722, "step": 65000 }, { - "epoch": 0.29, - "learning_rate": 9.554561095490168e-06, - "loss": 7.5507, + "epoch": 0.34, + "learning_rate": 7.3391418743327914e-06, + "loss": 2.9636, "step": 65500 }, { - "epoch": 0.29, - "learning_rate": 9.549060763270103e-06, - "loss": 7.5399, + "epoch": 0.35, + "learning_rate": 7.312183917961548e-06, + "loss": 2.9849, "step": 66000 }, { - "epoch": 0.29, - "learning_rate": 9.543560431050036e-06, - "loss": 7.5313, + "epoch": 0.35, + "learning_rate": 7.285225961590305e-06, + "loss": 2.9696, "step": 66500 }, { - "epoch": 0.29, - "learning_rate": 9.53806009882997e-06, - "loss": 7.5484, + "epoch": 0.35, + "learning_rate": 7.258268005219062e-06, + "loss": 2.9667, "step": 67000 }, { - "epoch": 0.3, - "learning_rate": 9.532559766609905e-06, - "loss": 7.5436, + "epoch": 0.35, + "learning_rate": 7.231310048847817e-06, + "loss": 2.9635, "step": 67500 }, { - "epoch": 0.3, - "learning_rate": 9.527059434389837e-06, - "loss": 7.5258, + "epoch": 0.36, + "learning_rate": 7.204352092476574e-06, + "loss": 2.9669, "step": 68000 }, { - "epoch": 0.3, - "learning_rate": 9.521559102169771e-06, - "loss": 7.5261, + "epoch": 0.36, + "learning_rate": 7.177394136105331e-06, + "loss": 2.9778, "step": 68500 }, { - "epoch": 0.3, - "learning_rate": 9.516058769949705e-06, - "loss": 7.5312, + "epoch": 0.36, + "learning_rate": 7.150436179734087e-06, + "loss": 2.9681, "step": 69000 }, { - "epoch": 0.3, - "learning_rate": 9.51055843772964e-06, - "loss": 7.5295, + "epoch": 0.36, + "learning_rate": 7.123478223362844e-06, + "loss": 2.9739, "step": 69500 }, { - "epoch": 0.31, - "learning_rate": 9.505058105509573e-06, - "loss": 7.5164, + "epoch": 0.37, + "learning_rate": 7.0965202669916e-06, + "loss": 2.9737, "step": 70000 }, { - "epoch": 0.31, - "learning_rate": 9.499557773289507e-06, - "loss": 7.5121, + "epoch": 0.37, + "learning_rate": 7.069562310620357e-06, + "loss": 2.9659, "step": 70500 }, { - "epoch": 0.31, - "learning_rate": 9.494057441069441e-06, - "loss": 7.5284, + "epoch": 0.37, + "learning_rate": 7.042604354249114e-06, + "loss": 2.9698, "step": 71000 }, { - "epoch": 0.31, - "learning_rate": 9.488557108849375e-06, - "loss": 7.5277, + "epoch": 0.38, + "learning_rate": 7.01564639787787e-06, + "loss": 2.9675, "step": 71500 }, { - "epoch": 0.32, - "learning_rate": 9.483056776629309e-06, - "loss": 7.5442, + "epoch": 0.38, + "learning_rate": 6.988688441506627e-06, + "loss": 2.9718, "step": 72000 }, { - "epoch": 0.32, - "learning_rate": 9.477556444409243e-06, - "loss": 7.5118, + "epoch": 0.38, + "learning_rate": 6.961730485135384e-06, + "loss": 2.9665, "step": 72500 }, { - "epoch": 0.32, - "learning_rate": 9.472056112189177e-06, - "loss": 7.5094, + "epoch": 0.38, + "learning_rate": 6.93477252876414e-06, + "loss": 2.9883, "step": 73000 }, { - "epoch": 0.32, - "learning_rate": 9.46655577996911e-06, - "loss": 7.49, + "epoch": 0.39, + "learning_rate": 6.907814572392897e-06, + "loss": 2.9842, "step": 73500 }, { - "epoch": 0.32, - "learning_rate": 9.461055447749045e-06, - "loss": 7.5142, + "epoch": 0.39, + "learning_rate": 6.8808566160216525e-06, + "loss": 2.9607, "step": 74000 }, { - "epoch": 0.33, - "learning_rate": 9.455555115528978e-06, - "loss": 7.4973, + "epoch": 0.39, + "learning_rate": 6.8538986596504095e-06, + "loss": 2.9776, "step": 74500 }, { - "epoch": 0.33, - "learning_rate": 9.450054783308912e-06, - "loss": 7.5101, + "epoch": 0.39, + "learning_rate": 6.8269407032791666e-06, + "loss": 2.963, "step": 75000 }, { - "epoch": 0.33, - "learning_rate": 9.444554451088846e-06, - "loss": 7.5007, + "epoch": 0.4, + "learning_rate": 6.799982746907923e-06, + "loss": 2.9571, "step": 75500 }, { - "epoch": 0.33, - "learning_rate": 9.43905411886878e-06, - "loss": 7.5064, + "epoch": 0.4, + "learning_rate": 6.77302479053668e-06, + "loss": 2.9495, "step": 76000 }, { - "epoch": 0.33, - "learning_rate": 9.433553786648714e-06, - "loss": 7.5037, + "epoch": 0.4, + "learning_rate": 6.746066834165436e-06, + "loss": 2.9601, "step": 76500 }, { - "epoch": 0.34, - "learning_rate": 9.428053454428648e-06, - "loss": 7.4904, + "epoch": 0.4, + "learning_rate": 6.719108877794193e-06, + "loss": 2.9645, "step": 77000 }, { - "epoch": 0.34, - "learning_rate": 9.422553122208582e-06, - "loss": 7.5029, + "epoch": 0.41, + "learning_rate": 6.69215092142295e-06, + "loss": 2.9559, "step": 77500 }, { - "epoch": 0.34, - "learning_rate": 9.417052789988516e-06, - "loss": 7.4924, + "epoch": 0.41, + "learning_rate": 6.665192965051705e-06, + "loss": 2.9583, "step": 78000 }, { - "epoch": 0.34, - "learning_rate": 9.41155245776845e-06, - "loss": 7.4783, + "epoch": 0.41, + "learning_rate": 6.638235008680462e-06, + "loss": 2.9543, "step": 78500 }, { - "epoch": 0.35, - "learning_rate": 9.406052125548384e-06, - "loss": 7.4917, + "epoch": 0.41, + "learning_rate": 6.6112770523092194e-06, + "loss": 2.9585, "step": 79000 }, { - "epoch": 0.35, - "learning_rate": 9.400551793328318e-06, - "loss": 7.4844, + "epoch": 0.42, + "learning_rate": 6.584319095937976e-06, + "loss": 2.9437, "step": 79500 }, { - "epoch": 0.35, - "learning_rate": 9.395051461108252e-06, - "loss": 7.4634, + "epoch": 0.42, + "learning_rate": 6.557361139566733e-06, + "loss": 2.9573, "step": 80000 }, { - "epoch": 0.35, - "learning_rate": 9.389551128888186e-06, - "loss": 7.4885, + "epoch": 0.42, + "learning_rate": 6.530403183195488e-06, + "loss": 2.9595, "step": 80500 }, { - "epoch": 0.35, - "learning_rate": 9.38405079666812e-06, - "loss": 7.4879, + "epoch": 0.43, + "learning_rate": 6.503445226824245e-06, + "loss": 2.9693, "step": 81000 }, { - "epoch": 0.36, - "learning_rate": 9.378550464448053e-06, - "loss": 7.4821, + "epoch": 0.43, + "learning_rate": 6.476487270453002e-06, + "loss": 2.956, "step": 81500 }, { - "epoch": 0.36, - "learning_rate": 9.373050132227987e-06, - "loss": 7.4794, + "epoch": 0.43, + "learning_rate": 6.449529314081758e-06, + "loss": 2.9568, "step": 82000 }, { - "epoch": 0.36, - "learning_rate": 9.367549800007921e-06, - "loss": 7.4733, + "epoch": 0.43, + "learning_rate": 6.422571357710515e-06, + "loss": 2.9555, "step": 82500 }, { - "epoch": 0.36, - "learning_rate": 9.362049467787855e-06, - "loss": 7.4652, + "epoch": 0.44, + "learning_rate": 6.395613401339272e-06, + "loss": 2.9467, "step": 83000 }, { - "epoch": 0.37, - "learning_rate": 9.35654913556779e-06, - "loss": 7.4794, + "epoch": 0.44, + "learning_rate": 6.3686554449680285e-06, + "loss": 2.9605, "step": 83500 }, { - "epoch": 0.37, - "learning_rate": 9.351048803347723e-06, - "loss": 7.4743, + "epoch": 0.44, + "learning_rate": 6.3416974885967855e-06, + "loss": 2.9549, "step": 84000 }, { - "epoch": 0.37, - "learning_rate": 9.345548471127657e-06, - "loss": 7.4543, + "epoch": 0.44, + "learning_rate": 6.314739532225541e-06, + "loss": 2.9449, "step": 84500 }, { - "epoch": 0.37, - "learning_rate": 9.340048138907591e-06, - "loss": 7.4596, + "epoch": 0.45, + "learning_rate": 6.287781575854298e-06, + "loss": 2.9564, "step": 85000 }, { - "epoch": 0.37, - "learning_rate": 9.334547806687525e-06, - "loss": 7.4688, + "epoch": 0.45, + "learning_rate": 6.260823619483055e-06, + "loss": 2.9448, "step": 85500 }, { - "epoch": 0.38, - "learning_rate": 9.329047474467459e-06, - "loss": 7.4658, + "epoch": 0.45, + "learning_rate": 6.233865663111811e-06, + "loss": 2.9518, "step": 86000 }, { - "epoch": 0.38, - "learning_rate": 9.323547142247393e-06, - "loss": 7.4542, + "epoch": 0.45, + "learning_rate": 6.206907706740568e-06, + "loss": 2.9597, "step": 86500 }, { - "epoch": 0.38, - "learning_rate": 9.318046810027327e-06, - "loss": 7.4514, + "epoch": 0.46, + "learning_rate": 6.179949750369324e-06, + "loss": 2.9664, "step": 87000 }, { - "epoch": 0.38, - "learning_rate": 9.31254647780726e-06, - "loss": 7.4532, + "epoch": 0.46, + "learning_rate": 6.152991793998081e-06, + "loss": 2.9503, "step": 87500 }, { - "epoch": 0.39, - "learning_rate": 9.307046145587195e-06, - "loss": 7.4519, + "epoch": 0.46, + "learning_rate": 6.126033837626838e-06, + "loss": 2.931, "step": 88000 }, { - "epoch": 0.39, - "learning_rate": 9.301545813367129e-06, - "loss": 7.456, + "epoch": 0.46, + "learning_rate": 6.099075881255594e-06, + "loss": 2.9675, "step": 88500 }, { - "epoch": 0.39, - "learning_rate": 9.296045481147062e-06, - "loss": 7.4514, + "epoch": 0.47, + "learning_rate": 6.072117924884351e-06, + "loss": 2.9647, "step": 89000 }, { - "epoch": 0.39, - "learning_rate": 9.290545148926996e-06, - "loss": 7.4514, + "epoch": 0.47, + "learning_rate": 6.045159968513108e-06, + "loss": 2.9345, "step": 89500 }, { - "epoch": 0.39, - "learning_rate": 9.28504481670693e-06, - "loss": 7.4499, + "epoch": 0.47, + "learning_rate": 6.018202012141864e-06, + "loss": 2.9479, "step": 90000 }, { - "epoch": 0.4, - "learning_rate": 9.279544484486864e-06, - "loss": 7.4462, + "epoch": 0.48, + "learning_rate": 5.991244055770621e-06, + "loss": 2.947, "step": 90500 }, { - "epoch": 0.4, - "learning_rate": 9.274044152266798e-06, - "loss": 7.4492, + "epoch": 0.48, + "learning_rate": 5.964286099399376e-06, + "loss": 2.9549, "step": 91000 }, { - "epoch": 0.4, - "learning_rate": 9.268543820046732e-06, - "loss": 7.4339, + "epoch": 0.48, + "learning_rate": 5.937328143028133e-06, + "loss": 2.9594, "step": 91500 }, { - "epoch": 0.4, - "learning_rate": 9.263043487826664e-06, - "loss": 7.4386, + "epoch": 0.48, + "learning_rate": 5.91037018665689e-06, + "loss": 2.9543, "step": 92000 }, { - "epoch": 0.4, - "learning_rate": 9.2575431556066e-06, - "loss": 7.43, + "epoch": 0.49, + "learning_rate": 5.883412230285647e-06, + "loss": 2.9481, "step": 92500 }, { - "epoch": 0.41, - "learning_rate": 9.252042823386534e-06, - "loss": 7.422, + "epoch": 0.49, + "learning_rate": 5.856454273914404e-06, + "loss": 2.9593, "step": 93000 }, { - "epoch": 0.41, - "learning_rate": 9.246542491166468e-06, - "loss": 7.4264, + "epoch": 0.49, + "learning_rate": 5.829496317543161e-06, + "loss": 2.94, "step": 93500 }, { - "epoch": 0.41, - "learning_rate": 9.241042158946402e-06, - "loss": 7.4333, + "epoch": 0.49, + "learning_rate": 5.802538361171917e-06, + "loss": 2.9484, "step": 94000 }, { - "epoch": 0.41, - "learning_rate": 9.235541826726336e-06, - "loss": 7.4206, + "epoch": 0.5, + "learning_rate": 5.775580404800674e-06, + "loss": 2.9431, "step": 94500 }, { - "epoch": 0.42, - "learning_rate": 9.23004149450627e-06, - "loss": 7.4295, + "epoch": 0.5, + "learning_rate": 5.748622448429429e-06, + "loss": 2.9449, "step": 95000 }, { - "epoch": 0.42, - "learning_rate": 9.224541162286202e-06, - "loss": 7.4215, + "epoch": 0.5, + "learning_rate": 5.721664492058186e-06, + "loss": 2.9369, "step": 95500 }, { - "epoch": 0.42, - "learning_rate": 9.219040830066137e-06, - "loss": 7.4242, + "epoch": 0.5, + "learning_rate": 5.694706535686943e-06, + "loss": 2.9542, "step": 96000 }, { - "epoch": 0.42, - "learning_rate": 9.213540497846071e-06, - "loss": 7.4249, + "epoch": 0.51, + "learning_rate": 5.6677485793156994e-06, + "loss": 2.9468, "step": 96500 }, { - "epoch": 0.42, - "learning_rate": 9.208040165626004e-06, - "loss": 7.417, + "epoch": 0.51, + "learning_rate": 5.6407906229444565e-06, + "loss": 2.9551, "step": 97000 }, { - "epoch": 0.43, - "learning_rate": 9.202539833405938e-06, - "loss": 7.4293, + "epoch": 0.51, + "learning_rate": 5.613832666573213e-06, + "loss": 2.9282, "step": 97500 }, { - "epoch": 0.43, - "learning_rate": 9.197039501185873e-06, - "loss": 7.4275, + "epoch": 0.51, + "learning_rate": 5.58687471020197e-06, + "loss": 2.9475, "step": 98000 }, { - "epoch": 0.43, - "learning_rate": 9.191539168965805e-06, - "loss": 7.4298, + "epoch": 0.52, + "learning_rate": 5.559916753830727e-06, + "loss": 2.9546, "step": 98500 }, { - "epoch": 0.43, - "learning_rate": 9.18603883674574e-06, - "loss": 7.4207, + "epoch": 0.52, + "learning_rate": 5.532958797459482e-06, + "loss": 2.9335, "step": 99000 }, { - "epoch": 0.44, - "learning_rate": 9.180538504525675e-06, - "loss": 7.4235, + "epoch": 0.52, + "learning_rate": 5.506000841088239e-06, + "loss": 2.9486, "step": 99500 }, { - "epoch": 0.44, - "learning_rate": 9.175038172305609e-06, - "loss": 7.4082, + "epoch": 0.53, + "learning_rate": 5.479042884716996e-06, + "loss": 2.9595, "step": 100000 }, { - "epoch": 0.44, - "learning_rate": 9.169537840085541e-06, - "loss": 7.4255, + "epoch": 0.53, + "learning_rate": 5.452084928345752e-06, + "loss": 2.9223, "step": 100500 }, { - "epoch": 0.44, - "learning_rate": 9.164037507865475e-06, - "loss": 7.4198, + "epoch": 0.53, + "learning_rate": 5.425126971974509e-06, + "loss": 2.9461, "step": 101000 }, { - "epoch": 0.44, - "learning_rate": 9.15853717564541e-06, - "loss": 7.4257, + "epoch": 0.53, + "learning_rate": 5.3981690156032655e-06, + "loss": 2.9471, "step": 101500 }, { - "epoch": 0.45, - "learning_rate": 9.153036843425343e-06, - "loss": 7.4116, + "epoch": 0.54, + "learning_rate": 5.371211059232022e-06, + "loss": 2.9323, "step": 102000 }, { - "epoch": 0.45, - "learning_rate": 9.147536511205277e-06, - "loss": 7.411, + "epoch": 0.54, + "learning_rate": 5.344253102860779e-06, + "loss": 2.9288, "step": 102500 }, { - "epoch": 0.45, - "learning_rate": 9.142036178985212e-06, - "loss": 7.3992, + "epoch": 0.54, + "learning_rate": 5.317295146489535e-06, + "loss": 2.9378, "step": 103000 }, { - "epoch": 0.45, - "learning_rate": 9.136535846765145e-06, - "loss": 7.4208, + "epoch": 0.54, + "learning_rate": 5.290337190118292e-06, + "loss": 2.9221, "step": 103500 }, { - "epoch": 0.46, - "learning_rate": 9.131035514545079e-06, - "loss": 7.3927, + "epoch": 0.55, + "learning_rate": 5.263379233747049e-06, + "loss": 2.9339, "step": 104000 }, { - "epoch": 0.46, - "learning_rate": 9.125535182325013e-06, - "loss": 7.3963, + "epoch": 0.55, + "learning_rate": 5.236421277375805e-06, + "loss": 2.9353, "step": 104500 }, { - "epoch": 0.46, - "learning_rate": 9.120034850104948e-06, - "loss": 7.3989, + "epoch": 0.55, + "learning_rate": 5.209463321004562e-06, + "loss": 2.9405, "step": 105000 }, { - "epoch": 0.46, - "learning_rate": 9.11453451788488e-06, - "loss": 7.3897, + "epoch": 0.55, + "learning_rate": 5.1825053646333175e-06, + "loss": 2.9514, "step": 105500 }, { - "epoch": 0.46, - "learning_rate": 9.109034185664814e-06, - "loss": 7.3833, + "epoch": 0.56, + "learning_rate": 5.155547408262075e-06, + "loss": 2.9346, "step": 106000 }, { - "epoch": 0.47, - "learning_rate": 9.10353385344475e-06, - "loss": 7.3838, + "epoch": 0.56, + "learning_rate": 5.128589451890832e-06, + "loss": 2.9421, "step": 106500 }, { - "epoch": 0.47, - "learning_rate": 9.098033521224682e-06, - "loss": 7.3721, + "epoch": 0.56, + "learning_rate": 5.101631495519588e-06, + "loss": 2.9353, "step": 107000 }, { - "epoch": 0.47, - "learning_rate": 9.092533189004616e-06, - "loss": 7.3441, + "epoch": 0.56, + "learning_rate": 5.074673539148345e-06, + "loss": 2.933, "step": 107500 }, { - "epoch": 0.47, - "learning_rate": 9.08703285678455e-06, - "loss": 7.3448, + "epoch": 0.57, + "learning_rate": 5.047715582777101e-06, + "loss": 2.9221, "step": 108000 }, { - "epoch": 0.47, - "learning_rate": 9.081532524564484e-06, - "loss": 7.3308, + "epoch": 0.57, + "learning_rate": 5.020757626405858e-06, + "loss": 2.9621, "step": 108500 }, { - "epoch": 0.48, - "learning_rate": 9.076032192344418e-06, - "loss": 7.3492, + "epoch": 0.57, + "learning_rate": 4.993799670034614e-06, + "loss": 2.9304, "step": 109000 }, { - "epoch": 0.48, - "learning_rate": 9.070531860124352e-06, - "loss": 7.3236, + "epoch": 0.57, + "learning_rate": 4.966841713663371e-06, + "loss": 2.9428, "step": 109500 }, { - "epoch": 0.48, - "learning_rate": 9.065031527904287e-06, - "loss": 7.3321, + "epoch": 0.58, + "learning_rate": 4.9398837572921274e-06, + "loss": 2.9215, "step": 110000 }, { - "epoch": 0.48, - "learning_rate": 9.05953119568422e-06, - "loss": 7.329, + "epoch": 0.58, + "learning_rate": 4.9129258009208845e-06, + "loss": 2.948, "step": 110500 }, { - "epoch": 0.49, - "learning_rate": 9.054030863464154e-06, - "loss": 7.3076, + "epoch": 0.58, + "learning_rate": 4.885967844549641e-06, + "loss": 2.9427, "step": 111000 }, { - "epoch": 0.49, - "learning_rate": 9.048530531244088e-06, - "loss": 7.2991, + "epoch": 0.59, + "learning_rate": 4.859009888178398e-06, + "loss": 2.9462, "step": 111500 }, { - "epoch": 0.49, - "learning_rate": 9.043030199024021e-06, - "loss": 7.2965, + "epoch": 0.59, + "learning_rate": 4.832051931807154e-06, + "loss": 2.9374, "step": 112000 }, { - "epoch": 0.49, - "learning_rate": 9.037529866803955e-06, - "loss": 7.2772, + "epoch": 0.59, + "learning_rate": 4.805093975435911e-06, + "loss": 2.9145, "step": 112500 }, { - "epoch": 0.49, - "learning_rate": 9.03202953458389e-06, - "loss": 7.2735, + "epoch": 0.59, + "learning_rate": 4.778136019064667e-06, + "loss": 2.9384, "step": 113000 }, { - "epoch": 0.5, - "learning_rate": 9.026529202363823e-06, - "loss": 7.2715, + "epoch": 0.6, + "learning_rate": 4.751178062693424e-06, + "loss": 2.9349, "step": 113500 }, { - "epoch": 0.5, - "learning_rate": 9.021028870143757e-06, - "loss": 7.2794, + "epoch": 0.6, + "learning_rate": 4.72422010632218e-06, + "loss": 2.9303, "step": 114000 }, { - "epoch": 0.5, - "learning_rate": 9.015528537923691e-06, - "loss": 7.2733, + "epoch": 0.6, + "learning_rate": 4.6972621499509365e-06, + "loss": 2.9306, "step": 114500 }, { - "epoch": 0.5, - "learning_rate": 9.010028205703625e-06, - "loss": 7.2622, + "epoch": 0.6, + "learning_rate": 4.6703041935796935e-06, + "loss": 2.921, "step": 115000 }, { - "epoch": 0.51, - "learning_rate": 9.004527873483559e-06, - "loss": 7.2666, + "epoch": 0.61, + "learning_rate": 4.6433462372084506e-06, + "loss": 2.928, "step": 115500 }, { - "epoch": 0.51, - "learning_rate": 8.999027541263493e-06, - "loss": 7.2396, + "epoch": 0.61, + "learning_rate": 4.616388280837207e-06, + "loss": 2.9289, "step": 116000 }, { - "epoch": 0.51, - "learning_rate": 8.993527209043427e-06, - "loss": 7.2583, + "epoch": 0.61, + "learning_rate": 4.589430324465963e-06, + "loss": 2.9219, "step": 116500 }, { - "epoch": 0.51, - "learning_rate": 8.98802687682336e-06, - "loss": 7.2522, + "epoch": 0.61, + "learning_rate": 4.56247236809472e-06, + "loss": 2.914, "step": 117000 }, { - "epoch": 0.51, - "learning_rate": 8.982526544603295e-06, - "loss": 7.2489, + "epoch": 0.62, + "learning_rate": 4.535514411723476e-06, + "loss": 2.9339, "step": 117500 }, { - "epoch": 0.52, - "learning_rate": 8.977026212383229e-06, - "loss": 7.2362, + "epoch": 0.62, + "learning_rate": 4.508556455352233e-06, + "loss": 2.9264, "step": 118000 }, { - "epoch": 0.52, - "learning_rate": 8.971525880163163e-06, - "loss": 7.2387, + "epoch": 0.62, + "learning_rate": 4.481598498980989e-06, + "loss": 2.9312, "step": 118500 }, { - "epoch": 0.52, - "learning_rate": 8.966025547943096e-06, - "loss": 7.2258, + "epoch": 0.62, + "learning_rate": 4.454640542609746e-06, + "loss": 2.9132, "step": 119000 }, { - "epoch": 0.52, - "learning_rate": 8.96052521572303e-06, - "loss": 7.2387, + "epoch": 0.63, + "learning_rate": 4.427682586238503e-06, + "loss": 2.9201, "step": 119500 }, { - "epoch": 0.53, - "learning_rate": 8.955024883502964e-06, - "loss": 7.2345, + "epoch": 0.63, + "learning_rate": 4.40072462986726e-06, + "loss": 2.9214, "step": 120000 }, { - "epoch": 0.53, - "learning_rate": 8.949524551282898e-06, - "loss": 7.2113, + "epoch": 0.63, + "learning_rate": 4.373766673496016e-06, + "loss": 2.9197, "step": 120500 }, { - "epoch": 0.53, - "learning_rate": 8.944024219062832e-06, - "loss": 7.2161, + "epoch": 0.64, + "learning_rate": 4.346808717124773e-06, + "loss": 2.9238, "step": 121000 }, { - "epoch": 0.53, - "learning_rate": 8.938523886842766e-06, - "loss": 7.2371, + "epoch": 0.64, + "learning_rate": 4.319850760753529e-06, + "loss": 2.9114, "step": 121500 }, { - "epoch": 0.53, - "learning_rate": 8.9330235546227e-06, - "loss": 7.2099, + "epoch": 0.64, + "learning_rate": 4.292892804382286e-06, + "loss": 2.9142, "step": 122000 }, { - "epoch": 0.54, - "learning_rate": 8.927523222402634e-06, - "loss": 7.2023, + "epoch": 0.64, + "learning_rate": 4.265934848011042e-06, + "loss": 2.9223, "step": 122500 }, { - "epoch": 0.54, - "learning_rate": 8.922022890182568e-06, - "loss": 7.1956, + "epoch": 0.65, + "learning_rate": 4.238976891639799e-06, + "loss": 2.9169, "step": 123000 }, { - "epoch": 0.54, - "learning_rate": 8.916522557962502e-06, - "loss": 7.196, + "epoch": 0.65, + "learning_rate": 4.2120189352685554e-06, + "loss": 2.9305, "step": 123500 }, { - "epoch": 0.54, - "learning_rate": 8.911022225742436e-06, - "loss": 7.1946, + "epoch": 0.65, + "learning_rate": 4.1850609788973125e-06, + "loss": 2.9298, "step": 124000 }, { - "epoch": 0.54, - "learning_rate": 8.90552189352237e-06, - "loss": 7.1853, + "epoch": 0.65, + "learning_rate": 4.158103022526069e-06, + "loss": 2.9412, "step": 124500 }, { - "epoch": 0.55, - "learning_rate": 8.900021561302304e-06, - "loss": 7.1787, + "epoch": 0.66, + "learning_rate": 4.131145066154825e-06, + "loss": 2.9149, "step": 125000 }, { - "epoch": 0.55, - "learning_rate": 8.894521229082238e-06, - "loss": 7.1835, + "epoch": 0.66, + "learning_rate": 4.104187109783582e-06, + "loss": 2.9191, "step": 125500 }, { - "epoch": 0.55, - "learning_rate": 8.889020896862171e-06, - "loss": 7.2015, + "epoch": 0.66, + "learning_rate": 4.077229153412339e-06, + "loss": 2.9188, "step": 126000 }, { - "epoch": 0.55, - "learning_rate": 8.883520564642105e-06, - "loss": 7.1835, + "epoch": 0.66, + "learning_rate": 4.050271197041095e-06, + "loss": 2.9163, "step": 126500 }, { - "epoch": 0.56, - "learning_rate": 8.87802023242204e-06, - "loss": 7.1753, + "epoch": 0.67, + "learning_rate": 4.023313240669851e-06, + "loss": 2.9082, "step": 127000 }, { - "epoch": 0.56, - "learning_rate": 8.872519900201972e-06, - "loss": 7.1731, + "epoch": 0.67, + "learning_rate": 3.996355284298608e-06, + "loss": 2.9169, "step": 127500 }, { - "epoch": 0.56, - "learning_rate": 8.867019567981907e-06, - "loss": 7.1458, + "epoch": 0.67, + "learning_rate": 3.9693973279273645e-06, + "loss": 2.9091, "step": 128000 }, { - "epoch": 0.56, - "learning_rate": 8.861519235761841e-06, - "loss": 7.1542, + "epoch": 0.67, + "learning_rate": 3.9424393715561215e-06, + "loss": 2.9196, "step": 128500 }, { - "epoch": 0.56, - "learning_rate": 8.856018903541775e-06, - "loss": 7.1435, + "epoch": 0.68, + "learning_rate": 3.915481415184878e-06, + "loss": 2.9287, "step": 129000 }, { - "epoch": 0.57, - "learning_rate": 8.850518571321709e-06, - "loss": 7.1592, + "epoch": 0.68, + "learning_rate": 3.888523458813635e-06, + "loss": 2.8979, "step": 129500 }, { - "epoch": 0.57, - "learning_rate": 8.845018239101643e-06, - "loss": 7.1575, + "epoch": 0.68, + "learning_rate": 3.861565502442391e-06, + "loss": 2.9169, "step": 130000 }, { - "epoch": 0.57, - "learning_rate": 8.839517906881577e-06, - "loss": 7.1637, + "epoch": 0.69, + "learning_rate": 3.834607546071148e-06, + "loss": 2.9144, "step": 130500 }, { - "epoch": 0.57, - "learning_rate": 8.834017574661509e-06, - "loss": 7.1407, + "epoch": 0.69, + "learning_rate": 3.8076495896999046e-06, + "loss": 2.9094, "step": 131000 }, { - "epoch": 0.58, - "learning_rate": 8.828517242441445e-06, - "loss": 7.1479, + "epoch": 0.69, + "learning_rate": 3.7806916333286608e-06, + "loss": 2.9113, "step": 131500 }, { - "epoch": 0.58, - "learning_rate": 8.823016910221379e-06, - "loss": 7.1383, + "epoch": 0.69, + "learning_rate": 3.7537336769574174e-06, + "loss": 2.9179, "step": 132000 }, { - "epoch": 0.58, - "learning_rate": 8.817516578001311e-06, - "loss": 7.1254, + "epoch": 0.7, + "learning_rate": 3.7267757205861744e-06, + "loss": 2.9321, "step": 132500 }, { - "epoch": 0.58, - "learning_rate": 8.812016245781246e-06, - "loss": 7.1358, + "epoch": 0.7, + "learning_rate": 3.6998177642149306e-06, + "loss": 2.9284, "step": 133000 }, { - "epoch": 0.58, - "learning_rate": 8.80651591356118e-06, - "loss": 7.1213, + "epoch": 0.7, + "learning_rate": 3.672859807843687e-06, + "loss": 2.9231, "step": 133500 }, { - "epoch": 0.59, - "learning_rate": 8.801015581341114e-06, - "loss": 7.1027, + "epoch": 0.7, + "learning_rate": 3.645901851472444e-06, + "loss": 2.9114, "step": 134000 }, { - "epoch": 0.59, - "learning_rate": 8.795515249121047e-06, - "loss": 7.1114, + "epoch": 0.71, + "learning_rate": 3.618943895101201e-06, + "loss": 2.9173, "step": 134500 }, { - "epoch": 0.59, - "learning_rate": 8.790014916900982e-06, - "loss": 7.12, + "epoch": 0.71, + "learning_rate": 3.591985938729957e-06, + "loss": 2.9145, "step": 135000 }, { - "epoch": 0.59, - "learning_rate": 8.784514584680916e-06, - "loss": 7.1124, + "epoch": 0.71, + "learning_rate": 3.5650279823587136e-06, + "loss": 2.9127, "step": 135500 }, { - "epoch": 0.6, - "learning_rate": 8.779014252460848e-06, - "loss": 7.1064, + "epoch": 0.71, + "learning_rate": 3.5380700259874702e-06, + "loss": 2.9149, "step": 136000 }, { - "epoch": 0.6, - "learning_rate": 8.773513920240784e-06, - "loss": 7.116, + "epoch": 0.72, + "learning_rate": 3.5111120696162273e-06, + "loss": 2.926, "step": 136500 }, { - "epoch": 0.6, - "learning_rate": 8.768013588020718e-06, - "loss": 7.092, + "epoch": 0.72, + "learning_rate": 3.4841541132449834e-06, + "loss": 2.9061, "step": 137000 }, { - "epoch": 0.6, - "learning_rate": 8.76251325580065e-06, - "loss": 7.0904, + "epoch": 0.72, + "learning_rate": 3.45719615687374e-06, + "loss": 2.9266, "step": 137500 }, { - "epoch": 0.6, - "learning_rate": 8.757012923580584e-06, - "loss": 7.0863, + "epoch": 0.72, + "learning_rate": 3.4302382005024967e-06, + "loss": 2.9092, "step": 138000 }, { - "epoch": 0.61, - "learning_rate": 8.75151259136052e-06, - "loss": 7.0805, + "epoch": 0.73, + "learning_rate": 3.403280244131253e-06, + "loss": 2.9268, "step": 138500 }, { - "epoch": 0.61, - "learning_rate": 8.746012259140454e-06, - "loss": 7.0823, + "epoch": 0.73, + "learning_rate": 3.37632228776001e-06, + "loss": 2.9088, "step": 139000 }, { - "epoch": 0.61, - "learning_rate": 8.740511926920386e-06, - "loss": 7.0707, + "epoch": 0.73, + "learning_rate": 3.3493643313887665e-06, + "loss": 2.8918, "step": 139500 }, { - "epoch": 0.61, - "learning_rate": 8.735011594700321e-06, - "loss": 7.0688, + "epoch": 0.74, + "learning_rate": 3.3224063750175227e-06, + "loss": 2.9174, "step": 140000 }, { - "epoch": 0.61, - "learning_rate": 8.729511262480255e-06, - "loss": 7.0627, + "epoch": 0.74, + "learning_rate": 3.2954484186462793e-06, + "loss": 2.9104, "step": 140500 }, { - "epoch": 0.62, - "learning_rate": 8.724010930260188e-06, - "loss": 7.0688, + "epoch": 0.74, + "learning_rate": 3.2684904622750363e-06, + "loss": 2.9042, "step": 141000 }, { - "epoch": 0.62, - "learning_rate": 8.718510598040122e-06, - "loss": 7.0622, + "epoch": 0.74, + "learning_rate": 3.241532505903793e-06, + "loss": 2.9035, "step": 141500 }, { - "epoch": 0.62, - "learning_rate": 8.713010265820057e-06, - "loss": 7.0455, + "epoch": 0.75, + "learning_rate": 3.214574549532549e-06, + "loss": 2.8953, "step": 142000 }, { - "epoch": 0.62, - "learning_rate": 8.70750993359999e-06, - "loss": 7.0413, + "epoch": 0.75, + "learning_rate": 3.1876165931613057e-06, + "loss": 2.9143, "step": 142500 }, { - "epoch": 0.63, - "learning_rate": 8.702009601379923e-06, - "loss": 7.0441, + "epoch": 0.75, + "learning_rate": 3.1606586367900627e-06, + "loss": 2.9151, "step": 143000 }, { - "epoch": 0.63, - "learning_rate": 8.696509269159859e-06, - "loss": 7.0476, + "epoch": 0.75, + "learning_rate": 3.1337006804188193e-06, + "loss": 2.9104, "step": 143500 }, { - "epoch": 0.63, - "learning_rate": 8.691008936939793e-06, - "loss": 7.0418, + "epoch": 0.76, + "learning_rate": 3.1067427240475755e-06, + "loss": 2.9164, "step": 144000 }, { - "epoch": 0.63, - "learning_rate": 8.685508604719725e-06, - "loss": 7.0182, + "epoch": 0.76, + "learning_rate": 3.079784767676332e-06, + "loss": 2.9027, "step": 144500 }, { - "epoch": 0.63, - "learning_rate": 8.680008272499659e-06, - "loss": 7.0226, + "epoch": 0.76, + "learning_rate": 3.052826811305089e-06, + "loss": 2.9007, "step": 145000 }, { - "epoch": 0.64, - "learning_rate": 8.674507940279595e-06, - "loss": 7.014, + "epoch": 0.76, + "learning_rate": 3.0258688549338454e-06, + "loss": 2.9028, "step": 145500 }, { - "epoch": 0.64, - "learning_rate": 8.669007608059527e-06, - "loss": 7.0208, + "epoch": 0.77, + "learning_rate": 2.998910898562602e-06, + "loss": 2.9095, "step": 146000 }, { - "epoch": 0.64, - "learning_rate": 8.663507275839461e-06, - "loss": 7.0294, + "epoch": 0.77, + "learning_rate": 2.9719529421913586e-06, + "loss": 2.8995, "step": 146500 }, { - "epoch": 0.64, - "learning_rate": 8.658006943619396e-06, - "loss": 7.0099, + "epoch": 0.77, + "learning_rate": 2.9449949858201156e-06, + "loss": 2.9016, "step": 147000 }, { - "epoch": 0.65, - "learning_rate": 8.652506611399329e-06, - "loss": 7.0196, + "epoch": 0.77, + "learning_rate": 2.9180370294488718e-06, + "loss": 2.9078, "step": 147500 }, { - "epoch": 0.65, - "learning_rate": 8.647006279179263e-06, - "loss": 7.005, + "epoch": 0.78, + "learning_rate": 2.8910790730776284e-06, + "loss": 2.9225, "step": 148000 }, { - "epoch": 0.65, - "learning_rate": 8.641505946959197e-06, - "loss": 6.9846, + "epoch": 0.78, + "learning_rate": 2.864121116706385e-06, + "loss": 2.8951, "step": 148500 }, { - "epoch": 0.65, - "learning_rate": 8.636005614739132e-06, - "loss": 6.9799, + "epoch": 0.78, + "learning_rate": 2.837163160335141e-06, + "loss": 2.8937, "step": 149000 }, { - "epoch": 0.65, - "learning_rate": 8.630505282519064e-06, - "loss": 6.9991, + "epoch": 0.78, + "learning_rate": 2.8102052039638982e-06, + "loss": 2.9105, "step": 149500 }, { - "epoch": 0.66, - "learning_rate": 8.625004950298998e-06, - "loss": 6.9868, + "epoch": 0.79, + "learning_rate": 2.783247247592655e-06, + "loss": 2.9172, "step": 150000 }, { - "epoch": 0.66, - "learning_rate": 8.619504618078932e-06, - "loss": 6.9632, + "epoch": 0.79, + "learning_rate": 2.756289291221411e-06, + "loss": 2.8935, "step": 150500 }, { - "epoch": 0.66, - "learning_rate": 8.614004285858866e-06, - "loss": 6.9719, + "epoch": 0.79, + "learning_rate": 2.7293313348501676e-06, + "loss": 2.8989, "step": 151000 }, { - "epoch": 0.66, - "learning_rate": 8.6085039536388e-06, - "loss": 6.9815, + "epoch": 0.8, + "learning_rate": 2.7023733784789247e-06, + "loss": 2.9197, "step": 151500 }, { - "epoch": 0.67, - "learning_rate": 8.603003621418734e-06, - "loss": 6.9849, + "epoch": 0.8, + "learning_rate": 2.6754154221076813e-06, + "loss": 2.9014, "step": 152000 }, { - "epoch": 0.67, - "learning_rate": 8.597503289198668e-06, - "loss": 6.9632, + "epoch": 0.8, + "learning_rate": 2.6484574657364374e-06, + "loss": 2.917, "step": 152500 }, { - "epoch": 0.67, - "learning_rate": 8.592002956978602e-06, - "loss": 6.9641, + "epoch": 0.8, + "learning_rate": 2.621499509365194e-06, + "loss": 2.9114, "step": 153000 }, { - "epoch": 0.67, - "learning_rate": 8.586502624758536e-06, - "loss": 6.9708, + "epoch": 0.81, + "learning_rate": 2.594541552993951e-06, + "loss": 2.8914, "step": 153500 }, { - "epoch": 0.67, - "learning_rate": 8.58100229253847e-06, - "loss": 6.9457, + "epoch": 0.81, + "learning_rate": 2.5675835966227077e-06, + "loss": 2.8933, "step": 154000 }, { - "epoch": 0.68, - "learning_rate": 8.575501960318404e-06, - "loss": 6.9505, + "epoch": 0.81, + "learning_rate": 2.540625640251464e-06, + "loss": 2.9082, "step": 154500 }, { - "epoch": 0.68, - "learning_rate": 8.570001628098338e-06, - "loss": 6.9442, + "epoch": 0.81, + "learning_rate": 2.5136676838802205e-06, + "loss": 2.9024, "step": 155000 }, { - "epoch": 0.68, - "learning_rate": 8.564501295878272e-06, - "loss": 6.9395, + "epoch": 0.82, + "learning_rate": 2.486709727508977e-06, + "loss": 2.8803, "step": 155500 }, { - "epoch": 0.68, - "learning_rate": 8.559000963658206e-06, - "loss": 6.9363, + "epoch": 0.82, + "learning_rate": 2.4597517711377337e-06, + "loss": 2.9039, "step": 156000 }, { - "epoch": 0.68, - "learning_rate": 8.55350063143814e-06, - "loss": 6.9424, + "epoch": 0.82, + "learning_rate": 2.4327938147664903e-06, + "loss": 2.9101, "step": 156500 }, { - "epoch": 0.69, - "learning_rate": 8.548000299218073e-06, - "loss": 6.9474, + "epoch": 0.82, + "learning_rate": 2.405835858395247e-06, + "loss": 2.8997, "step": 157000 }, { - "epoch": 0.69, - "learning_rate": 8.542499966998007e-06, - "loss": 6.9172, + "epoch": 0.83, + "learning_rate": 2.3788779020240035e-06, + "loss": 2.9082, "step": 157500 }, { - "epoch": 0.69, - "learning_rate": 8.536999634777941e-06, - "loss": 6.9108, + "epoch": 0.83, + "learning_rate": 2.35191994565276e-06, + "loss": 2.8857, "step": 158000 }, { - "epoch": 0.69, - "learning_rate": 8.531499302557875e-06, - "loss": 6.8984, + "epoch": 0.83, + "learning_rate": 2.3249619892815167e-06, + "loss": 2.9077, "step": 158500 }, { - "epoch": 0.7, - "learning_rate": 8.525998970337809e-06, - "loss": 6.9112, + "epoch": 0.83, + "learning_rate": 2.2980040329102734e-06, + "loss": 2.8974, "step": 159000 }, { - "epoch": 0.7, - "learning_rate": 8.520498638117743e-06, - "loss": 6.9091, + "epoch": 0.84, + "learning_rate": 2.27104607653903e-06, + "loss": 2.8819, "step": 159500 }, { - "epoch": 0.7, - "learning_rate": 8.514998305897677e-06, - "loss": 6.918, + "epoch": 0.84, + "learning_rate": 2.2440881201677866e-06, + "loss": 2.9061, "step": 160000 }, { - "epoch": 0.7, - "learning_rate": 8.509497973677611e-06, - "loss": 6.8966, + "epoch": 0.84, + "learning_rate": 2.217130163796543e-06, + "loss": 2.9103, "step": 160500 }, { - "epoch": 0.7, - "learning_rate": 8.503997641457545e-06, - "loss": 6.8854, + "epoch": 0.85, + "learning_rate": 2.1901722074252994e-06, + "loss": 2.9046, "step": 161000 }, { - "epoch": 0.71, - "learning_rate": 8.498497309237479e-06, - "loss": 6.8797, + "epoch": 0.85, + "learning_rate": 2.1632142510540564e-06, + "loss": 2.8976, "step": 161500 }, { - "epoch": 0.71, - "learning_rate": 8.492996977017413e-06, - "loss": 6.8944, + "epoch": 0.85, + "learning_rate": 2.1362562946828126e-06, + "loss": 2.8751, "step": 162000 }, { - "epoch": 0.71, - "learning_rate": 8.487496644797347e-06, - "loss": 6.8815, + "epoch": 0.85, + "learning_rate": 2.1092983383115696e-06, + "loss": 2.9034, "step": 162500 }, { - "epoch": 0.71, - "learning_rate": 8.48199631257728e-06, - "loss": 6.871, + "epoch": 0.86, + "learning_rate": 2.082340381940326e-06, + "loss": 2.9018, "step": 163000 }, { - "epoch": 0.72, - "learning_rate": 8.476495980357214e-06, - "loss": 6.8839, + "epoch": 0.86, + "learning_rate": 2.055382425569083e-06, + "loss": 2.8938, "step": 163500 }, { - "epoch": 0.72, - "learning_rate": 8.470995648137148e-06, - "loss": 6.844, + "epoch": 0.86, + "learning_rate": 2.028424469197839e-06, + "loss": 2.9216, "step": 164000 }, { - "epoch": 0.72, - "learning_rate": 8.465495315917082e-06, - "loss": 6.8761, + "epoch": 0.86, + "learning_rate": 2.001466512826596e-06, + "loss": 2.8841, "step": 164500 }, { - "epoch": 0.72, - "learning_rate": 8.459994983697016e-06, - "loss": 6.8561, + "epoch": 0.87, + "learning_rate": 1.9745085564553522e-06, + "loss": 2.893, "step": 165000 }, { - "epoch": 0.72, - "learning_rate": 8.45449465147695e-06, - "loss": 6.8393, + "epoch": 0.87, + "learning_rate": 1.9475506000841093e-06, + "loss": 2.9049, "step": 165500 }, { - "epoch": 0.73, - "learning_rate": 8.448994319256884e-06, - "loss": 6.8438, + "epoch": 0.87, + "learning_rate": 1.9205926437128654e-06, + "loss": 2.8978, "step": 166000 }, { - "epoch": 0.73, - "learning_rate": 8.443493987036818e-06, - "loss": 6.8317, + "epoch": 0.87, + "learning_rate": 1.8936346873416223e-06, + "loss": 2.8871, "step": 166500 }, { - "epoch": 0.73, - "learning_rate": 8.437993654816752e-06, - "loss": 6.8367, + "epoch": 0.88, + "learning_rate": 1.8666767309703787e-06, + "loss": 2.8934, "step": 167000 }, { - "epoch": 0.73, - "learning_rate": 8.432493322596686e-06, - "loss": 6.8271, + "epoch": 0.88, + "learning_rate": 1.8397187745991355e-06, + "loss": 2.9071, "step": 167500 }, { - "epoch": 0.74, - "learning_rate": 8.42699299037662e-06, - "loss": 6.817, + "epoch": 0.88, + "learning_rate": 1.8127608182278919e-06, + "loss": 2.8993, "step": 168000 }, { - "epoch": 0.74, - "learning_rate": 8.421492658156554e-06, - "loss": 6.8253, + "epoch": 0.88, + "learning_rate": 1.7858028618566487e-06, + "loss": 2.8957, "step": 168500 }, { - "epoch": 0.74, - "learning_rate": 8.415992325936488e-06, - "loss": 6.8266, + "epoch": 0.89, + "learning_rate": 1.758844905485405e-06, + "loss": 2.9015, "step": 169000 }, { - "epoch": 0.74, - "learning_rate": 8.410491993716422e-06, - "loss": 6.8241, + "epoch": 0.89, + "learning_rate": 1.731886949114162e-06, + "loss": 2.8916, "step": 169500 }, { - "epoch": 0.74, - "learning_rate": 8.404991661496356e-06, - "loss": 6.7968, + "epoch": 0.89, + "learning_rate": 1.7049289927429183e-06, + "loss": 2.9013, "step": 170000 }, { - "epoch": 0.75, - "learning_rate": 8.39949132927629e-06, - "loss": 6.8109, + "epoch": 0.9, + "learning_rate": 1.6779710363716747e-06, + "loss": 2.8928, "step": 170500 }, { - "epoch": 0.75, - "learning_rate": 8.393990997056223e-06, - "loss": 6.7955, + "epoch": 0.9, + "learning_rate": 1.6510130800004315e-06, + "loss": 2.8813, "step": 171000 }, { - "epoch": 0.75, - "learning_rate": 8.388490664836156e-06, - "loss": 6.7989, + "epoch": 0.9, + "learning_rate": 1.624055123629188e-06, + "loss": 2.8782, "step": 171500 }, { - "epoch": 0.75, - "learning_rate": 8.382990332616091e-06, - "loss": 6.7945, + "epoch": 0.9, + "learning_rate": 1.5970971672579447e-06, + "loss": 2.8838, "step": 172000 }, { - "epoch": 0.75, - "learning_rate": 8.377490000396025e-06, - "loss": 6.7752, + "epoch": 0.91, + "learning_rate": 1.5701392108867011e-06, + "loss": 2.8859, "step": 172500 }, { - "epoch": 0.76, - "learning_rate": 8.371989668175959e-06, - "loss": 6.7703, + "epoch": 0.91, + "learning_rate": 1.543181254515458e-06, + "loss": 2.8822, "step": 173000 }, { - "epoch": 0.76, - "learning_rate": 8.366489335955893e-06, - "loss": 6.7813, + "epoch": 0.91, + "learning_rate": 1.5162232981442144e-06, + "loss": 2.8794, "step": 173500 }, { - "epoch": 0.76, - "learning_rate": 8.360989003735827e-06, - "loss": 6.769, + "epoch": 0.91, + "learning_rate": 1.489265341772971e-06, + "loss": 2.894, "step": 174000 }, { - "epoch": 0.76, - "learning_rate": 8.355488671515761e-06, - "loss": 6.7649, + "epoch": 0.92, + "learning_rate": 1.4623073854017276e-06, + "loss": 2.8849, "step": 174500 }, { - "epoch": 0.77, - "learning_rate": 8.349988339295693e-06, - "loss": 6.7667, + "epoch": 0.92, + "learning_rate": 1.4353494290304842e-06, + "loss": 2.9072, "step": 175000 }, { - "epoch": 0.77, - "learning_rate": 8.344488007075629e-06, - "loss": 6.7566, + "epoch": 0.92, + "learning_rate": 1.4083914726592408e-06, + "loss": 2.8869, "step": 175500 }, { - "epoch": 0.77, - "learning_rate": 8.338987674855563e-06, - "loss": 6.7585, + "epoch": 0.92, + "learning_rate": 1.3814335162879974e-06, + "loss": 2.8873, "step": 176000 }, { - "epoch": 0.77, - "learning_rate": 8.333487342635495e-06, - "loss": 6.7401, + "epoch": 0.93, + "learning_rate": 1.3544755599167538e-06, + "loss": 2.9072, "step": 176500 }, { - "epoch": 0.77, - "learning_rate": 8.327987010415429e-06, - "loss": 6.7236, + "epoch": 0.93, + "learning_rate": 1.3275176035455106e-06, + "loss": 2.889, "step": 177000 }, { - "epoch": 0.78, - "learning_rate": 8.322486678195364e-06, - "loss": 6.732, + "epoch": 0.93, + "learning_rate": 1.300559647174267e-06, + "loss": 2.8639, "step": 177500 }, { - "epoch": 0.78, - "learning_rate": 8.316986345975298e-06, - "loss": 6.7241, + "epoch": 0.93, + "learning_rate": 1.2736016908030238e-06, + "loss": 2.8928, "step": 178000 }, { - "epoch": 0.78, - "learning_rate": 8.31148601375523e-06, - "loss": 6.7206, + "epoch": 0.94, + "learning_rate": 1.2466437344317802e-06, + "loss": 2.8948, "step": 178500 }, { - "epoch": 0.78, - "learning_rate": 8.305985681535166e-06, - "loss": 6.7128, + "epoch": 0.94, + "learning_rate": 1.2196857780605368e-06, + "loss": 2.8877, "step": 179000 }, { - "epoch": 0.79, - "learning_rate": 8.3004853493151e-06, - "loss": 6.7026, + "epoch": 0.94, + "learning_rate": 1.1927278216892934e-06, + "loss": 2.8897, "step": 179500 }, { - "epoch": 0.79, - "learning_rate": 8.294985017095032e-06, - "loss": 6.7112, + "epoch": 0.95, + "learning_rate": 1.16576986531805e-06, + "loss": 2.8959, "step": 180000 }, { - "epoch": 0.79, - "learning_rate": 8.289484684874966e-06, - "loss": 6.7032, + "epoch": 0.95, + "learning_rate": 1.1388119089468067e-06, + "loss": 2.8988, "step": 180500 }, { - "epoch": 0.79, - "learning_rate": 8.283984352654902e-06, - "loss": 6.6976, + "epoch": 0.95, + "learning_rate": 1.1118539525755633e-06, + "loss": 2.8774, "step": 181000 }, { - "epoch": 0.79, - "learning_rate": 8.278484020434834e-06, - "loss": 6.6837, + "epoch": 0.95, + "learning_rate": 1.0848959962043199e-06, + "loss": 2.8977, "step": 181500 }, { - "epoch": 0.8, - "learning_rate": 8.272983688214768e-06, - "loss": 6.6787, + "epoch": 0.96, + "learning_rate": 1.0579380398330765e-06, + "loss": 2.8776, "step": 182000 }, { - "epoch": 0.8, - "learning_rate": 8.267483355994704e-06, - "loss": 6.6798, + "epoch": 0.96, + "learning_rate": 1.030980083461833e-06, + "loss": 2.889, "step": 182500 }, { - "epoch": 0.8, - "learning_rate": 8.261983023774636e-06, - "loss": 6.6519, + "epoch": 0.96, + "learning_rate": 1.0040221270905897e-06, + "loss": 2.8759, "step": 183000 }, { - "epoch": 0.8, - "learning_rate": 8.25648269155457e-06, - "loss": 6.646, + "epoch": 0.96, + "learning_rate": 9.770641707193463e-07, + "loss": 2.8768, "step": 183500 }, { - "epoch": 0.81, - "learning_rate": 8.250982359334504e-06, - "loss": 6.6308, + "epoch": 0.97, + "learning_rate": 9.501062143481028e-07, + "loss": 2.8877, "step": 184000 }, { - "epoch": 0.81, - "learning_rate": 8.24548202711444e-06, - "loss": 6.6452, + "epoch": 0.97, + "learning_rate": 9.231482579768594e-07, + "loss": 2.8839, "step": 184500 }, { - "epoch": 0.81, - "learning_rate": 8.239981694894372e-06, - "loss": 6.6276, + "epoch": 0.97, + "learning_rate": 8.96190301605616e-07, + "loss": 2.8889, "step": 185000 }, { - "epoch": 0.81, - "learning_rate": 8.234481362674306e-06, - "loss": 6.6282, + "epoch": 0.97, + "learning_rate": 8.692323452343726e-07, + "loss": 2.8838, "step": 185500 }, { - "epoch": 0.81, - "learning_rate": 8.228981030454241e-06, - "loss": 6.6154, + "epoch": 0.98, + "learning_rate": 8.42274388863129e-07, + "loss": 2.8698, "step": 186000 }, { - "epoch": 0.82, - "learning_rate": 8.223480698234173e-06, - "loss": 6.6159, + "epoch": 0.98, + "learning_rate": 8.153164324918856e-07, + "loss": 2.8761, "step": 186500 }, { - "epoch": 0.82, - "learning_rate": 8.217980366014107e-06, - "loss": 6.6133, + "epoch": 0.98, + "learning_rate": 7.883584761206422e-07, + "loss": 2.8769, "step": 187000 }, { - "epoch": 0.82, - "learning_rate": 8.212480033794041e-06, - "loss": 6.6075, + "epoch": 0.98, + "learning_rate": 7.614005197493989e-07, + "loss": 2.8713, "step": 187500 }, { - "epoch": 0.82, - "learning_rate": 8.206979701573975e-06, - "loss": 6.5915, + "epoch": 0.99, + "learning_rate": 7.344425633781555e-07, + "loss": 2.8764, "step": 188000 }, { - "epoch": 0.82, - "learning_rate": 8.20147936935391e-06, - "loss": 6.5642, + "epoch": 0.99, + "learning_rate": 7.074846070069121e-07, + "loss": 2.8854, "step": 188500 }, { - "epoch": 0.83, - "learning_rate": 8.195979037133843e-06, - "loss": 6.5548, + "epoch": 0.99, + "learning_rate": 6.805266506356687e-07, + "loss": 2.8777, "step": 189000 }, { - "epoch": 0.83, - "learning_rate": 8.190478704913779e-06, - "loss": 6.5503, + "epoch": 0.99, + "learning_rate": 6.535686942644253e-07, + "loss": 2.8941, "step": 189500 }, { - "epoch": 0.83, - "learning_rate": 8.184978372693711e-06, - "loss": 6.5392, + "epoch": 1.0, + "learning_rate": 6.266107378931818e-07, + "loss": 2.8917, "step": 190000 }, - { - "epoch": 0.83, - "learning_rate": 8.179478040473645e-06, - "loss": 6.5297, - "step": 190500 - }, - { - "epoch": 0.84, - "learning_rate": 8.173977708253579e-06, - "loss": 6.5266, - "step": 191000 - }, - { - "epoch": 0.84, - "learning_rate": 8.168477376033513e-06, - "loss": 6.5272, - "step": 191500 - }, - { - "epoch": 0.84, - "learning_rate": 8.162977043813447e-06, - "loss": 6.5002, - "step": 192000 - }, - { - "epoch": 0.84, - "learning_rate": 8.15747671159338e-06, - "loss": 6.4881, - "step": 192500 - }, - { - "epoch": 0.84, - "learning_rate": 8.151976379373315e-06, - "loss": 6.4877, - "step": 193000 - }, - { - "epoch": 0.85, - "learning_rate": 8.146476047153248e-06, - "loss": 6.4956, - "step": 193500 - }, - { - "epoch": 0.85, - "learning_rate": 8.140975714933182e-06, - "loss": 6.4729, - "step": 194000 - }, - { - "epoch": 0.85, - "learning_rate": 8.135475382713116e-06, - "loss": 6.4722, - "step": 194500 - }, - { - "epoch": 0.85, - "learning_rate": 8.12997505049305e-06, - "loss": 6.4524, - "step": 195000 - }, - { - "epoch": 0.86, - "learning_rate": 8.124474718272984e-06, - "loss": 6.4595, - "step": 195500 - }, - { - "epoch": 0.86, - "learning_rate": 8.118974386052918e-06, - "loss": 6.4414, - "step": 196000 - }, - { - "epoch": 0.86, - "learning_rate": 8.113474053832852e-06, - "loss": 6.4218, - "step": 196500 - }, - { - "epoch": 0.86, - "learning_rate": 8.107973721612786e-06, - "loss": 6.4155, - "step": 197000 - }, - { - "epoch": 0.86, - "learning_rate": 8.10247338939272e-06, - "loss": 6.4034, - "step": 197500 - }, - { - "epoch": 0.87, - "learning_rate": 8.096973057172654e-06, - "loss": 6.4217, - "step": 198000 - }, - { - "epoch": 0.87, - "learning_rate": 8.091472724952588e-06, - "loss": 6.3908, - "step": 198500 - }, - { - "epoch": 0.87, - "learning_rate": 8.085972392732522e-06, - "loss": 6.3917, - "step": 199000 - }, - { - "epoch": 0.87, - "learning_rate": 8.080472060512456e-06, - "loss": 6.3961, - "step": 199500 - }, - { - "epoch": 0.88, - "learning_rate": 8.07497172829239e-06, - "loss": 6.3781, - "step": 200000 - }, - { - "epoch": 0.88, - "learning_rate": 8.069471396072323e-06, - "loss": 6.3586, - "step": 200500 - }, - { - "epoch": 0.88, - "learning_rate": 8.063971063852257e-06, - "loss": 6.3485, - "step": 201000 - }, - { - "epoch": 0.88, - "learning_rate": 8.058470731632191e-06, - "loss": 6.3428, - "step": 201500 - }, - { - "epoch": 0.88, - "learning_rate": 8.052970399412125e-06, - "loss": 6.34, - "step": 202000 - }, - { - "epoch": 0.89, - "learning_rate": 8.04747006719206e-06, - "loss": 6.3431, - "step": 202500 - }, - { - "epoch": 0.89, - "learning_rate": 8.041969734971993e-06, - "loss": 6.3253, - "step": 203000 - }, - { - "epoch": 0.89, - "learning_rate": 8.036469402751927e-06, - "loss": 6.3391, - "step": 203500 - }, - { - "epoch": 0.89, - "learning_rate": 8.030969070531861e-06, - "loss": 6.3018, - "step": 204000 - }, - { - "epoch": 0.89, - "learning_rate": 8.025468738311795e-06, - "loss": 6.296, - "step": 204500 - }, - { - "epoch": 0.9, - "learning_rate": 8.019968406091729e-06, - "loss": 6.3219, - "step": 205000 - }, - { - "epoch": 0.9, - "learning_rate": 8.014468073871663e-06, - "loss": 6.2961, - "step": 205500 - }, - { - "epoch": 0.9, - "learning_rate": 8.008967741651597e-06, - "loss": 6.2849, - "step": 206000 - }, - { - "epoch": 0.9, - "learning_rate": 8.00346740943153e-06, - "loss": 6.3006, - "step": 206500 - }, - { - "epoch": 0.91, - "learning_rate": 7.997967077211465e-06, - "loss": 6.268, - "step": 207000 - }, - { - "epoch": 0.91, - "learning_rate": 7.992466744991398e-06, - "loss": 6.2677, - "step": 207500 - }, - { - "epoch": 0.91, - "learning_rate": 7.986966412771332e-06, - "loss": 6.2703, - "step": 208000 - }, - { - "epoch": 0.91, - "learning_rate": 7.981466080551266e-06, - "loss": 6.2545, - "step": 208500 - }, - { - "epoch": 0.91, - "learning_rate": 7.9759657483312e-06, - "loss": 6.251, - "step": 209000 - }, - { - "epoch": 0.92, - "learning_rate": 7.970465416111134e-06, - "loss": 6.245, - "step": 209500 - }, - { - "epoch": 0.92, - "learning_rate": 7.964965083891068e-06, - "loss": 6.2186, - "step": 210000 - }, - { - "epoch": 0.92, - "learning_rate": 7.959464751671e-06, - "loss": 6.2041, - "step": 210500 - }, - { - "epoch": 0.92, - "learning_rate": 7.953964419450936e-06, - "loss": 6.203, - "step": 211000 - }, - { - "epoch": 0.93, - "learning_rate": 7.94846408723087e-06, - "loss": 6.2025, - "step": 211500 - }, - { - "epoch": 0.93, - "learning_rate": 7.942963755010802e-06, - "loss": 6.2099, - "step": 212000 - }, - { - "epoch": 0.93, - "learning_rate": 7.937463422790738e-06, - "loss": 6.1776, - "step": 212500 - }, - { - "epoch": 0.93, - "learning_rate": 7.931963090570672e-06, - "loss": 6.1721, - "step": 213000 - }, - { - "epoch": 0.93, - "learning_rate": 7.926462758350606e-06, - "loss": 6.1524, - "step": 213500 - }, - { - "epoch": 0.94, - "learning_rate": 7.920962426130538e-06, - "loss": 6.1663, - "step": 214000 - }, - { - "epoch": 0.94, - "learning_rate": 7.915462093910473e-06, - "loss": 6.1561, - "step": 214500 - }, - { - "epoch": 0.94, - "learning_rate": 7.909961761690407e-06, - "loss": 6.1268, - "step": 215000 - }, - { - "epoch": 0.94, - "learning_rate": 7.90446142947034e-06, - "loss": 6.1254, - "step": 215500 - }, - { - "epoch": 0.95, - "learning_rate": 7.898961097250275e-06, - "loss": 6.1022, - "step": 216000 - }, - { - "epoch": 0.95, - "learning_rate": 7.89346076503021e-06, - "loss": 6.1058, - "step": 216500 - }, - { - "epoch": 0.95, - "learning_rate": 7.887960432810141e-06, - "loss": 6.0854, - "step": 217000 - }, - { - "epoch": 0.95, - "learning_rate": 7.882460100590075e-06, - "loss": 6.0492, - "step": 217500 - }, - { - "epoch": 0.95, - "learning_rate": 7.876959768370011e-06, - "loss": 6.0561, - "step": 218000 - }, - { - "epoch": 0.96, - "learning_rate": 7.871459436149945e-06, - "loss": 6.0556, - "step": 218500 - }, - { - "epoch": 0.96, - "learning_rate": 7.865959103929877e-06, - "loss": 6.0144, - "step": 219000 - }, - { - "epoch": 0.96, - "learning_rate": 7.860458771709813e-06, - "loss": 5.9812, - "step": 219500 - }, - { - "epoch": 0.96, - "learning_rate": 7.854958439489747e-06, - "loss": 5.9995, - "step": 220000 - }, - { - "epoch": 0.96, - "learning_rate": 7.849458107269679e-06, - "loss": 5.963, - "step": 220500 - }, - { - "epoch": 0.97, - "learning_rate": 7.843957775049613e-06, - "loss": 5.963, - "step": 221000 - }, - { - "epoch": 0.97, - "learning_rate": 7.838457442829548e-06, - "loss": 5.9367, - "step": 221500 - }, - { - "epoch": 0.97, - "learning_rate": 7.83295711060948e-06, - "loss": 5.9004, - "step": 222000 - }, - { - "epoch": 0.97, - "learning_rate": 7.827456778389415e-06, - "loss": 5.8854, - "step": 222500 - }, - { - "epoch": 0.98, - "learning_rate": 7.82195644616935e-06, - "loss": 5.8579, - "step": 223000 - }, - { - "epoch": 0.98, - "learning_rate": 7.816456113949284e-06, - "loss": 5.8545, - "step": 223500 - }, - { - "epoch": 0.98, - "learning_rate": 7.810955781729216e-06, - "loss": 5.8517, - "step": 224000 - }, - { - "epoch": 0.98, - "learning_rate": 7.80545544950915e-06, - "loss": 5.8351, - "step": 224500 - }, - { - "epoch": 0.98, - "learning_rate": 7.799955117289086e-06, - "loss": 5.821, - "step": 225000 - }, - { - "epoch": 0.99, - "learning_rate": 7.794454785069018e-06, - "loss": 5.7983, - "step": 225500 - }, - { - "epoch": 0.99, - "learning_rate": 7.788954452848952e-06, - "loss": 5.7687, - "step": 226000 - }, - { - "epoch": 0.99, - "learning_rate": 7.783454120628888e-06, - "loss": 5.7821, - "step": 226500 - }, - { - "epoch": 0.99, - "learning_rate": 7.77795378840882e-06, - "loss": 5.7513, - "step": 227000 - }, - { - "epoch": 1.0, - "learning_rate": 7.772453456188754e-06, - "loss": 5.7364, - "step": 227500 - }, - { - "epoch": 1.0, - "learning_rate": 7.766953123968688e-06, - "loss": 5.7413, - "step": 228000 - }, - { - "epoch": 1.0, - "learning_rate": 7.761452791748623e-06, - "loss": 5.7348, - "step": 228500 - }, - { - "epoch": 1.0, - "learning_rate": 7.755952459528556e-06, - "loss": 5.6842, - "step": 229000 - }, { "epoch": 1.0, - "learning_rate": 7.75045212730849e-06, - "loss": 5.696, - "step": 229500 - }, - { - "epoch": 1.01, - "learning_rate": 7.744951795088424e-06, - "loss": 5.6709, - "step": 230000 - }, - { - "epoch": 1.01, - "learning_rate": 7.739451462868358e-06, - "loss": 5.6695, - "step": 230500 - }, - { - "epoch": 1.01, - "learning_rate": 7.733951130648291e-06, - "loss": 5.672, - "step": 231000 - }, - { - "epoch": 1.01, - "learning_rate": 7.728450798428225e-06, - "loss": 5.6584, - "step": 231500 - }, - { - "epoch": 1.02, - "learning_rate": 7.72295046620816e-06, - "loss": 5.6375, - "step": 232000 - }, - { - "epoch": 1.02, - "learning_rate": 7.717450133988093e-06, - "loss": 5.629, - "step": 232500 - }, - { - "epoch": 1.02, - "learning_rate": 7.711949801768027e-06, - "loss": 5.5977, - "step": 233000 - }, - { - "epoch": 1.02, - "learning_rate": 7.706449469547961e-06, - "loss": 5.6079, - "step": 233500 - }, - { - "epoch": 1.02, - "learning_rate": 7.700949137327895e-06, - "loss": 5.5887, - "step": 234000 - }, - { - "epoch": 1.03, - "learning_rate": 7.695448805107829e-06, - "loss": 5.5741, - "step": 234500 - }, - { - "epoch": 1.03, - "learning_rate": 7.689948472887763e-06, - "loss": 5.5803, - "step": 235000 - }, - { - "epoch": 1.03, - "learning_rate": 7.684448140667697e-06, - "loss": 5.583, - "step": 235500 - }, - { - "epoch": 1.03, - "learning_rate": 7.67894780844763e-06, - "loss": 5.5438, - "step": 236000 - }, - { - "epoch": 1.03, - "learning_rate": 7.673447476227565e-06, - "loss": 5.5482, - "step": 236500 - }, - { - "epoch": 1.04, - "learning_rate": 7.667947144007499e-06, - "loss": 5.5311, - "step": 237000 - }, - { - "epoch": 1.04, - "learning_rate": 7.662446811787433e-06, - "loss": 5.5251, - "step": 237500 - }, - { - "epoch": 1.04, - "learning_rate": 7.656946479567366e-06, - "loss": 5.4892, - "step": 238000 - }, - { - "epoch": 1.04, - "learning_rate": 7.6514461473473e-06, - "loss": 5.5004, - "step": 238500 - }, - { - "epoch": 1.05, - "learning_rate": 7.645945815127234e-06, - "loss": 5.4966, - "step": 239000 - }, - { - "epoch": 1.05, - "learning_rate": 7.640445482907168e-06, - "loss": 5.4785, - "step": 239500 - }, - { - "epoch": 1.05, - "learning_rate": 7.634945150687102e-06, - "loss": 5.466, - "step": 240000 - }, - { - "epoch": 1.05, - "learning_rate": 7.629444818467036e-06, - "loss": 5.4547, - "step": 240500 - }, - { - "epoch": 1.05, - "learning_rate": 7.62394448624697e-06, - "loss": 5.4556, - "step": 241000 - }, - { - "epoch": 1.06, - "learning_rate": 7.618444154026904e-06, - "loss": 5.4539, - "step": 241500 - }, - { - "epoch": 1.06, - "learning_rate": 7.612943821806837e-06, - "loss": 5.4244, - "step": 242000 - }, - { - "epoch": 1.06, - "learning_rate": 7.607443489586772e-06, - "loss": 5.4353, - "step": 242500 - }, - { - "epoch": 1.06, - "learning_rate": 7.601943157366706e-06, - "loss": 5.4283, - "step": 243000 - }, - { - "epoch": 1.07, - "learning_rate": 7.59644282514664e-06, - "loss": 5.4089, - "step": 243500 - }, - { - "epoch": 1.07, - "learning_rate": 7.590942492926573e-06, - "loss": 5.4123, - "step": 244000 - }, - { - "epoch": 1.07, - "learning_rate": 7.5854421607065075e-06, - "loss": 5.3944, - "step": 244500 - }, - { - "epoch": 1.07, - "learning_rate": 7.5799418284864414e-06, - "loss": 5.3855, - "step": 245000 - }, - { - "epoch": 1.07, - "learning_rate": 7.5744414962663745e-06, - "loss": 5.3802, - "step": 245500 - }, - { - "epoch": 1.08, - "learning_rate": 7.568941164046309e-06, - "loss": 5.3804, - "step": 246000 - }, - { - "epoch": 1.08, - "learning_rate": 7.563440831826243e-06, - "loss": 5.3597, - "step": 246500 - }, - { - "epoch": 1.08, - "learning_rate": 7.557940499606176e-06, - "loss": 5.3678, - "step": 247000 - }, - { - "epoch": 1.08, - "learning_rate": 7.55244016738611e-06, - "loss": 5.3537, - "step": 247500 - }, - { - "epoch": 1.09, - "learning_rate": 7.546939835166045e-06, - "loss": 5.3499, - "step": 248000 - }, - { - "epoch": 1.09, - "learning_rate": 7.541439502945979e-06, - "loss": 5.3228, - "step": 248500 - }, - { - "epoch": 1.09, - "learning_rate": 7.535939170725912e-06, - "loss": 5.3057, - "step": 249000 - }, - { - "epoch": 1.09, - "learning_rate": 7.530438838505847e-06, - "loss": 5.3239, - "step": 249500 - }, - { - "epoch": 1.09, - "learning_rate": 7.524938506285781e-06, - "loss": 5.3015, - "step": 250000 - }, - { - "epoch": 1.1, - "learning_rate": 7.519438174065714e-06, - "loss": 5.307, - "step": 250500 - }, - { - "epoch": 1.1, - "learning_rate": 7.513937841845648e-06, - "loss": 5.2976, - "step": 251000 - }, - { - "epoch": 1.1, - "learning_rate": 7.5084375096255825e-06, - "loss": 5.2911, - "step": 251500 - }, - { - "epoch": 1.1, - "learning_rate": 7.502937177405516e-06, - "loss": 5.2935, - "step": 252000 - }, - { - "epoch": 1.1, - "learning_rate": 7.4974368451854495e-06, - "loss": 5.2853, - "step": 252500 - }, - { - "epoch": 1.11, - "learning_rate": 7.491936512965384e-06, - "loss": 5.269, - "step": 253000 - }, - { - "epoch": 1.11, - "learning_rate": 7.486436180745318e-06, - "loss": 5.2803, - "step": 253500 - }, - { - "epoch": 1.11, - "learning_rate": 7.480935848525251e-06, - "loss": 5.2618, - "step": 254000 - }, - { - "epoch": 1.11, - "learning_rate": 7.475435516305185e-06, - "loss": 5.2511, - "step": 254500 - }, - { - "epoch": 1.12, - "learning_rate": 7.46993518408512e-06, - "loss": 5.2339, - "step": 255000 - }, - { - "epoch": 1.12, - "learning_rate": 7.464434851865053e-06, - "loss": 5.2311, - "step": 255500 - }, - { - "epoch": 1.12, - "learning_rate": 7.458934519644987e-06, - "loss": 5.2512, - "step": 256000 - }, - { - "epoch": 1.12, - "learning_rate": 7.45343418742492e-06, - "loss": 5.1977, - "step": 256500 - }, - { - "epoch": 1.12, - "learning_rate": 7.447933855204855e-06, - "loss": 5.23, - "step": 257000 - }, - { - "epoch": 1.13, - "learning_rate": 7.442433522984789e-06, - "loss": 5.1894, - "step": 257500 - }, - { - "epoch": 1.13, - "learning_rate": 7.436933190764723e-06, - "loss": 5.2134, - "step": 258000 - }, - { - "epoch": 1.13, - "learning_rate": 7.4314328585446575e-06, - "loss": 5.1867, - "step": 258500 - }, - { - "epoch": 1.13, - "learning_rate": 7.425932526324591e-06, - "loss": 5.2069, - "step": 259000 - }, - { - "epoch": 1.14, - "learning_rate": 7.4204321941045245e-06, - "loss": 5.1914, - "step": 259500 - }, - { - "epoch": 1.14, - "learning_rate": 7.414931861884458e-06, - "loss": 5.1679, - "step": 260000 - }, - { - "epoch": 1.14, - "learning_rate": 7.409431529664392e-06, - "loss": 5.1823, - "step": 260500 - }, - { - "epoch": 1.14, - "learning_rate": 7.403931197444326e-06, - "loss": 5.1772, - "step": 261000 - }, - { - "epoch": 1.14, - "learning_rate": 7.398430865224259e-06, - "loss": 5.1581, - "step": 261500 - }, - { - "epoch": 1.15, - "learning_rate": 7.392930533004194e-06, - "loss": 5.153, - "step": 262000 - }, - { - "epoch": 1.15, - "learning_rate": 7.387430200784128e-06, - "loss": 5.1411, - "step": 262500 - }, - { - "epoch": 1.15, - "learning_rate": 7.381929868564062e-06, - "loss": 5.1442, - "step": 263000 - }, - { - "epoch": 1.15, - "learning_rate": 7.376429536343995e-06, - "loss": 5.1217, - "step": 263500 - }, - { - "epoch": 1.16, - "learning_rate": 7.37092920412393e-06, - "loss": 5.1317, - "step": 264000 - }, - { - "epoch": 1.16, - "learning_rate": 7.365428871903864e-06, - "loss": 5.1199, - "step": 264500 - }, - { - "epoch": 1.16, - "learning_rate": 7.359928539683797e-06, - "loss": 5.1086, - "step": 265000 - }, - { - "epoch": 1.16, - "learning_rate": 7.354428207463732e-06, - "loss": 5.1164, - "step": 265500 - }, - { - "epoch": 1.16, - "learning_rate": 7.348927875243666e-06, - "loss": 5.0998, - "step": 266000 - }, - { - "epoch": 1.17, - "learning_rate": 7.343427543023599e-06, - "loss": 5.0962, - "step": 266500 - }, - { - "epoch": 1.17, - "learning_rate": 7.337927210803533e-06, - "loss": 5.0752, - "step": 267000 - }, - { - "epoch": 1.17, - "learning_rate": 7.332426878583467e-06, - "loss": 5.0783, - "step": 267500 - }, - { - "epoch": 1.17, - "learning_rate": 7.326926546363401e-06, - "loss": 5.0755, - "step": 268000 - }, - { - "epoch": 1.18, - "learning_rate": 7.321426214143334e-06, - "loss": 5.0896, - "step": 268500 - }, - { - "epoch": 1.18, - "learning_rate": 7.315925881923269e-06, - "loss": 5.0868, - "step": 269000 - }, - { - "epoch": 1.18, - "learning_rate": 7.310425549703203e-06, - "loss": 5.0485, - "step": 269500 - }, - { - "epoch": 1.18, - "learning_rate": 7.304925217483136e-06, - "loss": 5.0642, - "step": 270000 - }, - { - "epoch": 1.18, - "learning_rate": 7.29942488526307e-06, - "loss": 5.0527, - "step": 270500 - }, - { - "epoch": 1.19, - "learning_rate": 7.293924553043005e-06, - "loss": 5.0557, - "step": 271000 - }, - { - "epoch": 1.19, - "learning_rate": 7.288424220822938e-06, - "loss": 5.0394, - "step": 271500 - }, - { - "epoch": 1.19, - "learning_rate": 7.282923888602872e-06, - "loss": 5.0433, - "step": 272000 - }, - { - "epoch": 1.19, - "learning_rate": 7.277423556382807e-06, - "loss": 5.0491, - "step": 272500 - }, - { - "epoch": 1.19, - "learning_rate": 7.271923224162741e-06, - "loss": 5.0097, - "step": 273000 - }, - { - "epoch": 1.2, - "learning_rate": 7.266422891942674e-06, - "loss": 5.0197, - "step": 273500 - }, - { - "epoch": 1.2, - "learning_rate": 7.260922559722608e-06, - "loss": 5.0195, - "step": 274000 - }, - { - "epoch": 1.2, - "learning_rate": 7.255422227502542e-06, - "loss": 5.0133, - "step": 274500 - }, - { - "epoch": 1.2, - "learning_rate": 7.2499218952824755e-06, - "loss": 5.0104, - "step": 275000 - }, - { - "epoch": 1.21, - "learning_rate": 7.244421563062409e-06, - "loss": 5.0117, - "step": 275500 - }, - { - "epoch": 1.21, - "learning_rate": 7.238921230842344e-06, - "loss": 4.9886, - "step": 276000 - }, - { - "epoch": 1.21, - "learning_rate": 7.233420898622277e-06, - "loss": 4.9858, - "step": 276500 - }, - { - "epoch": 1.21, - "learning_rate": 7.227920566402211e-06, - "loss": 4.9469, - "step": 277000 - }, - { - "epoch": 1.21, - "learning_rate": 7.222420234182145e-06, - "loss": 4.9754, - "step": 277500 - }, - { - "epoch": 1.22, - "learning_rate": 7.21691990196208e-06, - "loss": 4.9809, - "step": 278000 - }, - { - "epoch": 1.22, - "learning_rate": 7.211419569742013e-06, - "loss": 4.9723, - "step": 278500 - }, - { - "epoch": 1.22, - "learning_rate": 7.205919237521947e-06, - "loss": 4.9464, - "step": 279000 - }, - { - "epoch": 1.22, - "learning_rate": 7.200418905301882e-06, - "loss": 4.9646, - "step": 279500 - }, - { - "epoch": 1.23, - "learning_rate": 7.194918573081815e-06, - "loss": 4.9474, - "step": 280000 - }, - { - "epoch": 1.23, - "learning_rate": 7.189418240861749e-06, - "loss": 4.9361, - "step": 280500 - }, - { - "epoch": 1.23, - "learning_rate": 7.183917908641682e-06, - "loss": 4.9507, - "step": 281000 - }, - { - "epoch": 1.23, - "learning_rate": 7.1784175764216166e-06, - "loss": 4.9352, - "step": 281500 - }, - { - "epoch": 1.23, - "learning_rate": 7.1729172442015505e-06, - "loss": 4.9218, - "step": 282000 - }, - { - "epoch": 1.24, - "learning_rate": 7.167416911981484e-06, - "loss": 4.9262, - "step": 282500 - }, - { - "epoch": 1.24, - "learning_rate": 7.1619165797614175e-06, - "loss": 4.927, - "step": 283000 - }, - { - "epoch": 1.24, - "learning_rate": 7.156416247541352e-06, - "loss": 4.9142, - "step": 283500 - }, - { - "epoch": 1.24, - "learning_rate": 7.150915915321286e-06, - "loss": 4.925, - "step": 284000 - }, - { - "epoch": 1.25, - "learning_rate": 7.145415583101219e-06, - "loss": 4.903, - "step": 284500 - }, - { - "epoch": 1.25, - "learning_rate": 7.139915250881154e-06, - "loss": 4.9167, - "step": 285000 - }, - { - "epoch": 1.25, - "learning_rate": 7.134414918661088e-06, - "loss": 4.8977, - "step": 285500 - }, - { - "epoch": 1.25, - "learning_rate": 7.128914586441021e-06, - "loss": 4.8975, - "step": 286000 - }, - { - "epoch": 1.25, - "learning_rate": 7.123414254220955e-06, - "loss": 4.9021, - "step": 286500 - }, - { - "epoch": 1.26, - "learning_rate": 7.11791392200089e-06, - "loss": 4.8819, - "step": 287000 - }, - { - "epoch": 1.26, - "learning_rate": 7.112413589780824e-06, - "loss": 4.8766, - "step": 287500 - }, - { - "epoch": 1.26, - "learning_rate": 7.106913257560757e-06, - "loss": 4.8668, - "step": 288000 - }, - { - "epoch": 1.26, - "learning_rate": 7.1014129253406916e-06, - "loss": 4.883, - "step": 288500 - }, - { - "epoch": 1.26, - "learning_rate": 7.0959125931206255e-06, - "loss": 4.8537, - "step": 289000 - }, - { - "epoch": 1.27, - "learning_rate": 7.0904122609005586e-06, - "loss": 4.8567, - "step": 289500 - }, - { - "epoch": 1.27, - "learning_rate": 7.0849119286804925e-06, - "loss": 4.862, - "step": 290000 - }, - { - "epoch": 1.27, - "learning_rate": 7.079411596460427e-06, - "loss": 4.8569, - "step": 290500 - }, - { - "epoch": 1.27, - "learning_rate": 7.07391126424036e-06, - "loss": 4.8463, - "step": 291000 - }, - { - "epoch": 1.28, - "learning_rate": 7.068410932020294e-06, - "loss": 4.8474, - "step": 291500 - }, - { - "epoch": 1.28, - "learning_rate": 7.062910599800229e-06, - "loss": 4.8524, - "step": 292000 - }, - { - "epoch": 1.28, - "learning_rate": 7.057410267580163e-06, - "loss": 4.8418, - "step": 292500 - }, - { - "epoch": 1.28, - "learning_rate": 7.051909935360096e-06, - "loss": 4.8377, - "step": 293000 - }, - { - "epoch": 1.28, - "learning_rate": 7.04640960314003e-06, - "loss": 4.8286, - "step": 293500 - }, - { - "epoch": 1.29, - "learning_rate": 7.040909270919965e-06, - "loss": 4.8111, - "step": 294000 - }, - { - "epoch": 1.29, - "learning_rate": 7.035408938699898e-06, - "loss": 4.827, - "step": 294500 - }, - { - "epoch": 1.29, - "learning_rate": 7.029908606479832e-06, - "loss": 4.8078, - "step": 295000 - }, - { - "epoch": 1.29, - "learning_rate": 7.0244082742597666e-06, - "loss": 4.7961, - "step": 295500 - }, - { - "epoch": 1.3, - "learning_rate": 7.0189079420397e-06, - "loss": 4.7923, - "step": 296000 - }, - { - "epoch": 1.3, - "learning_rate": 7.0134076098196336e-06, - "loss": 4.7849, - "step": 296500 - }, - { - "epoch": 1.3, - "learning_rate": 7.0079072775995675e-06, - "loss": 4.7861, - "step": 297000 - }, - { - "epoch": 1.3, - "learning_rate": 7.0024069453795014e-06, - "loss": 4.7816, - "step": 297500 - }, - { - "epoch": 1.3, - "learning_rate": 6.996906613159435e-06, - "loss": 4.7775, - "step": 298000 - }, - { - "epoch": 1.31, - "learning_rate": 6.991406280939369e-06, - "loss": 4.7626, - "step": 298500 - }, - { - "epoch": 1.31, - "learning_rate": 6.985905948719304e-06, - "loss": 4.7622, - "step": 299000 - }, - { - "epoch": 1.31, - "learning_rate": 6.980405616499237e-06, - "loss": 4.7791, - "step": 299500 - }, - { - "epoch": 1.31, - "learning_rate": 6.974905284279171e-06, - "loss": 4.7738, - "step": 300000 - }, - { - "epoch": 1.32, - "learning_rate": 6.969404952059104e-06, - "loss": 4.7706, - "step": 300500 - }, - { - "epoch": 1.32, - "learning_rate": 6.963904619839039e-06, - "loss": 4.7621, - "step": 301000 - }, - { - "epoch": 1.32, - "learning_rate": 6.958404287618973e-06, - "loss": 4.7464, - "step": 301500 - }, - { - "epoch": 1.32, - "learning_rate": 6.952903955398907e-06, - "loss": 4.7444, - "step": 302000 - }, - { - "epoch": 1.32, - "learning_rate": 6.947403623178841e-06, - "loss": 4.7462, - "step": 302500 - }, - { - "epoch": 1.33, - "learning_rate": 6.941903290958775e-06, - "loss": 4.743, - "step": 303000 - }, - { - "epoch": 1.33, - "learning_rate": 6.9364029587387086e-06, - "loss": 4.733, - "step": 303500 - }, - { - "epoch": 1.33, - "learning_rate": 6.930902626518642e-06, - "loss": 4.7404, - "step": 304000 - }, - { - "epoch": 1.33, - "learning_rate": 6.9254022942985764e-06, - "loss": 4.7168, - "step": 304500 - }, - { - "epoch": 1.33, - "learning_rate": 6.91990196207851e-06, - "loss": 4.725, - "step": 305000 - }, - { - "epoch": 1.34, - "learning_rate": 6.9144016298584434e-06, - "loss": 4.7023, - "step": 305500 - }, - { - "epoch": 1.34, - "learning_rate": 6.908901297638378e-06, - "loss": 4.724, - "step": 306000 - }, - { - "epoch": 1.34, - "learning_rate": 6.903400965418312e-06, - "loss": 4.6999, - "step": 306500 - }, - { - "epoch": 1.34, - "learning_rate": 6.897900633198246e-06, - "loss": 4.7117, - "step": 307000 - }, - { - "epoch": 1.35, - "learning_rate": 6.892400300978179e-06, - "loss": 4.6932, - "step": 307500 - }, - { - "epoch": 1.35, - "learning_rate": 6.886899968758114e-06, - "loss": 4.7022, - "step": 308000 - }, - { - "epoch": 1.35, - "learning_rate": 6.881399636538048e-06, - "loss": 4.6799, - "step": 308500 - }, - { - "epoch": 1.35, - "learning_rate": 6.875899304317981e-06, - "loss": 4.6824, - "step": 309000 - }, - { - "epoch": 1.35, - "learning_rate": 6.870398972097915e-06, - "loss": 4.6786, - "step": 309500 - }, - { - "epoch": 1.36, - "learning_rate": 6.86489863987785e-06, - "loss": 4.6678, - "step": 310000 - }, - { - "epoch": 1.36, - "learning_rate": 6.859398307657783e-06, - "loss": 4.6949, - "step": 310500 - }, - { - "epoch": 1.36, - "learning_rate": 6.853897975437717e-06, - "loss": 4.6843, - "step": 311000 - }, - { - "epoch": 1.36, - "learning_rate": 6.8483976432176514e-06, - "loss": 4.6726, - "step": 311500 - }, - { - "epoch": 1.37, - "learning_rate": 6.8428973109975845e-06, - "loss": 4.6734, - "step": 312000 - }, - { - "epoch": 1.37, - "learning_rate": 6.8373969787775184e-06, - "loss": 4.6867, - "step": 312500 - }, - { - "epoch": 1.37, - "learning_rate": 6.831896646557452e-06, - "loss": 4.6817, - "step": 313000 - }, - { - "epoch": 1.37, - "learning_rate": 6.826396314337387e-06, - "loss": 4.6662, - "step": 313500 - }, - { - "epoch": 1.37, - "learning_rate": 6.82089598211732e-06, - "loss": 4.6444, - "step": 314000 - }, - { - "epoch": 1.38, - "learning_rate": 6.815395649897254e-06, - "loss": 4.6522, - "step": 314500 - }, - { - "epoch": 1.38, - "learning_rate": 6.809895317677189e-06, - "loss": 4.6509, - "step": 315000 - }, - { - "epoch": 1.38, - "learning_rate": 6.804394985457122e-06, - "loss": 4.656, - "step": 315500 - }, - { - "epoch": 1.38, - "learning_rate": 6.798894653237056e-06, - "loss": 4.6498, - "step": 316000 - }, - { - "epoch": 1.39, - "learning_rate": 6.79339432101699e-06, - "loss": 4.6359, - "step": 316500 - }, - { - "epoch": 1.39, - "learning_rate": 6.787893988796924e-06, - "loss": 4.6461, - "step": 317000 - }, - { - "epoch": 1.39, - "learning_rate": 6.782393656576858e-06, - "loss": 4.6279, - "step": 317500 - }, - { - "epoch": 1.39, - "learning_rate": 6.776893324356792e-06, - "loss": 4.6282, - "step": 318000 - }, - { - "epoch": 1.39, - "learning_rate": 6.7713929921367264e-06, - "loss": 4.6195, - "step": 318500 - }, - { - "epoch": 1.4, - "learning_rate": 6.7658926599166595e-06, - "loss": 4.6142, - "step": 319000 - }, - { - "epoch": 1.4, - "learning_rate": 6.7603923276965934e-06, - "loss": 4.6072, - "step": 319500 - }, - { - "epoch": 1.4, - "learning_rate": 6.7548919954765265e-06, - "loss": 4.6094, - "step": 320000 - }, - { - "epoch": 1.4, - "learning_rate": 6.749391663256461e-06, - "loss": 4.5979, - "step": 320500 - }, - { - "epoch": 1.4, - "learning_rate": 6.743891331036395e-06, - "loss": 4.6058, - "step": 321000 - }, - { - "epoch": 1.41, - "learning_rate": 6.738390998816329e-06, - "loss": 4.6106, - "step": 321500 - }, - { - "epoch": 1.41, - "learning_rate": 6.732890666596263e-06, - "loss": 4.5854, - "step": 322000 - }, - { - "epoch": 1.41, - "learning_rate": 6.727390334376197e-06, - "loss": 4.5927, - "step": 322500 - }, - { - "epoch": 1.41, - "learning_rate": 6.721890002156131e-06, - "loss": 4.5851, - "step": 323000 - }, - { - "epoch": 1.42, - "learning_rate": 6.716389669936064e-06, - "loss": 4.6067, - "step": 323500 - }, - { - "epoch": 1.42, - "learning_rate": 6.710889337715999e-06, - "loss": 4.5957, - "step": 324000 - }, - { - "epoch": 1.42, - "learning_rate": 6.705389005495933e-06, - "loss": 4.5881, - "step": 324500 - }, - { - "epoch": 1.42, - "learning_rate": 6.699888673275866e-06, - "loss": 4.5913, - "step": 325000 - }, - { - "epoch": 1.42, - "learning_rate": 6.694388341055801e-06, - "loss": 4.5669, - "step": 325500 - }, - { - "epoch": 1.43, - "learning_rate": 6.6888880088357345e-06, - "loss": 4.5722, - "step": 326000 - }, - { - "epoch": 1.43, - "learning_rate": 6.683387676615668e-06, - "loss": 4.5706, - "step": 326500 - }, - { - "epoch": 1.43, - "learning_rate": 6.6778873443956015e-06, - "loss": 4.5651, - "step": 327000 - }, - { - "epoch": 1.43, - "learning_rate": 6.672387012175536e-06, - "loss": 4.5625, - "step": 327500 - }, - { - "epoch": 1.44, - "learning_rate": 6.66688667995547e-06, - "loss": 4.5714, - "step": 328000 - }, - { - "epoch": 1.44, - "learning_rate": 6.661386347735403e-06, - "loss": 4.5521, - "step": 328500 - }, - { - "epoch": 1.44, - "learning_rate": 6.655886015515338e-06, - "loss": 4.5595, - "step": 329000 - }, - { - "epoch": 1.44, - "learning_rate": 6.650385683295272e-06, - "loss": 4.5633, - "step": 329500 - }, - { - "epoch": 1.44, - "learning_rate": 6.644885351075205e-06, - "loss": 4.5584, - "step": 330000 - }, - { - "epoch": 1.45, - "learning_rate": 6.639385018855139e-06, - "loss": 4.5715, - "step": 330500 - }, - { - "epoch": 1.45, - "learning_rate": 6.633884686635074e-06, - "loss": 4.5522, - "step": 331000 - }, - { - "epoch": 1.45, - "learning_rate": 6.628384354415007e-06, - "loss": 4.5238, - "step": 331500 - }, - { - "epoch": 1.45, - "learning_rate": 6.622884022194941e-06, - "loss": 4.5435, - "step": 332000 - }, - { - "epoch": 1.46, - "learning_rate": 6.617383689974876e-06, - "loss": 4.5146, - "step": 332500 - }, - { - "epoch": 1.46, - "learning_rate": 6.6118833577548095e-06, - "loss": 4.5555, - "step": 333000 - }, - { - "epoch": 1.46, - "learning_rate": 6.606383025534743e-06, - "loss": 4.5224, - "step": 333500 - }, - { - "epoch": 1.46, - "learning_rate": 6.6008826933146765e-06, - "loss": 4.5254, - "step": 334000 - }, - { - "epoch": 1.46, - "learning_rate": 6.595382361094611e-06, - "loss": 4.5067, - "step": 334500 - }, - { - "epoch": 1.47, - "learning_rate": 6.589882028874544e-06, - "loss": 4.5127, - "step": 335000 - }, - { - "epoch": 1.47, - "learning_rate": 6.584381696654478e-06, - "loss": 4.5133, - "step": 335500 - }, - { - "epoch": 1.47, - "learning_rate": 6.578881364434412e-06, - "loss": 4.5143, - "step": 336000 - }, - { - "epoch": 1.47, - "learning_rate": 6.573381032214346e-06, - "loss": 4.4947, - "step": 336500 - }, - { - "epoch": 1.47, - "learning_rate": 6.56788069999428e-06, - "loss": 4.4982, - "step": 337000 - }, - { - "epoch": 1.48, - "learning_rate": 6.562380367774214e-06, - "loss": 4.5174, - "step": 337500 - }, - { - "epoch": 1.48, - "learning_rate": 6.556880035554149e-06, - "loss": 4.4794, - "step": 338000 - }, - { - "epoch": 1.48, - "learning_rate": 6.551379703334082e-06, - "loss": 4.5072, - "step": 338500 - }, - { - "epoch": 1.48, - "learning_rate": 6.545879371114016e-06, - "loss": 4.486, - "step": 339000 - }, - { - "epoch": 1.49, - "learning_rate": 6.540379038893949e-06, - "loss": 4.4656, - "step": 339500 - }, - { - "epoch": 1.49, - "learning_rate": 6.534878706673884e-06, - "loss": 4.5007, - "step": 340000 - }, - { - "epoch": 1.49, - "learning_rate": 6.529378374453818e-06, - "loss": 4.4965, - "step": 340500 - }, - { - "epoch": 1.49, - "learning_rate": 6.523878042233751e-06, - "loss": 4.4591, - "step": 341000 - }, - { - "epoch": 1.49, - "learning_rate": 6.5183777100136855e-06, - "loss": 4.4768, - "step": 341500 - }, - { - "epoch": 1.5, - "learning_rate": 6.512877377793619e-06, - "loss": 4.477, - "step": 342000 - }, - { - "epoch": 1.5, - "learning_rate": 6.507377045573553e-06, - "loss": 4.4734, - "step": 342500 - }, - { - "epoch": 1.5, - "learning_rate": 6.501876713353486e-06, - "loss": 4.4715, - "step": 343000 - }, - { - "epoch": 1.5, - "learning_rate": 6.496376381133421e-06, - "loss": 4.4744, - "step": 343500 - }, - { - "epoch": 1.51, - "learning_rate": 6.490876048913355e-06, - "loss": 4.4673, - "step": 344000 - }, - { - "epoch": 1.51, - "learning_rate": 6.485375716693288e-06, - "loss": 4.4569, - "step": 344500 - }, - { - "epoch": 1.51, - "learning_rate": 6.479875384473223e-06, - "loss": 4.4386, - "step": 345000 - }, - { - "epoch": 1.51, - "learning_rate": 6.474375052253157e-06, - "loss": 4.46, - "step": 345500 - }, - { - "epoch": 1.51, - "learning_rate": 6.46887472003309e-06, - "loss": 4.4314, - "step": 346000 - }, - { - "epoch": 1.52, - "learning_rate": 6.463374387813024e-06, - "loss": 4.4401, - "step": 346500 - }, - { - "epoch": 1.52, - "learning_rate": 6.457874055592959e-06, - "loss": 4.4454, - "step": 347000 - }, - { - "epoch": 1.52, - "learning_rate": 6.452373723372893e-06, - "loss": 4.4322, - "step": 347500 - }, - { - "epoch": 1.52, - "learning_rate": 6.446873391152826e-06, - "loss": 4.4496, - "step": 348000 - }, - { - "epoch": 1.53, - "learning_rate": 6.4413730589327605e-06, - "loss": 4.4398, - "step": 348500 - }, - { - "epoch": 1.53, - "learning_rate": 6.435872726712694e-06, - "loss": 4.4377, - "step": 349000 - }, - { - "epoch": 1.53, - "learning_rate": 6.4303723944926275e-06, - "loss": 4.4474, - "step": 349500 - }, - { - "epoch": 1.53, - "learning_rate": 6.424872062272561e-06, - "loss": 4.4255, - "step": 350000 - }, - { - "epoch": 1.53, - "learning_rate": 6.419371730052496e-06, - "loss": 4.4139, - "step": 350500 - }, - { - "epoch": 1.54, - "learning_rate": 6.413871397832429e-06, - "loss": 4.4063, - "step": 351000 - }, - { - "epoch": 1.54, - "learning_rate": 6.408371065612363e-06, - "loss": 4.4205, - "step": 351500 - }, - { - "epoch": 1.54, - "learning_rate": 6.402870733392298e-06, - "loss": 4.4158, - "step": 352000 - }, - { - "epoch": 1.54, - "learning_rate": 6.397370401172232e-06, - "loss": 4.4186, - "step": 352500 - }, - { - "epoch": 1.54, - "learning_rate": 6.391870068952165e-06, - "loss": 4.4078, - "step": 353000 - }, - { - "epoch": 1.55, - "learning_rate": 6.386369736732099e-06, - "loss": 4.3984, - "step": 353500 - }, - { - "epoch": 1.55, - "learning_rate": 6.380869404512034e-06, - "loss": 4.4146, - "step": 354000 - }, - { - "epoch": 1.55, - "learning_rate": 6.375369072291967e-06, - "loss": 4.4225, - "step": 354500 - }, - { - "epoch": 1.55, - "learning_rate": 6.369868740071901e-06, - "loss": 4.4101, - "step": 355000 - }, - { - "epoch": 1.56, - "learning_rate": 6.3643684078518355e-06, - "loss": 4.3992, - "step": 355500 - }, - { - "epoch": 1.56, - "learning_rate": 6.3588680756317686e-06, - "loss": 4.4008, - "step": 356000 - }, - { - "epoch": 1.56, - "learning_rate": 6.3533677434117025e-06, - "loss": 4.4077, - "step": 356500 - }, - { - "epoch": 1.56, - "learning_rate": 6.347867411191636e-06, - "loss": 4.3789, - "step": 357000 - }, - { - "epoch": 1.56, - "learning_rate": 6.342367078971571e-06, - "loss": 4.3914, - "step": 357500 - }, - { - "epoch": 1.57, - "learning_rate": 6.336866746751504e-06, - "loss": 4.3788, - "step": 358000 - }, - { - "epoch": 1.57, - "learning_rate": 6.331366414531438e-06, - "loss": 4.3638, - "step": 358500 - }, - { - "epoch": 1.57, - "learning_rate": 6.325866082311373e-06, - "loss": 4.3766, - "step": 359000 - }, - { - "epoch": 1.57, - "learning_rate": 6.320365750091306e-06, - "loss": 4.3919, - "step": 359500 - }, - { - "epoch": 1.58, - "learning_rate": 6.31486541787124e-06, - "loss": 4.3809, - "step": 360000 - }, - { - "epoch": 1.58, - "learning_rate": 6.309365085651173e-06, - "loss": 4.3673, - "step": 360500 - }, - { - "epoch": 1.58, - "learning_rate": 6.303864753431108e-06, - "loss": 4.3655, - "step": 361000 - }, - { - "epoch": 1.58, - "learning_rate": 6.298364421211042e-06, - "loss": 4.3688, - "step": 361500 - }, - { - "epoch": 1.58, - "learning_rate": 6.292864088990976e-06, - "loss": 4.3605, - "step": 362000 - }, - { - "epoch": 1.59, - "learning_rate": 6.287363756770909e-06, - "loss": 4.3698, - "step": 362500 - }, - { - "epoch": 1.59, - "learning_rate": 6.2818634245508436e-06, - "loss": 4.3801, - "step": 363000 - }, - { - "epoch": 1.59, - "learning_rate": 6.2763630923307775e-06, - "loss": 4.35, - "step": 363500 - }, - { - "epoch": 1.59, - "learning_rate": 6.2708627601107106e-06, - "loss": 4.3594, - "step": 364000 - }, - { - "epoch": 1.6, - "learning_rate": 6.265362427890645e-06, - "loss": 4.3559, - "step": 364500 - }, - { - "epoch": 1.6, - "learning_rate": 6.259862095670579e-06, - "loss": 4.3507, - "step": 365000 - }, - { - "epoch": 1.6, - "learning_rate": 6.254361763450512e-06, - "loss": 4.3555, - "step": 365500 - }, - { - "epoch": 1.6, - "learning_rate": 6.248861431230446e-06, - "loss": 4.3418, - "step": 366000 - }, - { - "epoch": 1.6, - "learning_rate": 6.243361099010381e-06, - "loss": 4.354, - "step": 366500 - }, - { - "epoch": 1.61, - "learning_rate": 6.237860766790315e-06, - "loss": 4.3442, - "step": 367000 - }, - { - "epoch": 1.61, - "learning_rate": 6.232360434570248e-06, - "loss": 4.3484, - "step": 367500 - }, - { - "epoch": 1.61, - "learning_rate": 6.226860102350183e-06, - "loss": 4.3402, - "step": 368000 - }, - { - "epoch": 1.61, - "learning_rate": 6.221359770130117e-06, - "loss": 4.3284, - "step": 368500 - }, - { - "epoch": 1.61, - "learning_rate": 6.21585943791005e-06, - "loss": 4.3331, - "step": 369000 - }, - { - "epoch": 1.62, - "learning_rate": 6.210359105689984e-06, - "loss": 4.3404, - "step": 369500 - }, - { - "epoch": 1.62, - "learning_rate": 6.2048587734699186e-06, - "loss": 4.3379, - "step": 370000 - }, - { - "epoch": 1.62, - "learning_rate": 6.199358441249852e-06, - "loss": 4.3199, - "step": 370500 - }, - { - "epoch": 1.62, - "learning_rate": 6.1938581090297856e-06, - "loss": 4.3336, - "step": 371000 - }, - { - "epoch": 1.63, - "learning_rate": 6.18835777680972e-06, - "loss": 4.3294, - "step": 371500 - }, - { - "epoch": 1.63, - "learning_rate": 6.182857444589654e-06, - "loss": 4.3154, - "step": 372000 - }, - { - "epoch": 1.63, - "learning_rate": 6.177357112369587e-06, - "loss": 4.3203, - "step": 372500 - }, - { - "epoch": 1.63, - "learning_rate": 6.171856780149521e-06, - "loss": 4.3223, - "step": 373000 - }, - { - "epoch": 1.63, - "learning_rate": 6.166356447929456e-06, - "loss": 4.3287, - "step": 373500 - }, - { - "epoch": 1.64, - "learning_rate": 6.160856115709389e-06, - "loss": 4.3136, - "step": 374000 - }, - { - "epoch": 1.64, - "learning_rate": 6.155355783489323e-06, - "loss": 4.3073, - "step": 374500 - }, - { - "epoch": 1.64, - "learning_rate": 6.149855451269258e-06, - "loss": 4.3099, - "step": 375000 - }, - { - "epoch": 1.64, - "learning_rate": 6.144355119049191e-06, - "loss": 4.3075, - "step": 375500 - }, - { - "epoch": 1.65, - "learning_rate": 6.138854786829125e-06, - "loss": 4.315, - "step": 376000 - }, - { - "epoch": 1.65, - "learning_rate": 6.133354454609059e-06, - "loss": 4.3127, - "step": 376500 - }, - { - "epoch": 1.65, - "learning_rate": 6.127854122388993e-06, - "loss": 4.2933, - "step": 377000 - }, - { - "epoch": 1.65, - "learning_rate": 6.122353790168927e-06, - "loss": 4.2924, - "step": 377500 - }, - { - "epoch": 1.65, - "learning_rate": 6.116853457948861e-06, - "loss": 4.2817, - "step": 378000 - }, - { - "epoch": 1.66, - "learning_rate": 6.111353125728795e-06, - "loss": 4.3183, - "step": 378500 - }, - { - "epoch": 1.66, - "learning_rate": 6.1058527935087284e-06, - "loss": 4.3031, - "step": 379000 - }, - { - "epoch": 1.66, - "learning_rate": 6.100352461288662e-06, - "loss": 4.2815, - "step": 379500 - }, - { - "epoch": 1.66, - "learning_rate": 6.0948521290685954e-06, - "loss": 4.2801, - "step": 380000 - }, - { - "epoch": 1.67, - "learning_rate": 6.08935179684853e-06, - "loss": 4.2856, - "step": 380500 - }, - { - "epoch": 1.67, - "learning_rate": 6.083851464628464e-06, - "loss": 4.2868, - "step": 381000 - }, - { - "epoch": 1.67, - "learning_rate": 6.078351132408398e-06, - "loss": 4.2638, - "step": 381500 - }, - { - "epoch": 1.67, - "learning_rate": 6.072850800188332e-06, - "loss": 4.2782, - "step": 382000 - }, - { - "epoch": 1.67, - "learning_rate": 6.067350467968266e-06, - "loss": 4.2894, - "step": 382500 - }, - { - "epoch": 1.68, - "learning_rate": 6.0618501357482e-06, - "loss": 4.256, - "step": 383000 - }, - { - "epoch": 1.68, - "learning_rate": 6.056349803528133e-06, - "loss": 4.2703, - "step": 383500 - }, - { - "epoch": 1.68, - "learning_rate": 6.050849471308068e-06, - "loss": 4.2549, - "step": 384000 - }, - { - "epoch": 1.68, - "learning_rate": 6.045349139088002e-06, - "loss": 4.2678, - "step": 384500 - }, - { - "epoch": 1.68, - "learning_rate": 6.039848806867935e-06, - "loss": 4.2549, - "step": 385000 - }, - { - "epoch": 1.69, - "learning_rate": 6.0343484746478695e-06, - "loss": 4.2614, - "step": 385500 - }, - { - "epoch": 1.69, - "learning_rate": 6.0288481424278034e-06, - "loss": 4.2745, - "step": 386000 - }, - { - "epoch": 1.69, - "learning_rate": 6.023347810207737e-06, - "loss": 4.2644, - "step": 386500 - }, - { - "epoch": 1.69, - "learning_rate": 6.0178474779876705e-06, - "loss": 4.2588, - "step": 387000 - }, - { - "epoch": 1.7, - "learning_rate": 6.012347145767605e-06, - "loss": 4.2519, - "step": 387500 - }, - { - "epoch": 1.7, - "learning_rate": 6.006846813547539e-06, - "loss": 4.2536, - "step": 388000 - }, - { - "epoch": 1.7, - "learning_rate": 6.001346481327472e-06, - "loss": 4.2446, - "step": 388500 - }, - { - "epoch": 1.7, - "learning_rate": 5.995846149107406e-06, - "loss": 4.2396, - "step": 389000 - }, - { - "epoch": 1.7, - "learning_rate": 5.990345816887341e-06, - "loss": 4.2419, - "step": 389500 - }, - { - "epoch": 1.71, - "learning_rate": 5.984845484667274e-06, - "loss": 4.2453, - "step": 390000 - }, - { - "epoch": 1.71, - "learning_rate": 5.979345152447208e-06, - "loss": 4.2507, - "step": 390500 - }, - { - "epoch": 1.71, - "learning_rate": 5.973844820227143e-06, - "loss": 4.2294, - "step": 391000 - }, - { - "epoch": 1.71, - "learning_rate": 5.968344488007076e-06, - "loss": 4.2284, - "step": 391500 - }, - { - "epoch": 1.72, - "learning_rate": 5.96284415578701e-06, - "loss": 4.231, - "step": 392000 - }, - { - "epoch": 1.72, - "learning_rate": 5.957343823566944e-06, - "loss": 4.2395, - "step": 392500 - }, - { - "epoch": 1.72, - "learning_rate": 5.9518434913468784e-06, - "loss": 4.2113, - "step": 393000 - }, - { - "epoch": 1.72, - "learning_rate": 5.9463431591268115e-06, - "loss": 4.2103, - "step": 393500 - }, - { - "epoch": 1.72, - "learning_rate": 5.9408428269067455e-06, - "loss": 4.2229, - "step": 394000 - }, - { - "epoch": 1.73, - "learning_rate": 5.93534249468668e-06, - "loss": 4.222, - "step": 394500 - }, - { - "epoch": 1.73, - "learning_rate": 5.929842162466613e-06, - "loss": 4.2092, - "step": 395000 - }, - { - "epoch": 1.73, - "learning_rate": 5.924341830246547e-06, - "loss": 4.2056, - "step": 395500 - }, - { - "epoch": 1.73, - "learning_rate": 5.918841498026481e-06, - "loss": 4.2295, - "step": 396000 - }, - { - "epoch": 1.74, - "learning_rate": 5.913341165806415e-06, - "loss": 4.2, - "step": 396500 - }, - { - "epoch": 1.74, - "learning_rate": 5.907840833586349e-06, - "loss": 4.2339, - "step": 397000 - }, - { - "epoch": 1.74, - "learning_rate": 5.902340501366283e-06, - "loss": 4.2158, - "step": 397500 - }, - { - "epoch": 1.74, - "learning_rate": 5.896840169146218e-06, - "loss": 4.2161, - "step": 398000 - }, - { - "epoch": 1.74, - "learning_rate": 5.891339836926151e-06, - "loss": 4.2005, - "step": 398500 - }, - { - "epoch": 1.75, - "learning_rate": 5.885839504706085e-06, - "loss": 4.2143, - "step": 399000 - }, - { - "epoch": 1.75, - "learning_rate": 5.880339172486018e-06, - "loss": 4.2276, - "step": 399500 - }, - { - "epoch": 1.75, - "learning_rate": 5.874838840265953e-06, - "loss": 4.2186, - "step": 400000 - }, - { - "epoch": 1.75, - "learning_rate": 5.8693385080458865e-06, - "loss": 4.1812, - "step": 400500 - }, - { - "epoch": 1.75, - "learning_rate": 5.8638381758258205e-06, - "loss": 4.2175, - "step": 401000 - }, - { - "epoch": 1.76, - "learning_rate": 5.858337843605754e-06, - "loss": 4.1985, - "step": 401500 - }, - { - "epoch": 1.76, - "learning_rate": 5.852837511385688e-06, - "loss": 4.1935, - "step": 402000 - }, - { - "epoch": 1.76, - "learning_rate": 5.847337179165622e-06, - "loss": 4.1974, - "step": 402500 - }, - { - "epoch": 1.76, - "learning_rate": 5.841836846945555e-06, - "loss": 4.2105, - "step": 403000 - }, - { - "epoch": 1.77, - "learning_rate": 5.83633651472549e-06, - "loss": 4.2008, - "step": 403500 - }, - { - "epoch": 1.77, - "learning_rate": 5.830836182505424e-06, - "loss": 4.1886, - "step": 404000 - }, - { - "epoch": 1.77, - "learning_rate": 5.825335850285357e-06, - "loss": 4.1874, - "step": 404500 - }, - { - "epoch": 1.77, - "learning_rate": 5.819835518065292e-06, - "loss": 4.1874, - "step": 405000 - }, - { - "epoch": 1.77, - "learning_rate": 5.814335185845226e-06, - "loss": 4.1816, - "step": 405500 - }, - { - "epoch": 1.78, - "learning_rate": 5.808834853625159e-06, - "loss": 4.1745, - "step": 406000 - }, - { - "epoch": 1.78, - "learning_rate": 5.803334521405093e-06, - "loss": 4.1802, - "step": 406500 - }, - { - "epoch": 1.78, - "learning_rate": 5.797834189185028e-06, - "loss": 4.1832, - "step": 407000 - }, - { - "epoch": 1.78, - "learning_rate": 5.7923338569649615e-06, - "loss": 4.1693, - "step": 407500 - }, - { - "epoch": 1.79, - "learning_rate": 5.786833524744895e-06, - "loss": 4.1977, - "step": 408000 - }, - { - "epoch": 1.79, - "learning_rate": 5.781333192524829e-06, - "loss": 4.1706, - "step": 408500 - }, - { - "epoch": 1.79, - "learning_rate": 5.775832860304763e-06, - "loss": 4.1828, - "step": 409000 - }, - { - "epoch": 1.79, - "learning_rate": 5.770332528084696e-06, - "loss": 4.1706, - "step": 409500 - }, - { - "epoch": 1.79, - "learning_rate": 5.76483219586463e-06, - "loss": 4.154, - "step": 410000 - }, - { - "epoch": 1.8, - "learning_rate": 5.759331863644565e-06, - "loss": 4.1507, - "step": 410500 - }, - { - "epoch": 1.8, - "learning_rate": 5.753831531424498e-06, - "loss": 4.1611, - "step": 411000 - }, - { - "epoch": 1.8, - "learning_rate": 5.748331199204432e-06, - "loss": 4.1719, - "step": 411500 - }, - { - "epoch": 1.8, - "learning_rate": 5.742830866984367e-06, - "loss": 4.1487, - "step": 412000 - }, - { - "epoch": 1.81, - "learning_rate": 5.737330534764301e-06, - "loss": 4.1684, - "step": 412500 - }, - { - "epoch": 1.81, - "learning_rate": 5.731830202544234e-06, - "loss": 4.1549, - "step": 413000 - }, - { - "epoch": 1.81, - "learning_rate": 5.726329870324168e-06, - "loss": 4.1765, - "step": 413500 - }, - { - "epoch": 1.81, - "learning_rate": 5.720829538104103e-06, - "loss": 4.1499, - "step": 414000 - }, - { - "epoch": 1.81, - "learning_rate": 5.715329205884036e-06, - "loss": 4.1573, - "step": 414500 - }, - { - "epoch": 1.82, - "learning_rate": 5.70982887366397e-06, - "loss": 4.1644, - "step": 415000 - }, - { - "epoch": 1.82, - "learning_rate": 5.7043285414439035e-06, - "loss": 4.1414, - "step": 415500 - }, - { - "epoch": 1.82, - "learning_rate": 5.6988282092238375e-06, - "loss": 4.1282, - "step": 416000 - }, - { - "epoch": 1.82, - "learning_rate": 5.693327877003771e-06, - "loss": 4.1548, - "step": 416500 - }, - { - "epoch": 1.82, - "learning_rate": 5.687827544783705e-06, - "loss": 4.1391, - "step": 417000 - }, - { - "epoch": 1.83, - "learning_rate": 5.68232721256364e-06, - "loss": 4.1278, - "step": 417500 - }, - { - "epoch": 1.83, - "learning_rate": 5.676826880343573e-06, - "loss": 4.1243, - "step": 418000 - }, - { - "epoch": 1.83, - "learning_rate": 5.671326548123507e-06, - "loss": 4.1298, - "step": 418500 - }, - { - "epoch": 1.83, - "learning_rate": 5.66582621590344e-06, - "loss": 4.1463, - "step": 419000 - }, - { - "epoch": 1.84, - "learning_rate": 5.660325883683375e-06, - "loss": 4.1488, - "step": 419500 - }, - { - "epoch": 1.84, - "learning_rate": 5.654825551463309e-06, - "loss": 4.1529, - "step": 420000 - }, - { - "epoch": 1.84, - "learning_rate": 5.649325219243242e-06, - "loss": 4.1406, - "step": 420500 - }, - { - "epoch": 1.84, - "learning_rate": 5.643824887023177e-06, - "loss": 4.1463, - "step": 421000 - }, - { - "epoch": 1.84, - "learning_rate": 5.638324554803111e-06, - "loss": 4.1317, - "step": 421500 - }, - { - "epoch": 1.85, - "learning_rate": 5.632824222583045e-06, - "loss": 4.1392, - "step": 422000 - }, - { - "epoch": 1.85, - "learning_rate": 5.627323890362978e-06, - "loss": 4.1185, - "step": 422500 - }, - { - "epoch": 1.85, - "learning_rate": 5.6218235581429125e-06, - "loss": 4.1239, - "step": 423000 - }, - { - "epoch": 1.85, - "learning_rate": 5.616323225922846e-06, - "loss": 4.128, - "step": 423500 - }, - { - "epoch": 1.86, - "learning_rate": 5.6108228937027795e-06, - "loss": 4.1302, - "step": 424000 - }, - { - "epoch": 1.86, - "learning_rate": 5.605322561482714e-06, - "loss": 4.1128, - "step": 424500 - }, - { - "epoch": 1.86, - "learning_rate": 5.599822229262648e-06, - "loss": 4.1167, - "step": 425000 - }, - { - "epoch": 1.86, - "learning_rate": 5.594321897042581e-06, - "loss": 4.1224, - "step": 425500 - }, - { - "epoch": 1.86, - "learning_rate": 5.588821564822515e-06, - "loss": 4.1151, - "step": 426000 - }, - { - "epoch": 1.87, - "learning_rate": 5.58332123260245e-06, - "loss": 4.1033, - "step": 426500 - }, - { - "epoch": 1.87, - "learning_rate": 5.577820900382384e-06, - "loss": 4.1025, - "step": 427000 - }, - { - "epoch": 1.87, - "learning_rate": 5.572320568162317e-06, - "loss": 4.1059, - "step": 427500 - }, - { - "epoch": 1.87, - "learning_rate": 5.566820235942252e-06, - "loss": 4.1141, - "step": 428000 - }, - { - "epoch": 1.88, - "learning_rate": 5.561319903722186e-06, - "loss": 4.1059, - "step": 428500 - }, - { - "epoch": 1.88, - "learning_rate": 5.555819571502119e-06, - "loss": 4.1238, - "step": 429000 - }, - { - "epoch": 1.88, - "learning_rate": 5.550319239282053e-06, - "loss": 4.1249, - "step": 429500 - }, - { - "epoch": 1.88, - "learning_rate": 5.5448189070619875e-06, - "loss": 4.0975, - "step": 430000 - }, - { - "epoch": 1.88, - "learning_rate": 5.5393185748419206e-06, - "loss": 4.0973, - "step": 430500 - }, - { - "epoch": 1.89, - "learning_rate": 5.5338182426218545e-06, - "loss": 4.1075, - "step": 431000 - }, - { - "epoch": 1.89, - "learning_rate": 5.528317910401789e-06, - "loss": 4.0997, - "step": 431500 - }, - { - "epoch": 1.89, - "learning_rate": 5.522817578181723e-06, - "loss": 4.1091, - "step": 432000 - }, - { - "epoch": 1.89, - "learning_rate": 5.517317245961656e-06, - "loss": 4.1006, - "step": 432500 - }, - { - "epoch": 1.89, - "learning_rate": 5.51181691374159e-06, - "loss": 4.0814, - "step": 433000 - }, - { - "epoch": 1.9, - "learning_rate": 5.506316581521525e-06, - "loss": 4.0982, - "step": 433500 - }, - { - "epoch": 1.9, - "learning_rate": 5.500816249301458e-06, - "loss": 4.1054, - "step": 434000 - }, - { - "epoch": 1.9, - "learning_rate": 5.495315917081392e-06, - "loss": 4.1086, - "step": 434500 - }, - { - "epoch": 1.9, - "learning_rate": 5.489815584861327e-06, - "loss": 4.0927, - "step": 435000 - }, - { - "epoch": 1.91, - "learning_rate": 5.48431525264126e-06, - "loss": 4.0906, - "step": 435500 - }, - { - "epoch": 1.91, - "learning_rate": 5.478814920421194e-06, - "loss": 4.083, - "step": 436000 - }, - { - "epoch": 1.91, - "learning_rate": 5.473314588201128e-06, - "loss": 4.0806, - "step": 436500 - }, - { - "epoch": 1.91, - "learning_rate": 5.4678142559810625e-06, - "loss": 4.0919, - "step": 437000 - }, - { - "epoch": 1.91, - "learning_rate": 5.4623139237609956e-06, - "loss": 4.0932, - "step": 437500 - }, - { - "epoch": 1.92, - "learning_rate": 5.4568135915409295e-06, - "loss": 4.0956, - "step": 438000 - }, - { - "epoch": 1.92, - "learning_rate": 5.451313259320864e-06, - "loss": 4.0834, - "step": 438500 - }, - { - "epoch": 1.92, - "learning_rate": 5.445812927100797e-06, - "loss": 4.0945, - "step": 439000 - }, - { - "epoch": 1.92, - "learning_rate": 5.440312594880731e-06, - "loss": 4.0671, - "step": 439500 - }, - { - "epoch": 1.93, - "learning_rate": 5.434812262660664e-06, - "loss": 4.0632, - "step": 440000 - }, - { - "epoch": 1.93, - "learning_rate": 5.429311930440599e-06, - "loss": 4.0858, - "step": 440500 - }, - { - "epoch": 1.93, - "learning_rate": 5.423811598220533e-06, - "loss": 4.0743, - "step": 441000 - }, - { - "epoch": 1.93, - "learning_rate": 5.418311266000467e-06, - "loss": 4.0862, - "step": 441500 - }, - { - "epoch": 1.93, - "learning_rate": 5.4128109337804e-06, - "loss": 4.067, - "step": 442000 - }, - { - "epoch": 1.94, - "learning_rate": 5.407310601560335e-06, - "loss": 4.0933, - "step": 442500 - }, - { - "epoch": 1.94, - "learning_rate": 5.401810269340269e-06, - "loss": 4.0772, - "step": 443000 - }, - { - "epoch": 1.94, - "learning_rate": 5.396309937120202e-06, - "loss": 4.0608, - "step": 443500 - }, - { - "epoch": 1.94, - "learning_rate": 5.390809604900137e-06, - "loss": 4.0552, - "step": 444000 - }, - { - "epoch": 1.95, - "learning_rate": 5.3853092726800706e-06, - "loss": 4.0621, - "step": 444500 - }, - { - "epoch": 1.95, - "learning_rate": 5.379808940460004e-06, - "loss": 4.0642, - "step": 445000 - }, - { - "epoch": 1.95, - "learning_rate": 5.374308608239938e-06, - "loss": 4.0664, - "step": 445500 - }, - { - "epoch": 1.95, - "learning_rate": 5.368808276019872e-06, - "loss": 4.066, - "step": 446000 - }, - { - "epoch": 1.95, - "learning_rate": 5.363307943799806e-06, - "loss": 4.0523, - "step": 446500 - }, - { - "epoch": 1.96, - "learning_rate": 5.357807611579739e-06, - "loss": 4.0584, - "step": 447000 - }, - { - "epoch": 1.96, - "learning_rate": 5.352307279359674e-06, - "loss": 4.0603, - "step": 447500 - }, - { - "epoch": 1.96, - "learning_rate": 5.346806947139608e-06, - "loss": 4.0674, - "step": 448000 - }, - { - "epoch": 1.96, - "learning_rate": 5.341306614919541e-06, - "loss": 4.049, - "step": 448500 - }, - { - "epoch": 1.96, - "learning_rate": 5.335806282699475e-06, - "loss": 4.0774, - "step": 449000 - }, - { - "epoch": 1.97, - "learning_rate": 5.33030595047941e-06, - "loss": 4.0627, - "step": 449500 - }, - { - "epoch": 1.97, - "learning_rate": 5.324805618259343e-06, - "loss": 4.0462, - "step": 450000 - }, - { - "epoch": 1.97, - "learning_rate": 5.319305286039277e-06, - "loss": 4.0567, - "step": 450500 - }, - { - "epoch": 1.97, - "learning_rate": 5.313804953819212e-06, - "loss": 4.0407, - "step": 451000 - }, - { - "epoch": 1.98, - "learning_rate": 5.3083046215991456e-06, - "loss": 4.0465, - "step": 451500 - }, - { - "epoch": 1.98, - "learning_rate": 5.302804289379079e-06, - "loss": 4.0469, - "step": 452000 - }, - { - "epoch": 1.98, - "learning_rate": 5.297303957159013e-06, - "loss": 4.0454, - "step": 452500 - }, - { - "epoch": 1.98, - "learning_rate": 5.291803624938947e-06, - "loss": 4.0364, - "step": 453000 - }, - { - "epoch": 1.98, - "learning_rate": 5.2863032927188804e-06, - "loss": 4.0388, - "step": 453500 - }, - { - "epoch": 1.99, - "learning_rate": 5.280802960498814e-06, - "loss": 4.0361, - "step": 454000 - }, - { - "epoch": 1.99, - "learning_rate": 5.275302628278749e-06, - "loss": 4.0377, - "step": 454500 - }, - { - "epoch": 1.99, - "learning_rate": 5.269802296058682e-06, - "loss": 4.0349, - "step": 455000 - }, - { - "epoch": 1.99, - "learning_rate": 5.264301963838616e-06, - "loss": 4.0458, - "step": 455500 - }, - { - "epoch": 2.0, - "learning_rate": 5.25880163161855e-06, - "loss": 4.0532, - "step": 456000 - }, - { - "epoch": 2.0, - "learning_rate": 5.253301299398484e-06, - "loss": 4.0337, - "step": 456500 - }, - { - "epoch": 2.0, - "learning_rate": 5.247800967178418e-06, - "loss": 4.0266, - "step": 457000 - }, - { - "epoch": 2.0, - "learning_rate": 5.242300634958352e-06, - "loss": 4.012, - "step": 457500 - }, - { - "epoch": 2.0, - "learning_rate": 5.236800302738287e-06, - "loss": 4.0138, - "step": 458000 - }, - { - "epoch": 2.01, - "learning_rate": 5.23129997051822e-06, - "loss": 4.037, - "step": 458500 - }, - { - "epoch": 2.01, - "learning_rate": 5.225799638298154e-06, - "loss": 4.0339, - "step": 459000 - }, - { - "epoch": 2.01, - "learning_rate": 5.220299306078087e-06, - "loss": 4.0195, - "step": 459500 - }, - { - "epoch": 2.01, - "learning_rate": 5.2147989738580215e-06, - "loss": 4.0101, - "step": 460000 - }, - { - "epoch": 2.02, - "learning_rate": 5.2092986416379554e-06, - "loss": 4.021, - "step": 460500 - }, - { - "epoch": 2.02, - "learning_rate": 5.203798309417889e-06, - "loss": 4.0386, - "step": 461000 - }, - { - "epoch": 2.02, - "learning_rate": 5.198297977197823e-06, - "loss": 4.0007, - "step": 461500 - }, - { - "epoch": 2.02, - "learning_rate": 5.192797644977757e-06, - "loss": 4.0172, - "step": 462000 - }, - { - "epoch": 2.02, - "learning_rate": 5.187297312757691e-06, - "loss": 4.017, - "step": 462500 - }, - { - "epoch": 2.03, - "learning_rate": 5.181796980537624e-06, - "loss": 4.0018, - "step": 463000 - }, - { - "epoch": 2.03, - "learning_rate": 5.176296648317559e-06, - "loss": 4.017, - "step": 463500 - }, - { - "epoch": 2.03, - "learning_rate": 5.170796316097493e-06, - "loss": 4.0322, - "step": 464000 - }, - { - "epoch": 2.03, - "learning_rate": 5.165295983877426e-06, - "loss": 4.0057, - "step": 464500 - }, - { - "epoch": 2.03, - "learning_rate": 5.159795651657361e-06, - "loss": 4.017, - "step": 465000 - }, - { - "epoch": 2.04, - "learning_rate": 5.154295319437295e-06, - "loss": 4.0169, - "step": 465500 - }, - { - "epoch": 2.04, - "learning_rate": 5.148794987217229e-06, - "loss": 4.0138, - "step": 466000 - }, - { - "epoch": 2.04, - "learning_rate": 5.143294654997162e-06, - "loss": 4.0021, - "step": 466500 - }, - { - "epoch": 2.04, - "learning_rate": 5.1377943227770965e-06, - "loss": 4.0079, - "step": 467000 - }, - { - "epoch": 2.05, - "learning_rate": 5.1322939905570304e-06, - "loss": 3.9932, - "step": 467500 - }, - { - "epoch": 2.05, - "learning_rate": 5.1267936583369635e-06, - "loss": 4.0081, - "step": 468000 - }, - { - "epoch": 2.05, - "learning_rate": 5.1212933261168975e-06, - "loss": 3.9981, - "step": 468500 - }, - { - "epoch": 2.05, - "learning_rate": 5.115792993896832e-06, - "loss": 4.0078, - "step": 469000 - }, - { - "epoch": 2.05, - "learning_rate": 5.110292661676765e-06, - "loss": 3.9841, - "step": 469500 - }, - { - "epoch": 2.06, - "learning_rate": 5.104792329456699e-06, - "loss": 3.9979, - "step": 470000 - }, - { - "epoch": 2.06, - "learning_rate": 5.099291997236634e-06, - "loss": 3.9957, - "step": 470500 - }, - { - "epoch": 2.06, - "learning_rate": 5.093791665016567e-06, - "loss": 4.0029, - "step": 471000 - }, - { - "epoch": 2.06, - "learning_rate": 5.088291332796501e-06, - "loss": 3.9814, - "step": 471500 - }, - { - "epoch": 2.07, - "learning_rate": 5.082791000576435e-06, - "loss": 3.9898, - "step": 472000 - }, - { - "epoch": 2.07, - "learning_rate": 5.07729066835637e-06, - "loss": 3.9957, - "step": 472500 - }, - { - "epoch": 2.07, - "learning_rate": 5.071790336136303e-06, - "loss": 3.9726, - "step": 473000 - }, - { - "epoch": 2.07, - "learning_rate": 5.066290003916237e-06, - "loss": 4.0125, - "step": 473500 - }, - { - "epoch": 2.07, - "learning_rate": 5.0607896716961715e-06, - "loss": 3.9859, - "step": 474000 - }, - { - "epoch": 2.08, - "learning_rate": 5.055289339476105e-06, - "loss": 3.9817, - "step": 474500 - }, - { - "epoch": 2.08, - "learning_rate": 5.0497890072560385e-06, - "loss": 3.9765, - "step": 475000 - }, - { - "epoch": 2.08, - "learning_rate": 5.0442886750359725e-06, - "loss": 3.9675, - "step": 475500 - }, - { - "epoch": 2.08, - "learning_rate": 5.038788342815906e-06, - "loss": 3.9764, - "step": 476000 - }, - { - "epoch": 2.09, - "learning_rate": 5.03328801059584e-06, - "loss": 3.9743, - "step": 476500 - }, - { - "epoch": 2.09, - "learning_rate": 5.027787678375774e-06, - "loss": 3.9681, - "step": 477000 - }, - { - "epoch": 2.09, - "learning_rate": 5.022287346155709e-06, - "loss": 3.9895, - "step": 477500 - }, - { - "epoch": 2.09, - "learning_rate": 5.016787013935642e-06, - "loss": 3.9755, - "step": 478000 - }, - { - "epoch": 2.09, - "learning_rate": 5.011286681715576e-06, - "loss": 3.9612, - "step": 478500 - }, - { - "epoch": 2.1, - "learning_rate": 5.005786349495509e-06, - "loss": 3.9687, - "step": 479000 - }, - { - "epoch": 2.1, - "learning_rate": 5.000286017275444e-06, - "loss": 3.9685, - "step": 479500 - }, - { - "epoch": 2.1, - "learning_rate": 4.994785685055378e-06, - "loss": 3.9742, - "step": 480000 - }, - { - "epoch": 2.1, - "learning_rate": 4.989285352835312e-06, - "loss": 3.9636, - "step": 480500 - }, - { - "epoch": 2.1, - "learning_rate": 4.983785020615246e-06, - "loss": 3.9729, - "step": 481000 - }, - { - "epoch": 2.11, - "learning_rate": 4.97828468839518e-06, - "loss": 3.9758, - "step": 481500 - }, - { - "epoch": 2.11, - "learning_rate": 4.9727843561751135e-06, - "loss": 3.9642, - "step": 482000 - }, - { - "epoch": 2.11, - "learning_rate": 4.9672840239550475e-06, - "loss": 3.9607, - "step": 482500 - }, - { - "epoch": 2.11, - "learning_rate": 4.961783691734981e-06, - "loss": 3.9643, - "step": 483000 - }, - { - "epoch": 2.12, - "learning_rate": 4.956283359514915e-06, - "loss": 3.9637, - "step": 483500 - }, - { - "epoch": 2.12, - "learning_rate": 4.950783027294849e-06, - "loss": 3.9623, - "step": 484000 - }, - { - "epoch": 2.12, - "learning_rate": 4.945282695074783e-06, - "loss": 3.9623, - "step": 484500 - }, - { - "epoch": 2.12, - "learning_rate": 4.939782362854717e-06, - "loss": 3.9709, - "step": 485000 - }, - { - "epoch": 2.12, - "learning_rate": 4.93428203063465e-06, - "loss": 3.9395, - "step": 485500 - }, - { - "epoch": 2.13, - "learning_rate": 4.928781698414585e-06, - "loss": 3.9428, - "step": 486000 - }, - { - "epoch": 2.13, - "learning_rate": 4.923281366194518e-06, - "loss": 4.0036, - "step": 486500 - }, - { - "epoch": 2.13, - "learning_rate": 4.917781033974453e-06, - "loss": 3.9457, - "step": 487000 - }, - { - "epoch": 2.13, - "learning_rate": 4.912280701754386e-06, - "loss": 3.9635, - "step": 487500 - }, - { - "epoch": 2.14, - "learning_rate": 4.90678036953432e-06, - "loss": 3.954, - "step": 488000 - }, - { - "epoch": 2.14, - "learning_rate": 4.901280037314255e-06, - "loss": 3.9367, - "step": 488500 - }, - { - "epoch": 2.14, - "learning_rate": 4.895779705094188e-06, - "loss": 3.9686, - "step": 489000 - }, - { - "epoch": 2.14, - "learning_rate": 4.8902793728741225e-06, - "loss": 3.9572, - "step": 489500 - }, - { - "epoch": 2.14, - "learning_rate": 4.8847790406540556e-06, - "loss": 3.9387, - "step": 490000 - }, - { - "epoch": 2.15, - "learning_rate": 4.8792787084339895e-06, - "loss": 3.9297, - "step": 490500 - }, - { - "epoch": 2.15, - "learning_rate": 4.873778376213923e-06, - "loss": 3.9491, - "step": 491000 - }, - { - "epoch": 2.15, - "learning_rate": 4.868278043993857e-06, - "loss": 3.9532, - "step": 491500 - }, - { - "epoch": 2.15, - "learning_rate": 4.862777711773792e-06, - "loss": 3.9393, - "step": 492000 - }, - { - "epoch": 2.16, - "learning_rate": 4.857277379553725e-06, - "loss": 3.9418, - "step": 492500 - }, - { - "epoch": 2.16, - "learning_rate": 4.851777047333659e-06, - "loss": 3.9304, - "step": 493000 - }, - { - "epoch": 2.16, - "learning_rate": 4.846276715113593e-06, - "loss": 3.9539, - "step": 493500 - }, - { - "epoch": 2.16, - "learning_rate": 4.840776382893527e-06, - "loss": 3.9458, - "step": 494000 - }, - { - "epoch": 2.16, - "learning_rate": 4.835276050673461e-06, - "loss": 3.9491, - "step": 494500 - }, - { - "epoch": 2.17, - "learning_rate": 4.829775718453395e-06, - "loss": 3.9425, - "step": 495000 - }, - { - "epoch": 2.17, - "learning_rate": 4.824275386233329e-06, - "loss": 3.9529, - "step": 495500 - }, - { - "epoch": 2.17, - "learning_rate": 4.818775054013263e-06, - "loss": 3.9418, - "step": 496000 - }, - { - "epoch": 2.17, - "learning_rate": 4.813274721793197e-06, - "loss": 3.945, - "step": 496500 - }, - { - "epoch": 2.17, - "learning_rate": 4.8077743895731306e-06, - "loss": 3.9581, - "step": 497000 - }, - { - "epoch": 2.18, - "learning_rate": 4.8022740573530645e-06, - "loss": 3.9464, - "step": 497500 - }, - { - "epoch": 2.18, - "learning_rate": 4.796773725132998e-06, - "loss": 3.9287, - "step": 498000 - }, - { - "epoch": 2.18, - "learning_rate": 4.791273392912932e-06, - "loss": 3.9462, - "step": 498500 - }, - { - "epoch": 2.18, - "learning_rate": 4.785773060692866e-06, - "loss": 3.9331, - "step": 499000 - }, - { - "epoch": 2.19, - "learning_rate": 4.7802727284728e-06, - "loss": 3.9289, - "step": 499500 - }, - { - "epoch": 2.19, - "learning_rate": 4.774772396252734e-06, - "loss": 3.9348, - "step": 500000 - }, - { - "epoch": 2.19, - "learning_rate": 4.769272064032668e-06, - "loss": 3.929, - "step": 500500 - }, - { - "epoch": 2.19, - "learning_rate": 4.763771731812602e-06, - "loss": 3.9286, - "step": 501000 - }, - { - "epoch": 2.19, - "learning_rate": 4.758271399592536e-06, - "loss": 3.9327, - "step": 501500 - }, - { - "epoch": 2.2, - "learning_rate": 4.75277106737247e-06, - "loss": 3.9178, - "step": 502000 - }, - { - "epoch": 2.2, - "learning_rate": 4.747270735152403e-06, - "loss": 3.9321, - "step": 502500 - }, - { - "epoch": 2.2, - "learning_rate": 4.741770402932338e-06, - "loss": 3.9239, - "step": 503000 - }, - { - "epoch": 2.2, - "learning_rate": 4.736270070712272e-06, - "loss": 3.9168, - "step": 503500 - }, - { - "epoch": 2.21, - "learning_rate": 4.7307697384922056e-06, - "loss": 3.9304, - "step": 504000 - }, - { - "epoch": 2.21, - "learning_rate": 4.7252694062721395e-06, - "loss": 3.9262, - "step": 504500 - }, - { - "epoch": 2.21, - "learning_rate": 4.7197690740520726e-06, - "loss": 3.918, - "step": 505000 - }, - { - "epoch": 2.21, - "learning_rate": 4.714268741832007e-06, - "loss": 3.9114, - "step": 505500 - }, - { - "epoch": 2.21, - "learning_rate": 4.7087684096119404e-06, - "loss": 3.9143, - "step": 506000 - }, - { - "epoch": 2.22, - "learning_rate": 4.703268077391875e-06, - "loss": 3.9256, - "step": 506500 - }, - { - "epoch": 2.22, - "learning_rate": 4.697767745171809e-06, - "loss": 3.9179, - "step": 507000 - }, - { - "epoch": 2.22, - "learning_rate": 4.692267412951742e-06, - "loss": 3.9251, - "step": 507500 - }, - { - "epoch": 2.22, - "learning_rate": 4.686767080731677e-06, - "loss": 3.9256, - "step": 508000 - }, - { - "epoch": 2.23, - "learning_rate": 4.68126674851161e-06, - "loss": 3.908, - "step": 508500 - }, - { - "epoch": 2.23, - "learning_rate": 4.675766416291545e-06, - "loss": 3.9167, - "step": 509000 - }, - { - "epoch": 2.23, - "learning_rate": 4.670266084071478e-06, - "loss": 3.9118, - "step": 509500 - }, - { - "epoch": 2.23, - "learning_rate": 4.664765751851412e-06, - "loss": 3.9192, - "step": 510000 - }, - { - "epoch": 2.23, - "learning_rate": 4.659265419631347e-06, - "loss": 3.9013, - "step": 510500 - }, - { - "epoch": 2.24, - "learning_rate": 4.65376508741128e-06, - "loss": 3.9102, - "step": 511000 - }, - { - "epoch": 2.24, - "learning_rate": 4.6482647551912145e-06, - "loss": 3.9228, - "step": 511500 - }, - { - "epoch": 2.24, - "learning_rate": 4.6427644229711476e-06, - "loss": 3.9117, - "step": 512000 - }, - { - "epoch": 2.24, - "learning_rate": 4.6372640907510815e-06, - "loss": 3.9039, - "step": 512500 - }, - { - "epoch": 2.24, - "learning_rate": 4.6317637585310154e-06, - "loss": 3.9006, - "step": 513000 - }, - { - "epoch": 2.25, - "learning_rate": 4.626263426310949e-06, - "loss": 3.888, - "step": 513500 - }, - { - "epoch": 2.25, - "learning_rate": 4.620763094090883e-06, - "loss": 3.9163, - "step": 514000 - }, - { - "epoch": 2.25, - "learning_rate": 4.615262761870817e-06, - "loss": 3.8956, - "step": 514500 - }, - { - "epoch": 2.25, - "learning_rate": 4.609762429650751e-06, - "loss": 3.9047, - "step": 515000 - }, - { - "epoch": 2.26, - "learning_rate": 4.604262097430685e-06, - "loss": 3.8887, - "step": 515500 - }, - { - "epoch": 2.26, - "learning_rate": 4.598761765210619e-06, - "loss": 3.8938, - "step": 516000 - }, - { - "epoch": 2.26, - "learning_rate": 4.593261432990553e-06, - "loss": 3.8859, - "step": 516500 - }, - { - "epoch": 2.26, - "learning_rate": 4.587761100770487e-06, - "loss": 3.8827, - "step": 517000 - }, - { - "epoch": 2.26, - "learning_rate": 4.582260768550421e-06, - "loss": 3.9168, - "step": 517500 - }, - { - "epoch": 2.27, - "learning_rate": 4.576760436330355e-06, - "loss": 3.8968, - "step": 518000 - }, - { - "epoch": 2.27, - "learning_rate": 4.571260104110289e-06, - "loss": 3.8848, - "step": 518500 - }, - { - "epoch": 2.27, - "learning_rate": 4.5657597718902226e-06, - "loss": 3.8846, - "step": 519000 - }, - { - "epoch": 2.27, - "learning_rate": 4.5602594396701565e-06, - "loss": 3.8849, - "step": 519500 - }, - { - "epoch": 2.28, - "learning_rate": 4.5547591074500904e-06, - "loss": 3.8856, - "step": 520000 - }, - { - "epoch": 2.28, - "learning_rate": 4.549258775230024e-06, - "loss": 3.9032, - "step": 520500 - }, - { - "epoch": 2.28, - "learning_rate": 4.543758443009958e-06, - "loss": 3.886, - "step": 521000 - }, - { - "epoch": 2.28, - "learning_rate": 4.538258110789892e-06, - "loss": 3.894, - "step": 521500 - }, - { - "epoch": 2.28, - "learning_rate": 4.532757778569826e-06, - "loss": 3.8737, - "step": 522000 - }, - { - "epoch": 2.29, - "learning_rate": 4.52725744634976e-06, - "loss": 3.8734, - "step": 522500 - }, - { - "epoch": 2.29, - "learning_rate": 4.521757114129694e-06, - "loss": 3.8829, - "step": 523000 - }, - { - "epoch": 2.29, - "learning_rate": 4.516256781909628e-06, - "loss": 3.8842, - "step": 523500 - }, - { - "epoch": 2.29, - "learning_rate": 4.510756449689562e-06, - "loss": 3.8766, - "step": 524000 - }, - { - "epoch": 2.3, - "learning_rate": 4.505256117469495e-06, - "loss": 3.8956, - "step": 524500 - }, - { - "epoch": 2.3, - "learning_rate": 4.49975578524943e-06, - "loss": 3.8952, - "step": 525000 - }, - { - "epoch": 2.3, - "learning_rate": 4.494255453029364e-06, - "loss": 3.8863, - "step": 525500 - }, - { - "epoch": 2.3, - "learning_rate": 4.488755120809298e-06, - "loss": 3.8746, - "step": 526000 - }, - { - "epoch": 2.3, - "learning_rate": 4.4832547885892315e-06, - "loss": 3.8856, - "step": 526500 - }, - { - "epoch": 2.31, - "learning_rate": 4.477754456369165e-06, - "loss": 3.9011, - "step": 527000 - }, - { - "epoch": 2.31, - "learning_rate": 4.472254124149099e-06, - "loss": 3.8708, - "step": 527500 - }, - { - "epoch": 2.31, - "learning_rate": 4.4667537919290324e-06, - "loss": 3.8729, - "step": 528000 - }, - { - "epoch": 2.31, - "learning_rate": 4.461253459708967e-06, - "loss": 3.8778, - "step": 528500 - }, - { - "epoch": 2.32, - "learning_rate": 4.4557531274889e-06, - "loss": 3.8994, - "step": 529000 - }, - { - "epoch": 2.32, - "learning_rate": 4.450252795268834e-06, - "loss": 3.8571, - "step": 529500 - }, - { - "epoch": 2.32, - "learning_rate": 4.444752463048769e-06, - "loss": 3.8749, - "step": 530000 - }, - { - "epoch": 2.32, - "learning_rate": 4.439252130828702e-06, - "loss": 3.8686, - "step": 530500 - }, - { - "epoch": 2.32, - "learning_rate": 4.433751798608637e-06, - "loss": 3.8556, - "step": 531000 - }, - { - "epoch": 2.33, - "learning_rate": 4.42825146638857e-06, - "loss": 3.8639, - "step": 531500 - }, - { - "epoch": 2.33, - "learning_rate": 4.422751134168504e-06, - "loss": 3.8651, - "step": 532000 - }, - { - "epoch": 2.33, - "learning_rate": 4.417250801948438e-06, - "loss": 3.8659, - "step": 532500 - }, - { - "epoch": 2.33, - "learning_rate": 4.411750469728372e-06, - "loss": 3.8609, - "step": 533000 - }, - { - "epoch": 2.33, - "learning_rate": 4.4062501375083065e-06, - "loss": 3.8801, - "step": 533500 - }, - { - "epoch": 2.34, - "learning_rate": 4.40074980528824e-06, - "loss": 3.857, - "step": 534000 - }, - { - "epoch": 2.34, - "learning_rate": 4.3952494730681735e-06, - "loss": 3.8627, - "step": 534500 - }, - { - "epoch": 2.34, - "learning_rate": 4.3897491408481074e-06, - "loss": 3.8653, - "step": 535000 - }, - { - "epoch": 2.34, - "learning_rate": 4.384248808628041e-06, - "loss": 3.8654, - "step": 535500 - }, - { - "epoch": 2.35, - "learning_rate": 4.378748476407975e-06, - "loss": 3.8717, - "step": 536000 - }, - { - "epoch": 2.35, - "learning_rate": 4.373248144187909e-06, - "loss": 3.8482, - "step": 536500 - }, - { - "epoch": 2.35, - "learning_rate": 4.367747811967843e-06, - "loss": 3.8707, - "step": 537000 - }, - { - "epoch": 2.35, - "learning_rate": 4.362247479747777e-06, - "loss": 3.8523, - "step": 537500 - }, - { - "epoch": 2.35, - "learning_rate": 4.356747147527711e-06, - "loss": 3.8629, - "step": 538000 - }, - { - "epoch": 2.36, - "learning_rate": 4.351246815307645e-06, - "loss": 3.8712, - "step": 538500 - }, - { - "epoch": 2.36, - "learning_rate": 4.345746483087579e-06, - "loss": 3.8486, - "step": 539000 - }, - { - "epoch": 2.36, - "learning_rate": 4.340246150867513e-06, - "loss": 3.8586, - "step": 539500 - }, - { - "epoch": 2.36, - "learning_rate": 4.334745818647447e-06, - "loss": 3.8724, - "step": 540000 - }, - { - "epoch": 2.37, - "learning_rate": 4.329245486427381e-06, - "loss": 3.8568, - "step": 540500 - }, - { - "epoch": 2.37, - "learning_rate": 4.323745154207315e-06, - "loss": 3.8522, - "step": 541000 - }, - { - "epoch": 2.37, - "learning_rate": 4.3182448219872485e-06, - "loss": 3.8657, - "step": 541500 - }, - { - "epoch": 2.37, - "learning_rate": 4.3127444897671825e-06, - "loss": 3.8592, - "step": 542000 - }, - { - "epoch": 2.37, - "learning_rate": 4.307244157547116e-06, - "loss": 3.8423, - "step": 542500 - }, - { - "epoch": 2.38, - "learning_rate": 4.30174382532705e-06, - "loss": 3.8413, - "step": 543000 - }, - { - "epoch": 2.38, - "learning_rate": 4.296243493106984e-06, - "loss": 3.8402, - "step": 543500 - }, - { - "epoch": 2.38, - "learning_rate": 4.290743160886917e-06, - "loss": 3.8523, - "step": 544000 - }, - { - "epoch": 2.38, - "learning_rate": 4.285242828666852e-06, - "loss": 3.8484, - "step": 544500 - }, - { - "epoch": 2.39, - "learning_rate": 4.279742496446786e-06, - "loss": 3.8438, - "step": 545000 - }, - { - "epoch": 2.39, - "learning_rate": 4.27424216422672e-06, - "loss": 3.8523, - "step": 545500 - }, - { - "epoch": 2.39, - "learning_rate": 4.268741832006654e-06, - "loss": 3.851, - "step": 546000 - }, - { - "epoch": 2.39, - "learning_rate": 4.263241499786587e-06, - "loss": 3.8666, - "step": 546500 - }, - { - "epoch": 2.39, - "learning_rate": 4.257741167566522e-06, - "loss": 3.8476, - "step": 547000 - }, - { - "epoch": 2.4, - "learning_rate": 4.252240835346455e-06, - "loss": 3.8318, - "step": 547500 - }, - { - "epoch": 2.4, - "learning_rate": 4.24674050312639e-06, - "loss": 3.8342, - "step": 548000 - }, - { - "epoch": 2.4, - "learning_rate": 4.2412401709063235e-06, - "loss": 3.8342, - "step": 548500 - }, - { - "epoch": 2.4, - "learning_rate": 4.235739838686257e-06, - "loss": 3.8513, - "step": 549000 - }, - { - "epoch": 2.4, - "learning_rate": 4.230239506466191e-06, - "loss": 3.846, - "step": 549500 - }, - { - "epoch": 2.41, - "learning_rate": 4.2247391742461245e-06, - "loss": 3.8363, - "step": 550000 - }, - { - "epoch": 2.41, - "learning_rate": 4.219238842026059e-06, - "loss": 3.8365, - "step": 550500 - }, - { - "epoch": 2.41, - "learning_rate": 4.213738509805992e-06, - "loss": 3.8335, - "step": 551000 - }, - { - "epoch": 2.41, - "learning_rate": 4.208238177585926e-06, - "loss": 3.8421, - "step": 551500 - }, - { - "epoch": 2.42, - "learning_rate": 4.202737845365861e-06, - "loss": 3.859, - "step": 552000 - }, - { - "epoch": 2.42, - "learning_rate": 4.197237513145794e-06, - "loss": 3.8283, - "step": 552500 - }, - { - "epoch": 2.42, - "learning_rate": 4.191737180925728e-06, - "loss": 3.8288, - "step": 553000 - }, - { - "epoch": 2.42, - "learning_rate": 4.186236848705662e-06, - "loss": 3.8239, - "step": 553500 - }, - { - "epoch": 2.42, - "learning_rate": 4.180736516485596e-06, - "loss": 3.834, - "step": 554000 - }, - { - "epoch": 2.43, - "learning_rate": 4.17523618426553e-06, - "loss": 3.8265, - "step": 554500 - }, - { - "epoch": 2.43, - "learning_rate": 4.169735852045464e-06, - "loss": 3.8312, - "step": 555000 - }, - { - "epoch": 2.43, - "learning_rate": 4.164235519825398e-06, - "loss": 3.8317, - "step": 555500 - }, - { - "epoch": 2.43, - "learning_rate": 4.158735187605332e-06, - "loss": 3.8383, - "step": 556000 - }, - { - "epoch": 2.44, - "learning_rate": 4.1532348553852655e-06, - "loss": 3.8163, - "step": 556500 - }, - { - "epoch": 2.44, - "learning_rate": 4.1477345231651995e-06, - "loss": 3.8049, - "step": 557000 - }, - { - "epoch": 2.44, - "learning_rate": 4.142234190945133e-06, - "loss": 3.8479, - "step": 557500 - }, - { - "epoch": 2.44, - "learning_rate": 4.136733858725067e-06, - "loss": 3.8328, - "step": 558000 - }, - { - "epoch": 2.44, - "learning_rate": 4.131233526505001e-06, - "loss": 3.8151, - "step": 558500 - }, - { - "epoch": 2.45, - "learning_rate": 4.125733194284935e-06, - "loss": 3.8271, - "step": 559000 - }, - { - "epoch": 2.45, - "learning_rate": 4.120232862064869e-06, - "loss": 3.8143, - "step": 559500 - }, - { - "epoch": 2.45, - "learning_rate": 4.114732529844803e-06, - "loss": 3.8264, - "step": 560000 - }, - { - "epoch": 2.45, - "learning_rate": 4.109232197624737e-06, - "loss": 3.8173, - "step": 560500 - }, - { - "epoch": 2.46, - "learning_rate": 4.103731865404671e-06, - "loss": 3.814, - "step": 561000 - }, - { - "epoch": 2.46, - "learning_rate": 4.098231533184605e-06, - "loss": 3.8111, - "step": 561500 - }, - { - "epoch": 2.46, - "learning_rate": 4.092731200964539e-06, - "loss": 3.8192, - "step": 562000 - }, - { - "epoch": 2.46, - "learning_rate": 4.087230868744473e-06, - "loss": 3.8212, - "step": 562500 - }, - { - "epoch": 2.46, - "learning_rate": 4.081730536524407e-06, - "loss": 3.8073, - "step": 563000 - }, - { - "epoch": 2.47, - "learning_rate": 4.0762302043043405e-06, - "loss": 3.8258, - "step": 563500 - }, - { - "epoch": 2.47, - "learning_rate": 4.0707298720842745e-06, - "loss": 3.8391, - "step": 564000 - }, - { - "epoch": 2.47, - "learning_rate": 4.065229539864208e-06, - "loss": 3.8065, - "step": 564500 - }, - { - "epoch": 2.47, - "learning_rate": 4.059729207644142e-06, - "loss": 3.8148, - "step": 565000 - }, - { - "epoch": 2.47, - "learning_rate": 4.054228875424076e-06, - "loss": 3.8089, - "step": 565500 - }, - { - "epoch": 2.48, - "learning_rate": 4.048728543204009e-06, - "loss": 3.8037, - "step": 566000 - }, - { - "epoch": 2.48, - "learning_rate": 4.043228210983944e-06, - "loss": 3.8184, - "step": 566500 - }, - { - "epoch": 2.48, - "learning_rate": 4.037727878763877e-06, - "loss": 3.8412, - "step": 567000 - }, - { - "epoch": 2.48, - "learning_rate": 4.032227546543811e-06, - "loss": 3.8171, - "step": 567500 - }, - { - "epoch": 2.49, - "learning_rate": 4.026727214323746e-06, - "loss": 3.8175, - "step": 568000 - }, - { - "epoch": 2.49, - "learning_rate": 4.021226882103679e-06, - "loss": 3.8184, - "step": 568500 - }, - { - "epoch": 2.49, - "learning_rate": 4.015726549883614e-06, - "loss": 3.8238, - "step": 569000 - }, - { - "epoch": 2.49, - "learning_rate": 4.010226217663547e-06, - "loss": 3.807, - "step": 569500 - }, - { - "epoch": 2.49, - "learning_rate": 4.004725885443481e-06, - "loss": 3.7998, - "step": 570000 - }, - { - "epoch": 2.5, - "learning_rate": 3.999225553223415e-06, - "loss": 3.815, - "step": 570500 - }, - { - "epoch": 2.5, - "learning_rate": 3.993725221003349e-06, - "loss": 3.8179, - "step": 571000 - }, - { - "epoch": 2.5, - "learning_rate": 3.988224888783283e-06, - "loss": 3.803, - "step": 571500 - }, - { - "epoch": 2.5, - "learning_rate": 3.9827245565632165e-06, - "loss": 3.8154, - "step": 572000 - }, - { - "epoch": 2.51, - "learning_rate": 3.97722422434315e-06, - "loss": 3.7961, - "step": 572500 - }, - { - "epoch": 2.51, - "learning_rate": 3.971723892123084e-06, - "loss": 3.8028, - "step": 573000 - }, - { - "epoch": 2.51, - "learning_rate": 3.966223559903018e-06, - "loss": 3.8072, - "step": 573500 - }, - { - "epoch": 2.51, - "learning_rate": 3.960723227682952e-06, - "loss": 3.7901, - "step": 574000 - }, - { - "epoch": 2.51, - "learning_rate": 3.955222895462886e-06, - "loss": 3.7932, - "step": 574500 - }, - { - "epoch": 2.52, - "learning_rate": 3.94972256324282e-06, - "loss": 3.789, - "step": 575000 - }, - { - "epoch": 2.52, - "learning_rate": 3.944222231022754e-06, - "loss": 3.8177, - "step": 575500 - }, - { - "epoch": 2.52, - "learning_rate": 3.938721898802688e-06, - "loss": 3.7971, - "step": 576000 - }, - { - "epoch": 2.52, - "learning_rate": 3.933221566582622e-06, - "loss": 3.802, - "step": 576500 - }, - { - "epoch": 2.53, - "learning_rate": 3.927721234362556e-06, - "loss": 3.8384, - "step": 577000 - }, - { - "epoch": 2.53, - "learning_rate": 3.92222090214249e-06, - "loss": 3.7789, - "step": 577500 - }, - { - "epoch": 2.53, - "learning_rate": 3.916720569922424e-06, - "loss": 3.8068, - "step": 578000 - }, - { - "epoch": 2.53, - "learning_rate": 3.9112202377023576e-06, - "loss": 3.7959, - "step": 578500 - }, - { - "epoch": 2.53, - "learning_rate": 3.9057199054822915e-06, - "loss": 3.7924, - "step": 579000 - }, - { - "epoch": 2.54, - "learning_rate": 3.900219573262225e-06, - "loss": 3.7729, - "step": 579500 - }, - { - "epoch": 2.54, - "learning_rate": 3.894719241042159e-06, - "loss": 3.7925, - "step": 580000 - }, - { - "epoch": 2.54, - "learning_rate": 3.889218908822093e-06, - "loss": 3.7963, - "step": 580500 - }, - { - "epoch": 2.54, - "learning_rate": 3.883718576602027e-06, - "loss": 3.7818, - "step": 581000 - }, - { - "epoch": 2.54, - "learning_rate": 3.878218244381961e-06, - "loss": 3.7931, - "step": 581500 - }, - { - "epoch": 2.55, - "learning_rate": 3.872717912161894e-06, - "loss": 3.7912, - "step": 582000 - }, - { - "epoch": 2.55, - "learning_rate": 3.867217579941829e-06, - "loss": 3.7882, - "step": 582500 - }, - { - "epoch": 2.55, - "learning_rate": 3.861717247721763e-06, - "loss": 3.797, - "step": 583000 - }, - { - "epoch": 2.55, - "learning_rate": 3.856216915501697e-06, - "loss": 3.7891, - "step": 583500 - }, - { - "epoch": 2.56, - "learning_rate": 3.850716583281631e-06, - "loss": 3.7888, - "step": 584000 - }, - { - "epoch": 2.56, - "learning_rate": 3.845216251061564e-06, - "loss": 3.7929, - "step": 584500 - }, - { - "epoch": 2.56, - "learning_rate": 3.839715918841499e-06, - "loss": 3.7851, - "step": 585000 - }, - { - "epoch": 2.56, - "learning_rate": 3.834215586621432e-06, - "loss": 3.803, - "step": 585500 - }, - { - "epoch": 2.56, - "learning_rate": 3.8287152544013665e-06, - "loss": 3.7712, - "step": 586000 - }, - { - "epoch": 2.57, - "learning_rate": 3.8232149221813e-06, - "loss": 3.7852, - "step": 586500 - }, - { - "epoch": 2.57, - "learning_rate": 3.8177145899612335e-06, - "loss": 3.7847, - "step": 587000 - }, - { - "epoch": 2.57, - "learning_rate": 3.812214257741168e-06, - "loss": 3.7644, - "step": 587500 - }, - { - "epoch": 2.57, - "learning_rate": 3.8067139255211018e-06, - "loss": 3.7816, - "step": 588000 - }, - { - "epoch": 2.58, - "learning_rate": 3.8012135933010357e-06, - "loss": 3.8016, - "step": 588500 - }, - { - "epoch": 2.58, - "learning_rate": 3.7957132610809692e-06, - "loss": 3.779, - "step": 589000 - }, - { - "epoch": 2.58, - "learning_rate": 3.7902129288609036e-06, - "loss": 3.7893, - "step": 589500 - }, - { - "epoch": 2.58, - "learning_rate": 3.7847125966408375e-06, - "loss": 3.7595, - "step": 590000 - }, - { - "epoch": 2.58, - "learning_rate": 3.7792122644207714e-06, - "loss": 3.7787, - "step": 590500 - }, - { - "epoch": 2.59, - "learning_rate": 3.7737119322007054e-06, - "loss": 3.7649, - "step": 591000 - }, - { - "epoch": 2.59, - "learning_rate": 3.768211599980639e-06, - "loss": 3.7634, - "step": 591500 - }, - { - "epoch": 2.59, - "learning_rate": 3.7627112677605732e-06, - "loss": 3.7862, - "step": 592000 - }, - { - "epoch": 2.59, - "learning_rate": 3.7572109355405067e-06, - "loss": 3.768, - "step": 592500 - }, - { - "epoch": 2.6, - "learning_rate": 3.751710603320441e-06, - "loss": 3.77, - "step": 593000 - }, - { - "epoch": 2.6, - "learning_rate": 3.7462102711003746e-06, - "loss": 3.7923, - "step": 593500 - }, - { - "epoch": 2.6, - "learning_rate": 3.7407099388803085e-06, - "loss": 3.7903, - "step": 594000 - }, - { - "epoch": 2.6, - "learning_rate": 3.735209606660243e-06, - "loss": 3.7742, - "step": 594500 - }, - { - "epoch": 2.6, - "learning_rate": 3.7297092744401764e-06, - "loss": 3.7723, - "step": 595000 - }, - { - "epoch": 2.61, - "learning_rate": 3.7242089422201107e-06, - "loss": 3.7747, - "step": 595500 - }, - { - "epoch": 2.61, - "learning_rate": 3.7187086100000442e-06, - "loss": 3.7572, - "step": 596000 - }, - { - "epoch": 2.61, - "learning_rate": 3.713208277779978e-06, - "loss": 3.7676, - "step": 596500 - }, - { - "epoch": 2.61, - "learning_rate": 3.707707945559912e-06, - "loss": 3.7739, - "step": 597000 - }, - { - "epoch": 2.61, - "learning_rate": 3.702207613339846e-06, - "loss": 3.7719, - "step": 597500 - }, - { - "epoch": 2.62, - "learning_rate": 3.6967072811197804e-06, - "loss": 3.7698, - "step": 598000 - }, - { - "epoch": 2.62, - "learning_rate": 3.691206948899714e-06, - "loss": 3.7597, - "step": 598500 - }, - { - "epoch": 2.62, - "learning_rate": 3.685706616679648e-06, - "loss": 3.7637, - "step": 599000 - }, - { - "epoch": 2.62, - "learning_rate": 3.6802062844595813e-06, - "loss": 3.7903, - "step": 599500 - }, - { - "epoch": 2.63, - "learning_rate": 3.6747059522395157e-06, - "loss": 3.7736, - "step": 600000 - }, - { - "epoch": 2.63, - "learning_rate": 3.669205620019449e-06, - "loss": 3.7649, - "step": 600500 - }, - { - "epoch": 2.63, - "learning_rate": 3.6637052877993835e-06, - "loss": 3.7754, - "step": 601000 - }, - { - "epoch": 2.63, - "learning_rate": 3.6582049555793174e-06, - "loss": 3.7583, - "step": 601500 - }, - { - "epoch": 2.63, - "learning_rate": 3.652704623359251e-06, - "loss": 3.7565, - "step": 602000 - }, - { - "epoch": 2.64, - "learning_rate": 3.6472042911391853e-06, - "loss": 3.7819, - "step": 602500 - }, - { - "epoch": 2.64, - "learning_rate": 3.641703958919119e-06, - "loss": 3.7609, - "step": 603000 - }, - { - "epoch": 2.64, - "learning_rate": 3.636203626699053e-06, - "loss": 3.783, - "step": 603500 - }, - { - "epoch": 2.64, - "learning_rate": 3.6307032944789867e-06, - "loss": 3.7643, - "step": 604000 - }, - { - "epoch": 2.65, - "learning_rate": 3.6252029622589206e-06, - "loss": 3.7631, - "step": 604500 - }, - { - "epoch": 2.65, - "learning_rate": 3.619702630038855e-06, - "loss": 3.7547, - "step": 605000 - }, - { - "epoch": 2.65, - "learning_rate": 3.6142022978187884e-06, - "loss": 3.7683, - "step": 605500 - }, - { - "epoch": 2.65, - "learning_rate": 3.608701965598723e-06, - "loss": 3.7608, - "step": 606000 - }, - { - "epoch": 2.65, - "learning_rate": 3.6032016333786563e-06, - "loss": 3.7394, - "step": 606500 - }, - { - "epoch": 2.66, - "learning_rate": 3.5977013011585902e-06, - "loss": 3.759, - "step": 607000 - }, - { - "epoch": 2.66, - "learning_rate": 3.592200968938524e-06, - "loss": 3.7483, - "step": 607500 - }, - { - "epoch": 2.66, - "learning_rate": 3.586700636718458e-06, - "loss": 3.7504, - "step": 608000 - }, - { - "epoch": 2.66, - "learning_rate": 3.5812003044983916e-06, - "loss": 3.7799, - "step": 608500 - }, - { - "epoch": 2.67, - "learning_rate": 3.575699972278326e-06, - "loss": 3.7455, - "step": 609000 - }, - { - "epoch": 2.67, - "learning_rate": 3.57019964005826e-06, - "loss": 3.7645, - "step": 609500 - }, - { - "epoch": 2.67, - "learning_rate": 3.564699307838194e-06, - "loss": 3.7539, - "step": 610000 - }, - { - "epoch": 2.67, - "learning_rate": 3.5591989756181277e-06, - "loss": 3.7454, - "step": 610500 - }, - { - "epoch": 2.67, - "learning_rate": 3.5536986433980612e-06, - "loss": 3.7622, - "step": 611000 - }, - { - "epoch": 2.68, - "learning_rate": 3.5481983111779956e-06, - "loss": 3.7541, - "step": 611500 - }, - { - "epoch": 2.68, - "learning_rate": 3.542697978957929e-06, - "loss": 3.7643, - "step": 612000 - }, - { - "epoch": 2.68, - "learning_rate": 3.5371976467378635e-06, - "loss": 3.7736, - "step": 612500 - }, - { - "epoch": 2.68, - "learning_rate": 3.5316973145177974e-06, - "loss": 3.7541, - "step": 613000 - }, - { - "epoch": 2.68, - "learning_rate": 3.526196982297731e-06, - "loss": 3.7547, - "step": 613500 - }, - { - "epoch": 2.69, - "learning_rate": 3.5206966500776652e-06, - "loss": 3.7471, - "step": 614000 - }, - { - "epoch": 2.69, - "learning_rate": 3.5151963178575987e-06, - "loss": 3.7696, - "step": 614500 - }, - { - "epoch": 2.69, - "learning_rate": 3.509695985637533e-06, - "loss": 3.7523, - "step": 615000 - }, - { - "epoch": 2.69, - "learning_rate": 3.5041956534174666e-06, - "loss": 3.7594, - "step": 615500 - }, - { - "epoch": 2.7, - "learning_rate": 3.4986953211974005e-06, - "loss": 3.7387, - "step": 616000 - }, - { - "epoch": 2.7, - "learning_rate": 3.493194988977335e-06, - "loss": 3.7519, - "step": 616500 - }, - { - "epoch": 2.7, - "learning_rate": 3.4876946567572684e-06, - "loss": 3.7412, - "step": 617000 - }, - { - "epoch": 2.7, - "learning_rate": 3.4821943245372023e-06, - "loss": 3.7378, - "step": 617500 - }, - { - "epoch": 2.7, - "learning_rate": 3.4766939923171362e-06, - "loss": 3.7344, - "step": 618000 - }, - { - "epoch": 2.71, - "learning_rate": 3.47119366009707e-06, - "loss": 3.7464, - "step": 618500 - }, - { - "epoch": 2.71, - "learning_rate": 3.4656933278770037e-06, - "loss": 3.7491, - "step": 619000 - }, - { - "epoch": 2.71, - "learning_rate": 3.460192995656938e-06, - "loss": 3.7542, - "step": 619500 - }, - { - "epoch": 2.71, - "learning_rate": 3.4546926634368715e-06, - "loss": 3.743, - "step": 620000 - }, - { - "epoch": 2.72, - "learning_rate": 3.449192331216806e-06, - "loss": 3.7351, - "step": 620500 - }, - { - "epoch": 2.72, - "learning_rate": 3.44369199899674e-06, - "loss": 3.7342, - "step": 621000 - }, - { - "epoch": 2.72, - "learning_rate": 3.4381916667766733e-06, - "loss": 3.7416, - "step": 621500 - }, - { - "epoch": 2.72, - "learning_rate": 3.4326913345566077e-06, - "loss": 3.7543, - "step": 622000 - }, - { - "epoch": 2.72, - "learning_rate": 3.427191002336541e-06, - "loss": 3.7338, - "step": 622500 - }, - { - "epoch": 2.73, - "learning_rate": 3.4216906701164755e-06, - "loss": 3.7325, - "step": 623000 - }, - { - "epoch": 2.73, - "learning_rate": 3.416190337896409e-06, - "loss": 3.7203, - "step": 623500 - }, - { - "epoch": 2.73, - "learning_rate": 3.410690005676343e-06, - "loss": 3.7379, - "step": 624000 - }, - { - "epoch": 2.73, - "learning_rate": 3.4051896734562773e-06, - "loss": 3.7296, - "step": 624500 - }, - { - "epoch": 2.74, - "learning_rate": 3.399689341236211e-06, - "loss": 3.7402, - "step": 625000 - }, - { - "epoch": 2.74, - "learning_rate": 3.394189009016145e-06, - "loss": 3.7209, - "step": 625500 - }, - { - "epoch": 2.74, - "learning_rate": 3.3886886767960787e-06, - "loss": 3.7284, - "step": 626000 - }, - { - "epoch": 2.74, - "learning_rate": 3.3831883445760126e-06, - "loss": 3.7234, - "step": 626500 - }, - { - "epoch": 2.74, - "learning_rate": 3.3776880123559465e-06, - "loss": 3.7357, - "step": 627000 - }, - { - "epoch": 2.75, - "learning_rate": 3.3721876801358805e-06, - "loss": 3.7248, - "step": 627500 - }, - { - "epoch": 2.75, - "learning_rate": 3.366687347915815e-06, - "loss": 3.7242, - "step": 628000 - }, - { - "epoch": 2.75, - "learning_rate": 3.3611870156957483e-06, - "loss": 3.7506, - "step": 628500 - }, - { - "epoch": 2.75, - "learning_rate": 3.3556866834756823e-06, - "loss": 3.7226, - "step": 629000 - }, - { - "epoch": 2.75, - "learning_rate": 3.350186351255616e-06, - "loss": 3.7389, - "step": 629500 - }, - { - "epoch": 2.76, - "learning_rate": 3.34468601903555e-06, - "loss": 3.737, - "step": 630000 - }, - { - "epoch": 2.76, - "learning_rate": 3.3391856868154836e-06, - "loss": 3.7277, - "step": 630500 - }, - { - "epoch": 2.76, - "learning_rate": 3.333685354595418e-06, - "loss": 3.7262, - "step": 631000 - }, - { - "epoch": 2.76, - "learning_rate": 3.328185022375352e-06, - "loss": 3.7151, - "step": 631500 - }, - { - "epoch": 2.77, - "learning_rate": 3.3226846901552854e-06, - "loss": 3.7277, - "step": 632000 - }, - { - "epoch": 2.77, - "learning_rate": 3.3171843579352198e-06, - "loss": 3.753, - "step": 632500 - }, - { - "epoch": 2.77, - "learning_rate": 3.3116840257151533e-06, - "loss": 3.7222, - "step": 633000 - }, - { - "epoch": 2.77, - "learning_rate": 3.3061836934950876e-06, - "loss": 3.7188, - "step": 633500 - }, - { - "epoch": 2.77, - "learning_rate": 3.300683361275021e-06, - "loss": 3.722, - "step": 634000 - }, - { - "epoch": 2.78, - "learning_rate": 3.295183029054955e-06, - "loss": 3.7498, - "step": 634500 - }, - { - "epoch": 2.78, - "learning_rate": 3.289682696834889e-06, - "loss": 3.7312, - "step": 635000 - }, - { - "epoch": 2.78, - "learning_rate": 3.284182364614823e-06, - "loss": 3.7182, - "step": 635500 - }, - { - "epoch": 2.78, - "learning_rate": 3.2786820323947573e-06, - "loss": 3.7401, - "step": 636000 - }, - { - "epoch": 2.79, - "learning_rate": 3.2731817001746908e-06, - "loss": 3.7245, - "step": 636500 - }, - { - "epoch": 2.79, - "learning_rate": 3.2676813679546247e-06, - "loss": 3.7145, - "step": 637000 - }, - { - "epoch": 2.79, - "learning_rate": 3.2621810357345586e-06, - "loss": 3.729, - "step": 637500 - }, - { - "epoch": 2.79, - "learning_rate": 3.2566807035144925e-06, - "loss": 3.7488, - "step": 638000 - }, - { - "epoch": 2.79, - "learning_rate": 3.251180371294426e-06, - "loss": 3.7329, - "step": 638500 - }, - { - "epoch": 2.8, - "learning_rate": 3.2456800390743604e-06, - "loss": 3.7121, - "step": 639000 - }, - { - "epoch": 2.8, - "learning_rate": 3.2401797068542943e-06, - "loss": 3.719, - "step": 639500 - }, - { - "epoch": 2.8, - "learning_rate": 3.2346793746342283e-06, - "loss": 3.7277, - "step": 640000 - }, - { - "epoch": 2.8, - "learning_rate": 3.229179042414162e-06, - "loss": 3.7167, - "step": 640500 - }, - { - "epoch": 2.81, - "learning_rate": 3.2236787101940957e-06, - "loss": 3.7119, - "step": 641000 - }, - { - "epoch": 2.81, - "learning_rate": 3.21817837797403e-06, - "loss": 3.7478, - "step": 641500 - }, - { - "epoch": 2.81, - "learning_rate": 3.2126780457539636e-06, - "loss": 3.7177, - "step": 642000 - }, - { - "epoch": 2.81, - "learning_rate": 3.207177713533898e-06, - "loss": 3.7032, - "step": 642500 - }, - { - "epoch": 2.81, - "learning_rate": 3.201677381313832e-06, - "loss": 3.7161, - "step": 643000 - }, - { - "epoch": 2.82, - "learning_rate": 3.1961770490937653e-06, - "loss": 3.7241, - "step": 643500 - }, - { - "epoch": 2.82, - "learning_rate": 3.1906767168736997e-06, - "loss": 3.7244, - "step": 644000 - }, - { - "epoch": 2.82, - "learning_rate": 3.185176384653633e-06, - "loss": 3.7248, - "step": 644500 - }, - { - "epoch": 2.82, - "learning_rate": 3.1796760524335676e-06, - "loss": 3.7437, - "step": 645000 - }, - { - "epoch": 2.82, - "learning_rate": 3.174175720213501e-06, - "loss": 3.7532, - "step": 645500 - }, - { - "epoch": 2.83, - "learning_rate": 3.168675387993435e-06, - "loss": 3.7164, - "step": 646000 - }, - { - "epoch": 2.83, - "learning_rate": 3.1631750557733685e-06, - "loss": 3.7146, - "step": 646500 - }, - { - "epoch": 2.83, - "learning_rate": 3.157674723553303e-06, - "loss": 3.7108, - "step": 647000 - }, - { - "epoch": 2.83, - "learning_rate": 3.152174391333237e-06, - "loss": 3.6925, - "step": 647500 - }, - { - "epoch": 2.84, - "learning_rate": 3.1466740591131707e-06, - "loss": 3.7028, - "step": 648000 - }, - { - "epoch": 2.84, - "learning_rate": 3.1411737268931046e-06, - "loss": 3.721, - "step": 648500 - }, - { - "epoch": 2.84, - "learning_rate": 3.135673394673038e-06, - "loss": 3.7131, - "step": 649000 - }, - { - "epoch": 2.84, - "learning_rate": 3.1301730624529725e-06, - "loss": 3.7004, - "step": 649500 - }, - { - "epoch": 2.84, - "learning_rate": 3.124672730232906e-06, - "loss": 3.7099, - "step": 650000 - }, - { - "epoch": 2.85, - "learning_rate": 3.1191723980128403e-06, - "loss": 3.7139, - "step": 650500 - }, - { - "epoch": 2.85, - "learning_rate": 3.1136720657927743e-06, - "loss": 3.7192, - "step": 651000 - }, - { - "epoch": 2.85, - "learning_rate": 3.1081717335727078e-06, - "loss": 3.7072, - "step": 651500 - }, - { - "epoch": 2.85, - "learning_rate": 3.102671401352642e-06, - "loss": 3.7057, - "step": 652000 - }, - { - "epoch": 2.86, - "learning_rate": 3.0971710691325756e-06, - "loss": 3.7074, - "step": 652500 - }, - { - "epoch": 2.86, - "learning_rate": 3.09167073691251e-06, - "loss": 3.6901, - "step": 653000 - }, - { - "epoch": 2.86, - "learning_rate": 3.0861704046924435e-06, - "loss": 3.7143, - "step": 653500 - }, - { - "epoch": 2.86, - "learning_rate": 3.0806700724723774e-06, - "loss": 3.7008, - "step": 654000 - }, - { - "epoch": 2.86, - "learning_rate": 3.0751697402523118e-06, - "loss": 3.7157, - "step": 654500 - }, - { - "epoch": 2.87, - "learning_rate": 3.0696694080322453e-06, - "loss": 3.7042, - "step": 655000 - }, - { - "epoch": 2.87, - "learning_rate": 3.0641690758121796e-06, - "loss": 3.7275, - "step": 655500 - }, - { - "epoch": 2.87, - "learning_rate": 3.058668743592113e-06, - "loss": 3.7057, - "step": 656000 - }, - { - "epoch": 2.87, - "learning_rate": 3.053168411372047e-06, - "loss": 3.6993, - "step": 656500 - }, - { - "epoch": 2.88, - "learning_rate": 3.047668079151981e-06, - "loss": 3.6943, - "step": 657000 - }, - { - "epoch": 2.88, - "learning_rate": 3.042167746931915e-06, - "loss": 3.7124, - "step": 657500 - }, - { - "epoch": 2.88, - "learning_rate": 3.0366674147118493e-06, - "loss": 3.7149, - "step": 658000 - }, - { - "epoch": 2.88, - "learning_rate": 3.0311670824917828e-06, - "loss": 3.7134, - "step": 658500 - }, - { - "epoch": 2.88, - "learning_rate": 3.0256667502717167e-06, - "loss": 3.7186, - "step": 659000 - }, - { - "epoch": 2.89, - "learning_rate": 3.0201664180516506e-06, - "loss": 3.698, - "step": 659500 - }, - { - "epoch": 2.89, - "learning_rate": 3.0146660858315846e-06, - "loss": 3.7098, - "step": 660000 - }, - { - "epoch": 2.89, - "learning_rate": 3.009165753611518e-06, - "loss": 3.6881, - "step": 660500 - }, - { - "epoch": 2.89, - "learning_rate": 3.0036654213914524e-06, - "loss": 3.697, - "step": 661000 - }, - { - "epoch": 2.89, - "learning_rate": 2.998165089171386e-06, - "loss": 3.6947, - "step": 661500 - }, - { - "epoch": 2.9, - "learning_rate": 2.9926647569513203e-06, - "loss": 3.7003, - "step": 662000 - }, - { - "epoch": 2.9, - "learning_rate": 2.9871644247312542e-06, - "loss": 3.6983, - "step": 662500 - }, - { - "epoch": 2.9, - "learning_rate": 2.9816640925111877e-06, - "loss": 3.695, - "step": 663000 - }, - { - "epoch": 2.9, - "learning_rate": 2.976163760291122e-06, - "loss": 3.6907, - "step": 663500 - }, - { - "epoch": 2.91, - "learning_rate": 2.9706634280710556e-06, - "loss": 3.7034, - "step": 664000 - }, - { - "epoch": 2.91, - "learning_rate": 2.9651630958509895e-06, - "loss": 3.6857, - "step": 664500 - }, - { - "epoch": 2.91, - "learning_rate": 2.9596627636309234e-06, - "loss": 3.7105, - "step": 665000 - }, - { - "epoch": 2.91, - "learning_rate": 2.9541624314108574e-06, - "loss": 3.7165, - "step": 665500 - }, - { - "epoch": 2.91, - "learning_rate": 2.9486620991907917e-06, - "loss": 3.6927, - "step": 666000 - }, - { - "epoch": 2.92, - "learning_rate": 2.9431617669707252e-06, - "loss": 3.7015, - "step": 666500 - }, - { - "epoch": 2.92, - "learning_rate": 2.937661434750659e-06, - "loss": 3.6991, - "step": 667000 - }, - { - "epoch": 2.92, - "learning_rate": 2.932161102530593e-06, - "loss": 3.6869, - "step": 667500 - }, - { - "epoch": 2.92, - "learning_rate": 2.926660770310527e-06, - "loss": 3.684, - "step": 668000 - }, - { - "epoch": 2.93, - "learning_rate": 2.9211604380904605e-06, - "loss": 3.7013, - "step": 668500 - }, - { - "epoch": 2.93, - "learning_rate": 2.915660105870395e-06, - "loss": 3.6768, - "step": 669000 - }, - { - "epoch": 2.93, - "learning_rate": 2.910159773650329e-06, - "loss": 3.7044, - "step": 669500 - }, - { - "epoch": 2.93, - "learning_rate": 2.9046594414302627e-06, - "loss": 3.7081, - "step": 670000 - }, - { - "epoch": 2.93, - "learning_rate": 2.8991591092101967e-06, - "loss": 3.702, - "step": 670500 - }, - { - "epoch": 2.94, - "learning_rate": 2.89365877699013e-06, - "loss": 3.6981, - "step": 671000 - }, - { - "epoch": 2.94, - "learning_rate": 2.8881584447700645e-06, - "loss": 3.6725, - "step": 671500 - }, - { - "epoch": 2.94, - "learning_rate": 2.882658112549998e-06, - "loss": 3.6838, - "step": 672000 - }, - { - "epoch": 2.94, - "learning_rate": 2.8771577803299324e-06, - "loss": 3.7048, - "step": 672500 - }, - { - "epoch": 2.95, - "learning_rate": 2.871657448109866e-06, - "loss": 3.6937, - "step": 673000 - }, - { - "epoch": 2.95, - "learning_rate": 2.8661571158898e-06, - "loss": 3.6923, - "step": 673500 - }, - { - "epoch": 2.95, - "learning_rate": 2.860656783669734e-06, - "loss": 3.6854, - "step": 674000 - }, - { - "epoch": 2.95, - "learning_rate": 2.8551564514496677e-06, - "loss": 3.6848, - "step": 674500 - }, - { - "epoch": 2.95, - "learning_rate": 2.849656119229602e-06, - "loss": 3.6736, - "step": 675000 - }, - { - "epoch": 2.96, - "learning_rate": 2.8441557870095355e-06, - "loss": 3.6957, - "step": 675500 - }, - { - "epoch": 2.96, - "learning_rate": 2.8386554547894694e-06, - "loss": 3.6843, - "step": 676000 - }, - { - "epoch": 2.96, - "learning_rate": 2.8331551225694034e-06, - "loss": 3.6824, - "step": 676500 - }, - { - "epoch": 2.96, - "learning_rate": 2.8276547903493373e-06, - "loss": 3.6942, - "step": 677000 - }, - { - "epoch": 2.96, - "learning_rate": 2.8221544581292717e-06, - "loss": 3.6742, - "step": 677500 - }, - { - "epoch": 2.97, - "learning_rate": 2.816654125909205e-06, - "loss": 3.6756, - "step": 678000 - }, - { - "epoch": 2.97, - "learning_rate": 2.811153793689139e-06, - "loss": 3.6754, - "step": 678500 - }, - { - "epoch": 2.97, - "learning_rate": 2.8056534614690726e-06, - "loss": 3.6676, - "step": 679000 - }, - { - "epoch": 2.97, - "learning_rate": 2.800153129249007e-06, - "loss": 3.6645, - "step": 679500 - }, - { - "epoch": 2.98, - "learning_rate": 2.7946527970289405e-06, - "loss": 3.6895, - "step": 680000 - }, - { - "epoch": 2.98, - "learning_rate": 2.789152464808875e-06, - "loss": 3.6723, - "step": 680500 - }, - { - "epoch": 2.98, - "learning_rate": 2.7836521325888087e-06, - "loss": 3.6766, - "step": 681000 - }, - { - "epoch": 2.98, - "learning_rate": 2.7781518003687422e-06, - "loss": 3.6747, - "step": 681500 - }, - { - "epoch": 2.98, - "learning_rate": 2.7726514681486766e-06, - "loss": 3.6776, - "step": 682000 - }, - { - "epoch": 2.99, - "learning_rate": 2.76715113592861e-06, - "loss": 3.6917, - "step": 682500 - }, - { - "epoch": 2.99, - "learning_rate": 2.7616508037085444e-06, - "loss": 3.6874, - "step": 683000 - }, - { - "epoch": 2.99, - "learning_rate": 2.756150471488478e-06, - "loss": 3.6866, - "step": 683500 - }, - { - "epoch": 2.99, - "learning_rate": 2.750650139268412e-06, - "loss": 3.6723, - "step": 684000 - }, - { - "epoch": 3.0, - "learning_rate": 2.7451498070483462e-06, - "loss": 3.6642, - "step": 684500 - }, - { - "epoch": 3.0, - "learning_rate": 2.7396494748282797e-06, - "loss": 3.6796, - "step": 685000 - }, - { - "epoch": 3.0, - "learning_rate": 2.734149142608214e-06, - "loss": 3.6819, - "step": 685500 - }, - { - "epoch": 3.0, - "learning_rate": 2.7286488103881476e-06, - "loss": 3.6769, - "step": 686000 - }, - { - "epoch": 3.0, - "learning_rate": 2.7231484781680815e-06, - "loss": 3.6762, - "step": 686500 - }, - { - "epoch": 3.01, - "learning_rate": 2.7176481459480155e-06, - "loss": 3.6525, - "step": 687000 - }, - { - "epoch": 3.01, - "learning_rate": 2.7121478137279494e-06, - "loss": 3.6849, - "step": 687500 - }, - { - "epoch": 3.01, - "learning_rate": 2.706647481507883e-06, - "loss": 3.659, - "step": 688000 - }, - { - "epoch": 3.01, - "learning_rate": 2.7011471492878172e-06, - "loss": 3.6637, - "step": 688500 - }, - { - "epoch": 3.02, - "learning_rate": 2.695646817067751e-06, - "loss": 3.6884, - "step": 689000 - }, - { - "epoch": 3.02, - "learning_rate": 2.690146484847685e-06, - "loss": 3.6698, - "step": 689500 - }, - { - "epoch": 3.02, - "learning_rate": 2.684646152627619e-06, - "loss": 3.665, - "step": 690000 - }, - { - "epoch": 3.02, - "learning_rate": 2.6791458204075525e-06, - "loss": 3.6518, - "step": 690500 - }, - { - "epoch": 3.02, - "learning_rate": 2.673645488187487e-06, - "loss": 3.6717, - "step": 691000 - }, - { - "epoch": 3.03, - "learning_rate": 2.6681451559674204e-06, - "loss": 3.6727, - "step": 691500 - }, - { - "epoch": 3.03, - "learning_rate": 2.6626448237473547e-06, - "loss": 3.6653, - "step": 692000 - }, - { - "epoch": 3.03, - "learning_rate": 2.6571444915272887e-06, - "loss": 3.6758, - "step": 692500 - }, - { - "epoch": 3.03, - "learning_rate": 2.651644159307222e-06, - "loss": 3.6462, - "step": 693000 - }, - { - "epoch": 3.03, - "learning_rate": 2.6461438270871565e-06, - "loss": 3.666, - "step": 693500 - }, - { - "epoch": 3.04, - "learning_rate": 2.64064349486709e-06, - "loss": 3.6677, - "step": 694000 - }, - { - "epoch": 3.04, - "learning_rate": 2.6351431626470244e-06, - "loss": 3.6593, - "step": 694500 - }, - { - "epoch": 3.04, - "learning_rate": 2.629642830426958e-06, - "loss": 3.6507, - "step": 695000 - }, - { - "epoch": 3.04, - "learning_rate": 2.624142498206892e-06, - "loss": 3.682, - "step": 695500 - }, - { - "epoch": 3.05, - "learning_rate": 2.618642165986826e-06, - "loss": 3.6746, - "step": 696000 - }, - { - "epoch": 3.05, - "learning_rate": 2.6131418337667597e-06, - "loss": 3.6697, - "step": 696500 - }, - { - "epoch": 3.05, - "learning_rate": 2.607641501546694e-06, - "loss": 3.6692, - "step": 697000 - }, - { - "epoch": 3.05, - "learning_rate": 2.6021411693266275e-06, - "loss": 3.6647, - "step": 697500 - }, - { - "epoch": 3.05, - "learning_rate": 2.5966408371065615e-06, - "loss": 3.6514, - "step": 698000 - }, - { - "epoch": 3.06, - "learning_rate": 2.591140504886495e-06, - "loss": 3.6857, - "step": 698500 - }, - { - "epoch": 3.06, - "learning_rate": 2.5856401726664293e-06, - "loss": 3.642, - "step": 699000 - }, - { - "epoch": 3.06, - "learning_rate": 2.580139840446363e-06, - "loss": 3.6616, - "step": 699500 - }, - { - "epoch": 3.06, - "learning_rate": 2.574639508226297e-06, - "loss": 3.6542, - "step": 700000 - }, - { - "epoch": 3.07, - "learning_rate": 2.569139176006231e-06, - "loss": 3.6593, - "step": 700500 - }, - { - "epoch": 3.07, - "learning_rate": 2.5636388437861646e-06, - "loss": 3.6587, - "step": 701000 - }, - { - "epoch": 3.07, - "learning_rate": 2.558138511566099e-06, - "loss": 3.6669, - "step": 701500 - }, - { - "epoch": 3.07, - "learning_rate": 2.5526381793460325e-06, - "loss": 3.6729, - "step": 702000 - }, - { - "epoch": 3.07, - "learning_rate": 2.547137847125967e-06, - "loss": 3.6637, - "step": 702500 - }, - { - "epoch": 3.08, - "learning_rate": 2.5416375149059003e-06, - "loss": 3.6729, - "step": 703000 - }, - { - "epoch": 3.08, - "learning_rate": 2.5361371826858343e-06, - "loss": 3.6586, - "step": 703500 - }, - { - "epoch": 3.08, - "learning_rate": 2.5306368504657686e-06, - "loss": 3.6552, - "step": 704000 - }, - { - "epoch": 3.08, - "learning_rate": 2.525136518245702e-06, - "loss": 3.6681, - "step": 704500 - }, - { - "epoch": 3.09, - "learning_rate": 2.5196361860256365e-06, - "loss": 3.6506, - "step": 705000 - }, - { - "epoch": 3.09, - "learning_rate": 2.51413585380557e-06, - "loss": 3.6672, - "step": 705500 - }, - { - "epoch": 3.09, - "learning_rate": 2.508635521585504e-06, - "loss": 3.6543, - "step": 706000 - }, - { - "epoch": 3.09, - "learning_rate": 2.503135189365438e-06, - "loss": 3.6552, - "step": 706500 - }, - { - "epoch": 3.09, - "learning_rate": 2.4976348571453718e-06, - "loss": 3.66, - "step": 707000 - }, - { - "epoch": 3.1, - "learning_rate": 2.4921345249253057e-06, - "loss": 3.6742, - "step": 707500 - }, - { - "epoch": 3.1, - "learning_rate": 2.4866341927052396e-06, - "loss": 3.6776, - "step": 708000 - }, - { - "epoch": 3.1, - "learning_rate": 2.4811338604851735e-06, - "loss": 3.6334, - "step": 708500 - }, - { - "epoch": 3.1, - "learning_rate": 2.4756335282651075e-06, - "loss": 3.6503, - "step": 709000 - }, - { - "epoch": 3.1, - "learning_rate": 2.4701331960450414e-06, - "loss": 3.6462, - "step": 709500 - }, - { - "epoch": 3.11, - "learning_rate": 2.4646328638249753e-06, - "loss": 3.6595, - "step": 710000 - }, - { - "epoch": 3.11, - "learning_rate": 2.4591325316049093e-06, - "loss": 3.6479, - "step": 710500 - }, - { - "epoch": 3.11, - "learning_rate": 2.453632199384843e-06, - "loss": 3.6565, - "step": 711000 - }, - { - "epoch": 3.11, - "learning_rate": 2.448131867164777e-06, - "loss": 3.6369, - "step": 711500 - }, - { - "epoch": 3.12, - "learning_rate": 2.4426315349447106e-06, - "loss": 3.6472, - "step": 712000 - }, - { - "epoch": 3.12, - "learning_rate": 2.4371312027246446e-06, - "loss": 3.6586, - "step": 712500 - }, - { - "epoch": 3.12, - "learning_rate": 2.431630870504579e-06, - "loss": 3.6407, - "step": 713000 - }, - { - "epoch": 3.12, - "learning_rate": 2.426130538284513e-06, - "loss": 3.6419, - "step": 713500 - }, - { - "epoch": 3.12, - "learning_rate": 2.4206302060644463e-06, - "loss": 3.656, - "step": 714000 - }, - { - "epoch": 3.13, - "learning_rate": 2.4151298738443803e-06, - "loss": 3.6574, - "step": 714500 - }, - { - "epoch": 3.13, - "learning_rate": 2.409629541624314e-06, - "loss": 3.6627, - "step": 715000 - }, - { - "epoch": 3.13, - "learning_rate": 2.404129209404248e-06, - "loss": 3.6533, - "step": 715500 - }, - { - "epoch": 3.13, - "learning_rate": 2.398628877184182e-06, - "loss": 3.6598, - "step": 716000 - }, - { - "epoch": 3.14, - "learning_rate": 2.393128544964116e-06, - "loss": 3.6383, - "step": 716500 - }, - { - "epoch": 3.14, - "learning_rate": 2.38762821274405e-06, - "loss": 3.6495, - "step": 717000 - }, - { - "epoch": 3.14, - "learning_rate": 2.382127880523984e-06, - "loss": 3.6403, - "step": 717500 - }, - { - "epoch": 3.14, - "learning_rate": 2.3766275483039178e-06, - "loss": 3.6252, - "step": 718000 - }, - { - "epoch": 3.14, - "learning_rate": 2.3711272160838517e-06, - "loss": 3.6393, - "step": 718500 - }, - { - "epoch": 3.15, - "learning_rate": 2.3656268838637856e-06, - "loss": 3.6445, - "step": 719000 - }, - { - "epoch": 3.15, - "learning_rate": 2.3601265516437196e-06, - "loss": 3.6524, - "step": 719500 - }, - { - "epoch": 3.15, - "learning_rate": 2.354626219423653e-06, - "loss": 3.6464, - "step": 720000 - }, - { - "epoch": 3.15, - "learning_rate": 2.3491258872035874e-06, - "loss": 3.6339, - "step": 720500 - }, - { - "epoch": 3.16, - "learning_rate": 2.3436255549835213e-06, - "loss": 3.6574, - "step": 721000 - }, - { - "epoch": 3.16, - "learning_rate": 2.3381252227634553e-06, - "loss": 3.6531, - "step": 721500 - }, - { - "epoch": 3.16, - "learning_rate": 2.332624890543389e-06, - "loss": 3.6396, - "step": 722000 - }, - { - "epoch": 3.16, - "learning_rate": 2.3271245583233227e-06, - "loss": 3.6356, - "step": 722500 - }, - { - "epoch": 3.16, - "learning_rate": 2.3216242261032566e-06, - "loss": 3.657, - "step": 723000 - }, - { - "epoch": 3.17, - "learning_rate": 2.3161238938831906e-06, - "loss": 3.6506, - "step": 723500 - }, - { - "epoch": 3.17, - "learning_rate": 2.3106235616631245e-06, - "loss": 3.6428, - "step": 724000 - }, - { - "epoch": 3.17, - "learning_rate": 2.305123229443059e-06, - "loss": 3.6326, - "step": 724500 - }, - { - "epoch": 3.17, - "learning_rate": 2.2996228972229924e-06, - "loss": 3.6471, - "step": 725000 - }, - { - "epoch": 3.17, - "learning_rate": 2.2941225650029263e-06, - "loss": 3.6425, - "step": 725500 - }, - { - "epoch": 3.18, - "learning_rate": 2.28862223278286e-06, - "loss": 3.6469, - "step": 726000 - }, - { - "epoch": 3.18, - "learning_rate": 2.283121900562794e-06, - "loss": 3.624, - "step": 726500 - }, - { - "epoch": 3.18, - "learning_rate": 2.277621568342728e-06, - "loss": 3.6305, - "step": 727000 - }, - { - "epoch": 3.18, - "learning_rate": 2.272121236122662e-06, - "loss": 3.6303, - "step": 727500 - }, - { - "epoch": 3.19, - "learning_rate": 2.266620903902596e-06, - "loss": 3.6412, - "step": 728000 - }, - { - "epoch": 3.19, - "learning_rate": 2.26112057168253e-06, - "loss": 3.6278, - "step": 728500 - }, - { - "epoch": 3.19, - "learning_rate": 2.2556202394624638e-06, - "loss": 3.6592, - "step": 729000 - }, - { - "epoch": 3.19, - "learning_rate": 2.2501199072423977e-06, - "loss": 3.6277, - "step": 729500 - }, - { - "epoch": 3.19, - "learning_rate": 2.2446195750223316e-06, - "loss": 3.6473, - "step": 730000 - }, - { - "epoch": 3.2, - "learning_rate": 2.2391192428022656e-06, - "loss": 3.6472, - "step": 730500 - }, - { - "epoch": 3.2, - "learning_rate": 2.233618910582199e-06, - "loss": 3.6361, - "step": 731000 - }, - { - "epoch": 3.2, - "learning_rate": 2.228118578362133e-06, - "loss": 3.6418, - "step": 731500 - }, - { - "epoch": 3.2, - "learning_rate": 2.2226182461420674e-06, - "loss": 3.6541, - "step": 732000 - }, - { - "epoch": 3.21, - "learning_rate": 2.2171179139220013e-06, - "loss": 3.6382, - "step": 732500 - }, - { - "epoch": 3.21, - "learning_rate": 2.211617581701935e-06, - "loss": 3.6565, - "step": 733000 - }, - { - "epoch": 3.21, - "learning_rate": 2.2061172494818687e-06, - "loss": 3.6152, - "step": 733500 - }, - { - "epoch": 3.21, - "learning_rate": 2.2006169172618026e-06, - "loss": 3.6281, - "step": 734000 - }, - { - "epoch": 3.21, - "learning_rate": 2.1951165850417366e-06, - "loss": 3.6262, - "step": 734500 - }, - { - "epoch": 3.22, - "learning_rate": 2.1896162528216705e-06, - "loss": 3.6217, - "step": 735000 - }, - { - "epoch": 3.22, - "learning_rate": 2.1841159206016044e-06, - "loss": 3.6567, - "step": 735500 - }, - { - "epoch": 3.22, - "learning_rate": 2.1786155883815384e-06, - "loss": 3.6587, - "step": 736000 - }, - { - "epoch": 3.22, - "learning_rate": 2.1731152561614723e-06, - "loss": 3.6168, - "step": 736500 - }, - { - "epoch": 3.23, - "learning_rate": 2.1676149239414062e-06, - "loss": 3.6255, - "step": 737000 - }, - { - "epoch": 3.23, - "learning_rate": 2.16211459172134e-06, - "loss": 3.6383, - "step": 737500 - }, - { - "epoch": 3.23, - "learning_rate": 2.156614259501274e-06, - "loss": 3.6205, - "step": 738000 - }, - { - "epoch": 3.23, - "learning_rate": 2.151113927281208e-06, - "loss": 3.6206, - "step": 738500 - }, - { - "epoch": 3.23, - "learning_rate": 2.145613595061142e-06, - "loss": 3.6552, - "step": 739000 - }, - { - "epoch": 3.24, - "learning_rate": 2.140113262841076e-06, - "loss": 3.6309, - "step": 739500 - }, - { - "epoch": 3.24, - "learning_rate": 2.13461293062101e-06, - "loss": 3.6272, - "step": 740000 - }, - { - "epoch": 3.24, - "learning_rate": 2.1291125984009437e-06, - "loss": 3.6102, - "step": 740500 - }, - { - "epoch": 3.24, - "learning_rate": 2.1236122661808776e-06, - "loss": 3.6113, - "step": 741000 - }, - { - "epoch": 3.24, - "learning_rate": 2.1181119339608116e-06, - "loss": 3.6356, - "step": 741500 - }, - { - "epoch": 3.25, - "learning_rate": 2.112611601740745e-06, - "loss": 3.6216, - "step": 742000 - }, - { - "epoch": 3.25, - "learning_rate": 2.107111269520679e-06, - "loss": 3.6297, - "step": 742500 - }, - { - "epoch": 3.25, - "learning_rate": 2.101610937300613e-06, - "loss": 3.6411, - "step": 743000 - }, - { - "epoch": 3.25, - "learning_rate": 2.0961106050805473e-06, - "loss": 3.6339, - "step": 743500 - }, - { - "epoch": 3.26, - "learning_rate": 2.0906102728604812e-06, - "loss": 3.6366, - "step": 744000 - }, - { - "epoch": 3.26, - "learning_rate": 2.0851099406404147e-06, - "loss": 3.6237, - "step": 744500 - }, - { - "epoch": 3.26, - "learning_rate": 2.0796096084203487e-06, - "loss": 3.6409, - "step": 745000 - }, - { - "epoch": 3.26, - "learning_rate": 2.0741092762002826e-06, - "loss": 3.6242, - "step": 745500 - }, - { - "epoch": 3.26, - "learning_rate": 2.0686089439802165e-06, - "loss": 3.6334, - "step": 746000 - }, - { - "epoch": 3.27, - "learning_rate": 2.0631086117601504e-06, - "loss": 3.6293, - "step": 746500 - }, - { - "epoch": 3.27, - "learning_rate": 2.0576082795400844e-06, - "loss": 3.621, - "step": 747000 - }, - { - "epoch": 3.27, - "learning_rate": 2.0521079473200183e-06, - "loss": 3.6453, - "step": 747500 - }, - { - "epoch": 3.27, - "learning_rate": 2.0466076150999522e-06, - "loss": 3.6199, - "step": 748000 - }, - { - "epoch": 3.28, - "learning_rate": 2.041107282879886e-06, - "loss": 3.622, - "step": 748500 - }, - { - "epoch": 3.28, - "learning_rate": 2.03560695065982e-06, - "loss": 3.6239, - "step": 749000 - }, - { - "epoch": 3.28, - "learning_rate": 2.030106618439754e-06, - "loss": 3.6204, - "step": 749500 - }, - { - "epoch": 3.28, - "learning_rate": 2.024606286219688e-06, - "loss": 3.6277, - "step": 750000 - }, - { - "epoch": 3.28, - "learning_rate": 2.0191059539996214e-06, - "loss": 3.6097, - "step": 750500 - }, - { - "epoch": 3.29, - "learning_rate": 2.013605621779556e-06, - "loss": 3.623, - "step": 751000 - }, - { - "epoch": 3.29, - "learning_rate": 2.0081052895594897e-06, - "loss": 3.6062, - "step": 751500 - }, - { - "epoch": 3.29, - "learning_rate": 2.0026049573394237e-06, - "loss": 3.6213, - "step": 752000 - }, - { - "epoch": 3.29, - "learning_rate": 1.9971046251193576e-06, - "loss": 3.6332, - "step": 752500 - }, - { - "epoch": 3.3, - "learning_rate": 1.991604292899291e-06, - "loss": 3.6231, - "step": 753000 - }, - { - "epoch": 3.3, - "learning_rate": 1.986103960679225e-06, - "loss": 3.6362, - "step": 753500 - }, - { - "epoch": 3.3, - "learning_rate": 1.980603628459159e-06, - "loss": 3.6255, - "step": 754000 - }, - { - "epoch": 3.3, - "learning_rate": 1.9751032962390933e-06, - "loss": 3.6273, - "step": 754500 - }, - { - "epoch": 3.3, - "learning_rate": 1.969602964019027e-06, - "loss": 3.6085, - "step": 755000 - }, - { - "epoch": 3.31, - "learning_rate": 1.9641026317989607e-06, - "loss": 3.6228, - "step": 755500 - }, - { - "epoch": 3.31, - "learning_rate": 1.9586022995788947e-06, - "loss": 3.6276, - "step": 756000 - }, - { - "epoch": 3.31, - "learning_rate": 1.9531019673588286e-06, - "loss": 3.6219, - "step": 756500 - }, - { - "epoch": 3.31, - "learning_rate": 1.9476016351387625e-06, - "loss": 3.6357, - "step": 757000 - }, - { - "epoch": 3.31, - "learning_rate": 1.9421013029186965e-06, - "loss": 3.6168, - "step": 757500 - }, - { - "epoch": 3.32, - "learning_rate": 1.9366009706986304e-06, - "loss": 3.6224, - "step": 758000 - }, - { - "epoch": 3.32, - "learning_rate": 1.9311006384785643e-06, - "loss": 3.6105, - "step": 758500 - }, - { - "epoch": 3.32, - "learning_rate": 1.9256003062584982e-06, - "loss": 3.6348, - "step": 759000 - }, - { - "epoch": 3.32, - "learning_rate": 1.920099974038432e-06, - "loss": 3.6333, - "step": 759500 - }, - { - "epoch": 3.33, - "learning_rate": 1.914599641818366e-06, - "loss": 3.6262, - "step": 760000 - }, - { - "epoch": 3.33, - "learning_rate": 1.9090993095983e-06, - "loss": 3.6144, - "step": 760500 - }, - { - "epoch": 3.33, - "learning_rate": 1.9035989773782337e-06, - "loss": 3.5976, - "step": 761000 - }, - { - "epoch": 3.33, - "learning_rate": 1.8980986451581677e-06, - "loss": 3.6118, - "step": 761500 - }, - { - "epoch": 3.33, - "learning_rate": 1.8925983129381014e-06, - "loss": 3.6193, - "step": 762000 - }, - { - "epoch": 3.34, - "learning_rate": 1.8870979807180355e-06, - "loss": 3.6339, - "step": 762500 - }, - { - "epoch": 3.34, - "learning_rate": 1.8815976484979695e-06, - "loss": 3.6026, - "step": 763000 - }, - { - "epoch": 3.34, - "learning_rate": 1.8760973162779034e-06, - "loss": 3.6367, - "step": 763500 - }, - { - "epoch": 3.34, - "learning_rate": 1.8705969840578373e-06, - "loss": 3.6108, - "step": 764000 - }, - { - "epoch": 3.35, - "learning_rate": 1.865096651837771e-06, - "loss": 3.5938, - "step": 764500 - }, - { - "epoch": 3.35, - "learning_rate": 1.859596319617705e-06, - "loss": 3.6145, - "step": 765000 - }, - { - "epoch": 3.35, - "learning_rate": 1.8540959873976389e-06, - "loss": 3.6213, - "step": 765500 - }, - { - "epoch": 3.35, - "learning_rate": 1.848595655177573e-06, - "loss": 3.6156, - "step": 766000 - }, - { - "epoch": 3.35, - "learning_rate": 1.843095322957507e-06, - "loss": 3.6202, - "step": 766500 - }, - { - "epoch": 3.36, - "learning_rate": 1.8375949907374407e-06, - "loss": 3.6116, - "step": 767000 - }, - { - "epoch": 3.36, - "learning_rate": 1.8320946585173746e-06, - "loss": 3.6031, - "step": 767500 - }, - { - "epoch": 3.36, - "learning_rate": 1.8265943262973085e-06, - "loss": 3.6284, - "step": 768000 - }, - { - "epoch": 3.36, - "learning_rate": 1.8210939940772423e-06, - "loss": 3.6116, - "step": 768500 - }, - { - "epoch": 3.37, - "learning_rate": 1.8155936618571762e-06, - "loss": 3.6036, - "step": 769000 - }, - { - "epoch": 3.37, - "learning_rate": 1.8100933296371101e-06, - "loss": 3.6161, - "step": 769500 - }, - { - "epoch": 3.37, - "learning_rate": 1.8045929974170442e-06, - "loss": 3.6137, - "step": 770000 - }, - { - "epoch": 3.37, - "learning_rate": 1.7990926651969782e-06, - "loss": 3.6098, - "step": 770500 - }, - { - "epoch": 3.37, - "learning_rate": 1.793592332976912e-06, - "loss": 3.6203, - "step": 771000 - }, - { - "epoch": 3.38, - "learning_rate": 1.7880920007568458e-06, - "loss": 3.6222, - "step": 771500 - }, - { - "epoch": 3.38, - "learning_rate": 1.7825916685367798e-06, - "loss": 3.6052, - "step": 772000 - }, - { - "epoch": 3.38, - "learning_rate": 1.7770913363167137e-06, - "loss": 3.6241, - "step": 772500 - }, - { - "epoch": 3.38, - "learning_rate": 1.7715910040966474e-06, - "loss": 3.6213, - "step": 773000 - }, - { - "epoch": 3.38, - "learning_rate": 1.7660906718765815e-06, - "loss": 3.5962, - "step": 773500 - }, - { - "epoch": 3.39, - "learning_rate": 1.7605903396565155e-06, - "loss": 3.6085, - "step": 774000 - }, - { - "epoch": 3.39, - "learning_rate": 1.7550900074364494e-06, - "loss": 3.59, - "step": 774500 - }, - { - "epoch": 3.39, - "learning_rate": 1.7495896752163833e-06, - "loss": 3.6154, - "step": 775000 - }, - { - "epoch": 3.39, - "learning_rate": 1.744089342996317e-06, - "loss": 3.5961, - "step": 775500 - }, - { - "epoch": 3.4, - "learning_rate": 1.738589010776251e-06, - "loss": 3.619, - "step": 776000 - }, - { - "epoch": 3.4, - "learning_rate": 1.733088678556185e-06, - "loss": 3.6108, - "step": 776500 - }, - { - "epoch": 3.4, - "learning_rate": 1.7275883463361186e-06, - "loss": 3.6124, - "step": 777000 - }, - { - "epoch": 3.4, - "learning_rate": 1.722088014116053e-06, - "loss": 3.6054, - "step": 777500 - }, - { - "epoch": 3.4, - "learning_rate": 1.7165876818959867e-06, - "loss": 3.6057, - "step": 778000 - }, - { - "epoch": 3.41, - "learning_rate": 1.7110873496759206e-06, - "loss": 3.5989, - "step": 778500 - }, - { - "epoch": 3.41, - "learning_rate": 1.7055870174558545e-06, - "loss": 3.6196, - "step": 779000 - }, - { - "epoch": 3.41, - "learning_rate": 1.7000866852357883e-06, - "loss": 3.6141, - "step": 779500 - }, - { - "epoch": 3.41, - "learning_rate": 1.6945863530157222e-06, - "loss": 3.6201, - "step": 780000 - }, - { - "epoch": 3.42, - "learning_rate": 1.6890860207956561e-06, - "loss": 3.6144, - "step": 780500 - }, - { - "epoch": 3.42, - "learning_rate": 1.6835856885755903e-06, - "loss": 3.6094, - "step": 781000 - }, - { - "epoch": 3.42, - "learning_rate": 1.6780853563555242e-06, - "loss": 3.5901, - "step": 781500 - }, - { - "epoch": 3.42, - "learning_rate": 1.672585024135458e-06, - "loss": 3.5945, - "step": 782000 - }, - { - "epoch": 3.42, - "learning_rate": 1.6670846919153918e-06, - "loss": 3.6066, - "step": 782500 - }, - { - "epoch": 3.43, - "learning_rate": 1.6615843596953258e-06, - "loss": 3.6179, - "step": 783000 - }, - { - "epoch": 3.43, - "learning_rate": 1.6560840274752597e-06, - "loss": 3.6198, - "step": 783500 - }, - { - "epoch": 3.43, - "learning_rate": 1.6505836952551934e-06, - "loss": 3.6033, - "step": 784000 - }, - { - "epoch": 3.43, - "learning_rate": 1.6450833630351273e-06, - "loss": 3.6041, - "step": 784500 - }, - { - "epoch": 3.44, - "learning_rate": 1.6395830308150615e-06, - "loss": 3.6131, - "step": 785000 - }, - { - "epoch": 3.44, - "learning_rate": 1.6340826985949954e-06, - "loss": 3.6208, - "step": 785500 - }, - { - "epoch": 3.44, - "learning_rate": 1.6285823663749291e-06, - "loss": 3.6218, - "step": 786000 - }, - { - "epoch": 3.44, - "learning_rate": 1.623082034154863e-06, - "loss": 3.6059, - "step": 786500 - }, - { - "epoch": 3.44, - "learning_rate": 1.617581701934797e-06, - "loss": 3.6209, - "step": 787000 - }, - { - "epoch": 3.45, - "learning_rate": 1.612081369714731e-06, - "loss": 3.5962, - "step": 787500 - }, - { - "epoch": 3.45, - "learning_rate": 1.6065810374946646e-06, - "loss": 3.6046, - "step": 788000 - }, - { - "epoch": 3.45, - "learning_rate": 1.6010807052745986e-06, - "loss": 3.6158, - "step": 788500 - }, - { - "epoch": 3.45, - "learning_rate": 1.5955803730545327e-06, - "loss": 3.5979, - "step": 789000 - }, - { - "epoch": 3.46, - "learning_rate": 1.5900800408344666e-06, - "loss": 3.6153, - "step": 789500 - }, - { - "epoch": 3.46, - "learning_rate": 1.5845797086144006e-06, - "loss": 3.5999, - "step": 790000 - }, - { - "epoch": 3.46, - "learning_rate": 1.5790793763943343e-06, - "loss": 3.6041, - "step": 790500 - }, - { - "epoch": 3.46, - "learning_rate": 1.5735790441742682e-06, - "loss": 3.6019, - "step": 791000 - }, - { - "epoch": 3.46, - "learning_rate": 1.5680787119542021e-06, - "loss": 3.5991, - "step": 791500 - }, - { - "epoch": 3.47, - "learning_rate": 1.562578379734136e-06, - "loss": 3.5955, - "step": 792000 - }, - { - "epoch": 3.47, - "learning_rate": 1.5570780475140702e-06, - "loss": 3.6217, - "step": 792500 - }, - { - "epoch": 3.47, - "learning_rate": 1.551577715294004e-06, - "loss": 3.6082, - "step": 793000 - }, - { - "epoch": 3.47, - "learning_rate": 1.5460773830739378e-06, - "loss": 3.6068, - "step": 793500 - }, - { - "epoch": 3.47, - "learning_rate": 1.5405770508538718e-06, - "loss": 3.6073, - "step": 794000 - }, - { - "epoch": 3.48, - "learning_rate": 1.5350767186338055e-06, - "loss": 3.61, - "step": 794500 - }, - { - "epoch": 3.48, - "learning_rate": 1.5295763864137394e-06, - "loss": 3.5978, - "step": 795000 - }, - { - "epoch": 3.48, - "learning_rate": 1.5240760541936733e-06, - "loss": 3.594, - "step": 795500 - }, - { - "epoch": 3.48, - "learning_rate": 1.5185757219736073e-06, - "loss": 3.5951, - "step": 796000 - }, - { - "epoch": 3.49, - "learning_rate": 1.5130753897535414e-06, - "loss": 3.5942, - "step": 796500 - }, - { - "epoch": 3.49, - "learning_rate": 1.5075750575334751e-06, - "loss": 3.578, - "step": 797000 - }, - { - "epoch": 3.49, - "learning_rate": 1.502074725313409e-06, - "loss": 3.5983, - "step": 797500 - }, - { - "epoch": 3.49, - "learning_rate": 1.496574393093343e-06, - "loss": 3.5995, - "step": 798000 - }, - { - "epoch": 3.49, - "learning_rate": 1.491074060873277e-06, - "loss": 3.5942, - "step": 798500 - }, - { - "epoch": 3.5, - "learning_rate": 1.4855737286532106e-06, - "loss": 3.6105, - "step": 799000 - }, - { - "epoch": 3.5, - "learning_rate": 1.4800733964331446e-06, - "loss": 3.6005, - "step": 799500 - }, - { - "epoch": 3.5, - "learning_rate": 1.4745730642130787e-06, - "loss": 3.5992, - "step": 800000 - }, - { - "epoch": 3.5, - "learning_rate": 1.4690727319930126e-06, - "loss": 3.6003, - "step": 800500 - }, - { - "epoch": 3.51, - "learning_rate": 1.4635723997729466e-06, - "loss": 3.5961, - "step": 801000 - }, - { - "epoch": 3.51, - "learning_rate": 1.4580720675528803e-06, - "loss": 3.5923, - "step": 801500 - }, - { - "epoch": 3.51, - "learning_rate": 1.4525717353328142e-06, - "loss": 3.5917, - "step": 802000 - }, - { - "epoch": 3.51, - "learning_rate": 1.4470714031127481e-06, - "loss": 3.5949, - "step": 802500 - }, - { - "epoch": 3.51, - "learning_rate": 1.4415710708926819e-06, - "loss": 3.5836, - "step": 803000 - }, - { - "epoch": 3.52, - "learning_rate": 1.4360707386726158e-06, - "loss": 3.6094, - "step": 803500 - }, - { - "epoch": 3.52, - "learning_rate": 1.43057040645255e-06, - "loss": 3.6073, - "step": 804000 - }, - { - "epoch": 3.52, - "learning_rate": 1.4250700742324839e-06, - "loss": 3.5858, - "step": 804500 - }, - { - "epoch": 3.52, - "learning_rate": 1.4195697420124178e-06, - "loss": 3.6001, - "step": 805000 - }, - { - "epoch": 3.53, - "learning_rate": 1.4140694097923515e-06, - "loss": 3.5929, - "step": 805500 - }, - { - "epoch": 3.53, - "learning_rate": 1.4085690775722854e-06, - "loss": 3.5922, - "step": 806000 - }, - { - "epoch": 3.53, - "learning_rate": 1.4030687453522194e-06, - "loss": 3.6161, - "step": 806500 - }, - { - "epoch": 3.53, - "learning_rate": 1.3975684131321533e-06, - "loss": 3.6097, - "step": 807000 - }, - { - "epoch": 3.53, - "learning_rate": 1.3920680809120874e-06, - "loss": 3.6172, - "step": 807500 - }, - { - "epoch": 3.54, - "learning_rate": 1.3865677486920211e-06, - "loss": 3.5946, - "step": 808000 - }, - { - "epoch": 3.54, - "learning_rate": 1.381067416471955e-06, - "loss": 3.6015, - "step": 808500 - }, - { - "epoch": 3.54, - "learning_rate": 1.375567084251889e-06, - "loss": 3.6125, - "step": 809000 - }, - { - "epoch": 3.54, - "learning_rate": 1.3700667520318227e-06, - "loss": 3.6061, - "step": 809500 - }, - { - "epoch": 3.54, - "learning_rate": 1.3645664198117566e-06, - "loss": 3.5902, - "step": 810000 - }, - { - "epoch": 3.55, - "learning_rate": 1.3590660875916906e-06, - "loss": 3.597, - "step": 810500 - }, - { - "epoch": 3.55, - "learning_rate": 1.3535657553716245e-06, - "loss": 3.6164, - "step": 811000 - }, - { - "epoch": 3.55, - "learning_rate": 1.3480654231515586e-06, - "loss": 3.6002, - "step": 811500 - }, - { - "epoch": 3.55, - "learning_rate": 1.3425650909314924e-06, - "loss": 3.6012, - "step": 812000 - }, - { - "epoch": 3.56, - "learning_rate": 1.3370647587114263e-06, - "loss": 3.5951, - "step": 812500 - }, - { - "epoch": 3.56, - "learning_rate": 1.3315644264913602e-06, - "loss": 3.5944, - "step": 813000 - }, - { - "epoch": 3.56, - "learning_rate": 1.3260640942712941e-06, - "loss": 3.6024, - "step": 813500 - }, - { - "epoch": 3.56, - "learning_rate": 1.3205637620512279e-06, - "loss": 3.5891, - "step": 814000 - }, - { - "epoch": 3.56, - "learning_rate": 1.3150634298311618e-06, - "loss": 3.5926, - "step": 814500 - }, - { - "epoch": 3.57, - "learning_rate": 1.3095630976110957e-06, - "loss": 3.5937, - "step": 815000 - }, - { - "epoch": 3.57, - "learning_rate": 1.3040627653910299e-06, - "loss": 3.5772, - "step": 815500 - }, - { - "epoch": 3.57, - "learning_rate": 1.2985624331709638e-06, - "loss": 3.5793, - "step": 816000 - }, - { - "epoch": 3.57, - "learning_rate": 1.2930621009508975e-06, - "loss": 3.5919, - "step": 816500 - }, - { - "epoch": 3.58, - "learning_rate": 1.2875617687308314e-06, - "loss": 3.6159, - "step": 817000 - }, - { - "epoch": 3.58, - "learning_rate": 1.2820614365107654e-06, - "loss": 3.5919, - "step": 817500 - }, - { - "epoch": 3.58, - "learning_rate": 1.276561104290699e-06, - "loss": 3.6001, - "step": 818000 - }, - { - "epoch": 3.58, - "learning_rate": 1.271060772070633e-06, - "loss": 3.613, - "step": 818500 - }, - { - "epoch": 3.58, - "learning_rate": 1.2655604398505672e-06, - "loss": 3.5816, - "step": 819000 - }, - { - "epoch": 3.59, - "learning_rate": 1.260060107630501e-06, - "loss": 3.6099, - "step": 819500 - }, - { - "epoch": 3.59, - "learning_rate": 1.254559775410435e-06, - "loss": 3.5974, - "step": 820000 - }, - { - "epoch": 3.59, - "learning_rate": 1.2490594431903687e-06, - "loss": 3.5986, - "step": 820500 - }, - { - "epoch": 3.59, - "learning_rate": 1.2435591109703027e-06, - "loss": 3.5901, - "step": 821000 - }, - { - "epoch": 3.6, - "learning_rate": 1.2380587787502366e-06, - "loss": 3.5968, - "step": 821500 - }, - { - "epoch": 3.6, - "learning_rate": 1.2325584465301705e-06, - "loss": 3.6025, - "step": 822000 - }, - { - "epoch": 3.6, - "learning_rate": 1.2270581143101044e-06, - "loss": 3.5785, - "step": 822500 - }, - { - "epoch": 3.6, - "learning_rate": 1.2215577820900384e-06, - "loss": 3.5906, - "step": 823000 - }, - { - "epoch": 3.6, - "learning_rate": 1.216057449869972e-06, - "loss": 3.5827, - "step": 823500 - }, - { - "epoch": 3.61, - "learning_rate": 1.2105571176499062e-06, - "loss": 3.5897, - "step": 824000 - }, - { - "epoch": 3.61, - "learning_rate": 1.2050567854298402e-06, - "loss": 3.5881, - "step": 824500 - }, - { - "epoch": 3.61, - "learning_rate": 1.1995564532097739e-06, - "loss": 3.5894, - "step": 825000 - }, - { - "epoch": 3.61, - "learning_rate": 1.194056120989708e-06, - "loss": 3.5914, - "step": 825500 - }, - { - "epoch": 3.61, - "learning_rate": 1.1885557887696417e-06, - "loss": 3.5652, - "step": 826000 - }, - { - "epoch": 3.62, - "learning_rate": 1.1830554565495757e-06, - "loss": 3.5747, - "step": 826500 - }, - { - "epoch": 3.62, - "learning_rate": 1.1775551243295096e-06, - "loss": 3.594, - "step": 827000 - }, - { - "epoch": 3.62, - "learning_rate": 1.1720547921094435e-06, - "loss": 3.5865, - "step": 827500 - }, - { - "epoch": 3.62, - "learning_rate": 1.1665544598893774e-06, - "loss": 3.5854, - "step": 828000 - }, - { - "epoch": 3.63, - "learning_rate": 1.1610541276693114e-06, - "loss": 3.5782, - "step": 828500 - }, - { - "epoch": 3.63, - "learning_rate": 1.155553795449245e-06, - "loss": 3.6062, - "step": 829000 - }, - { - "epoch": 3.63, - "learning_rate": 1.1500534632291792e-06, - "loss": 3.5711, - "step": 829500 - }, - { - "epoch": 3.63, - "learning_rate": 1.144553131009113e-06, - "loss": 3.5878, - "step": 830000 - }, - { - "epoch": 3.63, - "learning_rate": 1.1390527987890469e-06, - "loss": 3.593, - "step": 830500 - }, - { - "epoch": 3.64, - "learning_rate": 1.1335524665689808e-06, - "loss": 3.584, - "step": 831000 - }, - { - "epoch": 3.64, - "learning_rate": 1.1280521343489147e-06, - "loss": 3.5956, - "step": 831500 - }, - { - "epoch": 3.64, - "learning_rate": 1.1225518021288487e-06, - "loss": 3.5882, - "step": 832000 - }, - { - "epoch": 3.64, - "learning_rate": 1.1170514699087826e-06, - "loss": 3.594, - "step": 832500 - }, - { - "epoch": 3.65, - "learning_rate": 1.1115511376887165e-06, - "loss": 3.5969, - "step": 833000 - }, - { - "epoch": 3.65, - "learning_rate": 1.1060508054686505e-06, - "loss": 3.5945, - "step": 833500 - }, - { - "epoch": 3.65, - "learning_rate": 1.1005504732485844e-06, - "loss": 3.5813, - "step": 834000 - }, - { - "epoch": 3.65, - "learning_rate": 1.095050141028518e-06, - "loss": 3.6051, - "step": 834500 - }, - { - "epoch": 3.65, - "learning_rate": 1.0895498088084522e-06, - "loss": 3.5854, - "step": 835000 - }, - { - "epoch": 3.66, - "learning_rate": 1.084049476588386e-06, - "loss": 3.5739, - "step": 835500 - }, - { - "epoch": 3.66, - "learning_rate": 1.0785491443683199e-06, - "loss": 3.5921, - "step": 836000 - }, - { - "epoch": 3.66, - "learning_rate": 1.0730488121482538e-06, - "loss": 3.587, - "step": 836500 - }, - { - "epoch": 3.66, - "learning_rate": 1.0675484799281877e-06, - "loss": 3.5739, - "step": 837000 - }, - { - "epoch": 3.67, - "learning_rate": 1.0620481477081217e-06, - "loss": 3.5844, - "step": 837500 - }, - { - "epoch": 3.67, - "learning_rate": 1.0565478154880556e-06, - "loss": 3.5814, - "step": 838000 - }, - { - "epoch": 3.67, - "learning_rate": 1.0510474832679893e-06, - "loss": 3.5723, - "step": 838500 - }, - { - "epoch": 3.67, - "learning_rate": 1.0455471510479235e-06, - "loss": 3.587, - "step": 839000 - }, - { - "epoch": 3.67, - "learning_rate": 1.0400468188278574e-06, - "loss": 3.5839, - "step": 839500 - }, - { - "epoch": 3.68, - "learning_rate": 1.0345464866077911e-06, - "loss": 3.5731, - "step": 840000 - }, - { - "epoch": 3.68, - "learning_rate": 1.029046154387725e-06, - "loss": 3.575, - "step": 840500 - }, - { - "epoch": 3.68, - "learning_rate": 1.023545822167659e-06, - "loss": 3.5902, - "step": 841000 - }, - { - "epoch": 3.68, - "learning_rate": 1.0180454899475929e-06, - "loss": 3.6101, - "step": 841500 - }, - { - "epoch": 3.68, - "learning_rate": 1.0125451577275268e-06, - "loss": 3.578, - "step": 842000 - }, - { - "epoch": 3.69, - "learning_rate": 1.0070448255074608e-06, - "loss": 3.5796, - "step": 842500 - }, - { - "epoch": 3.69, - "learning_rate": 1.0015444932873947e-06, - "loss": 3.5879, - "step": 843000 - }, - { - "epoch": 3.69, - "learning_rate": 9.960441610673286e-07, - "loss": 3.5764, - "step": 843500 - }, - { - "epoch": 3.69, - "learning_rate": 9.905438288472623e-07, - "loss": 3.5936, - "step": 844000 - }, - { - "epoch": 3.7, - "learning_rate": 9.850434966271965e-07, - "loss": 3.5776, - "step": 844500 - }, - { - "epoch": 3.7, - "learning_rate": 9.795431644071304e-07, - "loss": 3.5755, - "step": 845000 - }, - { - "epoch": 3.7, - "learning_rate": 9.740428321870641e-07, - "loss": 3.5725, - "step": 845500 - }, - { - "epoch": 3.7, - "learning_rate": 9.68542499966998e-07, - "loss": 3.5699, - "step": 846000 - }, - { - "epoch": 3.7, - "learning_rate": 9.63042167746932e-07, - "loss": 3.5751, - "step": 846500 - }, - { - "epoch": 3.71, - "learning_rate": 9.57541835526866e-07, - "loss": 3.5946, - "step": 847000 - }, - { - "epoch": 3.71, - "learning_rate": 9.520415033067998e-07, - "loss": 3.5818, - "step": 847500 - }, - { - "epoch": 3.71, - "learning_rate": 9.465411710867336e-07, - "loss": 3.5993, - "step": 848000 - }, - { - "epoch": 3.71, - "learning_rate": 9.410408388666677e-07, - "loss": 3.5642, - "step": 848500 - }, - { - "epoch": 3.72, - "learning_rate": 9.355405066466015e-07, - "loss": 3.5975, - "step": 849000 - }, - { - "epoch": 3.72, - "learning_rate": 9.300401744265354e-07, - "loss": 3.5909, - "step": 849500 - }, - { - "epoch": 3.72, - "learning_rate": 9.245398422064693e-07, - "loss": 3.5627, - "step": 850000 - }, - { - "epoch": 3.72, - "learning_rate": 9.190395099864033e-07, - "loss": 3.5729, - "step": 850500 - }, - { - "epoch": 3.72, - "learning_rate": 9.135391777663371e-07, - "loss": 3.5828, - "step": 851000 - }, - { - "epoch": 3.73, - "learning_rate": 9.08038845546271e-07, - "loss": 3.5692, - "step": 851500 - }, - { - "epoch": 3.73, - "learning_rate": 9.025385133262051e-07, - "loss": 3.5645, - "step": 852000 - }, - { - "epoch": 3.73, - "learning_rate": 8.970381811061389e-07, - "loss": 3.5867, - "step": 852500 - }, - { - "epoch": 3.73, - "learning_rate": 8.915378488860728e-07, - "loss": 3.5761, - "step": 853000 - }, - { - "epoch": 3.74, - "learning_rate": 8.860375166660067e-07, - "loss": 3.5734, - "step": 853500 - }, - { - "epoch": 3.74, - "learning_rate": 8.805371844459407e-07, - "loss": 3.5915, - "step": 854000 - }, - { - "epoch": 3.74, - "learning_rate": 8.750368522258745e-07, - "loss": 3.5812, - "step": 854500 - }, - { - "epoch": 3.74, - "learning_rate": 8.695365200058084e-07, - "loss": 3.5731, - "step": 855000 - }, - { - "epoch": 3.74, - "learning_rate": 8.640361877857423e-07, - "loss": 3.577, - "step": 855500 - }, - { - "epoch": 3.75, - "learning_rate": 8.585358555656763e-07, - "loss": 3.5891, - "step": 856000 - }, - { - "epoch": 3.75, - "learning_rate": 8.530355233456101e-07, - "loss": 3.5745, - "step": 856500 - }, - { - "epoch": 3.75, - "learning_rate": 8.47535191125544e-07, - "loss": 3.5749, - "step": 857000 - }, - { - "epoch": 3.75, - "learning_rate": 8.420348589054779e-07, - "loss": 3.5853, - "step": 857500 - }, - { - "epoch": 3.75, - "learning_rate": 8.365345266854119e-07, - "loss": 3.5863, - "step": 858000 - }, - { - "epoch": 3.76, - "learning_rate": 8.310341944653457e-07, - "loss": 3.562, - "step": 858500 - }, - { - "epoch": 3.76, - "learning_rate": 8.255338622452797e-07, - "loss": 3.5744, - "step": 859000 - }, - { - "epoch": 3.76, - "learning_rate": 8.200335300252137e-07, - "loss": 3.5943, - "step": 859500 - }, - { - "epoch": 3.76, - "learning_rate": 8.145331978051475e-07, - "loss": 3.5715, - "step": 860000 - }, - { - "epoch": 3.77, - "learning_rate": 8.090328655850814e-07, - "loss": 3.574, - "step": 860500 - }, - { - "epoch": 3.77, - "learning_rate": 8.035325333650153e-07, - "loss": 3.5707, - "step": 861000 - }, - { - "epoch": 3.77, - "learning_rate": 7.980322011449493e-07, - "loss": 3.5604, - "step": 861500 - }, - { - "epoch": 3.77, - "learning_rate": 7.925318689248831e-07, - "loss": 3.5664, - "step": 862000 - }, - { - "epoch": 3.77, - "learning_rate": 7.870315367048171e-07, - "loss": 3.5709, - "step": 862500 - }, - { - "epoch": 3.78, - "learning_rate": 7.815312044847509e-07, - "loss": 3.5779, - "step": 863000 - }, - { - "epoch": 3.78, - "learning_rate": 7.760308722646849e-07, - "loss": 3.5612, - "step": 863500 - }, - { - "epoch": 3.78, - "learning_rate": 7.705305400446187e-07, - "loss": 3.5783, - "step": 864000 - }, - { - "epoch": 3.78, - "learning_rate": 7.650302078245527e-07, - "loss": 3.5779, - "step": 864500 - }, - { - "epoch": 3.79, - "learning_rate": 7.595298756044865e-07, - "loss": 3.5689, - "step": 865000 - }, - { - "epoch": 3.79, - "learning_rate": 7.540295433844205e-07, - "loss": 3.5759, - "step": 865500 - }, - { - "epoch": 3.79, - "learning_rate": 7.485292111643543e-07, - "loss": 3.5804, - "step": 866000 - }, - { - "epoch": 3.79, - "learning_rate": 7.430288789442883e-07, - "loss": 3.5765, - "step": 866500 - }, - { - "epoch": 3.79, - "learning_rate": 7.375285467242221e-07, - "loss": 3.5839, - "step": 867000 - }, - { - "epoch": 3.8, - "learning_rate": 7.320282145041561e-07, - "loss": 3.5736, - "step": 867500 - }, - { - "epoch": 3.8, - "learning_rate": 7.265278822840901e-07, - "loss": 3.5739, - "step": 868000 - }, - { - "epoch": 3.8, - "learning_rate": 7.210275500640239e-07, - "loss": 3.5856, - "step": 868500 - }, - { - "epoch": 3.8, - "learning_rate": 7.155272178439579e-07, - "loss": 3.5765, - "step": 869000 - }, - { - "epoch": 3.81, - "learning_rate": 7.100268856238917e-07, - "loss": 3.5569, - "step": 869500 - }, - { - "epoch": 3.81, - "learning_rate": 7.045265534038257e-07, - "loss": 3.5749, - "step": 870000 - }, - { - "epoch": 3.81, - "learning_rate": 6.990262211837595e-07, - "loss": 3.5789, - "step": 870500 - }, - { - "epoch": 3.81, - "learning_rate": 6.935258889636935e-07, - "loss": 3.5751, - "step": 871000 - }, - { - "epoch": 3.81, - "learning_rate": 6.880255567436274e-07, - "loss": 3.5877, - "step": 871500 - }, - { - "epoch": 3.82, - "learning_rate": 6.825252245235613e-07, - "loss": 3.5813, - "step": 872000 - }, - { - "epoch": 3.82, - "learning_rate": 6.770248923034951e-07, - "loss": 3.5859, - "step": 872500 - }, - { - "epoch": 3.82, - "learning_rate": 6.715245600834291e-07, - "loss": 3.5547, - "step": 873000 - }, - { - "epoch": 3.82, - "learning_rate": 6.660242278633631e-07, - "loss": 3.5746, - "step": 873500 - }, - { - "epoch": 3.82, - "learning_rate": 6.605238956432969e-07, - "loss": 3.5859, - "step": 874000 - }, - { - "epoch": 3.83, - "learning_rate": 6.550235634232307e-07, - "loss": 3.5723, - "step": 874500 - }, - { - "epoch": 3.83, - "learning_rate": 6.495232312031647e-07, - "loss": 3.5854, - "step": 875000 - }, - { - "epoch": 3.83, - "learning_rate": 6.440228989830987e-07, - "loss": 3.5609, - "step": 875500 - }, - { - "epoch": 3.83, - "learning_rate": 6.385225667630325e-07, - "loss": 3.5816, - "step": 876000 - }, - { - "epoch": 3.84, - "learning_rate": 6.330222345429664e-07, - "loss": 3.5917, - "step": 876500 - }, - { - "epoch": 3.84, - "learning_rate": 6.275219023229004e-07, - "loss": 3.565, - "step": 877000 - }, - { - "epoch": 3.84, - "learning_rate": 6.220215701028343e-07, - "loss": 3.582, - "step": 877500 - }, - { - "epoch": 3.84, - "learning_rate": 6.165212378827682e-07, - "loss": 3.5734, - "step": 878000 - }, - { - "epoch": 3.84, - "learning_rate": 6.11020905662702e-07, - "loss": 3.5583, - "step": 878500 - }, - { - "epoch": 3.85, - "learning_rate": 6.05520573442636e-07, - "loss": 3.5628, - "step": 879000 - }, - { - "epoch": 3.85, - "learning_rate": 6.000202412225699e-07, - "loss": 3.5575, - "step": 879500 - }, - { - "epoch": 3.85, - "learning_rate": 5.945199090025038e-07, - "loss": 3.5824, - "step": 880000 - }, - { - "epoch": 3.85, - "learning_rate": 5.890195767824376e-07, - "loss": 3.5685, - "step": 880500 - }, - { - "epoch": 3.86, - "learning_rate": 5.835192445623717e-07, - "loss": 3.5838, - "step": 881000 - }, - { - "epoch": 3.86, - "learning_rate": 5.780189123423055e-07, - "loss": 3.5711, - "step": 881500 - }, - { - "epoch": 3.86, - "learning_rate": 5.725185801222394e-07, - "loss": 3.5761, - "step": 882000 - }, - { - "epoch": 3.86, - "learning_rate": 5.670182479021734e-07, - "loss": 3.5635, - "step": 882500 - }, - { - "epoch": 3.86, - "learning_rate": 5.615179156821073e-07, - "loss": 3.5609, - "step": 883000 - }, - { - "epoch": 3.87, - "learning_rate": 5.560175834620411e-07, - "loss": 3.5916, - "step": 883500 - }, - { - "epoch": 3.87, - "learning_rate": 5.50517251241975e-07, - "loss": 3.5857, - "step": 884000 - }, - { - "epoch": 3.87, - "learning_rate": 5.45016919021909e-07, - "loss": 3.5733, - "step": 884500 - }, - { - "epoch": 3.87, - "learning_rate": 5.395165868018429e-07, - "loss": 3.5576, - "step": 885000 - }, - { - "epoch": 3.88, - "learning_rate": 5.340162545817767e-07, - "loss": 3.5641, - "step": 885500 - }, - { - "epoch": 3.88, - "learning_rate": 5.285159223617107e-07, - "loss": 3.5612, - "step": 886000 - }, - { - "epoch": 3.88, - "learning_rate": 5.230155901416446e-07, - "loss": 3.5757, - "step": 886500 - }, - { - "epoch": 3.88, - "learning_rate": 5.175152579215785e-07, - "loss": 3.5792, - "step": 887000 - }, - { - "epoch": 3.88, - "learning_rate": 5.120149257015124e-07, - "loss": 3.5916, - "step": 887500 - }, - { - "epoch": 3.89, - "learning_rate": 5.065145934814463e-07, - "loss": 3.5646, - "step": 888000 - }, - { - "epoch": 3.89, - "learning_rate": 5.010142612613803e-07, - "loss": 3.5706, - "step": 888500 - }, - { - "epoch": 3.89, - "learning_rate": 4.955139290413141e-07, - "loss": 3.5896, - "step": 889000 - }, - { - "epoch": 3.89, - "learning_rate": 4.90013596821248e-07, - "loss": 3.5664, - "step": 889500 - }, - { - "epoch": 3.89, - "learning_rate": 4.84513264601182e-07, - "loss": 3.5767, - "step": 890000 - }, - { - "epoch": 3.9, - "learning_rate": 4.790129323811159e-07, - "loss": 3.5711, - "step": 890500 - }, - { - "epoch": 3.9, - "learning_rate": 4.7351260016104973e-07, - "loss": 3.5919, - "step": 891000 - }, - { - "epoch": 3.9, - "learning_rate": 4.680122679409837e-07, - "loss": 3.562, - "step": 891500 - }, - { - "epoch": 3.9, - "learning_rate": 4.6251193572091753e-07, - "loss": 3.562, - "step": 892000 - }, - { - "epoch": 3.91, - "learning_rate": 4.570116035008515e-07, - "loss": 3.5696, - "step": 892500 - }, - { - "epoch": 3.91, - "learning_rate": 4.515112712807854e-07, - "loss": 3.5657, - "step": 893000 - }, - { - "epoch": 3.91, - "learning_rate": 4.460109390607193e-07, - "loss": 3.555, - "step": 893500 - }, - { - "epoch": 3.91, - "learning_rate": 4.405106068406532e-07, - "loss": 3.586, - "step": 894000 - }, - { - "epoch": 3.91, - "learning_rate": 4.350102746205871e-07, - "loss": 3.5581, - "step": 894500 - }, - { - "epoch": 3.92, - "learning_rate": 4.2950994240052105e-07, - "loss": 3.5658, - "step": 895000 - }, - { - "epoch": 3.92, - "learning_rate": 4.2400961018045493e-07, - "loss": 3.5601, - "step": 895500 - }, - { - "epoch": 3.92, - "learning_rate": 4.1850927796038886e-07, - "loss": 3.5856, - "step": 896000 - }, - { - "epoch": 3.92, - "learning_rate": 4.1300894574032273e-07, - "loss": 3.5489, - "step": 896500 - }, - { - "epoch": 3.93, - "learning_rate": 4.0750861352025666e-07, - "loss": 3.5652, - "step": 897000 - }, - { - "epoch": 3.93, - "learning_rate": 4.0200828130019054e-07, - "loss": 3.548, - "step": 897500 - }, - { - "epoch": 3.93, - "learning_rate": 3.965079490801245e-07, - "loss": 3.5772, - "step": 898000 - }, - { - "epoch": 3.93, - "learning_rate": 3.9100761686005834e-07, - "loss": 3.5762, - "step": 898500 - }, - { - "epoch": 3.93, - "learning_rate": 3.855072846399923e-07, - "loss": 3.5521, - "step": 899000 - }, - { - "epoch": 3.94, - "learning_rate": 3.800069524199262e-07, - "loss": 3.5695, - "step": 899500 - }, - { - "epoch": 3.94, - "learning_rate": 3.7450662019986013e-07, - "loss": 3.5733, - "step": 900000 - }, - { - "epoch": 3.94, - "learning_rate": 3.69006287979794e-07, - "loss": 3.5832, - "step": 900500 - }, - { - "epoch": 3.94, - "learning_rate": 3.6350595575972793e-07, - "loss": 3.5668, - "step": 901000 - }, - { - "epoch": 3.95, - "learning_rate": 3.580056235396618e-07, - "loss": 3.5794, - "step": 901500 - }, - { - "epoch": 3.95, - "learning_rate": 3.5250529131959574e-07, - "loss": 3.5633, - "step": 902000 - }, - { - "epoch": 3.95, - "learning_rate": 3.470049590995296e-07, - "loss": 3.5713, - "step": 902500 - }, - { - "epoch": 3.95, - "learning_rate": 3.4150462687946354e-07, - "loss": 3.5666, - "step": 903000 - }, - { - "epoch": 3.95, - "learning_rate": 3.3600429465939747e-07, - "loss": 3.5673, - "step": 903500 - }, - { - "epoch": 3.96, - "learning_rate": 3.3050396243933135e-07, - "loss": 3.5563, - "step": 904000 - }, - { - "epoch": 3.96, - "learning_rate": 3.250036302192653e-07, - "loss": 3.5795, - "step": 904500 - }, - { - "epoch": 3.96, - "learning_rate": 3.1950329799919915e-07, - "loss": 3.5727, - "step": 905000 - }, - { - "epoch": 3.96, - "learning_rate": 3.1400296577913313e-07, - "loss": 3.5572, - "step": 905500 - }, - { - "epoch": 3.96, - "learning_rate": 3.08502633559067e-07, - "loss": 3.5539, - "step": 906000 - }, - { - "epoch": 3.97, - "learning_rate": 3.030023013390009e-07, - "loss": 3.5655, - "step": 906500 - }, - { - "epoch": 3.97, - "learning_rate": 2.975019691189348e-07, - "loss": 3.5589, - "step": 907000 - }, - { - "epoch": 3.97, - "learning_rate": 2.920016368988687e-07, - "loss": 3.5711, - "step": 907500 - }, - { - "epoch": 3.97, - "learning_rate": 2.865013046788026e-07, - "loss": 3.5594, - "step": 908000 - }, - { - "epoch": 3.98, - "learning_rate": 2.8100097245873655e-07, - "loss": 3.5661, - "step": 908500 - }, - { - "epoch": 3.98, - "learning_rate": 2.755006402386705e-07, - "loss": 3.5827, - "step": 909000 - }, - { - "epoch": 3.98, - "learning_rate": 2.7000030801860435e-07, - "loss": 3.5596, - "step": 909500 - }, - { - "epoch": 3.98, - "learning_rate": 2.644999757985383e-07, - "loss": 3.579, - "step": 910000 - }, - { - "epoch": 3.98, - "learning_rate": 2.5899964357847216e-07, - "loss": 3.5746, - "step": 910500 - }, - { - "epoch": 3.99, - "learning_rate": 2.534993113584061e-07, - "loss": 3.5646, - "step": 911000 - }, - { - "epoch": 3.99, - "learning_rate": 2.4799897913833996e-07, - "loss": 3.5753, - "step": 911500 - }, - { - "epoch": 3.99, - "learning_rate": 2.424986469182739e-07, - "loss": 3.5575, - "step": 912000 - }, - { - "epoch": 3.99, - "learning_rate": 2.369983146982078e-07, - "loss": 3.547, - "step": 912500 - }, - { - "epoch": 4.0, - "learning_rate": 2.314979824781417e-07, - "loss": 3.5592, - "step": 913000 - }, - { - "epoch": 4.0, - "learning_rate": 2.259976502580756e-07, - "loss": 3.5682, - "step": 913500 - }, - { - "epoch": 4.0, - "learning_rate": 2.204973180380095e-07, - "loss": 3.5759, - "step": 914000 - }, - { - "epoch": 4.0, - "step": 914036, - "total_flos": 4010446533477751296, - "train_runtime": 482483.0917, - "train_samples_per_second": 1.894 - }, - { - "epoch": 4.38, - "learning_rate": 9.28e-07, - "loss": 3.74, - "step": 914500 - }, - { - "epoch": 4.38, - "learning_rate": 1.928e-06, - "loss": 3.6837, - "step": 915000 - }, - { - "epoch": 4.38, - "learning_rate": 2.928e-06, - "loss": 3.6548, - "step": 915500 - }, - { - "epoch": 4.38, - "learning_rate": 3.928e-06, - "loss": 3.6495, - "step": 916000 - }, - { - "epoch": 4.38, - "learning_rate": 4.928000000000001e-06, - "loss": 3.6499, - "step": 916500 - }, - { - "epoch": 4.39, - "learning_rate": 5.928000000000001e-06, - "loss": 3.6435, - "step": 917000 - }, - { - "epoch": 4.39, - "learning_rate": 6.928e-06, - "loss": 3.6555, - "step": 917500 - }, - { - "epoch": 4.39, - "learning_rate": 7.928e-06, - "loss": 3.6449, - "step": 918000 - }, - { - "epoch": 4.39, - "learning_rate": 8.928000000000002e-06, - "loss": 3.645, - "step": 918500 - }, - { - "epoch": 4.4, - "learning_rate": 9.928e-06, - "loss": 3.6527, - "step": 919000 - }, - { - "epoch": 4.4, - "learning_rate": 9.996285509805775e-06, - "loss": 3.6712, - "step": 919500 - }, - { - "epoch": 4.4, - "learning_rate": 9.992282826406824e-06, - "loss": 3.6637, - "step": 920000 - }, - { - "epoch": 4.4, - "learning_rate": 9.988280143007873e-06, - "loss": 3.6703, - "step": 920500 - }, - { - "epoch": 4.41, - "learning_rate": 9.984277459608923e-06, - "loss": 3.6596, - "step": 921000 - }, - { - "epoch": 4.41, - "learning_rate": 9.980274776209972e-06, - "loss": 3.6731, - "step": 921500 - }, - { - "epoch": 4.41, - "learning_rate": 9.976272092811021e-06, - "loss": 3.6479, - "step": 922000 - }, - { - "epoch": 4.41, - "learning_rate": 9.97226940941207e-06, - "loss": 3.6645, - "step": 922500 - }, - { - "epoch": 4.42, - "learning_rate": 9.968266726013121e-06, - "loss": 3.6548, - "step": 923000 - }, - { - "epoch": 4.42, - "learning_rate": 9.964264042614169e-06, - "loss": 3.666, - "step": 923500 - }, - { - "epoch": 4.42, - "learning_rate": 9.960261359215218e-06, - "loss": 3.6458, - "step": 924000 - }, - { - "epoch": 4.42, - "learning_rate": 9.956258675816269e-06, - "loss": 3.6544, - "step": 924500 - }, - { - "epoch": 4.43, - "learning_rate": 9.952255992417318e-06, - "loss": 3.6303, - "step": 925000 - }, - { - "epoch": 4.43, - "learning_rate": 9.948253309018367e-06, - "loss": 3.6393, - "step": 925500 - }, - { - "epoch": 4.43, - "learning_rate": 9.944250625619416e-06, - "loss": 3.644, - "step": 926000 - }, - { - "epoch": 4.43, - "learning_rate": 9.940247942220466e-06, - "loss": 3.6509, - "step": 926500 - }, - { - "epoch": 4.43, - "learning_rate": 9.936245258821515e-06, - "loss": 3.6363, - "step": 927000 - }, - { - "epoch": 4.44, - "learning_rate": 9.932242575422564e-06, - "loss": 3.649, - "step": 927500 - }, - { - "epoch": 4.44, - "learning_rate": 9.928239892023613e-06, - "loss": 3.6387, - "step": 928000 - }, - { - "epoch": 4.44, - "learning_rate": 9.924237208624662e-06, - "loss": 3.6365, - "step": 928500 - }, - { - "epoch": 4.44, - "learning_rate": 9.920234525225713e-06, - "loss": 3.6407, - "step": 929000 - }, - { - "epoch": 4.45, - "learning_rate": 9.91623184182676e-06, - "loss": 3.6452, - "step": 929500 - }, - { - "epoch": 4.45, - "learning_rate": 9.91222915842781e-06, - "loss": 3.6411, - "step": 930000 - }, - { - "epoch": 4.45, - "learning_rate": 9.90822647502886e-06, - "loss": 3.6548, - "step": 930500 - }, - { - "epoch": 4.45, - "learning_rate": 9.90422379162991e-06, - "loss": 3.6502, - "step": 931000 - }, - { - "epoch": 4.46, - "learning_rate": 9.90022110823096e-06, - "loss": 3.6401, - "step": 931500 - }, - { - "epoch": 4.46, - "learning_rate": 9.896218424832008e-06, - "loss": 3.6336, - "step": 932000 - }, - { - "epoch": 4.46, - "learning_rate": 9.892215741433058e-06, - "loss": 3.6401, - "step": 932500 - }, - { - "epoch": 4.46, - "learning_rate": 9.888213058034107e-06, - "loss": 3.6192, - "step": 933000 - }, - { - "epoch": 4.47, - "learning_rate": 9.884210374635156e-06, - "loss": 3.6232, - "step": 933500 - }, - { - "epoch": 4.47, - "learning_rate": 9.880207691236205e-06, - "loss": 3.6416, - "step": 934000 - }, - { - "epoch": 4.47, - "learning_rate": 9.876205007837254e-06, - "loss": 3.6332, - "step": 934500 - }, - { - "epoch": 4.47, - "learning_rate": 9.872202324438305e-06, - "loss": 3.6184, - "step": 935000 - }, - { - "epoch": 4.48, - "learning_rate": 9.868199641039353e-06, - "loss": 3.6325, - "step": 935500 - }, - { - "epoch": 4.48, - "learning_rate": 9.864196957640402e-06, - "loss": 3.632, - "step": 936000 - }, - { - "epoch": 4.48, - "learning_rate": 9.860194274241453e-06, - "loss": 3.6197, - "step": 936500 - }, - { - "epoch": 4.48, - "learning_rate": 9.856191590842502e-06, - "loss": 3.6193, - "step": 937000 - }, - { - "epoch": 4.49, - "learning_rate": 9.852188907443551e-06, - "loss": 3.6253, - "step": 937500 - }, - { - "epoch": 4.49, - "learning_rate": 9.8481862240446e-06, - "loss": 3.6216, - "step": 938000 - }, - { - "epoch": 4.49, - "learning_rate": 9.84418354064565e-06, - "loss": 3.6113, - "step": 938500 - }, - { - "epoch": 4.49, - "learning_rate": 9.840180857246699e-06, - "loss": 3.6246, - "step": 939000 - }, - { - "epoch": 4.49, - "learning_rate": 9.836178173847748e-06, - "loss": 3.6344, - "step": 939500 - }, - { - "epoch": 4.5, - "learning_rate": 9.832175490448797e-06, - "loss": 3.6139, - "step": 940000 - }, - { - "epoch": 4.5, - "learning_rate": 9.828172807049847e-06, - "loss": 3.6269, - "step": 940500 - }, - { - "epoch": 4.5, - "learning_rate": 9.824170123650897e-06, - "loss": 3.6017, - "step": 941000 - }, - { - "epoch": 4.5, - "learning_rate": 9.820167440251945e-06, - "loss": 3.6059, - "step": 941500 - }, - { - "epoch": 4.51, - "learning_rate": 9.816164756852994e-06, - "loss": 3.5937, - "step": 942000 - }, - { - "epoch": 4.51, - "learning_rate": 9.812162073454045e-06, - "loss": 3.6126, - "step": 942500 - }, - { - "epoch": 4.51, - "learning_rate": 9.808159390055094e-06, - "loss": 3.6252, - "step": 943000 - }, - { - "epoch": 4.51, - "learning_rate": 9.804156706656143e-06, - "loss": 3.6047, - "step": 943500 - }, - { - "epoch": 4.52, - "learning_rate": 9.800154023257193e-06, - "loss": 3.6086, - "step": 944000 - }, - { - "epoch": 4.52, - "learning_rate": 9.796151339858242e-06, - "loss": 3.6162, - "step": 944500 - }, - { - "epoch": 4.52, - "learning_rate": 9.792148656459291e-06, - "loss": 3.6092, - "step": 945000 - }, - { - "epoch": 4.52, - "learning_rate": 9.78814597306034e-06, - "loss": 3.6027, - "step": 945500 - }, - { - "epoch": 4.53, - "learning_rate": 9.78414328966139e-06, - "loss": 3.6049, - "step": 946000 - }, - { - "epoch": 4.53, - "learning_rate": 9.780140606262439e-06, - "loss": 3.5871, - "step": 946500 - }, - { - "epoch": 4.53, - "learning_rate": 9.77613792286349e-06, - "loss": 3.612, - "step": 947000 - }, - { - "epoch": 4.53, - "learning_rate": 9.772135239464537e-06, - "loss": 3.6203, - "step": 947500 - }, - { - "epoch": 4.54, - "learning_rate": 9.768132556065586e-06, - "loss": 3.6045, - "step": 948000 - }, - { - "epoch": 4.54, - "learning_rate": 9.764129872666637e-06, - "loss": 3.6093, - "step": 948500 - }, - { - "epoch": 4.54, - "learning_rate": 9.760127189267686e-06, - "loss": 3.5939, - "step": 949000 - }, - { - "epoch": 4.54, - "learning_rate": 9.756124505868736e-06, - "loss": 3.5818, - "step": 949500 - }, - { - "epoch": 4.54, - "learning_rate": 9.752121822469785e-06, - "loss": 3.5917, - "step": 950000 - }, - { - "epoch": 4.55, - "learning_rate": 9.748119139070834e-06, - "loss": 3.5907, - "step": 950500 - }, - { - "epoch": 4.55, - "learning_rate": 9.744116455671883e-06, - "loss": 3.5795, - "step": 951000 - }, - { - "epoch": 4.55, - "learning_rate": 9.740113772272932e-06, - "loss": 3.5883, - "step": 951500 - }, - { - "epoch": 4.55, - "learning_rate": 9.736111088873982e-06, - "loss": 3.5787, - "step": 952000 - }, - { - "epoch": 4.56, - "learning_rate": 9.73210840547503e-06, - "loss": 3.5857, - "step": 952500 - }, - { - "epoch": 4.56, - "learning_rate": 9.728105722076082e-06, - "loss": 3.5732, - "step": 953000 - }, - { - "epoch": 4.56, - "learning_rate": 9.72410303867713e-06, - "loss": 3.5931, - "step": 953500 - }, - { - "epoch": 4.56, - "learning_rate": 9.720100355278178e-06, - "loss": 3.5892, - "step": 954000 - }, - { - "epoch": 4.57, - "learning_rate": 9.71609767187923e-06, - "loss": 3.5789, - "step": 954500 - }, - { - "epoch": 4.57, - "learning_rate": 9.712094988480278e-06, - "loss": 3.5734, - "step": 955000 - }, - { - "epoch": 4.57, - "learning_rate": 9.708092305081328e-06, - "loss": 3.577, - "step": 955500 - }, - { - "epoch": 4.57, - "learning_rate": 9.704089621682377e-06, - "loss": 3.5898, - "step": 956000 - }, - { - "epoch": 4.58, - "learning_rate": 9.700086938283426e-06, - "loss": 3.5609, - "step": 956500 - }, - { - "epoch": 4.58, - "learning_rate": 9.696084254884475e-06, - "loss": 3.5817, - "step": 957000 - }, - { - "epoch": 4.58, - "learning_rate": 9.692081571485524e-06, - "loss": 3.571, - "step": 957500 - }, - { - "epoch": 4.58, - "learning_rate": 9.688078888086574e-06, - "loss": 3.5702, - "step": 958000 - }, - { - "epoch": 4.59, - "learning_rate": 9.684076204687623e-06, - "loss": 3.5672, - "step": 958500 - }, - { - "epoch": 4.59, - "learning_rate": 9.680073521288674e-06, - "loss": 3.5786, - "step": 959000 - }, - { - "epoch": 4.59, - "learning_rate": 9.676070837889721e-06, - "loss": 3.5617, - "step": 959500 - }, - { - "epoch": 4.59, - "learning_rate": 9.67206815449077e-06, - "loss": 3.5776, - "step": 960000 - }, - { - "epoch": 4.6, - "learning_rate": 9.668065471091821e-06, - "loss": 3.5625, - "step": 960500 - }, - { - "epoch": 4.6, - "learning_rate": 9.66406278769287e-06, - "loss": 3.5623, - "step": 961000 - }, - { - "epoch": 4.6, - "learning_rate": 9.66006010429392e-06, - "loss": 3.5592, - "step": 961500 - }, - { - "epoch": 4.6, - "learning_rate": 9.656057420894969e-06, - "loss": 3.5509, - "step": 962000 - }, - { - "epoch": 4.6, - "learning_rate": 9.652054737496018e-06, - "loss": 3.5683, - "step": 962500 - }, - { - "epoch": 4.61, - "learning_rate": 9.648052054097067e-06, - "loss": 3.5564, - "step": 963000 - }, - { - "epoch": 4.61, - "learning_rate": 9.644049370698117e-06, - "loss": 3.5604, - "step": 963500 - }, - { - "epoch": 4.61, - "learning_rate": 9.640046687299166e-06, - "loss": 3.5741, - "step": 964000 - }, - { - "epoch": 4.61, - "learning_rate": 9.636044003900215e-06, - "loss": 3.5541, - "step": 964500 - }, - { - "epoch": 4.62, - "learning_rate": 9.632041320501266e-06, - "loss": 3.5703, - "step": 965000 - }, - { - "epoch": 4.62, - "learning_rate": 9.628038637102313e-06, - "loss": 3.5455, - "step": 965500 - }, - { - "epoch": 4.62, - "learning_rate": 9.624035953703363e-06, - "loss": 3.5427, - "step": 966000 - }, - { - "epoch": 4.62, - "learning_rate": 9.620033270304413e-06, - "loss": 3.5595, - "step": 966500 - }, - { - "epoch": 4.63, - "learning_rate": 9.616030586905463e-06, - "loss": 3.5436, - "step": 967000 - }, - { - "epoch": 4.63, - "learning_rate": 9.612027903506512e-06, - "loss": 3.551, - "step": 967500 - }, - { - "epoch": 4.63, - "learning_rate": 9.608025220107561e-06, - "loss": 3.5516, - "step": 968000 - }, - { - "epoch": 4.63, - "learning_rate": 9.60402253670861e-06, - "loss": 3.5552, - "step": 968500 - }, - { - "epoch": 4.64, - "learning_rate": 9.60001985330966e-06, - "loss": 3.5368, - "step": 969000 - }, - { - "epoch": 4.64, - "learning_rate": 9.596017169910709e-06, - "loss": 3.5329, - "step": 969500 - }, - { - "epoch": 4.64, - "learning_rate": 9.592014486511758e-06, - "loss": 3.5442, - "step": 970000 - }, - { - "epoch": 4.64, - "learning_rate": 9.588011803112807e-06, - "loss": 3.5538, - "step": 970500 - }, - { - "epoch": 4.65, - "learning_rate": 9.584009119713858e-06, - "loss": 3.5464, - "step": 971000 - }, - { - "epoch": 4.65, - "learning_rate": 9.580006436314905e-06, - "loss": 3.5492, - "step": 971500 - }, - { - "epoch": 4.65, - "learning_rate": 9.576003752915955e-06, - "loss": 3.5393, - "step": 972000 - }, - { - "epoch": 4.65, - "learning_rate": 9.572001069517006e-06, - "loss": 3.549, - "step": 972500 - }, - { - "epoch": 4.65, - "learning_rate": 9.567998386118055e-06, - "loss": 3.5289, - "step": 973000 - }, - { - "epoch": 4.66, - "learning_rate": 9.563995702719104e-06, - "loss": 3.5552, - "step": 973500 - }, - { - "epoch": 4.66, - "learning_rate": 9.559993019320153e-06, - "loss": 3.545, - "step": 974000 - }, - { - "epoch": 4.66, - "learning_rate": 9.555990335921202e-06, - "loss": 3.542, - "step": 974500 - }, - { - "epoch": 4.66, - "learning_rate": 9.551987652522252e-06, - "loss": 3.5498, - "step": 975000 - }, - { - "epoch": 4.67, - "learning_rate": 9.5479849691233e-06, - "loss": 3.5428, - "step": 975500 - }, - { - "epoch": 4.67, - "learning_rate": 9.54398228572435e-06, - "loss": 3.5262, - "step": 976000 - }, - { - "epoch": 4.67, - "learning_rate": 9.5399796023254e-06, - "loss": 3.5596, - "step": 976500 - }, - { - "epoch": 4.67, - "learning_rate": 9.53597691892645e-06, - "loss": 3.5245, - "step": 977000 - }, - { - "epoch": 4.68, - "learning_rate": 9.531974235527498e-06, - "loss": 3.5314, - "step": 977500 - }, - { - "epoch": 4.68, - "learning_rate": 9.527971552128547e-06, - "loss": 3.5227, - "step": 978000 - }, - { - "epoch": 4.68, - "learning_rate": 9.523968868729598e-06, - "loss": 3.5274, - "step": 978500 - }, - { - "epoch": 4.68, - "learning_rate": 9.519966185330647e-06, - "loss": 3.5329, - "step": 979000 - }, - { - "epoch": 4.69, - "learning_rate": 9.515963501931696e-06, - "loss": 3.52, - "step": 979500 - }, - { - "epoch": 4.69, - "learning_rate": 9.511960818532745e-06, - "loss": 3.5284, - "step": 980000 - }, - { - "epoch": 4.69, - "learning_rate": 9.507958135133794e-06, - "loss": 3.527, - "step": 980500 - }, - { - "epoch": 4.69, - "learning_rate": 9.503955451734844e-06, - "loss": 3.5231, - "step": 981000 - }, - { - "epoch": 4.7, - "learning_rate": 9.499952768335893e-06, - "loss": 3.4989, - "step": 981500 - }, - { - "epoch": 4.7, - "learning_rate": 9.495950084936942e-06, - "loss": 3.5303, - "step": 982000 - }, - { - "epoch": 4.7, - "learning_rate": 9.491947401537991e-06, - "loss": 3.4973, - "step": 982500 - }, - { - "epoch": 4.7, - "learning_rate": 9.487944718139042e-06, - "loss": 3.5241, - "step": 983000 - }, - { - "epoch": 4.71, - "learning_rate": 9.48394203474009e-06, - "loss": 3.509, - "step": 983500 - }, - { - "epoch": 4.71, - "learning_rate": 9.479939351341139e-06, - "loss": 3.5171, - "step": 984000 - }, - { - "epoch": 4.71, - "learning_rate": 9.47593666794219e-06, - "loss": 3.5103, - "step": 984500 - }, - { - "epoch": 4.71, - "learning_rate": 9.471933984543239e-06, - "loss": 3.5223, - "step": 985000 - }, - { - "epoch": 4.71, - "learning_rate": 9.467931301144288e-06, - "loss": 3.5036, - "step": 985500 - }, - { - "epoch": 4.72, - "learning_rate": 9.463928617745337e-06, - "loss": 3.5212, - "step": 986000 - }, - { - "epoch": 4.72, - "learning_rate": 9.459925934346387e-06, - "loss": 3.5028, - "step": 986500 - }, - { - "epoch": 4.72, - "learning_rate": 9.455923250947436e-06, - "loss": 3.527, - "step": 987000 - }, - { - "epoch": 4.72, - "learning_rate": 9.451920567548485e-06, - "loss": 3.4991, - "step": 987500 - }, - { - "epoch": 4.73, - "learning_rate": 9.447917884149534e-06, - "loss": 3.5121, - "step": 988000 - }, - { - "epoch": 4.73, - "learning_rate": 9.443915200750583e-06, - "loss": 3.4947, - "step": 988500 - }, - { - "epoch": 4.73, - "learning_rate": 9.439912517351634e-06, - "loss": 3.5095, - "step": 989000 - }, - { - "epoch": 4.73, - "learning_rate": 9.435909833952682e-06, - "loss": 3.5161, - "step": 989500 - }, - { - "epoch": 4.74, - "learning_rate": 9.431907150553731e-06, - "loss": 3.5029, - "step": 990000 - }, - { - "epoch": 4.74, - "learning_rate": 9.427904467154782e-06, - "loss": 3.4983, - "step": 990500 - }, - { - "epoch": 4.74, - "learning_rate": 9.423901783755831e-06, - "loss": 3.5224, - "step": 991000 - }, - { - "epoch": 4.74, - "learning_rate": 9.41989910035688e-06, - "loss": 3.5059, - "step": 991500 - }, - { - "epoch": 4.75, - "learning_rate": 9.41589641695793e-06, - "loss": 3.522, - "step": 992000 - }, - { - "epoch": 4.75, - "learning_rate": 9.411893733558979e-06, - "loss": 3.5031, - "step": 992500 - }, - { - "epoch": 4.75, - "learning_rate": 9.407891050160028e-06, - "loss": 3.4977, - "step": 993000 - }, - { - "epoch": 4.75, - "learning_rate": 9.403888366761077e-06, - "loss": 3.4896, - "step": 993500 - }, - { - "epoch": 4.76, - "learning_rate": 9.399885683362126e-06, - "loss": 3.4966, - "step": 994000 - }, - { - "epoch": 4.76, - "learning_rate": 9.395882999963176e-06, - "loss": 3.4921, - "step": 994500 - }, - { - "epoch": 4.76, - "learning_rate": 9.391880316564226e-06, - "loss": 3.5007, - "step": 995000 - }, - { - "epoch": 4.76, - "learning_rate": 9.387877633165274e-06, - "loss": 3.4917, - "step": 995500 - }, - { - "epoch": 4.76, - "learning_rate": 9.383874949766323e-06, - "loss": 3.4926, - "step": 996000 - }, - { - "epoch": 4.77, - "learning_rate": 9.379872266367374e-06, - "loss": 3.5001, - "step": 996500 - }, - { - "epoch": 4.77, - "learning_rate": 9.375869582968423e-06, - "loss": 3.4814, - "step": 997000 - }, - { - "epoch": 4.77, - "learning_rate": 9.371866899569472e-06, - "loss": 3.4847, - "step": 997500 - }, - { - "epoch": 4.77, - "learning_rate": 9.367864216170522e-06, - "loss": 3.4825, - "step": 998000 - }, - { - "epoch": 4.78, - "learning_rate": 9.36386153277157e-06, - "loss": 3.489, - "step": 998500 - }, - { - "epoch": 4.78, - "learning_rate": 9.35985884937262e-06, - "loss": 3.4716, - "step": 999000 - }, - { - "epoch": 4.78, - "learning_rate": 9.35585616597367e-06, - "loss": 3.4849, - "step": 999500 - }, - { - "epoch": 4.78, - "learning_rate": 9.351853482574718e-06, - "loss": 3.4844, - "step": 1000000 - }, - { - "epoch": 4.79, - "learning_rate": 9.347850799175768e-06, - "loss": 3.4868, - "step": 1000500 - }, - { - "epoch": 4.79, - "learning_rate": 9.343848115776819e-06, - "loss": 3.4658, - "step": 1001000 - }, - { - "epoch": 4.79, - "learning_rate": 9.339845432377866e-06, - "loss": 3.474, - "step": 1001500 - }, - { - "epoch": 4.79, - "learning_rate": 9.335842748978915e-06, - "loss": 3.4939, - "step": 1002000 - }, - { - "epoch": 4.8, - "learning_rate": 9.331840065579966e-06, - "loss": 3.4664, - "step": 1002500 - }, - { - "epoch": 4.8, - "learning_rate": 9.327837382181015e-06, - "loss": 3.4715, - "step": 1003000 - }, - { - "epoch": 4.8, - "learning_rate": 9.323834698782065e-06, - "loss": 3.4739, - "step": 1003500 - }, - { - "epoch": 4.8, - "learning_rate": 9.319832015383114e-06, - "loss": 3.4763, - "step": 1004000 - }, - { - "epoch": 4.81, - "learning_rate": 9.315829331984163e-06, - "loss": 3.4815, - "step": 1004500 - }, - { - "epoch": 4.81, - "learning_rate": 9.311826648585212e-06, - "loss": 3.4816, - "step": 1005000 - }, - { - "epoch": 4.81, - "learning_rate": 9.307823965186261e-06, - "loss": 3.4704, - "step": 1005500 - }, - { - "epoch": 4.81, - "learning_rate": 9.30382128178731e-06, - "loss": 3.47, - "step": 1006000 - }, - { - "epoch": 4.82, - "learning_rate": 9.29981859838836e-06, - "loss": 3.4674, - "step": 1006500 - }, - { - "epoch": 4.82, - "learning_rate": 9.29581591498941e-06, - "loss": 3.4659, - "step": 1007000 - }, - { - "epoch": 4.82, - "learning_rate": 9.291813231590458e-06, - "loss": 3.4701, - "step": 1007500 - }, - { - "epoch": 4.82, - "learning_rate": 9.287810548191507e-06, - "loss": 3.4679, - "step": 1008000 - }, - { - "epoch": 4.82, - "learning_rate": 9.283807864792558e-06, - "loss": 3.4887, - "step": 1008500 - }, - { - "epoch": 4.83, - "learning_rate": 9.279805181393607e-06, - "loss": 3.4622, - "step": 1009000 - }, - { - "epoch": 4.83, - "learning_rate": 9.275802497994657e-06, - "loss": 3.4561, - "step": 1009500 - }, - { - "epoch": 4.83, - "learning_rate": 9.271799814595706e-06, - "loss": 3.4557, - "step": 1010000 - }, - { - "epoch": 4.83, - "learning_rate": 9.267797131196755e-06, - "loss": 3.4776, - "step": 1010500 - }, - { - "epoch": 4.84, - "learning_rate": 9.263794447797804e-06, - "loss": 3.4496, - "step": 1011000 - }, - { - "epoch": 4.84, - "learning_rate": 9.259791764398853e-06, - "loss": 3.471, - "step": 1011500 - }, - { - "epoch": 4.84, - "learning_rate": 9.255789080999903e-06, - "loss": 3.4426, - "step": 1012000 - }, - { - "epoch": 4.84, - "learning_rate": 9.251786397600952e-06, - "loss": 3.4546, - "step": 1012500 - }, - { - "epoch": 4.85, - "learning_rate": 9.247783714202003e-06, - "loss": 3.4477, - "step": 1013000 - }, - { - "epoch": 4.85, - "learning_rate": 9.24378103080305e-06, - "loss": 3.47, - "step": 1013500 - }, - { - "epoch": 4.85, - "learning_rate": 9.2397783474041e-06, - "loss": 3.4587, - "step": 1014000 - }, - { - "epoch": 4.85, - "learning_rate": 9.23577566400515e-06, - "loss": 3.4485, - "step": 1014500 - }, - { - "epoch": 4.86, - "learning_rate": 9.2317729806062e-06, - "loss": 3.4658, - "step": 1015000 - }, - { - "epoch": 4.86, - "learning_rate": 9.227770297207249e-06, - "loss": 3.4314, - "step": 1015500 - }, - { - "epoch": 4.86, - "learning_rate": 9.223767613808298e-06, - "loss": 3.447, - "step": 1016000 - }, - { - "epoch": 4.86, - "learning_rate": 9.219764930409347e-06, - "loss": 3.4478, - "step": 1016500 - }, - { - "epoch": 4.87, - "learning_rate": 9.215762247010396e-06, - "loss": 3.4497, - "step": 1017000 - }, - { - "epoch": 4.87, - "learning_rate": 9.211759563611446e-06, - "loss": 3.458, - "step": 1017500 - }, - { - "epoch": 4.87, - "learning_rate": 9.207756880212495e-06, - "loss": 3.461, - "step": 1018000 - }, - { - "epoch": 4.87, - "learning_rate": 9.203754196813544e-06, - "loss": 3.4581, - "step": 1018500 - }, - { - "epoch": 4.87, - "learning_rate": 9.199751513414595e-06, - "loss": 3.4544, - "step": 1019000 - }, - { - "epoch": 4.88, - "learning_rate": 9.195748830015642e-06, - "loss": 3.4346, - "step": 1019500 - }, - { - "epoch": 4.88, - "learning_rate": 9.191746146616692e-06, - "loss": 3.4412, - "step": 1020000 - }, - { - "epoch": 4.88, - "learning_rate": 9.187743463217742e-06, - "loss": 3.4563, - "step": 1020500 - }, - { - "epoch": 4.88, - "learning_rate": 9.183740779818792e-06, - "loss": 3.4442, - "step": 1021000 - }, - { - "epoch": 4.89, - "learning_rate": 9.179738096419841e-06, - "loss": 3.4518, - "step": 1021500 - }, - { - "epoch": 4.89, - "learning_rate": 9.17573541302089e-06, - "loss": 3.448, - "step": 1022000 - }, - { - "epoch": 4.89, - "learning_rate": 9.17173272962194e-06, - "loss": 3.4234, - "step": 1022500 - }, - { - "epoch": 4.89, - "learning_rate": 9.167730046222988e-06, - "loss": 3.435, - "step": 1023000 - }, - { - "epoch": 4.9, - "learning_rate": 9.163727362824038e-06, - "loss": 3.4533, - "step": 1023500 - }, - { - "epoch": 4.9, - "learning_rate": 9.159724679425087e-06, - "loss": 3.4432, - "step": 1024000 - }, - { - "epoch": 4.9, - "learning_rate": 9.155721996026136e-06, - "loss": 3.4313, - "step": 1024500 - }, - { - "epoch": 4.9, - "learning_rate": 9.151719312627187e-06, - "loss": 3.4425, - "step": 1025000 - }, - { - "epoch": 4.91, - "learning_rate": 9.147716629228234e-06, - "loss": 3.4533, - "step": 1025500 - }, - { - "epoch": 4.91, - "learning_rate": 9.143713945829284e-06, - "loss": 3.4374, - "step": 1026000 - }, - { - "epoch": 4.91, - "learning_rate": 9.139711262430335e-06, - "loss": 3.4225, - "step": 1026500 - }, - { - "epoch": 4.91, - "learning_rate": 9.135708579031384e-06, - "loss": 3.4367, - "step": 1027000 - }, - { - "epoch": 4.92, - "learning_rate": 9.131705895632433e-06, - "loss": 3.44, - "step": 1027500 - }, - { - "epoch": 4.92, - "learning_rate": 9.127703212233482e-06, - "loss": 3.4377, - "step": 1028000 - }, - { - "epoch": 4.92, - "learning_rate": 9.123700528834531e-06, - "loss": 3.4361, - "step": 1028500 - }, - { - "epoch": 4.92, - "learning_rate": 9.11969784543558e-06, - "loss": 3.4369, - "step": 1029000 - }, - { - "epoch": 4.93, - "learning_rate": 9.11569516203663e-06, - "loss": 3.4374, - "step": 1029500 - }, - { - "epoch": 4.93, - "learning_rate": 9.111692478637679e-06, - "loss": 3.4377, - "step": 1030000 - }, - { - "epoch": 4.93, - "learning_rate": 9.107689795238728e-06, - "loss": 3.422, - "step": 1030500 - }, - { - "epoch": 4.93, - "learning_rate": 9.103687111839779e-06, - "loss": 3.4309, - "step": 1031000 - }, - { - "epoch": 4.93, - "learning_rate": 9.099684428440827e-06, - "loss": 3.4217, - "step": 1031500 - }, - { - "epoch": 4.94, - "learning_rate": 9.095681745041876e-06, - "loss": 3.4228, - "step": 1032000 - }, - { - "epoch": 4.94, - "learning_rate": 9.091679061642927e-06, - "loss": 3.4227, - "step": 1032500 - }, - { - "epoch": 4.94, - "learning_rate": 9.087676378243976e-06, - "loss": 3.4247, - "step": 1033000 - }, - { - "epoch": 4.94, - "learning_rate": 9.083673694845025e-06, - "loss": 3.4238, - "step": 1033500 - }, - { - "epoch": 4.95, - "learning_rate": 9.079671011446074e-06, - "loss": 3.4359, - "step": 1034000 - }, - { - "epoch": 4.95, - "learning_rate": 9.075668328047123e-06, - "loss": 3.4259, - "step": 1034500 - }, - { - "epoch": 4.95, - "learning_rate": 9.071665644648173e-06, - "loss": 3.4207, - "step": 1035000 - }, - { - "epoch": 4.95, - "learning_rate": 9.067662961249222e-06, - "loss": 3.4115, - "step": 1035500 - }, - { - "epoch": 4.96, - "learning_rate": 9.063660277850271e-06, - "loss": 3.408, - "step": 1036000 - }, - { - "epoch": 4.96, - "learning_rate": 9.05965759445132e-06, - "loss": 3.4158, - "step": 1036500 - }, - { - "epoch": 4.96, - "learning_rate": 9.055654911052371e-06, - "loss": 3.4278, - "step": 1037000 - }, - { - "epoch": 4.96, - "learning_rate": 9.051652227653419e-06, - "loss": 3.4451, - "step": 1037500 - }, - { - "epoch": 4.97, - "learning_rate": 9.047649544254468e-06, - "loss": 3.417, - "step": 1038000 - }, - { - "epoch": 4.97, - "learning_rate": 9.043646860855519e-06, - "loss": 3.4113, - "step": 1038500 - }, - { - "epoch": 4.97, - "learning_rate": 9.039644177456568e-06, - "loss": 3.4053, - "step": 1039000 - }, - { - "epoch": 4.97, - "learning_rate": 9.035641494057617e-06, - "loss": 3.4014, - "step": 1039500 - }, - { - "epoch": 4.98, - "learning_rate": 9.031638810658666e-06, - "loss": 3.4174, - "step": 1040000 - }, - { - "epoch": 4.98, - "learning_rate": 9.027636127259716e-06, - "loss": 3.3954, - "step": 1040500 - }, - { - "epoch": 4.98, - "learning_rate": 9.023633443860765e-06, - "loss": 3.4071, - "step": 1041000 - }, - { - "epoch": 4.98, - "learning_rate": 9.019630760461814e-06, - "loss": 3.4283, - "step": 1041500 - }, - { - "epoch": 4.99, - "learning_rate": 9.015628077062863e-06, - "loss": 3.4063, - "step": 1042000 - }, - { - "epoch": 4.99, - "learning_rate": 9.011625393663912e-06, - "loss": 3.3895, - "step": 1042500 - }, - { - "epoch": 4.99, - "learning_rate": 9.007622710264963e-06, - "loss": 3.4066, - "step": 1043000 - }, - { - "epoch": 4.99, - "learning_rate": 9.00362002686601e-06, - "loss": 3.4119, - "step": 1043500 - }, - { - "epoch": 4.99, - "learning_rate": 8.99961734346706e-06, - "loss": 3.3966, - "step": 1044000 - }, - { - "epoch": 5.0, - "learning_rate": 8.995614660068111e-06, - "loss": 3.3983, - "step": 1044500 - }, - { - "epoch": 5.0, - "learning_rate": 8.99161197666916e-06, - "loss": 3.4122, - "step": 1045000 - }, - { - "epoch": 5.0, - "learning_rate": 8.98760929327021e-06, - "loss": 3.403, - "step": 1045500 - }, - { - "epoch": 5.0, - "learning_rate": 8.983606609871259e-06, - "loss": 3.4052, - "step": 1046000 - }, - { - "epoch": 5.01, - "learning_rate": 8.979603926472308e-06, - "loss": 3.4156, - "step": 1046500 - }, - { - "epoch": 5.01, - "learning_rate": 8.975601243073357e-06, - "loss": 3.4046, - "step": 1047000 - }, - { - "epoch": 5.01, - "learning_rate": 8.971598559674406e-06, - "loss": 3.3866, - "step": 1047500 - }, - { - "epoch": 5.01, - "learning_rate": 8.967595876275455e-06, - "loss": 3.397, - "step": 1048000 - }, - { - "epoch": 5.02, - "learning_rate": 8.963593192876505e-06, - "loss": 3.3934, - "step": 1048500 - }, - { - "epoch": 5.02, - "learning_rate": 8.959590509477555e-06, - "loss": 3.4057, - "step": 1049000 - }, - { - "epoch": 5.02, - "learning_rate": 8.955587826078603e-06, - "loss": 3.4034, - "step": 1049500 - }, - { - "epoch": 5.02, - "learning_rate": 8.951585142679652e-06, - "loss": 3.3912, - "step": 1050000 - }, - { - "epoch": 5.03, - "learning_rate": 8.947582459280703e-06, - "loss": 3.3995, - "step": 1050500 - }, - { - "epoch": 5.03, - "learning_rate": 8.943579775881752e-06, - "loss": 3.3758, - "step": 1051000 - }, - { - "epoch": 5.03, - "learning_rate": 8.939577092482801e-06, - "loss": 3.3933, - "step": 1051500 - }, - { - "epoch": 5.03, - "learning_rate": 8.93557440908385e-06, - "loss": 3.3974, - "step": 1052000 - }, - { - "epoch": 5.04, - "learning_rate": 8.9315717256849e-06, - "loss": 3.3947, - "step": 1052500 - }, - { - "epoch": 5.04, - "learning_rate": 8.927569042285949e-06, - "loss": 3.3824, - "step": 1053000 - }, - { - "epoch": 5.04, - "learning_rate": 8.923566358886998e-06, - "loss": 3.3874, - "step": 1053500 - }, - { - "epoch": 5.04, - "learning_rate": 8.919563675488047e-06, - "loss": 3.3792, - "step": 1054000 - }, - { - "epoch": 5.04, - "learning_rate": 8.915560992089097e-06, - "loss": 3.3918, - "step": 1054500 - }, - { - "epoch": 5.05, - "learning_rate": 8.911558308690148e-06, - "loss": 3.3781, - "step": 1055000 - }, - { - "epoch": 5.05, - "learning_rate": 8.907555625291195e-06, - "loss": 3.3948, - "step": 1055500 - }, - { - "epoch": 5.05, - "learning_rate": 8.903552941892244e-06, - "loss": 3.3855, - "step": 1056000 - }, - { - "epoch": 5.05, - "learning_rate": 8.899550258493295e-06, - "loss": 3.3945, - "step": 1056500 - }, - { - "epoch": 5.06, - "learning_rate": 8.895547575094344e-06, - "loss": 3.3716, - "step": 1057000 - }, - { - "epoch": 5.06, - "learning_rate": 8.891544891695394e-06, - "loss": 3.3838, - "step": 1057500 - }, - { - "epoch": 5.06, - "learning_rate": 8.887542208296443e-06, - "loss": 3.3963, - "step": 1058000 - }, - { - "epoch": 5.06, - "learning_rate": 8.883539524897492e-06, - "loss": 3.4065, - "step": 1058500 - }, - { - "epoch": 5.07, - "learning_rate": 8.879536841498541e-06, - "loss": 3.3982, - "step": 1059000 - }, - { - "epoch": 5.07, - "learning_rate": 8.87553415809959e-06, - "loss": 3.3815, - "step": 1059500 - }, - { - "epoch": 5.07, - "learning_rate": 8.87153147470064e-06, - "loss": 3.3836, - "step": 1060000 - }, - { - "epoch": 5.07, - "learning_rate": 8.867528791301689e-06, - "loss": 3.3915, - "step": 1060500 - }, - { - "epoch": 5.08, - "learning_rate": 8.86352610790274e-06, - "loss": 3.3789, - "step": 1061000 - }, - { - "epoch": 5.08, - "learning_rate": 8.859523424503787e-06, - "loss": 3.3921, - "step": 1061500 - }, - { - "epoch": 5.08, - "learning_rate": 8.855520741104836e-06, - "loss": 3.39, - "step": 1062000 - }, - { - "epoch": 5.08, - "learning_rate": 8.851518057705887e-06, - "loss": 3.3655, - "step": 1062500 - }, - { - "epoch": 5.09, - "learning_rate": 8.847515374306936e-06, - "loss": 3.3851, - "step": 1063000 - }, - { - "epoch": 5.09, - "learning_rate": 8.843512690907986e-06, - "loss": 3.3707, - "step": 1063500 - }, - { - "epoch": 5.09, - "learning_rate": 8.839510007509035e-06, - "loss": 3.3615, - "step": 1064000 - }, - { - "epoch": 5.09, - "learning_rate": 8.835507324110084e-06, - "loss": 3.3788, - "step": 1064500 - }, - { - "epoch": 5.1, - "learning_rate": 8.831504640711133e-06, - "loss": 3.3719, - "step": 1065000 - }, - { - "epoch": 5.1, - "learning_rate": 8.827501957312182e-06, - "loss": 3.3626, - "step": 1065500 - }, - { - "epoch": 5.1, - "learning_rate": 8.823499273913232e-06, - "loss": 3.3631, - "step": 1066000 - }, - { - "epoch": 5.1, - "learning_rate": 8.81949659051428e-06, - "loss": 3.3576, - "step": 1066500 - }, - { - "epoch": 5.1, - "learning_rate": 8.815493907115332e-06, - "loss": 3.3631, - "step": 1067000 - }, - { - "epoch": 5.11, - "learning_rate": 8.81149122371638e-06, - "loss": 3.3458, - "step": 1067500 - }, - { - "epoch": 5.11, - "learning_rate": 8.807488540317428e-06, - "loss": 3.3737, - "step": 1068000 - }, - { - "epoch": 5.11, - "learning_rate": 8.80348585691848e-06, - "loss": 3.3795, - "step": 1068500 - }, - { - "epoch": 5.11, - "learning_rate": 8.799483173519529e-06, - "loss": 3.3823, - "step": 1069000 - }, - { - "epoch": 5.12, - "learning_rate": 8.795480490120578e-06, - "loss": 3.3549, - "step": 1069500 - }, - { - "epoch": 5.12, - "learning_rate": 8.791477806721627e-06, - "loss": 3.3519, - "step": 1070000 - }, - { - "epoch": 5.12, - "learning_rate": 8.787475123322676e-06, - "loss": 3.3695, - "step": 1070500 - }, - { - "epoch": 5.12, - "learning_rate": 8.783472439923725e-06, - "loss": 3.3603, - "step": 1071000 - }, - { - "epoch": 5.13, - "learning_rate": 8.779469756524775e-06, - "loss": 3.3586, - "step": 1071500 - }, - { - "epoch": 5.13, - "learning_rate": 8.775467073125824e-06, - "loss": 3.3561, - "step": 1072000 - }, - { - "epoch": 5.13, - "learning_rate": 8.771464389726873e-06, - "loss": 3.3482, - "step": 1072500 - }, - { - "epoch": 5.13, - "learning_rate": 8.767461706327924e-06, - "loss": 3.3564, - "step": 1073000 - }, - { - "epoch": 5.14, - "learning_rate": 8.763459022928971e-06, - "loss": 3.3558, - "step": 1073500 - }, - { - "epoch": 5.14, - "learning_rate": 8.75945633953002e-06, - "loss": 3.3778, - "step": 1074000 - }, - { - "epoch": 5.14, - "learning_rate": 8.755453656131071e-06, - "loss": 3.3673, - "step": 1074500 - }, - { - "epoch": 5.14, - "learning_rate": 8.75145097273212e-06, - "loss": 3.3794, - "step": 1075000 - }, - { - "epoch": 5.15, - "learning_rate": 8.74744828933317e-06, - "loss": 3.3445, - "step": 1075500 - }, - { - "epoch": 5.15, - "learning_rate": 8.743445605934219e-06, - "loss": 3.353, - "step": 1076000 - }, - { - "epoch": 5.15, - "learning_rate": 8.739442922535268e-06, - "loss": 3.3647, - "step": 1076500 - }, - { - "epoch": 5.15, - "learning_rate": 8.735440239136317e-06, - "loss": 3.3595, - "step": 1077000 - }, - { - "epoch": 5.15, - "learning_rate": 8.731437555737367e-06, - "loss": 3.3579, - "step": 1077500 - }, - { - "epoch": 5.16, - "learning_rate": 8.727434872338416e-06, - "loss": 3.3764, - "step": 1078000 - }, - { - "epoch": 5.16, - "learning_rate": 8.723432188939465e-06, - "loss": 3.3575, - "step": 1078500 - }, - { - "epoch": 5.16, - "learning_rate": 8.719429505540516e-06, - "loss": 3.3581, - "step": 1079000 - }, - { - "epoch": 5.16, - "learning_rate": 8.715426822141563e-06, - "loss": 3.3582, - "step": 1079500 - }, - { - "epoch": 5.17, - "learning_rate": 8.711424138742613e-06, - "loss": 3.364, - "step": 1080000 - }, - { - "epoch": 5.17, - "learning_rate": 8.707421455343664e-06, - "loss": 3.3689, - "step": 1080500 - }, - { - "epoch": 5.17, - "learning_rate": 8.703418771944713e-06, - "loss": 3.3486, - "step": 1081000 - }, - { - "epoch": 5.17, - "learning_rate": 8.699416088545762e-06, - "loss": 3.3392, - "step": 1081500 - }, - { - "epoch": 5.18, - "learning_rate": 8.695413405146811e-06, - "loss": 3.3616, - "step": 1082000 - }, - { - "epoch": 5.18, - "learning_rate": 8.69141072174786e-06, - "loss": 3.3624, - "step": 1082500 - }, - { - "epoch": 5.18, - "learning_rate": 8.68740803834891e-06, - "loss": 3.3314, - "step": 1083000 - }, - { - "epoch": 5.18, - "learning_rate": 8.683405354949959e-06, - "loss": 3.346, - "step": 1083500 - }, - { - "epoch": 5.19, - "learning_rate": 8.679402671551008e-06, - "loss": 3.3643, - "step": 1084000 - }, - { - "epoch": 5.19, - "learning_rate": 8.675399988152057e-06, - "loss": 3.3358, - "step": 1084500 - }, - { - "epoch": 5.19, - "learning_rate": 8.671397304753108e-06, - "loss": 3.3508, - "step": 1085000 - }, - { - "epoch": 5.19, - "learning_rate": 8.667394621354156e-06, - "loss": 3.3515, - "step": 1085500 - }, - { - "epoch": 5.2, - "learning_rate": 8.663391937955205e-06, - "loss": 3.3562, - "step": 1086000 - }, - { - "epoch": 5.2, - "learning_rate": 8.659389254556256e-06, - "loss": 3.3287, - "step": 1086500 - }, - { - "epoch": 5.2, - "learning_rate": 8.655386571157305e-06, - "loss": 3.3401, - "step": 1087000 - }, - { - "epoch": 5.2, - "learning_rate": 8.651383887758354e-06, - "loss": 3.3571, - "step": 1087500 - }, - { - "epoch": 5.21, - "learning_rate": 8.647381204359403e-06, - "loss": 3.3407, - "step": 1088000 - }, - { - "epoch": 5.21, - "learning_rate": 8.643378520960452e-06, - "loss": 3.3256, - "step": 1088500 - }, - { - "epoch": 5.21, - "learning_rate": 8.639375837561502e-06, - "loss": 3.3403, - "step": 1089000 - }, - { - "epoch": 5.21, - "learning_rate": 8.635373154162551e-06, - "loss": 3.3475, - "step": 1089500 - }, - { - "epoch": 5.21, - "learning_rate": 8.6313704707636e-06, - "loss": 3.3381, - "step": 1090000 - }, - { - "epoch": 5.22, - "learning_rate": 8.62736778736465e-06, - "loss": 3.3287, - "step": 1090500 - }, - { - "epoch": 5.22, - "learning_rate": 8.6233651039657e-06, - "loss": 3.3423, - "step": 1091000 - }, - { - "epoch": 5.22, - "learning_rate": 8.619362420566748e-06, - "loss": 3.3206, - "step": 1091500 - }, - { - "epoch": 5.22, - "learning_rate": 8.615359737167797e-06, - "loss": 3.3258, - "step": 1092000 - }, - { - "epoch": 5.23, - "learning_rate": 8.611357053768848e-06, - "loss": 3.327, - "step": 1092500 - }, - { - "epoch": 5.23, - "learning_rate": 8.607354370369897e-06, - "loss": 3.326, - "step": 1093000 - }, - { - "epoch": 5.23, - "learning_rate": 8.603351686970946e-06, - "loss": 3.3478, - "step": 1093500 - }, - { - "epoch": 5.23, - "learning_rate": 8.599349003571995e-06, - "loss": 3.3329, - "step": 1094000 - }, - { - "epoch": 5.24, - "learning_rate": 8.595346320173045e-06, - "loss": 3.3348, - "step": 1094500 - }, - { - "epoch": 5.24, - "learning_rate": 8.591343636774094e-06, - "loss": 3.3243, - "step": 1095000 - }, - { - "epoch": 5.24, - "learning_rate": 8.587340953375145e-06, - "loss": 3.3454, - "step": 1095500 - }, - { - "epoch": 5.24, - "learning_rate": 8.583338269976192e-06, - "loss": 3.3306, - "step": 1096000 - }, - { - "epoch": 5.25, - "learning_rate": 8.579335586577241e-06, - "loss": 3.3134, - "step": 1096500 - }, - { - "epoch": 5.25, - "learning_rate": 8.575332903178292e-06, - "loss": 3.3141, - "step": 1097000 - }, - { - "epoch": 5.25, - "learning_rate": 8.571330219779341e-06, - "loss": 3.3206, - "step": 1097500 - }, - { - "epoch": 5.25, - "learning_rate": 8.567327536380389e-06, - "loss": 3.3307, - "step": 1098000 - }, - { - "epoch": 5.26, - "learning_rate": 8.56332485298144e-06, - "loss": 3.3218, - "step": 1098500 - }, - { - "epoch": 5.26, - "learning_rate": 8.559322169582489e-06, - "loss": 3.3011, - "step": 1099000 - }, - { - "epoch": 5.26, - "learning_rate": 8.555319486183538e-06, - "loss": 3.3387, - "step": 1099500 - }, - { - "epoch": 5.26, - "learning_rate": 8.551316802784587e-06, - "loss": 3.3237, - "step": 1100000 - }, - { - "epoch": 5.26, - "learning_rate": 8.547314119385637e-06, - "loss": 3.3094, - "step": 1100500 - }, - { - "epoch": 5.27, - "learning_rate": 8.543311435986686e-06, - "loss": 3.332, - "step": 1101000 - }, - { - "epoch": 5.27, - "learning_rate": 8.539308752587737e-06, - "loss": 3.3344, - "step": 1101500 - }, - { - "epoch": 5.27, - "learning_rate": 8.535306069188784e-06, - "loss": 3.3262, - "step": 1102000 - }, - { - "epoch": 5.27, - "learning_rate": 8.531303385789834e-06, - "loss": 3.3267, - "step": 1102500 - }, - { - "epoch": 5.28, - "learning_rate": 8.527300702390884e-06, - "loss": 3.3155, - "step": 1103000 - }, - { - "epoch": 5.28, - "learning_rate": 8.523298018991934e-06, - "loss": 3.3035, - "step": 1103500 - }, - { - "epoch": 5.28, - "learning_rate": 8.519295335592981e-06, - "loss": 3.3111, - "step": 1104000 - }, - { - "epoch": 5.28, - "learning_rate": 8.515292652194032e-06, - "loss": 3.3118, - "step": 1104500 - }, - { - "epoch": 5.29, - "learning_rate": 8.511289968795081e-06, - "loss": 3.3005, - "step": 1105000 - }, - { - "epoch": 5.29, - "learning_rate": 8.50728728539613e-06, - "loss": 3.3014, - "step": 1105500 - }, - { - "epoch": 5.29, - "learning_rate": 8.50328460199718e-06, - "loss": 3.3032, - "step": 1106000 - }, - { - "epoch": 5.29, - "learning_rate": 8.499281918598229e-06, - "loss": 3.3133, - "step": 1106500 - }, - { - "epoch": 5.3, - "learning_rate": 8.495279235199278e-06, - "loss": 3.3007, - "step": 1107000 - }, - { - "epoch": 5.3, - "learning_rate": 8.491276551800329e-06, - "loss": 3.3218, - "step": 1107500 - }, - { - "epoch": 5.3, - "learning_rate": 8.487273868401376e-06, - "loss": 3.3353, - "step": 1108000 - }, - { - "epoch": 5.3, - "learning_rate": 8.483271185002426e-06, - "loss": 3.3056, - "step": 1108500 - }, - { - "epoch": 5.31, - "learning_rate": 8.479268501603477e-06, - "loss": 3.3195, - "step": 1109000 - }, - { - "epoch": 5.31, - "learning_rate": 8.475265818204526e-06, - "loss": 3.2965, - "step": 1109500 - }, - { - "epoch": 5.31, - "learning_rate": 8.471263134805573e-06, - "loss": 3.3079, - "step": 1110000 - }, - { - "epoch": 5.31, - "learning_rate": 8.467260451406624e-06, - "loss": 3.3237, - "step": 1110500 - }, - { - "epoch": 5.32, - "learning_rate": 8.463257768007673e-06, - "loss": 3.3059, - "step": 1111000 - }, - { - "epoch": 5.32, - "learning_rate": 8.459255084608723e-06, - "loss": 3.3178, - "step": 1111500 - }, - { - "epoch": 5.32, - "learning_rate": 8.455252401209772e-06, - "loss": 3.3127, - "step": 1112000 - }, - { - "epoch": 5.32, - "learning_rate": 8.451249717810821e-06, - "loss": 3.3192, - "step": 1112500 - }, - { - "epoch": 5.32, - "learning_rate": 8.44724703441187e-06, - "loss": 3.3053, - "step": 1113000 - }, - { - "epoch": 5.33, - "learning_rate": 8.443244351012921e-06, - "loss": 3.3102, - "step": 1113500 - }, - { - "epoch": 5.33, - "learning_rate": 8.439241667613969e-06, - "loss": 3.3199, - "step": 1114000 - }, - { - "epoch": 5.33, - "learning_rate": 8.435238984215018e-06, - "loss": 3.3016, - "step": 1114500 - }, - { - "epoch": 5.33, - "learning_rate": 8.431236300816069e-06, - "loss": 3.3186, - "step": 1115000 - }, - { - "epoch": 5.34, - "learning_rate": 8.427233617417118e-06, - "loss": 3.2908, - "step": 1115500 - }, - { - "epoch": 5.34, - "learning_rate": 8.423230934018165e-06, - "loss": 3.2958, - "step": 1116000 - }, - { - "epoch": 5.34, - "learning_rate": 8.419228250619216e-06, - "loss": 3.3082, - "step": 1116500 - }, - { - "epoch": 5.34, - "learning_rate": 8.415225567220265e-06, - "loss": 3.2966, - "step": 1117000 - }, - { - "epoch": 5.35, - "learning_rate": 8.411222883821315e-06, - "loss": 3.306, - "step": 1117500 - }, - { - "epoch": 5.35, - "learning_rate": 8.407220200422364e-06, - "loss": 3.3072, - "step": 1118000 - }, - { - "epoch": 5.35, - "learning_rate": 8.403217517023413e-06, - "loss": 3.2981, - "step": 1118500 - }, - { - "epoch": 5.35, - "learning_rate": 8.399214833624462e-06, - "loss": 3.2855, - "step": 1119000 - }, - { - "epoch": 5.36, - "learning_rate": 8.395212150225513e-06, - "loss": 3.3125, - "step": 1119500 - }, - { - "epoch": 5.36, - "learning_rate": 8.39120946682656e-06, - "loss": 3.3075, - "step": 1120000 - }, - { - "epoch": 5.36, - "learning_rate": 8.38720678342761e-06, - "loss": 3.3052, - "step": 1120500 - }, - { - "epoch": 5.36, - "learning_rate": 8.38320410002866e-06, - "loss": 3.2915, - "step": 1121000 - }, - { - "epoch": 5.37, - "learning_rate": 8.37920141662971e-06, - "loss": 3.2974, - "step": 1121500 - }, - { - "epoch": 5.37, - "learning_rate": 8.375198733230757e-06, - "loss": 3.2958, - "step": 1122000 - }, - { - "epoch": 5.37, - "learning_rate": 8.371196049831808e-06, - "loss": 3.2962, - "step": 1122500 - }, - { - "epoch": 5.37, - "learning_rate": 8.367193366432858e-06, - "loss": 3.3038, - "step": 1123000 - }, - { - "epoch": 5.37, - "learning_rate": 8.363190683033907e-06, - "loss": 3.2942, - "step": 1123500 - }, - { - "epoch": 5.38, - "learning_rate": 8.359187999634956e-06, - "loss": 3.2961, - "step": 1124000 - }, - { - "epoch": 5.38, - "learning_rate": 8.355185316236005e-06, - "loss": 3.3067, - "step": 1124500 - }, - { - "epoch": 5.38, - "learning_rate": 8.351182632837054e-06, - "loss": 3.2838, - "step": 1125000 - }, - { - "epoch": 5.38, - "learning_rate": 8.347179949438105e-06, - "loss": 3.3056, - "step": 1125500 - }, - { - "epoch": 5.39, - "learning_rate": 8.343177266039153e-06, - "loss": 3.2759, - "step": 1126000 - }, - { - "epoch": 5.39, - "learning_rate": 8.339174582640202e-06, - "loss": 3.2721, - "step": 1126500 - }, - { - "epoch": 5.39, - "learning_rate": 8.335171899241253e-06, - "loss": 3.2923, - "step": 1127000 - }, - { - "epoch": 5.39, - "learning_rate": 8.331169215842302e-06, - "loss": 3.2961, - "step": 1127500 - }, - { - "epoch": 5.4, - "learning_rate": 8.32716653244335e-06, - "loss": 3.3039, - "step": 1128000 - }, - { - "epoch": 5.4, - "learning_rate": 8.3231638490444e-06, - "loss": 3.2968, - "step": 1128500 - }, - { - "epoch": 5.4, - "learning_rate": 8.31916116564545e-06, - "loss": 3.2871, - "step": 1129000 - }, - { - "epoch": 5.4, - "learning_rate": 8.315158482246499e-06, - "loss": 3.2979, - "step": 1129500 - }, - { - "epoch": 5.41, - "learning_rate": 8.311155798847548e-06, - "loss": 3.2824, - "step": 1130000 - }, - { - "epoch": 5.41, - "learning_rate": 8.307153115448597e-06, - "loss": 3.2944, - "step": 1130500 - }, - { - "epoch": 5.41, - "learning_rate": 8.303150432049646e-06, - "loss": 3.2941, - "step": 1131000 - }, - { - "epoch": 5.41, - "learning_rate": 8.299147748650697e-06, - "loss": 3.2811, - "step": 1131500 - }, - { - "epoch": 5.42, - "learning_rate": 8.295145065251745e-06, - "loss": 3.2813, - "step": 1132000 - }, - { - "epoch": 5.42, - "learning_rate": 8.291142381852794e-06, - "loss": 3.2939, - "step": 1132500 - }, - { - "epoch": 5.42, - "learning_rate": 8.287139698453845e-06, - "loss": 3.2819, - "step": 1133000 - }, - { - "epoch": 5.42, - "learning_rate": 8.283137015054894e-06, - "loss": 3.2774, - "step": 1133500 - }, - { - "epoch": 5.43, - "learning_rate": 8.279134331655942e-06, - "loss": 3.2964, - "step": 1134000 - }, - { - "epoch": 5.43, - "learning_rate": 8.275131648256993e-06, - "loss": 3.2655, - "step": 1134500 - }, - { - "epoch": 5.43, - "learning_rate": 8.271128964858042e-06, - "loss": 3.2791, - "step": 1135000 - }, - { - "epoch": 5.43, - "learning_rate": 8.267126281459091e-06, - "loss": 3.2829, - "step": 1135500 - }, - { - "epoch": 5.43, - "learning_rate": 8.26312359806014e-06, - "loss": 3.281, - "step": 1136000 - }, - { - "epoch": 5.44, - "learning_rate": 8.25912091466119e-06, - "loss": 3.2728, - "step": 1136500 - }, - { - "epoch": 5.44, - "learning_rate": 8.255118231262239e-06, - "loss": 3.2873, - "step": 1137000 - }, - { - "epoch": 5.44, - "learning_rate": 8.25111554786329e-06, - "loss": 3.2807, - "step": 1137500 - }, - { - "epoch": 5.44, - "learning_rate": 8.247112864464337e-06, - "loss": 3.2678, - "step": 1138000 - }, - { - "epoch": 5.45, - "learning_rate": 8.243110181065386e-06, - "loss": 3.2703, - "step": 1138500 - }, - { - "epoch": 5.45, - "learning_rate": 8.239107497666437e-06, - "loss": 3.2813, - "step": 1139000 - }, - { - "epoch": 5.45, - "learning_rate": 8.235104814267486e-06, - "loss": 3.2883, - "step": 1139500 - }, - { - "epoch": 5.45, - "learning_rate": 8.231102130868534e-06, - "loss": 3.266, - "step": 1140000 - }, - { - "epoch": 5.46, - "learning_rate": 8.227099447469585e-06, - "loss": 3.2726, - "step": 1140500 - }, - { - "epoch": 5.46, - "learning_rate": 8.223096764070634e-06, - "loss": 3.2736, - "step": 1141000 - }, - { - "epoch": 5.46, - "learning_rate": 8.219094080671683e-06, - "loss": 3.2734, - "step": 1141500 - }, - { - "epoch": 5.46, - "learning_rate": 8.215091397272732e-06, - "loss": 3.2866, - "step": 1142000 - }, - { - "epoch": 5.47, - "learning_rate": 8.211088713873781e-06, - "loss": 3.2704, - "step": 1142500 - }, - { - "epoch": 5.47, - "learning_rate": 8.20708603047483e-06, - "loss": 3.2742, - "step": 1143000 - }, - { - "epoch": 5.47, - "learning_rate": 8.203083347075882e-06, - "loss": 3.2874, - "step": 1143500 - }, - { - "epoch": 5.47, - "learning_rate": 8.199080663676929e-06, - "loss": 3.2823, - "step": 1144000 - }, - { - "epoch": 5.48, - "learning_rate": 8.195077980277978e-06, - "loss": 3.256, - "step": 1144500 - }, - { - "epoch": 5.48, - "learning_rate": 8.19107529687903e-06, - "loss": 3.274, - "step": 1145000 - }, - { - "epoch": 5.48, - "learning_rate": 8.187072613480078e-06, - "loss": 3.2788, - "step": 1145500 - }, - { - "epoch": 5.48, - "learning_rate": 8.183069930081126e-06, - "loss": 3.2601, - "step": 1146000 - }, - { - "epoch": 5.48, - "learning_rate": 8.179067246682177e-06, - "loss": 3.2771, - "step": 1146500 - }, - { - "epoch": 5.49, - "learning_rate": 8.175064563283226e-06, - "loss": 3.2826, - "step": 1147000 - }, - { - "epoch": 5.49, - "learning_rate": 8.171061879884275e-06, - "loss": 3.2721, - "step": 1147500 - }, - { - "epoch": 5.49, - "learning_rate": 8.167059196485324e-06, - "loss": 3.2869, - "step": 1148000 - }, - { - "epoch": 5.49, - "learning_rate": 8.163056513086374e-06, - "loss": 3.2787, - "step": 1148500 - }, - { - "epoch": 5.5, - "learning_rate": 8.159053829687423e-06, - "loss": 3.2689, - "step": 1149000 - }, - { - "epoch": 5.5, - "learning_rate": 8.155051146288474e-06, - "loss": 3.2545, - "step": 1149500 - }, - { - "epoch": 5.5, - "learning_rate": 8.151048462889521e-06, - "loss": 3.2727, - "step": 1150000 - }, - { - "epoch": 5.5, - "learning_rate": 8.14704577949057e-06, - "loss": 3.2654, - "step": 1150500 - }, - { - "epoch": 5.51, - "learning_rate": 8.143043096091621e-06, - "loss": 3.2624, - "step": 1151000 - }, - { - "epoch": 5.51, - "learning_rate": 8.13904041269267e-06, - "loss": 3.2515, - "step": 1151500 - }, - { - "epoch": 5.51, - "learning_rate": 8.135037729293718e-06, - "loss": 3.2505, - "step": 1152000 - }, - { - "epoch": 5.51, - "learning_rate": 8.131035045894769e-06, - "loss": 3.2632, - "step": 1152500 - }, - { - "epoch": 5.52, - "learning_rate": 8.127032362495818e-06, - "loss": 3.2662, - "step": 1153000 - }, - { - "epoch": 5.52, - "learning_rate": 8.123029679096867e-06, - "loss": 3.2461, - "step": 1153500 - }, - { - "epoch": 5.52, - "learning_rate": 8.119026995697916e-06, - "loss": 3.2541, - "step": 1154000 - }, - { - "epoch": 5.52, - "learning_rate": 8.115024312298966e-06, - "loss": 3.2546, - "step": 1154500 - }, - { - "epoch": 5.53, - "learning_rate": 8.111021628900015e-06, - "loss": 3.2552, - "step": 1155000 - }, - { - "epoch": 5.53, - "learning_rate": 8.107018945501066e-06, - "loss": 3.2635, - "step": 1155500 - }, - { - "epoch": 5.53, - "learning_rate": 8.103016262102113e-06, - "loss": 3.2563, - "step": 1156000 - }, - { - "epoch": 5.53, - "learning_rate": 8.099013578703162e-06, - "loss": 3.2706, - "step": 1156500 - }, - { - "epoch": 5.54, - "learning_rate": 8.095010895304213e-06, - "loss": 3.2538, - "step": 1157000 - }, - { - "epoch": 5.54, - "learning_rate": 8.091008211905263e-06, - "loss": 3.2605, - "step": 1157500 - }, - { - "epoch": 5.54, - "learning_rate": 8.08700552850631e-06, - "loss": 3.2468, - "step": 1158000 - }, - { - "epoch": 5.54, - "learning_rate": 8.083002845107361e-06, - "loss": 3.2514, - "step": 1158500 - }, - { - "epoch": 5.54, - "learning_rate": 8.07900016170841e-06, - "loss": 3.2688, - "step": 1159000 - }, - { - "epoch": 5.55, - "learning_rate": 8.07499747830946e-06, - "loss": 3.2543, - "step": 1159500 - }, - { - "epoch": 5.55, - "learning_rate": 8.070994794910509e-06, - "loss": 3.2374, - "step": 1160000 - }, - { - "epoch": 5.55, - "learning_rate": 8.066992111511558e-06, - "loss": 3.2605, - "step": 1160500 - }, - { - "epoch": 5.55, - "learning_rate": 8.062989428112607e-06, - "loss": 3.2453, - "step": 1161000 - }, - { - "epoch": 5.56, - "learning_rate": 8.058986744713658e-06, - "loss": 3.2419, - "step": 1161500 - }, - { - "epoch": 5.56, - "learning_rate": 8.054984061314705e-06, - "loss": 3.2599, - "step": 1162000 - }, - { - "epoch": 5.56, - "learning_rate": 8.050981377915755e-06, - "loss": 3.2566, - "step": 1162500 - }, - { - "epoch": 5.56, - "learning_rate": 8.046978694516806e-06, - "loss": 3.2559, - "step": 1163000 - }, - { - "epoch": 5.57, - "learning_rate": 8.042976011117855e-06, - "loss": 3.2489, - "step": 1163500 - }, - { - "epoch": 5.57, - "learning_rate": 8.038973327718902e-06, - "loss": 3.2448, - "step": 1164000 - }, - { - "epoch": 5.57, - "learning_rate": 8.034970644319953e-06, - "loss": 3.246, - "step": 1164500 - }, - { - "epoch": 5.57, - "learning_rate": 8.030967960921002e-06, - "loss": 3.2343, - "step": 1165000 - }, - { - "epoch": 5.58, - "learning_rate": 8.026965277522052e-06, - "loss": 3.2286, - "step": 1165500 - }, - { - "epoch": 5.58, - "learning_rate": 8.0229625941231e-06, - "loss": 3.2526, - "step": 1166000 - }, - { - "epoch": 5.58, - "learning_rate": 8.01895991072415e-06, - "loss": 3.2441, - "step": 1166500 - }, - { - "epoch": 5.58, - "learning_rate": 8.014957227325199e-06, - "loss": 3.2316, - "step": 1167000 - }, - { - "epoch": 5.59, - "learning_rate": 8.01095454392625e-06, - "loss": 3.2599, - "step": 1167500 - }, - { - "epoch": 5.59, - "learning_rate": 8.006951860527298e-06, - "loss": 3.2614, - "step": 1168000 - }, - { - "epoch": 5.59, - "learning_rate": 8.002949177128347e-06, - "loss": 3.2432, - "step": 1168500 - }, - { - "epoch": 5.59, - "learning_rate": 7.998946493729398e-06, - "loss": 3.2465, - "step": 1169000 - }, - { - "epoch": 5.59, - "learning_rate": 7.994943810330447e-06, - "loss": 3.2357, - "step": 1169500 - }, - { - "epoch": 5.6, - "learning_rate": 7.990941126931494e-06, - "loss": 3.247, - "step": 1170000 - }, - { - "epoch": 5.6, - "learning_rate": 7.986938443532545e-06, - "loss": 3.2461, - "step": 1170500 - }, - { - "epoch": 5.6, - "learning_rate": 7.982935760133594e-06, - "loss": 3.2429, - "step": 1171000 - }, - { - "epoch": 5.6, - "learning_rate": 7.978933076734644e-06, - "loss": 3.2473, - "step": 1171500 - }, - { - "epoch": 5.61, - "learning_rate": 7.974930393335693e-06, - "loss": 3.2314, - "step": 1172000 - }, - { - "epoch": 5.61, - "learning_rate": 7.970927709936742e-06, - "loss": 3.2502, - "step": 1172500 - }, - { - "epoch": 5.61, - "learning_rate": 7.966925026537791e-06, - "loss": 3.2342, - "step": 1173000 - }, - { - "epoch": 5.61, - "learning_rate": 7.962922343138842e-06, - "loss": 3.229, - "step": 1173500 - }, - { - "epoch": 5.62, - "learning_rate": 7.95891965973989e-06, - "loss": 3.2453, - "step": 1174000 - }, - { - "epoch": 5.62, - "learning_rate": 7.954916976340939e-06, - "loss": 3.2478, - "step": 1174500 - }, - { - "epoch": 5.62, - "learning_rate": 7.95091429294199e-06, - "loss": 3.248, - "step": 1175000 - }, - { - "epoch": 5.62, - "learning_rate": 7.946911609543039e-06, - "loss": 3.242, - "step": 1175500 - }, - { - "epoch": 5.63, - "learning_rate": 7.942908926144086e-06, - "loss": 3.2378, - "step": 1176000 - }, - { - "epoch": 5.63, - "learning_rate": 7.938906242745137e-06, - "loss": 3.2453, - "step": 1176500 - }, - { - "epoch": 5.63, - "learning_rate": 7.934903559346187e-06, - "loss": 3.2363, - "step": 1177000 - }, - { - "epoch": 5.63, - "learning_rate": 7.930900875947236e-06, - "loss": 3.2372, - "step": 1177500 - }, - { - "epoch": 5.64, - "learning_rate": 7.926898192548285e-06, - "loss": 3.2232, - "step": 1178000 - }, - { - "epoch": 5.64, - "learning_rate": 7.922895509149334e-06, - "loss": 3.2244, - "step": 1178500 - }, - { - "epoch": 5.64, - "learning_rate": 7.918892825750383e-06, - "loss": 3.2392, - "step": 1179000 - }, - { - "epoch": 5.64, - "learning_rate": 7.914890142351434e-06, - "loss": 3.2361, - "step": 1179500 - }, - { - "epoch": 5.65, - "learning_rate": 7.910887458952482e-06, - "loss": 3.2229, - "step": 1180000 - }, - { - "epoch": 5.65, - "learning_rate": 7.906884775553531e-06, - "loss": 3.2502, - "step": 1180500 - }, - { - "epoch": 5.65, - "learning_rate": 7.902882092154582e-06, - "loss": 3.2407, - "step": 1181000 - }, - { - "epoch": 5.65, - "learning_rate": 7.898879408755631e-06, - "loss": 3.2298, - "step": 1181500 - }, - { - "epoch": 5.65, - "learning_rate": 7.894876725356679e-06, - "loss": 3.2216, - "step": 1182000 - }, - { - "epoch": 5.66, - "learning_rate": 7.89087404195773e-06, - "loss": 3.2201, - "step": 1182500 - }, - { - "epoch": 5.66, - "learning_rate": 7.886871358558779e-06, - "loss": 3.2355, - "step": 1183000 - }, - { - "epoch": 5.66, - "learning_rate": 7.882868675159828e-06, - "loss": 3.2362, - "step": 1183500 - }, - { - "epoch": 5.66, - "learning_rate": 7.878865991760877e-06, - "loss": 3.2239, - "step": 1184000 - }, - { - "epoch": 5.67, - "learning_rate": 7.874863308361926e-06, - "loss": 3.2249, - "step": 1184500 - }, - { - "epoch": 5.67, - "learning_rate": 7.870860624962975e-06, - "loss": 3.2411, - "step": 1185000 - }, - { - "epoch": 5.67, - "learning_rate": 7.866857941564026e-06, - "loss": 3.2443, - "step": 1185500 - }, - { - "epoch": 5.67, - "learning_rate": 7.862855258165074e-06, - "loss": 3.2211, - "step": 1186000 - }, - { - "epoch": 5.68, - "learning_rate": 7.858852574766123e-06, - "loss": 3.2286, - "step": 1186500 - }, - { - "epoch": 5.68, - "learning_rate": 7.854849891367174e-06, - "loss": 3.2384, - "step": 1187000 - }, - { - "epoch": 5.68, - "learning_rate": 7.850847207968223e-06, - "loss": 3.2304, - "step": 1187500 - }, - { - "epoch": 5.68, - "learning_rate": 7.84684452456927e-06, - "loss": 3.2317, - "step": 1188000 - }, - { - "epoch": 5.69, - "learning_rate": 7.842841841170322e-06, - "loss": 3.223, - "step": 1188500 - }, - { - "epoch": 5.69, - "learning_rate": 7.83883915777137e-06, - "loss": 3.231, - "step": 1189000 - }, - { - "epoch": 5.69, - "learning_rate": 7.83483647437242e-06, - "loss": 3.2226, - "step": 1189500 - }, - { - "epoch": 5.69, - "learning_rate": 7.830833790973469e-06, - "loss": 3.2199, - "step": 1190000 - }, - { - "epoch": 5.7, - "learning_rate": 7.826831107574518e-06, - "loss": 3.2368, - "step": 1190500 - }, - { - "epoch": 5.7, - "learning_rate": 7.822828424175568e-06, - "loss": 3.2386, - "step": 1191000 - }, - { - "epoch": 5.7, - "learning_rate": 7.818825740776618e-06, - "loss": 3.2184, - "step": 1191500 - }, - { - "epoch": 5.7, - "learning_rate": 7.814823057377666e-06, - "loss": 3.2167, - "step": 1192000 - }, - { - "epoch": 5.71, - "learning_rate": 7.810820373978715e-06, - "loss": 3.2251, - "step": 1192500 - }, - { - "epoch": 5.71, - "learning_rate": 7.806817690579766e-06, - "loss": 3.2358, - "step": 1193000 - }, - { - "epoch": 5.71, - "learning_rate": 7.802815007180815e-06, - "loss": 3.2141, - "step": 1193500 - }, - { - "epoch": 5.71, - "learning_rate": 7.798812323781863e-06, - "loss": 3.2126, - "step": 1194000 - }, - { - "epoch": 5.71, - "learning_rate": 7.794809640382914e-06, - "loss": 3.2383, - "step": 1194500 - }, - { - "epoch": 5.72, - "learning_rate": 7.790806956983963e-06, - "loss": 3.2188, - "step": 1195000 - }, - { - "epoch": 5.72, - "learning_rate": 7.786804273585012e-06, - "loss": 3.2143, - "step": 1195500 - }, - { - "epoch": 5.72, - "learning_rate": 7.782801590186061e-06, - "loss": 3.2256, - "step": 1196000 - }, - { - "epoch": 5.72, - "learning_rate": 7.77879890678711e-06, - "loss": 3.2247, - "step": 1196500 - }, - { - "epoch": 5.73, - "learning_rate": 7.77479622338816e-06, - "loss": 3.2005, - "step": 1197000 - }, - { - "epoch": 5.73, - "learning_rate": 7.77079353998921e-06, - "loss": 3.2231, - "step": 1197500 - }, - { - "epoch": 5.73, - "learning_rate": 7.766790856590258e-06, - "loss": 3.215, - "step": 1198000 - }, - { - "epoch": 5.73, - "learning_rate": 7.762788173191307e-06, - "loss": 3.2147, - "step": 1198500 - }, - { - "epoch": 5.74, - "learning_rate": 7.758785489792358e-06, - "loss": 3.2175, - "step": 1199000 - }, - { - "epoch": 5.74, - "learning_rate": 7.754782806393407e-06, - "loss": 3.2086, - "step": 1199500 - }, - { - "epoch": 5.74, - "learning_rate": 7.750780122994455e-06, - "loss": 3.207, - "step": 1200000 - }, - { - "epoch": 5.74, - "learning_rate": 7.746777439595506e-06, - "loss": 3.2035, - "step": 1200500 - }, - { - "epoch": 5.75, - "learning_rate": 7.742774756196555e-06, - "loss": 3.2304, - "step": 1201000 - }, - { - "epoch": 5.75, - "learning_rate": 7.738772072797604e-06, - "loss": 3.2161, - "step": 1201500 - }, - { - "epoch": 5.75, - "learning_rate": 7.734769389398653e-06, - "loss": 3.2194, - "step": 1202000 - }, - { - "epoch": 5.75, - "learning_rate": 7.730766705999703e-06, - "loss": 3.2104, - "step": 1202500 - }, - { - "epoch": 5.76, - "learning_rate": 7.726764022600752e-06, - "loss": 3.2134, - "step": 1203000 - }, - { - "epoch": 5.76, - "learning_rate": 7.722761339201803e-06, - "loss": 3.219, - "step": 1203500 - }, - { - "epoch": 5.76, - "learning_rate": 7.71875865580285e-06, - "loss": 3.2207, - "step": 1204000 - }, - { - "epoch": 5.76, - "learning_rate": 7.7147559724039e-06, - "loss": 3.2118, - "step": 1204500 - }, - { - "epoch": 5.76, - "learning_rate": 7.71075328900495e-06, - "loss": 3.2275, - "step": 1205000 - }, - { - "epoch": 5.77, - "learning_rate": 7.706750605606e-06, - "loss": 3.1857, - "step": 1205500 - }, - { - "epoch": 5.77, - "learning_rate": 7.702747922207047e-06, - "loss": 3.215, - "step": 1206000 - }, - { - "epoch": 5.77, - "learning_rate": 7.698745238808098e-06, - "loss": 3.2049, - "step": 1206500 - }, - { - "epoch": 5.77, - "learning_rate": 7.694742555409147e-06, - "loss": 3.2012, - "step": 1207000 - }, - { - "epoch": 5.78, - "learning_rate": 7.690739872010196e-06, - "loss": 3.2215, - "step": 1207500 - }, - { - "epoch": 5.78, - "learning_rate": 7.686737188611245e-06, - "loss": 3.2027, - "step": 1208000 - }, - { - "epoch": 5.78, - "learning_rate": 7.682734505212295e-06, - "loss": 3.2169, - "step": 1208500 - }, - { - "epoch": 5.78, - "learning_rate": 7.678731821813344e-06, - "loss": 3.2064, - "step": 1209000 - }, - { - "epoch": 5.79, - "learning_rate": 7.674729138414395e-06, - "loss": 3.2188, - "step": 1209500 - }, - { - "epoch": 5.79, - "learning_rate": 7.670726455015442e-06, - "loss": 3.1964, - "step": 1210000 - }, - { - "epoch": 5.79, - "learning_rate": 7.666723771616491e-06, - "loss": 3.1835, - "step": 1210500 - }, - { - "epoch": 5.79, - "learning_rate": 7.662721088217542e-06, - "loss": 3.1982, - "step": 1211000 - }, - { - "epoch": 5.8, - "learning_rate": 7.658718404818592e-06, - "loss": 3.2069, - "step": 1211500 - }, - { - "epoch": 5.8, - "learning_rate": 7.654715721419639e-06, - "loss": 3.1984, - "step": 1212000 - }, - { - "epoch": 5.8, - "learning_rate": 7.65071303802069e-06, - "loss": 3.197, - "step": 1212500 - }, - { - "epoch": 5.8, - "learning_rate": 7.64671035462174e-06, - "loss": 3.2164, - "step": 1213000 - }, - { - "epoch": 5.81, - "learning_rate": 7.642707671222788e-06, - "loss": 3.2134, - "step": 1213500 - }, - { - "epoch": 5.81, - "learning_rate": 7.638704987823838e-06, - "loss": 3.1984, - "step": 1214000 - }, - { - "epoch": 5.81, - "learning_rate": 7.634702304424887e-06, - "loss": 3.2043, - "step": 1214500 - }, - { - "epoch": 5.81, - "learning_rate": 7.630699621025936e-06, - "loss": 3.1946, - "step": 1215000 - }, - { - "epoch": 5.82, - "learning_rate": 7.626696937626986e-06, - "loss": 3.2126, - "step": 1215500 - }, - { - "epoch": 5.82, - "learning_rate": 7.622694254228034e-06, - "loss": 3.1757, - "step": 1216000 - }, - { - "epoch": 5.82, - "learning_rate": 7.6186915708290844e-06, - "loss": 3.1974, - "step": 1216500 - }, - { - "epoch": 5.82, - "learning_rate": 7.614688887430134e-06, - "loss": 3.1973, - "step": 1217000 - }, - { - "epoch": 5.82, - "learning_rate": 7.610686204031184e-06, - "loss": 3.1989, - "step": 1217500 - }, - { - "epoch": 5.83, - "learning_rate": 7.606683520632232e-06, - "loss": 3.2144, - "step": 1218000 - }, - { - "epoch": 5.83, - "learning_rate": 7.602680837233281e-06, - "loss": 3.1938, - "step": 1218500 - }, - { - "epoch": 5.83, - "learning_rate": 7.598678153834331e-06, - "loss": 3.1877, - "step": 1219000 - }, - { - "epoch": 5.83, - "learning_rate": 7.5946754704353805e-06, - "loss": 3.2009, - "step": 1219500 - }, - { - "epoch": 5.84, - "learning_rate": 7.59067278703643e-06, - "loss": 3.1983, - "step": 1220000 - }, - { - "epoch": 5.84, - "learning_rate": 7.586670103637479e-06, - "loss": 3.197, - "step": 1220500 - }, - { - "epoch": 5.84, - "learning_rate": 7.582667420238529e-06, - "loss": 3.2096, - "step": 1221000 - }, - { - "epoch": 5.84, - "learning_rate": 7.578664736839578e-06, - "loss": 3.1755, - "step": 1221500 - }, - { - "epoch": 5.85, - "learning_rate": 7.5746620534406265e-06, - "loss": 3.1916, - "step": 1222000 - }, - { - "epoch": 5.85, - "learning_rate": 7.5706593700416766e-06, - "loss": 3.2051, - "step": 1222500 - }, - { - "epoch": 5.85, - "learning_rate": 7.566656686642726e-06, - "loss": 3.1812, - "step": 1223000 - }, - { - "epoch": 5.85, - "learning_rate": 7.562654003243776e-06, - "loss": 3.1993, - "step": 1223500 - }, - { - "epoch": 5.86, - "learning_rate": 7.558651319844824e-06, - "loss": 3.1907, - "step": 1224000 - }, - { - "epoch": 5.86, - "learning_rate": 7.554648636445873e-06, - "loss": 3.1972, - "step": 1224500 - }, - { - "epoch": 5.86, - "learning_rate": 7.550645953046923e-06, - "loss": 3.2049, - "step": 1225000 - }, - { - "epoch": 5.86, - "learning_rate": 7.546643269647973e-06, - "loss": 3.1825, - "step": 1225500 - }, - { - "epoch": 5.87, - "learning_rate": 7.542640586249022e-06, - "loss": 3.1768, - "step": 1226000 - }, - { - "epoch": 5.87, - "learning_rate": 7.538637902850071e-06, - "loss": 3.1902, - "step": 1226500 - }, - { - "epoch": 5.87, - "learning_rate": 7.534635219451121e-06, - "loss": 3.1875, - "step": 1227000 - }, - { - "epoch": 5.87, - "learning_rate": 7.53063253605217e-06, - "loss": 3.198, - "step": 1227500 - }, - { - "epoch": 5.87, - "learning_rate": 7.526629852653219e-06, - "loss": 3.1851, - "step": 1228000 - }, - { - "epoch": 5.88, - "learning_rate": 7.522627169254269e-06, - "loss": 3.1934, - "step": 1228500 - }, - { - "epoch": 5.88, - "learning_rate": 7.518624485855318e-06, - "loss": 3.1717, - "step": 1229000 - }, - { - "epoch": 5.88, - "learning_rate": 7.514621802456368e-06, - "loss": 3.1948, - "step": 1229500 - }, - { - "epoch": 5.88, - "learning_rate": 7.510619119057416e-06, - "loss": 3.2118, - "step": 1230000 - }, - { - "epoch": 5.89, - "learning_rate": 7.5066164356584655e-06, - "loss": 3.1771, - "step": 1230500 - }, - { - "epoch": 5.89, - "learning_rate": 7.5026137522595155e-06, - "loss": 3.1818, - "step": 1231000 - }, - { - "epoch": 5.89, - "learning_rate": 7.498611068860565e-06, - "loss": 3.189, - "step": 1231500 - }, - { - "epoch": 5.89, - "learning_rate": 7.494608385461614e-06, - "loss": 3.1778, - "step": 1232000 - }, - { - "epoch": 5.9, - "learning_rate": 7.490605702062663e-06, - "loss": 3.1775, - "step": 1232500 - }, - { - "epoch": 5.9, - "learning_rate": 7.486603018663713e-06, - "loss": 3.1775, - "step": 1233000 - }, - { - "epoch": 5.9, - "learning_rate": 7.482600335264762e-06, - "loss": 3.1882, - "step": 1233500 - }, - { - "epoch": 5.9, - "learning_rate": 7.478597651865811e-06, - "loss": 3.1981, - "step": 1234000 - }, - { - "epoch": 5.91, - "learning_rate": 7.474594968466861e-06, - "loss": 3.1832, - "step": 1234500 - }, - { - "epoch": 5.91, - "learning_rate": 7.47059228506791e-06, - "loss": 3.1902, - "step": 1235000 - }, - { - "epoch": 5.91, - "learning_rate": 7.46658960166896e-06, - "loss": 3.1851, - "step": 1235500 - }, - { - "epoch": 5.91, - "learning_rate": 7.462586918270008e-06, - "loss": 3.1938, - "step": 1236000 - }, - { - "epoch": 5.92, - "learning_rate": 7.458584234871058e-06, - "loss": 3.1565, - "step": 1236500 - }, - { - "epoch": 5.92, - "learning_rate": 7.454581551472108e-06, - "loss": 3.1785, - "step": 1237000 - }, - { - "epoch": 5.92, - "learning_rate": 7.450578868073157e-06, - "loss": 3.1825, - "step": 1237500 - }, - { - "epoch": 5.92, - "learning_rate": 7.446576184674206e-06, - "loss": 3.1712, - "step": 1238000 - }, - { - "epoch": 5.93, - "learning_rate": 7.442573501275255e-06, - "loss": 3.1838, - "step": 1238500 - }, - { - "epoch": 5.93, - "learning_rate": 7.438570817876305e-06, - "loss": 3.1843, - "step": 1239000 - }, - { - "epoch": 5.93, - "learning_rate": 7.4345681344773545e-06, - "loss": 3.1868, - "step": 1239500 - }, - { - "epoch": 5.93, - "learning_rate": 7.430565451078403e-06, - "loss": 3.1785, - "step": 1240000 - }, - { - "epoch": 5.93, - "learning_rate": 7.426562767679453e-06, - "loss": 3.1836, - "step": 1240500 - }, - { - "epoch": 5.94, - "learning_rate": 7.422560084280502e-06, - "loss": 3.1799, - "step": 1241000 - }, - { - "epoch": 5.94, - "learning_rate": 7.418557400881552e-06, - "loss": 3.1859, - "step": 1241500 - }, - { - "epoch": 5.94, - "learning_rate": 7.4145547174826005e-06, - "loss": 3.1834, - "step": 1242000 - }, - { - "epoch": 5.94, - "learning_rate": 7.41055203408365e-06, - "loss": 3.1853, - "step": 1242500 - }, - { - "epoch": 5.95, - "learning_rate": 7.4065493506847e-06, - "loss": 3.166, - "step": 1243000 - }, - { - "epoch": 5.95, - "learning_rate": 7.402546667285749e-06, - "loss": 3.1666, - "step": 1243500 - }, - { - "epoch": 5.95, - "learning_rate": 7.398543983886798e-06, - "loss": 3.1739, - "step": 1244000 - }, - { - "epoch": 5.95, - "learning_rate": 7.394541300487847e-06, - "loss": 3.1669, - "step": 1244500 - }, - { - "epoch": 5.96, - "learning_rate": 7.390538617088897e-06, - "loss": 3.1841, - "step": 1245000 - }, - { - "epoch": 5.96, - "learning_rate": 7.386535933689947e-06, - "loss": 3.1653, - "step": 1245500 - }, - { - "epoch": 5.96, - "learning_rate": 7.382533250290995e-06, - "loss": 3.1598, - "step": 1246000 - }, - { - "epoch": 5.96, - "learning_rate": 7.378530566892045e-06, - "loss": 3.1673, - "step": 1246500 - }, - { - "epoch": 5.97, - "learning_rate": 7.374527883493094e-06, - "loss": 3.1755, - "step": 1247000 - }, - { - "epoch": 5.97, - "learning_rate": 7.370525200094144e-06, - "loss": 3.1797, - "step": 1247500 - }, - { - "epoch": 5.97, - "learning_rate": 7.366522516695193e-06, - "loss": 3.1844, - "step": 1248000 - }, - { - "epoch": 5.97, - "learning_rate": 7.362519833296242e-06, - "loss": 3.1611, - "step": 1248500 - }, - { - "epoch": 5.98, - "learning_rate": 7.358517149897292e-06, - "loss": 3.1623, - "step": 1249000 - }, - { - "epoch": 5.98, - "learning_rate": 7.354514466498341e-06, - "loss": 3.1857, - "step": 1249500 - }, - { - "epoch": 5.98, - "learning_rate": 7.35051178309939e-06, - "loss": 3.1599, - "step": 1250000 - }, - { - "epoch": 5.98, - "learning_rate": 7.3465090997004395e-06, - "loss": 3.1663, - "step": 1250500 - }, - { - "epoch": 5.98, - "learning_rate": 7.3425064163014895e-06, - "loss": 3.1652, - "step": 1251000 - }, - { - "epoch": 5.99, - "learning_rate": 7.338503732902539e-06, - "loss": 3.17, - "step": 1251500 - }, - { - "epoch": 5.99, - "learning_rate": 7.334501049503587e-06, - "loss": 3.1542, - "step": 1252000 - }, - { - "epoch": 5.99, - "learning_rate": 7.330498366104637e-06, - "loss": 3.1714, - "step": 1252500 - }, - { - "epoch": 5.99, - "learning_rate": 7.326495682705686e-06, - "loss": 3.1672, - "step": 1253000 - }, - { - "epoch": 6.0, - "learning_rate": 7.322492999306736e-06, - "loss": 3.1768, - "step": 1253500 - }, - { - "epoch": 6.0, - "learning_rate": 7.318490315907785e-06, - "loss": 3.1719, - "step": 1254000 - }, - { - "epoch": 6.0, - "step": 1254162, - "total_flos": 5550828628572068352, - "train_runtime": 106122.2527, - "train_samples_per_second": 11.818 + "step": 190474, + "total_flos": 812201236873720320, + "train_runtime": 31399.0337, + "train_samples_per_second": 6.066 } ], - "max_steps": 1254162, - "num_train_epochs": 6, - "total_flos": 5550828628572068352, + "max_steps": 190474, + "num_train_epochs": 1, + "total_flos": 812201236873720320, "trial_name": null, "trial_params": null }