{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.565962534874451, "global_step": 54000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.0004991142604074402, "loss": 2.333, "step": 100 }, { "epoch": 0.04, "learning_rate": 0.0004982285208148805, "loss": 2.3899, "step": 200 }, { "epoch": 0.05, "learning_rate": 0.0004973427812223207, "loss": 2.4422, "step": 300 }, { "epoch": 0.07, "learning_rate": 0.0004964570416297609, "loss": 2.4359, "step": 400 }, { "epoch": 0.09, "learning_rate": 0.0004955713020372011, "loss": 2.3725, "step": 500 }, { "epoch": 0.11, "learning_rate": 0.0004946855624446413, "loss": 2.4478, "step": 600 }, { "epoch": 0.12, "learning_rate": 0.0004937998228520815, "loss": 2.4036, "step": 700 }, { "epoch": 0.14, "learning_rate": 0.0004929140832595217, "loss": 2.3984, "step": 800 }, { "epoch": 0.16, "learning_rate": 0.0004920283436669619, "loss": 2.3836, "step": 900 }, { "epoch": 0.18, "learning_rate": 0.0004911426040744021, "loss": 2.3708, "step": 1000 }, { "epoch": 0.19, "learning_rate": 0.0004902568644818423, "loss": 2.3009, "step": 1100 }, { "epoch": 0.21, "learning_rate": 0.0004893711248892826, "loss": 2.367, "step": 1200 }, { "epoch": 0.23, "learning_rate": 0.0004884853852967228, "loss": 2.3713, "step": 1300 }, { "epoch": 0.25, "learning_rate": 0.000487599645704163, "loss": 2.3609, "step": 1400 }, { "epoch": 0.27, "learning_rate": 0.00048671390611160323, "loss": 2.3181, "step": 1500 }, { "epoch": 0.28, "learning_rate": 0.00048582816651904345, "loss": 2.3249, "step": 1600 }, { "epoch": 0.3, "learning_rate": 0.0004849424269264836, "loss": 2.3505, "step": 1700 }, { "epoch": 0.32, "learning_rate": 0.00048405668733392384, "loss": 2.2962, "step": 1800 }, { "epoch": 0.34, "learning_rate": 0.00048317094774136406, "loss": 2.3446, "step": 1900 }, { "epoch": 0.35, "learning_rate": 0.0004822852081488043, "loss": 2.308, "step": 2000 }, { "epoch": 0.37, "learning_rate": 0.00048139946855624445, "loss": 2.2919, "step": 2100 }, { "epoch": 0.39, "learning_rate": 0.0004805137289636847, "loss": 2.308, "step": 2200 }, { "epoch": 0.41, "learning_rate": 0.0004796279893711249, "loss": 2.2863, "step": 2300 }, { "epoch": 0.43, "learning_rate": 0.0004787422497785651, "loss": 2.2976, "step": 2400 }, { "epoch": 0.44, "learning_rate": 0.00047785651018600534, "loss": 2.288, "step": 2500 }, { "epoch": 0.46, "learning_rate": 0.0004769707705934455, "loss": 2.2771, "step": 2600 }, { "epoch": 0.48, "learning_rate": 0.00047608503100088573, "loss": 2.2608, "step": 2700 }, { "epoch": 0.5, "learning_rate": 0.00047519929140832595, "loss": 2.2227, "step": 2800 }, { "epoch": 0.51, "learning_rate": 0.0004743135518157662, "loss": 2.2823, "step": 2900 }, { "epoch": 0.53, "learning_rate": 0.0004734278122232064, "loss": 2.1959, "step": 3000 }, { "epoch": 0.55, "learning_rate": 0.00047254207263064656, "loss": 2.2388, "step": 3100 }, { "epoch": 0.57, "learning_rate": 0.0004716563330380868, "loss": 2.252, "step": 3200 }, { "epoch": 0.58, "learning_rate": 0.000470770593445527, "loss": 2.2295, "step": 3300 }, { "epoch": 0.6, "learning_rate": 0.00046988485385296723, "loss": 2.2035, "step": 3400 }, { "epoch": 0.62, "learning_rate": 0.0004689991142604074, "loss": 2.285, "step": 3500 }, { "epoch": 0.64, "learning_rate": 0.00046811337466784767, "loss": 2.2439, "step": 3600 }, { "epoch": 0.66, "learning_rate": 0.0004672276350752879, "loss": 2.2378, "step": 3700 }, { "epoch": 0.67, "learning_rate": 0.0004663418954827281, "loss": 2.2499, "step": 3800 }, { "epoch": 0.69, "learning_rate": 0.00046545615589016834, "loss": 2.1999, "step": 3900 }, { "epoch": 0.71, "learning_rate": 0.0004645704162976085, "loss": 2.1862, "step": 4000 }, { "epoch": 0.73, "learning_rate": 0.00046368467670504873, "loss": 2.217, "step": 4100 }, { "epoch": 0.74, "learning_rate": 0.00046279893711248895, "loss": 2.2083, "step": 4200 }, { "epoch": 0.76, "learning_rate": 0.00046191319751992917, "loss": 2.1784, "step": 4300 }, { "epoch": 0.78, "learning_rate": 0.0004610274579273694, "loss": 2.1467, "step": 4400 }, { "epoch": 0.8, "learning_rate": 0.00046014171833480956, "loss": 2.2254, "step": 4500 }, { "epoch": 0.81, "learning_rate": 0.0004592559787422498, "loss": 2.2067, "step": 4600 }, { "epoch": 0.83, "learning_rate": 0.00045837023914969, "loss": 2.2011, "step": 4700 }, { "epoch": 0.85, "learning_rate": 0.0004574844995571302, "loss": 2.1521, "step": 4800 }, { "epoch": 0.87, "learning_rate": 0.0004565987599645704, "loss": 2.193, "step": 4900 }, { "epoch": 0.89, "learning_rate": 0.0004557130203720106, "loss": 2.1709, "step": 5000 }, { "epoch": 0.9, "learning_rate": 0.00045482728077945084, "loss": 2.1806, "step": 5100 }, { "epoch": 0.92, "learning_rate": 0.00045394154118689106, "loss": 2.1895, "step": 5200 }, { "epoch": 0.94, "learning_rate": 0.0004530558015943313, "loss": 2.203, "step": 5300 }, { "epoch": 0.96, "learning_rate": 0.00045217006200177145, "loss": 2.1686, "step": 5400 }, { "epoch": 0.97, "learning_rate": 0.00045128432240921167, "loss": 2.1461, "step": 5500 }, { "epoch": 0.99, "learning_rate": 0.0004503985828166519, "loss": 2.1239, "step": 5600 }, { "epoch": 1.01, "learning_rate": 0.0004495128432240921, "loss": 2.0857, "step": 5700 }, { "epoch": 1.03, "learning_rate": 0.00044862710363153234, "loss": 2.0997, "step": 5800 }, { "epoch": 1.05, "learning_rate": 0.0004477413640389725, "loss": 2.0756, "step": 5900 }, { "epoch": 1.06, "learning_rate": 0.0004468556244464128, "loss": 2.0468, "step": 6000 }, { "epoch": 1.08, "learning_rate": 0.000445969884853853, "loss": 2.0412, "step": 6100 }, { "epoch": 1.1, "learning_rate": 0.0004450841452612932, "loss": 2.0289, "step": 6200 }, { "epoch": 1.12, "learning_rate": 0.0004441984056687334, "loss": 2.07, "step": 6300 }, { "epoch": 1.13, "learning_rate": 0.0004433126660761736, "loss": 2.039, "step": 6400 }, { "epoch": 1.15, "learning_rate": 0.00044242692648361384, "loss": 2.0749, "step": 6500 }, { "epoch": 1.17, "learning_rate": 0.00044154118689105406, "loss": 2.0776, "step": 6600 }, { "epoch": 1.19, "learning_rate": 0.0004406554472984943, "loss": 2.0375, "step": 6700 }, { "epoch": 1.2, "learning_rate": 0.00043976970770593445, "loss": 2.0512, "step": 6800 }, { "epoch": 1.22, "learning_rate": 0.00043888396811337467, "loss": 2.0449, "step": 6900 }, { "epoch": 1.24, "learning_rate": 0.0004379982285208149, "loss": 2.0157, "step": 7000 }, { "epoch": 1.26, "learning_rate": 0.0004371124889282551, "loss": 2.0406, "step": 7100 }, { "epoch": 1.28, "learning_rate": 0.00043622674933569533, "loss": 2.039, "step": 7200 }, { "epoch": 1.29, "learning_rate": 0.0004353410097431355, "loss": 2.0525, "step": 7300 }, { "epoch": 1.31, "learning_rate": 0.0004344552701505757, "loss": 2.0217, "step": 7400 }, { "epoch": 1.33, "learning_rate": 0.00043356953055801595, "loss": 2.0398, "step": 7500 }, { "epoch": 1.35, "learning_rate": 0.00043268379096545617, "loss": 2.0426, "step": 7600 }, { "epoch": 1.36, "learning_rate": 0.00043179805137289634, "loss": 2.0565, "step": 7700 }, { "epoch": 1.38, "learning_rate": 0.00043091231178033656, "loss": 1.9831, "step": 7800 }, { "epoch": 1.4, "learning_rate": 0.0004300265721877768, "loss": 2.0202, "step": 7900 }, { "epoch": 1.42, "learning_rate": 0.000429140832595217, "loss": 2.0607, "step": 8000 }, { "epoch": 1.43, "learning_rate": 0.0004282550930026572, "loss": 2.0394, "step": 8100 }, { "epoch": 1.45, "learning_rate": 0.0004273693534100974, "loss": 2.0303, "step": 8200 }, { "epoch": 1.47, "learning_rate": 0.00042648361381753767, "loss": 2.0451, "step": 8300 }, { "epoch": 1.49, "learning_rate": 0.0004255978742249779, "loss": 2.0185, "step": 8400 }, { "epoch": 1.51, "learning_rate": 0.0004247121346324181, "loss": 2.0064, "step": 8500 }, { "epoch": 1.52, "learning_rate": 0.00042382639503985833, "loss": 2.0093, "step": 8600 }, { "epoch": 1.54, "learning_rate": 0.0004229406554472985, "loss": 1.9924, "step": 8700 }, { "epoch": 1.56, "learning_rate": 0.0004220549158547387, "loss": 2.0301, "step": 8800 }, { "epoch": 1.58, "learning_rate": 0.00042116917626217894, "loss": 1.9849, "step": 8900 }, { "epoch": 1.59, "learning_rate": 0.00042028343666961917, "loss": 2.0016, "step": 9000 }, { "epoch": 1.61, "learning_rate": 0.00041939769707705933, "loss": 1.9805, "step": 9100 }, { "epoch": 1.63, "learning_rate": 0.00041851195748449956, "loss": 2.0216, "step": 9200 }, { "epoch": 1.65, "learning_rate": 0.0004176262178919398, "loss": 1.9921, "step": 9300 }, { "epoch": 1.67, "learning_rate": 0.00041674047829938, "loss": 1.9857, "step": 9400 }, { "epoch": 1.68, "learning_rate": 0.0004158547387068202, "loss": 2.011, "step": 9500 }, { "epoch": 1.7, "learning_rate": 0.0004149689991142604, "loss": 2.0079, "step": 9600 }, { "epoch": 1.72, "learning_rate": 0.0004140832595217006, "loss": 2.0178, "step": 9700 }, { "epoch": 1.74, "learning_rate": 0.00041319751992914083, "loss": 1.9978, "step": 9800 }, { "epoch": 1.75, "learning_rate": 0.00041231178033658105, "loss": 1.9585, "step": 9900 }, { "epoch": 1.77, "learning_rate": 0.0004114260407440213, "loss": 1.9802, "step": 10000 }, { "epoch": 1.79, "learning_rate": 0.00041054030115146144, "loss": 1.9912, "step": 10100 }, { "epoch": 1.81, "learning_rate": 0.00040965456155890167, "loss": 2.0093, "step": 10200 }, { "epoch": 1.82, "learning_rate": 0.0004087688219663419, "loss": 1.9695, "step": 10300 }, { "epoch": 1.84, "learning_rate": 0.0004078830823737821, "loss": 2.0003, "step": 10400 }, { "epoch": 1.86, "learning_rate": 0.0004069973427812223, "loss": 2.0081, "step": 10500 }, { "epoch": 1.88, "learning_rate": 0.0004061116031886625, "loss": 1.9888, "step": 10600 }, { "epoch": 1.9, "learning_rate": 0.0004052258635961028, "loss": 2.0072, "step": 10700 }, { "epoch": 1.91, "learning_rate": 0.000404340124003543, "loss": 2.0097, "step": 10800 }, { "epoch": 1.93, "learning_rate": 0.0004034543844109832, "loss": 2.0066, "step": 10900 }, { "epoch": 1.95, "learning_rate": 0.0004025686448184234, "loss": 1.9976, "step": 11000 }, { "epoch": 1.97, "learning_rate": 0.0004016829052258636, "loss": 1.9543, "step": 11100 }, { "epoch": 1.98, "learning_rate": 0.00040079716563330383, "loss": 1.9676, "step": 11200 }, { "epoch": 2.0, "learning_rate": 0.00039991142604074405, "loss": 1.9547, "step": 11300 }, { "epoch": 2.02, "learning_rate": 0.0003990256864481843, "loss": 1.8447, "step": 11400 }, { "epoch": 2.04, "learning_rate": 0.00039813994685562444, "loss": 1.9108, "step": 11500 }, { "epoch": 2.05, "learning_rate": 0.00039725420726306466, "loss": 1.8746, "step": 11600 }, { "epoch": 2.07, "learning_rate": 0.0003963684676705049, "loss": 1.864, "step": 11700 }, { "epoch": 2.09, "learning_rate": 0.0003954827280779451, "loss": 1.8742, "step": 11800 }, { "epoch": 2.11, "learning_rate": 0.0003945969884853853, "loss": 1.8111, "step": 11900 }, { "epoch": 2.13, "learning_rate": 0.0003937112488928255, "loss": 1.8708, "step": 12000 }, { "epoch": 2.14, "learning_rate": 0.0003928255093002657, "loss": 1.8909, "step": 12100 }, { "epoch": 2.16, "learning_rate": 0.00039193976970770594, "loss": 1.8941, "step": 12200 }, { "epoch": 2.18, "learning_rate": 0.00039105403011514616, "loss": 1.8875, "step": 12300 }, { "epoch": 2.2, "learning_rate": 0.00039016829052258633, "loss": 1.8948, "step": 12400 }, { "epoch": 2.21, "learning_rate": 0.00038928255093002655, "loss": 1.8865, "step": 12500 }, { "epoch": 2.23, "learning_rate": 0.0003883968113374668, "loss": 1.8871, "step": 12600 }, { "epoch": 2.25, "learning_rate": 0.000387511071744907, "loss": 1.8811, "step": 12700 }, { "epoch": 2.27, "learning_rate": 0.0003866253321523472, "loss": 1.8876, "step": 12800 }, { "epoch": 2.29, "learning_rate": 0.0003857395925597874, "loss": 1.8702, "step": 12900 }, { "epoch": 2.3, "learning_rate": 0.00038485385296722766, "loss": 1.9055, "step": 13000 }, { "epoch": 2.32, "learning_rate": 0.0003839681133746679, "loss": 1.8816, "step": 13100 }, { "epoch": 2.34, "learning_rate": 0.0003830823737821081, "loss": 1.8652, "step": 13200 }, { "epoch": 2.36, "learning_rate": 0.00038219663418954827, "loss": 1.8891, "step": 13300 }, { "epoch": 2.37, "learning_rate": 0.0003813108945969885, "loss": 1.9039, "step": 13400 }, { "epoch": 2.39, "learning_rate": 0.0003804251550044287, "loss": 1.8558, "step": 13500 }, { "epoch": 2.41, "learning_rate": 0.00037953941541186894, "loss": 1.8471, "step": 13600 }, { "epoch": 2.43, "learning_rate": 0.00037865367581930916, "loss": 1.8532, "step": 13700 }, { "epoch": 2.44, "learning_rate": 0.00037776793622674933, "loss": 1.8786, "step": 13800 }, { "epoch": 2.46, "learning_rate": 0.00037688219663418955, "loss": 1.8285, "step": 13900 }, { "epoch": 2.48, "learning_rate": 0.00037599645704162977, "loss": 1.8505, "step": 14000 }, { "epoch": 2.5, "learning_rate": 0.00037511071744907, "loss": 1.8604, "step": 14100 }, { "epoch": 2.52, "learning_rate": 0.0003742249778565102, "loss": 1.8684, "step": 14200 }, { "epoch": 2.53, "learning_rate": 0.0003733392382639504, "loss": 1.8241, "step": 14300 }, { "epoch": 2.55, "learning_rate": 0.0003724534986713906, "loss": 1.8601, "step": 14400 }, { "epoch": 2.57, "learning_rate": 0.0003715677590788308, "loss": 1.8582, "step": 14500 }, { "epoch": 2.59, "learning_rate": 0.00037068201948627105, "loss": 1.8555, "step": 14600 }, { "epoch": 2.6, "learning_rate": 0.0003697962798937112, "loss": 1.8905, "step": 14700 }, { "epoch": 2.62, "learning_rate": 0.00036891054030115144, "loss": 1.8754, "step": 14800 }, { "epoch": 2.64, "learning_rate": 0.00036802480070859166, "loss": 1.8835, "step": 14900 }, { "epoch": 2.66, "learning_rate": 0.0003671390611160319, "loss": 1.847, "step": 15000 }, { "epoch": 2.67, "learning_rate": 0.0003662533215234721, "loss": 1.8621, "step": 15100 }, { "epoch": 2.69, "learning_rate": 0.00036536758193091227, "loss": 1.8558, "step": 15200 }, { "epoch": 2.71, "learning_rate": 0.0003644818423383525, "loss": 1.8349, "step": 15300 }, { "epoch": 2.73, "learning_rate": 0.00036359610274579277, "loss": 1.8698, "step": 15400 }, { "epoch": 2.75, "learning_rate": 0.000362710363153233, "loss": 1.8925, "step": 15500 }, { "epoch": 2.76, "learning_rate": 0.0003618246235606732, "loss": 1.8347, "step": 15600 }, { "epoch": 2.78, "learning_rate": 0.0003609388839681134, "loss": 1.869, "step": 15700 }, { "epoch": 2.8, "learning_rate": 0.0003600531443755536, "loss": 1.82, "step": 15800 }, { "epoch": 2.82, "learning_rate": 0.0003591674047829938, "loss": 1.8407, "step": 15900 }, { "epoch": 2.83, "learning_rate": 0.00035828166519043405, "loss": 1.8458, "step": 16000 }, { "epoch": 2.85, "learning_rate": 0.0003573959255978742, "loss": 1.8568, "step": 16100 }, { "epoch": 2.87, "learning_rate": 0.00035651018600531444, "loss": 1.855, "step": 16200 }, { "epoch": 2.89, "learning_rate": 0.00035562444641275466, "loss": 1.841, "step": 16300 }, { "epoch": 2.91, "learning_rate": 0.0003547387068201949, "loss": 1.814, "step": 16400 }, { "epoch": 2.92, "learning_rate": 0.0003538529672276351, "loss": 1.8102, "step": 16500 }, { "epoch": 2.94, "learning_rate": 0.00035296722763507527, "loss": 1.8517, "step": 16600 }, { "epoch": 2.96, "learning_rate": 0.0003520814880425155, "loss": 1.8326, "step": 16700 }, { "epoch": 2.98, "learning_rate": 0.0003511957484499557, "loss": 1.8514, "step": 16800 }, { "epoch": 2.99, "learning_rate": 0.00035031000885739594, "loss": 1.863, "step": 16900 }, { "epoch": 3.01, "learning_rate": 0.00034942426926483616, "loss": 1.779, "step": 17000 }, { "epoch": 3.03, "learning_rate": 0.0003485385296722763, "loss": 1.7412, "step": 17100 }, { "epoch": 3.05, "learning_rate": 0.00034765279007971655, "loss": 1.7293, "step": 17200 }, { "epoch": 3.06, "learning_rate": 0.00034676705048715677, "loss": 1.7102, "step": 17300 }, { "epoch": 3.08, "learning_rate": 0.000345881310894597, "loss": 1.7713, "step": 17400 }, { "epoch": 3.1, "learning_rate": 0.00034499557130203716, "loss": 1.7884, "step": 17500 }, { "epoch": 3.12, "learning_rate": 0.0003441098317094774, "loss": 1.7567, "step": 17600 }, { "epoch": 3.14, "learning_rate": 0.00034322409211691766, "loss": 1.7192, "step": 17700 }, { "epoch": 3.15, "learning_rate": 0.0003423383525243579, "loss": 1.7205, "step": 17800 }, { "epoch": 3.17, "learning_rate": 0.0003414526129317981, "loss": 1.7175, "step": 17900 }, { "epoch": 3.19, "learning_rate": 0.00034056687333923827, "loss": 1.7627, "step": 18000 }, { "epoch": 3.21, "learning_rate": 0.0003396811337466785, "loss": 1.7404, "step": 18100 }, { "epoch": 3.22, "learning_rate": 0.0003387953941541187, "loss": 1.7664, "step": 18200 }, { "epoch": 3.24, "learning_rate": 0.00033790965456155893, "loss": 1.7307, "step": 18300 }, { "epoch": 3.26, "learning_rate": 0.00033702391496899915, "loss": 1.7245, "step": 18400 }, { "epoch": 3.28, "learning_rate": 0.0003361381753764393, "loss": 1.7643, "step": 18500 }, { "epoch": 3.29, "learning_rate": 0.00033525243578387954, "loss": 1.7469, "step": 18600 }, { "epoch": 3.31, "learning_rate": 0.00033436669619131977, "loss": 1.7639, "step": 18700 }, { "epoch": 3.33, "learning_rate": 0.00033348095659876, "loss": 1.7364, "step": 18800 }, { "epoch": 3.35, "learning_rate": 0.00033259521700620016, "loss": 1.7284, "step": 18900 }, { "epoch": 3.37, "learning_rate": 0.0003317094774136404, "loss": 1.7461, "step": 19000 }, { "epoch": 3.38, "learning_rate": 0.0003308237378210806, "loss": 1.7558, "step": 19100 }, { "epoch": 3.4, "learning_rate": 0.0003299379982285208, "loss": 1.7578, "step": 19200 }, { "epoch": 3.42, "learning_rate": 0.00032905225863596104, "loss": 1.7989, "step": 19300 }, { "epoch": 3.44, "learning_rate": 0.0003281665190434012, "loss": 1.7949, "step": 19400 }, { "epoch": 3.45, "learning_rate": 0.00032728077945084143, "loss": 1.777, "step": 19500 }, { "epoch": 3.47, "learning_rate": 0.00032639503985828165, "loss": 1.777, "step": 19600 }, { "epoch": 3.49, "learning_rate": 0.0003255093002657219, "loss": 1.7626, "step": 19700 }, { "epoch": 3.51, "learning_rate": 0.0003246235606731621, "loss": 1.7843, "step": 19800 }, { "epoch": 3.53, "learning_rate": 0.00032373782108060227, "loss": 1.7301, "step": 19900 }, { "epoch": 3.54, "learning_rate": 0.0003228520814880425, "loss": 1.7561, "step": 20000 }, { "epoch": 3.56, "learning_rate": 0.00032196634189548276, "loss": 1.7534, "step": 20100 }, { "epoch": 3.58, "learning_rate": 0.000321080602302923, "loss": 1.753, "step": 20200 }, { "epoch": 3.6, "learning_rate": 0.00032019486271036315, "loss": 1.7259, "step": 20300 }, { "epoch": 3.61, "learning_rate": 0.0003193091231178034, "loss": 1.789, "step": 20400 }, { "epoch": 3.63, "learning_rate": 0.0003184233835252436, "loss": 1.7771, "step": 20500 }, { "epoch": 3.65, "learning_rate": 0.0003175376439326838, "loss": 1.7308, "step": 20600 }, { "epoch": 3.67, "learning_rate": 0.00031665190434012404, "loss": 1.7568, "step": 20700 }, { "epoch": 3.68, "learning_rate": 0.0003157661647475642, "loss": 1.7393, "step": 20800 }, { "epoch": 3.7, "learning_rate": 0.00031488042515500443, "loss": 1.7593, "step": 20900 }, { "epoch": 3.72, "learning_rate": 0.00031399468556244465, "loss": 1.732, "step": 21000 }, { "epoch": 3.74, "learning_rate": 0.0003131089459698849, "loss": 1.7342, "step": 21100 }, { "epoch": 3.76, "learning_rate": 0.0003122232063773251, "loss": 1.7386, "step": 21200 }, { "epoch": 3.77, "learning_rate": 0.00031133746678476526, "loss": 1.7394, "step": 21300 }, { "epoch": 3.79, "learning_rate": 0.0003104517271922055, "loss": 1.7439, "step": 21400 }, { "epoch": 3.81, "learning_rate": 0.0003095659875996457, "loss": 1.7053, "step": 21500 }, { "epoch": 3.83, "learning_rate": 0.00030868024800708593, "loss": 1.7022, "step": 21600 }, { "epoch": 3.84, "learning_rate": 0.0003077945084145261, "loss": 1.7254, "step": 21700 }, { "epoch": 3.86, "learning_rate": 0.0003069087688219663, "loss": 1.712, "step": 21800 }, { "epoch": 3.88, "learning_rate": 0.00030602302922940654, "loss": 1.7565, "step": 21900 }, { "epoch": 3.9, "learning_rate": 0.00030513728963684676, "loss": 1.7495, "step": 22000 }, { "epoch": 3.91, "learning_rate": 0.000304251550044287, "loss": 1.7435, "step": 22100 }, { "epoch": 3.93, "learning_rate": 0.00030336581045172715, "loss": 1.7649, "step": 22200 }, { "epoch": 3.95, "learning_rate": 0.0003024800708591674, "loss": 1.743, "step": 22300 }, { "epoch": 3.97, "learning_rate": 0.00030159433126660765, "loss": 1.7332, "step": 22400 }, { "epoch": 3.99, "learning_rate": 0.00030070859167404787, "loss": 1.6918, "step": 22500 }, { "epoch": 4.0, "learning_rate": 0.0002998228520814881, "loss": 1.6936, "step": 22600 }, { "epoch": 4.02, "learning_rate": 0.00029893711248892826, "loss": 1.6353, "step": 22700 }, { "epoch": 4.04, "learning_rate": 0.0002980513728963685, "loss": 1.6311, "step": 22800 }, { "epoch": 4.06, "learning_rate": 0.0002971656333038087, "loss": 1.6385, "step": 22900 }, { "epoch": 4.07, "learning_rate": 0.00029627989371124893, "loss": 1.6188, "step": 23000 }, { "epoch": 4.09, "learning_rate": 0.0002953941541186891, "loss": 1.6043, "step": 23100 }, { "epoch": 4.11, "learning_rate": 0.0002945084145261293, "loss": 1.6233, "step": 23200 }, { "epoch": 4.13, "learning_rate": 0.00029362267493356954, "loss": 1.6249, "step": 23300 }, { "epoch": 4.15, "learning_rate": 0.00029273693534100976, "loss": 1.6264, "step": 23400 }, { "epoch": 4.16, "learning_rate": 0.00029185119574845, "loss": 1.6343, "step": 23500 }, { "epoch": 4.18, "learning_rate": 0.00029096545615589015, "loss": 1.6157, "step": 23600 }, { "epoch": 4.2, "learning_rate": 0.00029007971656333037, "loss": 1.6384, "step": 23700 }, { "epoch": 4.22, "learning_rate": 0.0002891939769707706, "loss": 1.65, "step": 23800 }, { "epoch": 4.23, "learning_rate": 0.0002883082373782108, "loss": 1.6624, "step": 23900 }, { "epoch": 4.25, "learning_rate": 0.00028742249778565104, "loss": 1.6375, "step": 24000 }, { "epoch": 4.27, "learning_rate": 0.0002865367581930912, "loss": 1.6722, "step": 24100 }, { "epoch": 4.29, "learning_rate": 0.00028565101860053143, "loss": 1.6491, "step": 24200 }, { "epoch": 4.3, "learning_rate": 0.00028476527900797165, "loss": 1.6498, "step": 24300 }, { "epoch": 4.32, "learning_rate": 0.00028387953941541187, "loss": 1.6559, "step": 24400 }, { "epoch": 4.34, "learning_rate": 0.00028299379982285204, "loss": 1.6044, "step": 24500 }, { "epoch": 4.36, "learning_rate": 0.00028210806023029226, "loss": 1.6661, "step": 24600 }, { "epoch": 4.38, "learning_rate": 0.0002812223206377325, "loss": 1.6478, "step": 24700 }, { "epoch": 4.39, "learning_rate": 0.00028033658104517276, "loss": 1.6382, "step": 24800 }, { "epoch": 4.41, "learning_rate": 0.000279450841452613, "loss": 1.6339, "step": 24900 }, { "epoch": 4.43, "learning_rate": 0.00027856510186005315, "loss": 1.6223, "step": 25000 }, { "epoch": 4.45, "learning_rate": 0.00027767936226749337, "loss": 1.6472, "step": 25100 }, { "epoch": 4.46, "learning_rate": 0.0002767936226749336, "loss": 1.6333, "step": 25200 }, { "epoch": 4.48, "learning_rate": 0.0002759078830823738, "loss": 1.6384, "step": 25300 }, { "epoch": 4.5, "learning_rate": 0.00027502214348981404, "loss": 1.6005, "step": 25400 }, { "epoch": 4.52, "learning_rate": 0.0002741364038972542, "loss": 1.6455, "step": 25500 }, { "epoch": 4.53, "learning_rate": 0.0002732506643046944, "loss": 1.6201, "step": 25600 }, { "epoch": 4.55, "learning_rate": 0.00027236492471213465, "loss": 1.6439, "step": 25700 }, { "epoch": 4.57, "learning_rate": 0.00027147918511957487, "loss": 1.6261, "step": 25800 }, { "epoch": 4.59, "learning_rate": 0.00027059344552701504, "loss": 1.6236, "step": 25900 }, { "epoch": 4.61, "learning_rate": 0.00026970770593445526, "loss": 1.6156, "step": 26000 }, { "epoch": 4.62, "learning_rate": 0.0002688219663418955, "loss": 1.6597, "step": 26100 }, { "epoch": 4.64, "learning_rate": 0.0002679362267493357, "loss": 1.635, "step": 26200 }, { "epoch": 4.66, "learning_rate": 0.0002670504871567759, "loss": 1.6202, "step": 26300 }, { "epoch": 4.68, "learning_rate": 0.0002661647475642161, "loss": 1.6413, "step": 26400 }, { "epoch": 4.69, "learning_rate": 0.0002652790079716563, "loss": 1.6363, "step": 26500 }, { "epoch": 4.71, "learning_rate": 0.00026439326837909654, "loss": 1.6283, "step": 26600 }, { "epoch": 4.73, "learning_rate": 0.00026350752878653676, "loss": 1.6437, "step": 26700 }, { "epoch": 4.75, "learning_rate": 0.000262621789193977, "loss": 1.6456, "step": 26800 }, { "epoch": 4.77, "learning_rate": 0.00026173604960141715, "loss": 1.6076, "step": 26900 }, { "epoch": 4.78, "learning_rate": 0.00026085031000885737, "loss": 1.6429, "step": 27000 }, { "epoch": 4.8, "learning_rate": 0.00025996457041629764, "loss": 1.6635, "step": 27100 }, { "epoch": 4.82, "learning_rate": 0.00025907883082373787, "loss": 1.6269, "step": 27200 }, { "epoch": 4.84, "learning_rate": 0.00025819309123117803, "loss": 1.6282, "step": 27300 }, { "epoch": 4.85, "learning_rate": 0.00025730735163861826, "loss": 1.5984, "step": 27400 }, { "epoch": 4.87, "learning_rate": 0.0002564216120460585, "loss": 1.6434, "step": 27500 }, { "epoch": 4.89, "learning_rate": 0.0002555358724534987, "loss": 1.641, "step": 27600 }, { "epoch": 4.91, "learning_rate": 0.0002546501328609389, "loss": 1.6232, "step": 27700 }, { "epoch": 4.92, "learning_rate": 0.0002537643932683791, "loss": 1.5679, "step": 27800 }, { "epoch": 4.94, "learning_rate": 0.0002528786536758193, "loss": 1.6289, "step": 27900 }, { "epoch": 4.96, "learning_rate": 0.00025199291408325953, "loss": 1.6466, "step": 28000 }, { "epoch": 4.98, "learning_rate": 0.00025110717449069975, "loss": 1.6467, "step": 28100 }, { "epoch": 5.0, "learning_rate": 0.00025022143489814, "loss": 1.6348, "step": 28200 }, { "epoch": 5.01, "learning_rate": 0.0002493356953055802, "loss": 1.58, "step": 28300 }, { "epoch": 5.03, "learning_rate": 0.00024844995571302037, "loss": 1.5345, "step": 28400 }, { "epoch": 5.05, "learning_rate": 0.0002475642161204606, "loss": 1.5501, "step": 28500 }, { "epoch": 5.07, "learning_rate": 0.0002466784765279008, "loss": 1.5667, "step": 28600 }, { "epoch": 5.08, "learning_rate": 0.00024579273693534103, "loss": 1.5045, "step": 28700 }, { "epoch": 5.1, "learning_rate": 0.00024490699734278125, "loss": 1.5721, "step": 28800 }, { "epoch": 5.12, "learning_rate": 0.00024402125775022145, "loss": 1.5339, "step": 28900 }, { "epoch": 5.14, "learning_rate": 0.00024313551815766167, "loss": 1.5543, "step": 29000 }, { "epoch": 5.15, "learning_rate": 0.00024224977856510187, "loss": 1.5568, "step": 29100 }, { "epoch": 5.17, "learning_rate": 0.0002413640389725421, "loss": 1.5496, "step": 29200 }, { "epoch": 5.19, "learning_rate": 0.00024047829937998228, "loss": 1.5492, "step": 29300 }, { "epoch": 5.21, "learning_rate": 0.0002395925597874225, "loss": 1.571, "step": 29400 }, { "epoch": 5.23, "learning_rate": 0.00023870682019486273, "loss": 1.5495, "step": 29500 }, { "epoch": 5.24, "learning_rate": 0.00023782108060230292, "loss": 1.538, "step": 29600 }, { "epoch": 5.26, "learning_rate": 0.00023693534100974314, "loss": 1.5464, "step": 29700 }, { "epoch": 5.28, "learning_rate": 0.00023604960141718334, "loss": 1.5523, "step": 29800 }, { "epoch": 5.3, "learning_rate": 0.00023516386182462356, "loss": 1.526, "step": 29900 }, { "epoch": 5.31, "learning_rate": 0.00023427812223206375, "loss": 1.5692, "step": 30000 }, { "epoch": 5.33, "learning_rate": 0.000233392382639504, "loss": 1.548, "step": 30100 }, { "epoch": 5.35, "learning_rate": 0.00023250664304694422, "loss": 1.575, "step": 30200 }, { "epoch": 5.37, "learning_rate": 0.00023162090345438442, "loss": 1.5104, "step": 30300 }, { "epoch": 5.39, "learning_rate": 0.00023073516386182464, "loss": 1.558, "step": 30400 }, { "epoch": 5.4, "learning_rate": 0.00022984942426926484, "loss": 1.5481, "step": 30500 }, { "epoch": 5.42, "learning_rate": 0.00022896368467670506, "loss": 1.5538, "step": 30600 }, { "epoch": 5.44, "learning_rate": 0.00022807794508414525, "loss": 1.4957, "step": 30700 }, { "epoch": 5.46, "learning_rate": 0.00022719220549158547, "loss": 1.5547, "step": 30800 }, { "epoch": 5.47, "learning_rate": 0.0002263064658990257, "loss": 1.573, "step": 30900 }, { "epoch": 5.49, "learning_rate": 0.0002254207263064659, "loss": 1.5508, "step": 31000 }, { "epoch": 5.51, "learning_rate": 0.0002245349867139061, "loss": 1.5452, "step": 31100 }, { "epoch": 5.53, "learning_rate": 0.00022364924712134633, "loss": 1.5031, "step": 31200 }, { "epoch": 5.54, "learning_rate": 0.00022276350752878656, "loss": 1.5238, "step": 31300 }, { "epoch": 5.56, "learning_rate": 0.00022187776793622675, "loss": 1.5511, "step": 31400 }, { "epoch": 5.58, "learning_rate": 0.00022099202834366697, "loss": 1.5784, "step": 31500 }, { "epoch": 5.6, "learning_rate": 0.0002201062887511072, "loss": 1.5487, "step": 31600 }, { "epoch": 5.62, "learning_rate": 0.0002192205491585474, "loss": 1.5496, "step": 31700 }, { "epoch": 5.63, "learning_rate": 0.0002183348095659876, "loss": 1.5414, "step": 31800 }, { "epoch": 5.65, "learning_rate": 0.0002174490699734278, "loss": 1.5677, "step": 31900 }, { "epoch": 5.67, "learning_rate": 0.00021656333038086803, "loss": 1.5408, "step": 32000 }, { "epoch": 5.69, "learning_rate": 0.00021567759078830822, "loss": 1.5122, "step": 32100 }, { "epoch": 5.7, "learning_rate": 0.00021479185119574845, "loss": 1.5275, "step": 32200 }, { "epoch": 5.72, "learning_rate": 0.00021390611160318867, "loss": 1.5314, "step": 32300 }, { "epoch": 5.74, "learning_rate": 0.0002130203720106289, "loss": 1.5405, "step": 32400 }, { "epoch": 5.76, "learning_rate": 0.0002121346324180691, "loss": 1.5039, "step": 32500 }, { "epoch": 5.77, "learning_rate": 0.0002112488928255093, "loss": 1.5173, "step": 32600 }, { "epoch": 5.79, "learning_rate": 0.00021036315323294953, "loss": 1.564, "step": 32700 }, { "epoch": 5.81, "learning_rate": 0.00020947741364038972, "loss": 1.5666, "step": 32800 }, { "epoch": 5.83, "learning_rate": 0.00020859167404782994, "loss": 1.5539, "step": 32900 }, { "epoch": 5.85, "learning_rate": 0.00020770593445527017, "loss": 1.5436, "step": 33000 }, { "epoch": 5.86, "learning_rate": 0.00020682019486271036, "loss": 1.5431, "step": 33100 }, { "epoch": 5.88, "learning_rate": 0.00020593445527015058, "loss": 1.5364, "step": 33200 }, { "epoch": 5.9, "learning_rate": 0.00020504871567759078, "loss": 1.5186, "step": 33300 }, { "epoch": 5.92, "learning_rate": 0.000204162976085031, "loss": 1.5308, "step": 33400 }, { "epoch": 5.93, "learning_rate": 0.0002032772364924712, "loss": 1.5218, "step": 33500 }, { "epoch": 5.95, "learning_rate": 0.00020239149689991144, "loss": 1.547, "step": 33600 }, { "epoch": 5.97, "learning_rate": 0.00020150575730735166, "loss": 1.554, "step": 33700 }, { "epoch": 5.99, "learning_rate": 0.00020062001771479186, "loss": 1.5563, "step": 33800 }, { "epoch": 6.01, "learning_rate": 0.00019973427812223208, "loss": 1.5311, "step": 33900 }, { "epoch": 6.02, "learning_rate": 0.00019884853852967228, "loss": 1.4537, "step": 34000 }, { "epoch": 6.04, "learning_rate": 0.0001979627989371125, "loss": 1.4493, "step": 34100 }, { "epoch": 6.06, "learning_rate": 0.0001970770593445527, "loss": 1.462, "step": 34200 }, { "epoch": 6.08, "learning_rate": 0.00019619131975199291, "loss": 1.4879, "step": 34300 }, { "epoch": 6.09, "learning_rate": 0.00019530558015943314, "loss": 1.46, "step": 34400 }, { "epoch": 6.11, "learning_rate": 0.00019441984056687333, "loss": 1.4318, "step": 34500 }, { "epoch": 6.13, "learning_rate": 0.00019353410097431355, "loss": 1.448, "step": 34600 }, { "epoch": 6.15, "learning_rate": 0.00019264836138175375, "loss": 1.4615, "step": 34700 }, { "epoch": 6.16, "learning_rate": 0.000191762621789194, "loss": 1.4537, "step": 34800 }, { "epoch": 6.18, "learning_rate": 0.0001908768821966342, "loss": 1.4697, "step": 34900 }, { "epoch": 6.2, "learning_rate": 0.00018999114260407441, "loss": 1.4502, "step": 35000 }, { "epoch": 6.22, "learning_rate": 0.00018910540301151464, "loss": 1.4421, "step": 35100 }, { "epoch": 6.24, "learning_rate": 0.00018821966341895483, "loss": 1.456, "step": 35200 }, { "epoch": 6.25, "learning_rate": 0.00018733392382639505, "loss": 1.4647, "step": 35300 }, { "epoch": 6.27, "learning_rate": 0.00018644818423383525, "loss": 1.4854, "step": 35400 }, { "epoch": 6.29, "learning_rate": 0.00018556244464127547, "loss": 1.4589, "step": 35500 }, { "epoch": 6.31, "learning_rate": 0.00018467670504871566, "loss": 1.4537, "step": 35600 }, { "epoch": 6.32, "learning_rate": 0.00018379096545615589, "loss": 1.4602, "step": 35700 }, { "epoch": 6.34, "learning_rate": 0.0001829052258635961, "loss": 1.487, "step": 35800 }, { "epoch": 6.36, "learning_rate": 0.00018201948627103633, "loss": 1.4731, "step": 35900 }, { "epoch": 6.38, "learning_rate": 0.00018113374667847655, "loss": 1.4487, "step": 36000 }, { "epoch": 6.4, "learning_rate": 0.00018024800708591675, "loss": 1.4505, "step": 36100 }, { "epoch": 6.41, "learning_rate": 0.00017936226749335697, "loss": 1.4525, "step": 36200 }, { "epoch": 6.43, "learning_rate": 0.00017847652790079716, "loss": 1.4717, "step": 36300 }, { "epoch": 6.45, "learning_rate": 0.00017759078830823738, "loss": 1.4803, "step": 36400 }, { "epoch": 6.47, "learning_rate": 0.0001767050487156776, "loss": 1.4443, "step": 36500 }, { "epoch": 6.48, "learning_rate": 0.0001758193091231178, "loss": 1.4659, "step": 36600 }, { "epoch": 6.5, "learning_rate": 0.00017493356953055802, "loss": 1.4918, "step": 36700 }, { "epoch": 6.52, "learning_rate": 0.00017404782993799822, "loss": 1.4444, "step": 36800 }, { "epoch": 6.54, "learning_rate": 0.00017316209034543844, "loss": 1.4804, "step": 36900 }, { "epoch": 6.55, "learning_rate": 0.00017227635075287863, "loss": 1.4815, "step": 37000 }, { "epoch": 6.57, "learning_rate": 0.00017139061116031888, "loss": 1.4791, "step": 37100 }, { "epoch": 6.59, "learning_rate": 0.0001705048715677591, "loss": 1.4677, "step": 37200 }, { "epoch": 6.61, "learning_rate": 0.0001696191319751993, "loss": 1.4453, "step": 37300 }, { "epoch": 6.63, "learning_rate": 0.00016873339238263952, "loss": 1.4632, "step": 37400 }, { "epoch": 6.64, "learning_rate": 0.00016784765279007972, "loss": 1.4676, "step": 37500 }, { "epoch": 6.66, "learning_rate": 0.00016696191319751994, "loss": 1.4879, "step": 37600 }, { "epoch": 6.68, "learning_rate": 0.00016607617360496013, "loss": 1.4755, "step": 37700 }, { "epoch": 6.7, "learning_rate": 0.00016519043401240036, "loss": 1.4617, "step": 37800 }, { "epoch": 6.71, "learning_rate": 0.00016430469441984058, "loss": 1.4597, "step": 37900 }, { "epoch": 6.73, "learning_rate": 0.00016341895482728077, "loss": 1.4442, "step": 38000 }, { "epoch": 6.75, "learning_rate": 0.000162533215234721, "loss": 1.4734, "step": 38100 }, { "epoch": 6.77, "learning_rate": 0.0001616474756421612, "loss": 1.471, "step": 38200 }, { "epoch": 6.78, "learning_rate": 0.00016076173604960144, "loss": 1.4606, "step": 38300 }, { "epoch": 6.8, "learning_rate": 0.00015987599645704163, "loss": 1.4626, "step": 38400 }, { "epoch": 6.82, "learning_rate": 0.00015899025686448185, "loss": 1.4648, "step": 38500 }, { "epoch": 6.84, "learning_rate": 0.00015810451727192208, "loss": 1.4665, "step": 38600 }, { "epoch": 6.86, "learning_rate": 0.00015721877767936227, "loss": 1.4483, "step": 38700 }, { "epoch": 6.87, "learning_rate": 0.0001563330380868025, "loss": 1.4638, "step": 38800 }, { "epoch": 6.89, "learning_rate": 0.0001554472984942427, "loss": 1.4695, "step": 38900 }, { "epoch": 6.91, "learning_rate": 0.0001545615589016829, "loss": 1.4487, "step": 39000 }, { "epoch": 6.93, "learning_rate": 0.0001536758193091231, "loss": 1.4547, "step": 39100 }, { "epoch": 6.94, "learning_rate": 0.00015279007971656333, "loss": 1.4757, "step": 39200 }, { "epoch": 6.96, "learning_rate": 0.00015190434012400355, "loss": 1.4709, "step": 39300 }, { "epoch": 6.98, "learning_rate": 0.00015101860053144374, "loss": 1.438, "step": 39400 }, { "epoch": 7.0, "learning_rate": 0.000150132860938884, "loss": 1.4504, "step": 39500 }, { "epoch": 7.02, "learning_rate": 0.00014924712134632419, "loss": 1.4182, "step": 39600 }, { "epoch": 7.03, "learning_rate": 0.0001483613817537644, "loss": 1.3538, "step": 39700 }, { "epoch": 7.05, "learning_rate": 0.0001474756421612046, "loss": 1.3627, "step": 39800 }, { "epoch": 7.07, "learning_rate": 0.00014658990256864482, "loss": 1.3909, "step": 39900 }, { "epoch": 7.09, "learning_rate": 0.00014570416297608505, "loss": 1.4015, "step": 40000 }, { "epoch": 7.1, "learning_rate": 0.00014481842338352524, "loss": 1.4001, "step": 40100 }, { "epoch": 7.12, "learning_rate": 0.00014393268379096546, "loss": 1.3875, "step": 40200 }, { "epoch": 7.14, "learning_rate": 0.00014304694419840566, "loss": 1.4087, "step": 40300 }, { "epoch": 7.16, "learning_rate": 0.00014216120460584588, "loss": 1.4053, "step": 40400 }, { "epoch": 7.17, "learning_rate": 0.00014127546501328607, "loss": 1.3912, "step": 40500 }, { "epoch": 7.19, "learning_rate": 0.00014038972542072632, "loss": 1.4045, "step": 40600 }, { "epoch": 7.21, "learning_rate": 0.00013950398582816655, "loss": 1.3781, "step": 40700 }, { "epoch": 7.23, "learning_rate": 0.00013861824623560674, "loss": 1.4017, "step": 40800 }, { "epoch": 7.25, "learning_rate": 0.00013773250664304696, "loss": 1.3905, "step": 40900 }, { "epoch": 7.26, "learning_rate": 0.00013684676705048716, "loss": 1.4088, "step": 41000 }, { "epoch": 7.28, "learning_rate": 0.00013596102745792738, "loss": 1.427, "step": 41100 }, { "epoch": 7.3, "learning_rate": 0.00013507528786536757, "loss": 1.3799, "step": 41200 }, { "epoch": 7.32, "learning_rate": 0.0001341895482728078, "loss": 1.3973, "step": 41300 }, { "epoch": 7.33, "learning_rate": 0.00013330380868024802, "loss": 1.4444, "step": 41400 }, { "epoch": 7.35, "learning_rate": 0.0001324180690876882, "loss": 1.3977, "step": 41500 }, { "epoch": 7.37, "learning_rate": 0.00013153232949512843, "loss": 1.413, "step": 41600 }, { "epoch": 7.39, "learning_rate": 0.00013064658990256863, "loss": 1.3855, "step": 41700 }, { "epoch": 7.4, "learning_rate": 0.00012976085031000888, "loss": 1.3802, "step": 41800 }, { "epoch": 7.42, "learning_rate": 0.00012887511071744907, "loss": 1.3871, "step": 41900 }, { "epoch": 7.44, "learning_rate": 0.0001279893711248893, "loss": 1.3736, "step": 42000 }, { "epoch": 7.46, "learning_rate": 0.00012710363153232952, "loss": 1.4049, "step": 42100 }, { "epoch": 7.48, "learning_rate": 0.0001262178919397697, "loss": 1.3547, "step": 42200 }, { "epoch": 7.49, "learning_rate": 0.00012533215234720993, "loss": 1.3899, "step": 42300 }, { "epoch": 7.51, "learning_rate": 0.00012444641275465013, "loss": 1.3879, "step": 42400 }, { "epoch": 7.53, "learning_rate": 0.00012356067316209035, "loss": 1.386, "step": 42500 }, { "epoch": 7.55, "learning_rate": 0.00012267493356953057, "loss": 1.3889, "step": 42600 }, { "epoch": 7.56, "learning_rate": 0.00012178919397697078, "loss": 1.4047, "step": 42700 }, { "epoch": 7.58, "learning_rate": 0.00012090345438441099, "loss": 1.3835, "step": 42800 }, { "epoch": 7.6, "learning_rate": 0.0001200177147918512, "loss": 1.3969, "step": 42900 }, { "epoch": 7.62, "learning_rate": 0.0001191319751992914, "loss": 1.4, "step": 43000 }, { "epoch": 7.64, "learning_rate": 0.00011824623560673161, "loss": 1.3735, "step": 43100 }, { "epoch": 7.65, "learning_rate": 0.00011736049601417183, "loss": 1.3813, "step": 43200 }, { "epoch": 7.67, "learning_rate": 0.00011647475642161206, "loss": 1.3986, "step": 43300 }, { "epoch": 7.69, "learning_rate": 0.00011558901682905227, "loss": 1.3971, "step": 43400 }, { "epoch": 7.71, "learning_rate": 0.00011470327723649247, "loss": 1.4061, "step": 43500 }, { "epoch": 7.72, "learning_rate": 0.00011381753764393268, "loss": 1.3679, "step": 43600 }, { "epoch": 7.74, "learning_rate": 0.00011293179805137289, "loss": 1.3995, "step": 43700 }, { "epoch": 7.76, "learning_rate": 0.0001120460584588131, "loss": 1.3876, "step": 43800 }, { "epoch": 7.78, "learning_rate": 0.00011116031886625333, "loss": 1.3797, "step": 43900 }, { "epoch": 7.79, "learning_rate": 0.00011027457927369354, "loss": 1.3738, "step": 44000 }, { "epoch": 7.81, "learning_rate": 0.00010938883968113375, "loss": 1.3694, "step": 44100 }, { "epoch": 7.83, "learning_rate": 0.00010850310008857396, "loss": 1.4214, "step": 44200 }, { "epoch": 7.85, "learning_rate": 0.00010761736049601417, "loss": 1.4109, "step": 44300 }, { "epoch": 7.87, "learning_rate": 0.00010673162090345438, "loss": 1.3824, "step": 44400 }, { "epoch": 7.88, "learning_rate": 0.0001058458813108946, "loss": 1.399, "step": 44500 }, { "epoch": 7.9, "learning_rate": 0.00010496014171833482, "loss": 1.3917, "step": 44600 }, { "epoch": 7.92, "learning_rate": 0.00010407440212577503, "loss": 1.422, "step": 44700 }, { "epoch": 7.94, "learning_rate": 0.00010318866253321524, "loss": 1.3856, "step": 44800 }, { "epoch": 7.95, "learning_rate": 0.00010230292294065544, "loss": 1.3747, "step": 44900 }, { "epoch": 7.97, "learning_rate": 0.00010141718334809567, "loss": 1.3847, "step": 45000 }, { "epoch": 7.99, "learning_rate": 0.00010053144375553587, "loss": 1.3937, "step": 45100 }, { "epoch": 8.01, "learning_rate": 9.964570416297608e-05, "loss": 1.3614, "step": 45200 }, { "epoch": 8.02, "learning_rate": 9.87599645704163e-05, "loss": 1.3446, "step": 45300 }, { "epoch": 8.04, "learning_rate": 9.787422497785651e-05, "loss": 1.3427, "step": 45400 }, { "epoch": 8.06, "learning_rate": 9.698848538529672e-05, "loss": 1.3139, "step": 45500 }, { "epoch": 8.08, "learning_rate": 9.610274579273694e-05, "loss": 1.319, "step": 45600 }, { "epoch": 8.1, "learning_rate": 9.521700620017715e-05, "loss": 1.3558, "step": 45700 }, { "epoch": 8.11, "learning_rate": 9.433126660761736e-05, "loss": 1.3171, "step": 45800 }, { "epoch": 8.13, "learning_rate": 9.344552701505757e-05, "loss": 1.3402, "step": 45900 }, { "epoch": 8.15, "learning_rate": 9.255978742249779e-05, "loss": 1.3222, "step": 46000 }, { "epoch": 8.17, "learning_rate": 9.1674047829938e-05, "loss": 1.3373, "step": 46100 }, { "epoch": 8.18, "learning_rate": 9.078830823737822e-05, "loss": 1.3553, "step": 46200 }, { "epoch": 8.2, "learning_rate": 8.990256864481843e-05, "loss": 1.3406, "step": 46300 }, { "epoch": 8.22, "learning_rate": 8.901682905225864e-05, "loss": 1.3249, "step": 46400 }, { "epoch": 8.24, "learning_rate": 8.813108945969885e-05, "loss": 1.3145, "step": 46500 }, { "epoch": 8.26, "learning_rate": 8.724534986713905e-05, "loss": 1.3527, "step": 46600 }, { "epoch": 8.27, "learning_rate": 8.635961027457928e-05, "loss": 1.3283, "step": 46700 }, { "epoch": 8.29, "learning_rate": 8.54738706820195e-05, "loss": 1.3233, "step": 46800 }, { "epoch": 8.31, "learning_rate": 8.45881310894597e-05, "loss": 1.3495, "step": 46900 }, { "epoch": 8.33, "learning_rate": 8.370239149689991e-05, "loss": 1.3476, "step": 47000 }, { "epoch": 8.34, "learning_rate": 8.281665190434012e-05, "loss": 1.3237, "step": 47100 }, { "epoch": 8.36, "learning_rate": 8.193091231178033e-05, "loss": 1.3465, "step": 47200 }, { "epoch": 8.38, "learning_rate": 8.104517271922054e-05, "loss": 1.3308, "step": 47300 }, { "epoch": 8.4, "learning_rate": 8.015943312666077e-05, "loss": 1.3414, "step": 47400 }, { "epoch": 8.41, "learning_rate": 7.927369353410098e-05, "loss": 1.3329, "step": 47500 }, { "epoch": 8.43, "learning_rate": 7.838795394154119e-05, "loss": 1.346, "step": 47600 }, { "epoch": 8.45, "learning_rate": 7.75022143489814e-05, "loss": 1.3453, "step": 47700 }, { "epoch": 8.47, "learning_rate": 7.661647475642161e-05, "loss": 1.3197, "step": 47800 }, { "epoch": 8.49, "learning_rate": 7.573073516386182e-05, "loss": 1.3247, "step": 47900 }, { "epoch": 8.5, "learning_rate": 7.484499557130204e-05, "loss": 1.3389, "step": 48000 }, { "epoch": 8.52, "learning_rate": 7.395925597874226e-05, "loss": 1.3421, "step": 48100 }, { "epoch": 8.54, "learning_rate": 7.307351638618247e-05, "loss": 1.3203, "step": 48200 }, { "epoch": 8.56, "learning_rate": 7.218777679362268e-05, "loss": 1.3218, "step": 48300 }, { "epoch": 8.57, "learning_rate": 7.130203720106288e-05, "loss": 1.3637, "step": 48400 }, { "epoch": 8.59, "learning_rate": 7.041629760850309e-05, "loss": 1.3239, "step": 48500 }, { "epoch": 8.61, "learning_rate": 6.953055801594331e-05, "loss": 1.3559, "step": 48600 }, { "epoch": 8.63, "learning_rate": 6.864481842338352e-05, "loss": 1.3263, "step": 48700 }, { "epoch": 8.64, "learning_rate": 6.775907883082374e-05, "loss": 1.3626, "step": 48800 }, { "epoch": 8.66, "learning_rate": 6.687333923826395e-05, "loss": 1.3125, "step": 48900 }, { "epoch": 8.68, "learning_rate": 6.598759964570416e-05, "loss": 1.3079, "step": 49000 }, { "epoch": 8.7, "learning_rate": 6.510186005314437e-05, "loss": 1.3478, "step": 49100 }, { "epoch": 8.72, "learning_rate": 6.421612046058459e-05, "loss": 1.3252, "step": 49200 }, { "epoch": 8.73, "learning_rate": 6.33303808680248e-05, "loss": 1.3619, "step": 49300 }, { "epoch": 8.75, "learning_rate": 6.244464127546502e-05, "loss": 1.3281, "step": 49400 }, { "epoch": 8.77, "learning_rate": 6.155890168290523e-05, "loss": 1.316, "step": 49500 }, { "epoch": 8.79, "learning_rate": 6.067316209034544e-05, "loss": 1.3324, "step": 49600 }, { "epoch": 8.8, "learning_rate": 5.9787422497785654e-05, "loss": 1.3282, "step": 49700 }, { "epoch": 8.82, "learning_rate": 5.890168290522586e-05, "loss": 1.3412, "step": 49800 }, { "epoch": 8.84, "learning_rate": 5.801594331266608e-05, "loss": 1.3228, "step": 49900 }, { "epoch": 8.86, "learning_rate": 5.713020372010629e-05, "loss": 1.3367, "step": 50000 }, { "epoch": 8.88, "learning_rate": 5.62444641275465e-05, "loss": 1.3259, "step": 50100 }, { "epoch": 8.89, "learning_rate": 5.5358724534986716e-05, "loss": 1.3245, "step": 50200 }, { "epoch": 8.91, "learning_rate": 5.4472984942426924e-05, "loss": 1.3405, "step": 50300 }, { "epoch": 8.93, "learning_rate": 5.358724534986714e-05, "loss": 1.3278, "step": 50400 }, { "epoch": 8.95, "learning_rate": 5.2701505757307354e-05, "loss": 1.3012, "step": 50500 }, { "epoch": 8.96, "learning_rate": 5.181576616474756e-05, "loss": 1.2933, "step": 50600 }, { "epoch": 8.98, "learning_rate": 5.093002657218778e-05, "loss": 1.3753, "step": 50700 }, { "epoch": 9.0, "learning_rate": 5.004428697962799e-05, "loss": 1.3211, "step": 50800 }, { "epoch": 9.02, "learning_rate": 4.91585473870682e-05, "loss": 1.3282, "step": 50900 }, { "epoch": 9.03, "learning_rate": 4.827280779450841e-05, "loss": 1.2949, "step": 51000 }, { "epoch": 9.05, "learning_rate": 4.738706820194863e-05, "loss": 1.2804, "step": 51100 }, { "epoch": 9.07, "learning_rate": 4.650132860938884e-05, "loss": 1.2732, "step": 51200 }, { "epoch": 9.09, "learning_rate": 4.561558901682905e-05, "loss": 1.2743, "step": 51300 }, { "epoch": 9.11, "learning_rate": 4.472984942426927e-05, "loss": 1.2978, "step": 51400 }, { "epoch": 9.12, "learning_rate": 4.384410983170948e-05, "loss": 1.3016, "step": 51500 }, { "epoch": 9.14, "learning_rate": 4.2958370239149686e-05, "loss": 1.3, "step": 51600 }, { "epoch": 9.16, "learning_rate": 4.20726306465899e-05, "loss": 1.2877, "step": 51700 }, { "epoch": 9.18, "learning_rate": 4.1186891054030117e-05, "loss": 1.2664, "step": 51800 }, { "epoch": 9.19, "learning_rate": 4.030115146147033e-05, "loss": 1.2769, "step": 51900 }, { "epoch": 9.21, "learning_rate": 3.941541186891054e-05, "loss": 1.2926, "step": 52000 }, { "epoch": 9.23, "learning_rate": 3.8529672276350755e-05, "loss": 1.2731, "step": 52100 }, { "epoch": 9.25, "learning_rate": 3.764393268379097e-05, "loss": 1.2806, "step": 52200 }, { "epoch": 9.26, "learning_rate": 3.675819309123118e-05, "loss": 1.2822, "step": 52300 }, { "epoch": 9.28, "learning_rate": 3.587245349867139e-05, "loss": 1.2866, "step": 52400 }, { "epoch": 9.3, "learning_rate": 3.498671390611161e-05, "loss": 1.29, "step": 52500 }, { "epoch": 9.32, "learning_rate": 3.410097431355182e-05, "loss": 1.2734, "step": 52600 }, { "epoch": 9.34, "learning_rate": 3.3215234720992025e-05, "loss": 1.309, "step": 52700 }, { "epoch": 9.35, "learning_rate": 3.232949512843225e-05, "loss": 1.2796, "step": 52800 }, { "epoch": 9.37, "learning_rate": 3.1443755535872456e-05, "loss": 1.2581, "step": 52900 }, { "epoch": 9.39, "learning_rate": 3.0558015943312664e-05, "loss": 1.2936, "step": 53000 }, { "epoch": 9.41, "learning_rate": 2.967227635075288e-05, "loss": 1.3346, "step": 53100 }, { "epoch": 9.42, "learning_rate": 2.878653675819309e-05, "loss": 1.3168, "step": 53200 }, { "epoch": 9.44, "learning_rate": 2.7900797165633306e-05, "loss": 1.3255, "step": 53300 }, { "epoch": 9.46, "learning_rate": 2.7015057573073517e-05, "loss": 1.2798, "step": 53400 }, { "epoch": 9.48, "learning_rate": 2.612931798051373e-05, "loss": 1.2846, "step": 53500 }, { "epoch": 9.5, "learning_rate": 2.5243578387953944e-05, "loss": 1.2615, "step": 53600 }, { "epoch": 9.51, "learning_rate": 2.4357838795394153e-05, "loss": 1.2692, "step": 53700 }, { "epoch": 9.53, "learning_rate": 2.3472099202834368e-05, "loss": 1.2845, "step": 53800 }, { "epoch": 9.55, "learning_rate": 2.258635961027458e-05, "loss": 1.284, "step": 53900 }, { "epoch": 9.57, "learning_rate": 2.170062001771479e-05, "loss": 1.3194, "step": 54000 } ], "max_steps": 56450, "num_train_epochs": 10, "total_flos": 1.0185474088615219e+18, "trial_name": null, "trial_params": null }