|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.565962534874451, |
|
"global_step": 54000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004991142604074402, |
|
"loss": 2.333, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004982285208148805, |
|
"loss": 2.3899, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004973427812223207, |
|
"loss": 2.4422, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004964570416297609, |
|
"loss": 2.4359, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004955713020372011, |
|
"loss": 2.3725, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004946855624446413, |
|
"loss": 2.4478, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004937998228520815, |
|
"loss": 2.4036, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004929140832595217, |
|
"loss": 2.3984, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004920283436669619, |
|
"loss": 2.3836, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004911426040744021, |
|
"loss": 2.3708, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004902568644818423, |
|
"loss": 2.3009, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004893711248892826, |
|
"loss": 2.367, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004884853852967228, |
|
"loss": 2.3713, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000487599645704163, |
|
"loss": 2.3609, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00048671390611160323, |
|
"loss": 2.3181, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00048582816651904345, |
|
"loss": 2.3249, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004849424269264836, |
|
"loss": 2.3505, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00048405668733392384, |
|
"loss": 2.2962, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00048317094774136406, |
|
"loss": 2.3446, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0004822852081488043, |
|
"loss": 2.308, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00048139946855624445, |
|
"loss": 2.2919, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0004805137289636847, |
|
"loss": 2.308, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0004796279893711249, |
|
"loss": 2.2863, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0004787422497785651, |
|
"loss": 2.2976, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00047785651018600534, |
|
"loss": 2.288, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0004769707705934455, |
|
"loss": 2.2771, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00047608503100088573, |
|
"loss": 2.2608, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00047519929140832595, |
|
"loss": 2.2227, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004743135518157662, |
|
"loss": 2.2823, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004734278122232064, |
|
"loss": 2.1959, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00047254207263064656, |
|
"loss": 2.2388, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004716563330380868, |
|
"loss": 2.252, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.000470770593445527, |
|
"loss": 2.2295, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00046988485385296723, |
|
"loss": 2.2035, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0004689991142604074, |
|
"loss": 2.285, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00046811337466784767, |
|
"loss": 2.2439, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0004672276350752879, |
|
"loss": 2.2378, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0004663418954827281, |
|
"loss": 2.2499, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00046545615589016834, |
|
"loss": 2.1999, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0004645704162976085, |
|
"loss": 2.1862, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00046368467670504873, |
|
"loss": 2.217, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00046279893711248895, |
|
"loss": 2.2083, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00046191319751992917, |
|
"loss": 2.1784, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0004610274579273694, |
|
"loss": 2.1467, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00046014171833480956, |
|
"loss": 2.2254, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0004592559787422498, |
|
"loss": 2.2067, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00045837023914969, |
|
"loss": 2.2011, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0004574844995571302, |
|
"loss": 2.1521, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0004565987599645704, |
|
"loss": 2.193, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0004557130203720106, |
|
"loss": 2.1709, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00045482728077945084, |
|
"loss": 2.1806, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00045394154118689106, |
|
"loss": 2.1895, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0004530558015943313, |
|
"loss": 2.203, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00045217006200177145, |
|
"loss": 2.1686, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00045128432240921167, |
|
"loss": 2.1461, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0004503985828166519, |
|
"loss": 2.1239, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0004495128432240921, |
|
"loss": 2.0857, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00044862710363153234, |
|
"loss": 2.0997, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0004477413640389725, |
|
"loss": 2.0756, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0004468556244464128, |
|
"loss": 2.0468, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.000445969884853853, |
|
"loss": 2.0412, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0004450841452612932, |
|
"loss": 2.0289, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0004441984056687334, |
|
"loss": 2.07, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0004433126660761736, |
|
"loss": 2.039, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00044242692648361384, |
|
"loss": 2.0749, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00044154118689105406, |
|
"loss": 2.0776, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0004406554472984943, |
|
"loss": 2.0375, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00043976970770593445, |
|
"loss": 2.0512, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00043888396811337467, |
|
"loss": 2.0449, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0004379982285208149, |
|
"loss": 2.0157, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0004371124889282551, |
|
"loss": 2.0406, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00043622674933569533, |
|
"loss": 2.039, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0004353410097431355, |
|
"loss": 2.0525, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0004344552701505757, |
|
"loss": 2.0217, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00043356953055801595, |
|
"loss": 2.0398, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00043268379096545617, |
|
"loss": 2.0426, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00043179805137289634, |
|
"loss": 2.0565, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00043091231178033656, |
|
"loss": 1.9831, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0004300265721877768, |
|
"loss": 2.0202, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.000429140832595217, |
|
"loss": 2.0607, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0004282550930026572, |
|
"loss": 2.0394, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0004273693534100974, |
|
"loss": 2.0303, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00042648361381753767, |
|
"loss": 2.0451, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0004255978742249779, |
|
"loss": 2.0185, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0004247121346324181, |
|
"loss": 2.0064, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00042382639503985833, |
|
"loss": 2.0093, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0004229406554472985, |
|
"loss": 1.9924, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0004220549158547387, |
|
"loss": 2.0301, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00042116917626217894, |
|
"loss": 1.9849, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00042028343666961917, |
|
"loss": 2.0016, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00041939769707705933, |
|
"loss": 1.9805, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00041851195748449956, |
|
"loss": 2.0216, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0004176262178919398, |
|
"loss": 1.9921, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00041674047829938, |
|
"loss": 1.9857, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0004158547387068202, |
|
"loss": 2.011, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0004149689991142604, |
|
"loss": 2.0079, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0004140832595217006, |
|
"loss": 2.0178, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00041319751992914083, |
|
"loss": 1.9978, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00041231178033658105, |
|
"loss": 1.9585, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0004114260407440213, |
|
"loss": 1.9802, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00041054030115146144, |
|
"loss": 1.9912, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00040965456155890167, |
|
"loss": 2.0093, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0004087688219663419, |
|
"loss": 1.9695, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0004078830823737821, |
|
"loss": 2.0003, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0004069973427812223, |
|
"loss": 2.0081, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0004061116031886625, |
|
"loss": 1.9888, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0004052258635961028, |
|
"loss": 2.0072, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.000404340124003543, |
|
"loss": 2.0097, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0004034543844109832, |
|
"loss": 2.0066, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0004025686448184234, |
|
"loss": 1.9976, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0004016829052258636, |
|
"loss": 1.9543, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00040079716563330383, |
|
"loss": 1.9676, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00039991142604074405, |
|
"loss": 1.9547, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0003990256864481843, |
|
"loss": 1.8447, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00039813994685562444, |
|
"loss": 1.9108, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00039725420726306466, |
|
"loss": 1.8746, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.0003963684676705049, |
|
"loss": 1.864, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0003954827280779451, |
|
"loss": 1.8742, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0003945969884853853, |
|
"loss": 1.8111, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0003937112488928255, |
|
"loss": 1.8708, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.0003928255093002657, |
|
"loss": 1.8909, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.00039193976970770594, |
|
"loss": 1.8941, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00039105403011514616, |
|
"loss": 1.8875, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00039016829052258633, |
|
"loss": 1.8948, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00038928255093002655, |
|
"loss": 1.8865, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0003883968113374668, |
|
"loss": 1.8871, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.000387511071744907, |
|
"loss": 1.8811, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0003866253321523472, |
|
"loss": 1.8876, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.0003857395925597874, |
|
"loss": 1.8702, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00038485385296722766, |
|
"loss": 1.9055, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0003839681133746679, |
|
"loss": 1.8816, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0003830823737821081, |
|
"loss": 1.8652, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00038219663418954827, |
|
"loss": 1.8891, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0003813108945969885, |
|
"loss": 1.9039, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.0003804251550044287, |
|
"loss": 1.8558, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00037953941541186894, |
|
"loss": 1.8471, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00037865367581930916, |
|
"loss": 1.8532, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00037776793622674933, |
|
"loss": 1.8786, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.00037688219663418955, |
|
"loss": 1.8285, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.00037599645704162977, |
|
"loss": 1.8505, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00037511071744907, |
|
"loss": 1.8604, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.0003742249778565102, |
|
"loss": 1.8684, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.0003733392382639504, |
|
"loss": 1.8241, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0003724534986713906, |
|
"loss": 1.8601, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.0003715677590788308, |
|
"loss": 1.8582, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00037068201948627105, |
|
"loss": 1.8555, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.0003697962798937112, |
|
"loss": 1.8905, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.00036891054030115144, |
|
"loss": 1.8754, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00036802480070859166, |
|
"loss": 1.8835, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0003671390611160319, |
|
"loss": 1.847, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0003662533215234721, |
|
"loss": 1.8621, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00036536758193091227, |
|
"loss": 1.8558, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.0003644818423383525, |
|
"loss": 1.8349, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00036359610274579277, |
|
"loss": 1.8698, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.000362710363153233, |
|
"loss": 1.8925, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.0003618246235606732, |
|
"loss": 1.8347, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.0003609388839681134, |
|
"loss": 1.869, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0003600531443755536, |
|
"loss": 1.82, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.0003591674047829938, |
|
"loss": 1.8407, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.00035828166519043405, |
|
"loss": 1.8458, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.0003573959255978742, |
|
"loss": 1.8568, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00035651018600531444, |
|
"loss": 1.855, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.00035562444641275466, |
|
"loss": 1.841, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0003547387068201949, |
|
"loss": 1.814, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0003538529672276351, |
|
"loss": 1.8102, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.00035296722763507527, |
|
"loss": 1.8517, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.0003520814880425155, |
|
"loss": 1.8326, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.0003511957484499557, |
|
"loss": 1.8514, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.00035031000885739594, |
|
"loss": 1.863, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.00034942426926483616, |
|
"loss": 1.779, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0003485385296722763, |
|
"loss": 1.7412, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.00034765279007971655, |
|
"loss": 1.7293, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00034676705048715677, |
|
"loss": 1.7102, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.000345881310894597, |
|
"loss": 1.7713, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.00034499557130203716, |
|
"loss": 1.7884, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.0003441098317094774, |
|
"loss": 1.7567, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.00034322409211691766, |
|
"loss": 1.7192, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.0003423383525243579, |
|
"loss": 1.7205, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.0003414526129317981, |
|
"loss": 1.7175, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00034056687333923827, |
|
"loss": 1.7627, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0003396811337466785, |
|
"loss": 1.7404, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0003387953941541187, |
|
"loss": 1.7664, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.00033790965456155893, |
|
"loss": 1.7307, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.00033702391496899915, |
|
"loss": 1.7245, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.0003361381753764393, |
|
"loss": 1.7643, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.00033525243578387954, |
|
"loss": 1.7469, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.00033436669619131977, |
|
"loss": 1.7639, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00033348095659876, |
|
"loss": 1.7364, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.00033259521700620016, |
|
"loss": 1.7284, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.0003317094774136404, |
|
"loss": 1.7461, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.0003308237378210806, |
|
"loss": 1.7558, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.0003299379982285208, |
|
"loss": 1.7578, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.00032905225863596104, |
|
"loss": 1.7989, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.0003281665190434012, |
|
"loss": 1.7949, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.00032728077945084143, |
|
"loss": 1.777, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00032639503985828165, |
|
"loss": 1.777, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.0003255093002657219, |
|
"loss": 1.7626, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.0003246235606731621, |
|
"loss": 1.7843, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.00032373782108060227, |
|
"loss": 1.7301, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0003228520814880425, |
|
"loss": 1.7561, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.00032196634189548276, |
|
"loss": 1.7534, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.000321080602302923, |
|
"loss": 1.753, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.00032019486271036315, |
|
"loss": 1.7259, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 0.0003193091231178034, |
|
"loss": 1.789, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.0003184233835252436, |
|
"loss": 1.7771, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.0003175376439326838, |
|
"loss": 1.7308, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00031665190434012404, |
|
"loss": 1.7568, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.0003157661647475642, |
|
"loss": 1.7393, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.00031488042515500443, |
|
"loss": 1.7593, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.00031399468556244465, |
|
"loss": 1.732, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.0003131089459698849, |
|
"loss": 1.7342, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.0003122232063773251, |
|
"loss": 1.7386, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00031133746678476526, |
|
"loss": 1.7394, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.0003104517271922055, |
|
"loss": 1.7439, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0003095659875996457, |
|
"loss": 1.7053, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.00030868024800708593, |
|
"loss": 1.7022, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.0003077945084145261, |
|
"loss": 1.7254, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.0003069087688219663, |
|
"loss": 1.712, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00030602302922940654, |
|
"loss": 1.7565, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.00030513728963684676, |
|
"loss": 1.7495, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.000304251550044287, |
|
"loss": 1.7435, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.00030336581045172715, |
|
"loss": 1.7649, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.0003024800708591674, |
|
"loss": 1.743, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.00030159433126660765, |
|
"loss": 1.7332, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.00030070859167404787, |
|
"loss": 1.6918, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0002998228520814881, |
|
"loss": 1.6936, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.00029893711248892826, |
|
"loss": 1.6353, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.0002980513728963685, |
|
"loss": 1.6311, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.0002971656333038087, |
|
"loss": 1.6385, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.00029627989371124893, |
|
"loss": 1.6188, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.0002953941541186891, |
|
"loss": 1.6043, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.0002945084145261293, |
|
"loss": 1.6233, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.00029362267493356954, |
|
"loss": 1.6249, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00029273693534100976, |
|
"loss": 1.6264, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.00029185119574845, |
|
"loss": 1.6343, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.00029096545615589015, |
|
"loss": 1.6157, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.00029007971656333037, |
|
"loss": 1.6384, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0002891939769707706, |
|
"loss": 1.65, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.0002883082373782108, |
|
"loss": 1.6624, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.00028742249778565104, |
|
"loss": 1.6375, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.0002865367581930912, |
|
"loss": 1.6722, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00028565101860053143, |
|
"loss": 1.6491, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.00028476527900797165, |
|
"loss": 1.6498, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.00028387953941541187, |
|
"loss": 1.6559, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.00028299379982285204, |
|
"loss": 1.6044, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.00028210806023029226, |
|
"loss": 1.6661, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.0002812223206377325, |
|
"loss": 1.6478, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.00028033658104517276, |
|
"loss": 1.6382, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.000279450841452613, |
|
"loss": 1.6339, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.00027856510186005315, |
|
"loss": 1.6223, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.00027767936226749337, |
|
"loss": 1.6472, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.0002767936226749336, |
|
"loss": 1.6333, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.0002759078830823738, |
|
"loss": 1.6384, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.00027502214348981404, |
|
"loss": 1.6005, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.0002741364038972542, |
|
"loss": 1.6455, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.0002732506643046944, |
|
"loss": 1.6201, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.00027236492471213465, |
|
"loss": 1.6439, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.00027147918511957487, |
|
"loss": 1.6261, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.00027059344552701504, |
|
"loss": 1.6236, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.00026970770593445526, |
|
"loss": 1.6156, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.0002688219663418955, |
|
"loss": 1.6597, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.0002679362267493357, |
|
"loss": 1.635, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.0002670504871567759, |
|
"loss": 1.6202, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.0002661647475642161, |
|
"loss": 1.6413, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.0002652790079716563, |
|
"loss": 1.6363, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.00026439326837909654, |
|
"loss": 1.6283, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.00026350752878653676, |
|
"loss": 1.6437, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.000262621789193977, |
|
"loss": 1.6456, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 0.00026173604960141715, |
|
"loss": 1.6076, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.00026085031000885737, |
|
"loss": 1.6429, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.00025996457041629764, |
|
"loss": 1.6635, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.00025907883082373787, |
|
"loss": 1.6269, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.00025819309123117803, |
|
"loss": 1.6282, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.00025730735163861826, |
|
"loss": 1.5984, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.0002564216120460585, |
|
"loss": 1.6434, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.0002555358724534987, |
|
"loss": 1.641, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 0.0002546501328609389, |
|
"loss": 1.6232, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.0002537643932683791, |
|
"loss": 1.5679, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.0002528786536758193, |
|
"loss": 1.6289, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.00025199291408325953, |
|
"loss": 1.6466, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.00025110717449069975, |
|
"loss": 1.6467, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00025022143489814, |
|
"loss": 1.6348, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.0002493356953055802, |
|
"loss": 1.58, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.00024844995571302037, |
|
"loss": 1.5345, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.0002475642161204606, |
|
"loss": 1.5501, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.0002466784765279008, |
|
"loss": 1.5667, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.00024579273693534103, |
|
"loss": 1.5045, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.00024490699734278125, |
|
"loss": 1.5721, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.00024402125775022145, |
|
"loss": 1.5339, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.00024313551815766167, |
|
"loss": 1.5543, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.00024224977856510187, |
|
"loss": 1.5568, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.0002413640389725421, |
|
"loss": 1.5496, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.00024047829937998228, |
|
"loss": 1.5492, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.0002395925597874225, |
|
"loss": 1.571, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.00023870682019486273, |
|
"loss": 1.5495, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.00023782108060230292, |
|
"loss": 1.538, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00023693534100974314, |
|
"loss": 1.5464, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.00023604960141718334, |
|
"loss": 1.5523, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.00023516386182462356, |
|
"loss": 1.526, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00023427812223206375, |
|
"loss": 1.5692, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.000233392382639504, |
|
"loss": 1.548, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 0.00023250664304694422, |
|
"loss": 1.575, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.00023162090345438442, |
|
"loss": 1.5104, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.00023073516386182464, |
|
"loss": 1.558, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.00022984942426926484, |
|
"loss": 1.5481, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.00022896368467670506, |
|
"loss": 1.5538, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.00022807794508414525, |
|
"loss": 1.4957, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.00022719220549158547, |
|
"loss": 1.5547, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.0002263064658990257, |
|
"loss": 1.573, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.0002254207263064659, |
|
"loss": 1.5508, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0002245349867139061, |
|
"loss": 1.5452, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.00022364924712134633, |
|
"loss": 1.5031, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.00022276350752878656, |
|
"loss": 1.5238, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.00022187776793622675, |
|
"loss": 1.5511, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 0.00022099202834366697, |
|
"loss": 1.5784, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.0002201062887511072, |
|
"loss": 1.5487, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.0002192205491585474, |
|
"loss": 1.5496, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.0002183348095659876, |
|
"loss": 1.5414, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.0002174490699734278, |
|
"loss": 1.5677, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.00021656333038086803, |
|
"loss": 1.5408, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 0.00021567759078830822, |
|
"loss": 1.5122, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.00021479185119574845, |
|
"loss": 1.5275, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.00021390611160318867, |
|
"loss": 1.5314, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.0002130203720106289, |
|
"loss": 1.5405, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.0002121346324180691, |
|
"loss": 1.5039, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.0002112488928255093, |
|
"loss": 1.5173, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.00021036315323294953, |
|
"loss": 1.564, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.00020947741364038972, |
|
"loss": 1.5666, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.00020859167404782994, |
|
"loss": 1.5539, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.00020770593445527017, |
|
"loss": 1.5436, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.00020682019486271036, |
|
"loss": 1.5431, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.00020593445527015058, |
|
"loss": 1.5364, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 0.00020504871567759078, |
|
"loss": 1.5186, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.000204162976085031, |
|
"loss": 1.5308, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.0002032772364924712, |
|
"loss": 1.5218, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.00020239149689991144, |
|
"loss": 1.547, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.00020150575730735166, |
|
"loss": 1.554, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.00020062001771479186, |
|
"loss": 1.5563, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.00019973427812223208, |
|
"loss": 1.5311, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.00019884853852967228, |
|
"loss": 1.4537, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.0001979627989371125, |
|
"loss": 1.4493, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.0001970770593445527, |
|
"loss": 1.462, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.00019619131975199291, |
|
"loss": 1.4879, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.00019530558015943314, |
|
"loss": 1.46, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.00019441984056687333, |
|
"loss": 1.4318, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.00019353410097431355, |
|
"loss": 1.448, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.00019264836138175375, |
|
"loss": 1.4615, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.000191762621789194, |
|
"loss": 1.4537, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.0001908768821966342, |
|
"loss": 1.4697, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.00018999114260407441, |
|
"loss": 1.4502, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.00018910540301151464, |
|
"loss": 1.4421, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.00018821966341895483, |
|
"loss": 1.456, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.00018733392382639505, |
|
"loss": 1.4647, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.00018644818423383525, |
|
"loss": 1.4854, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.00018556244464127547, |
|
"loss": 1.4589, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.00018467670504871566, |
|
"loss": 1.4537, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.00018379096545615589, |
|
"loss": 1.4602, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.0001829052258635961, |
|
"loss": 1.487, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00018201948627103633, |
|
"loss": 1.4731, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.00018113374667847655, |
|
"loss": 1.4487, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00018024800708591675, |
|
"loss": 1.4505, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00017936226749335697, |
|
"loss": 1.4525, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.00017847652790079716, |
|
"loss": 1.4717, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.00017759078830823738, |
|
"loss": 1.4803, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.0001767050487156776, |
|
"loss": 1.4443, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.0001758193091231178, |
|
"loss": 1.4659, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.00017493356953055802, |
|
"loss": 1.4918, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00017404782993799822, |
|
"loss": 1.4444, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.00017316209034543844, |
|
"loss": 1.4804, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.00017227635075287863, |
|
"loss": 1.4815, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.00017139061116031888, |
|
"loss": 1.4791, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.0001705048715677591, |
|
"loss": 1.4677, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.0001696191319751993, |
|
"loss": 1.4453, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.00016873339238263952, |
|
"loss": 1.4632, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.00016784765279007972, |
|
"loss": 1.4676, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.00016696191319751994, |
|
"loss": 1.4879, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.00016607617360496013, |
|
"loss": 1.4755, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.00016519043401240036, |
|
"loss": 1.4617, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.00016430469441984058, |
|
"loss": 1.4597, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.00016341895482728077, |
|
"loss": 1.4442, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.000162533215234721, |
|
"loss": 1.4734, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 0.0001616474756421612, |
|
"loss": 1.471, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00016076173604960144, |
|
"loss": 1.4606, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.00015987599645704163, |
|
"loss": 1.4626, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.00015899025686448185, |
|
"loss": 1.4648, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.00015810451727192208, |
|
"loss": 1.4665, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.00015721877767936227, |
|
"loss": 1.4483, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.0001563330380868025, |
|
"loss": 1.4638, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.0001554472984942427, |
|
"loss": 1.4695, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 0.0001545615589016829, |
|
"loss": 1.4487, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 0.0001536758193091231, |
|
"loss": 1.4547, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.00015279007971656333, |
|
"loss": 1.4757, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00015190434012400355, |
|
"loss": 1.4709, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.00015101860053144374, |
|
"loss": 1.438, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.000150132860938884, |
|
"loss": 1.4504, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.00014924712134632419, |
|
"loss": 1.4182, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.0001483613817537644, |
|
"loss": 1.3538, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.0001474756421612046, |
|
"loss": 1.3627, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.00014658990256864482, |
|
"loss": 1.3909, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.00014570416297608505, |
|
"loss": 1.4015, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.00014481842338352524, |
|
"loss": 1.4001, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.00014393268379096546, |
|
"loss": 1.3875, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.00014304694419840566, |
|
"loss": 1.4087, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.00014216120460584588, |
|
"loss": 1.4053, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 0.00014127546501328607, |
|
"loss": 1.3912, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 0.00014038972542072632, |
|
"loss": 1.4045, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.00013950398582816655, |
|
"loss": 1.3781, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.00013861824623560674, |
|
"loss": 1.4017, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.00013773250664304696, |
|
"loss": 1.3905, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.00013684676705048716, |
|
"loss": 1.4088, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.00013596102745792738, |
|
"loss": 1.427, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.00013507528786536757, |
|
"loss": 1.3799, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.0001341895482728078, |
|
"loss": 1.3973, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.00013330380868024802, |
|
"loss": 1.4444, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.0001324180690876882, |
|
"loss": 1.3977, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.00013153232949512843, |
|
"loss": 1.413, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.00013064658990256863, |
|
"loss": 1.3855, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.00012976085031000888, |
|
"loss": 1.3802, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.00012887511071744907, |
|
"loss": 1.3871, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 0.0001279893711248893, |
|
"loss": 1.3736, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.00012710363153232952, |
|
"loss": 1.4049, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.0001262178919397697, |
|
"loss": 1.3547, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.00012533215234720993, |
|
"loss": 1.3899, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.00012444641275465013, |
|
"loss": 1.3879, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.00012356067316209035, |
|
"loss": 1.386, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.00012267493356953057, |
|
"loss": 1.3889, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.00012178919397697078, |
|
"loss": 1.4047, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 0.00012090345438441099, |
|
"loss": 1.3835, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 0.0001200177147918512, |
|
"loss": 1.3969, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.0001191319751992914, |
|
"loss": 1.4, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 0.00011824623560673161, |
|
"loss": 1.3735, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.00011736049601417183, |
|
"loss": 1.3813, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 0.00011647475642161206, |
|
"loss": 1.3986, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.00011558901682905227, |
|
"loss": 1.3971, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 0.00011470327723649247, |
|
"loss": 1.4061, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.00011381753764393268, |
|
"loss": 1.3679, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.00011293179805137289, |
|
"loss": 1.3995, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.0001120460584588131, |
|
"loss": 1.3876, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.00011116031886625333, |
|
"loss": 1.3797, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 0.00011027457927369354, |
|
"loss": 1.3738, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 0.00010938883968113375, |
|
"loss": 1.3694, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00010850310008857396, |
|
"loss": 1.4214, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 0.00010761736049601417, |
|
"loss": 1.4109, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.00010673162090345438, |
|
"loss": 1.3824, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.0001058458813108946, |
|
"loss": 1.399, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.00010496014171833482, |
|
"loss": 1.3917, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.00010407440212577503, |
|
"loss": 1.422, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.00010318866253321524, |
|
"loss": 1.3856, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.00010230292294065544, |
|
"loss": 1.3747, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.00010141718334809567, |
|
"loss": 1.3847, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.00010053144375553587, |
|
"loss": 1.3937, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 9.964570416297608e-05, |
|
"loss": 1.3614, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 9.87599645704163e-05, |
|
"loss": 1.3446, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 9.787422497785651e-05, |
|
"loss": 1.3427, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 9.698848538529672e-05, |
|
"loss": 1.3139, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 9.610274579273694e-05, |
|
"loss": 1.319, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 9.521700620017715e-05, |
|
"loss": 1.3558, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 9.433126660761736e-05, |
|
"loss": 1.3171, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 9.344552701505757e-05, |
|
"loss": 1.3402, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 9.255978742249779e-05, |
|
"loss": 1.3222, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 9.1674047829938e-05, |
|
"loss": 1.3373, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 9.078830823737822e-05, |
|
"loss": 1.3553, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 8.990256864481843e-05, |
|
"loss": 1.3406, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 8.901682905225864e-05, |
|
"loss": 1.3249, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 8.813108945969885e-05, |
|
"loss": 1.3145, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 8.724534986713905e-05, |
|
"loss": 1.3527, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 8.635961027457928e-05, |
|
"loss": 1.3283, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 8.54738706820195e-05, |
|
"loss": 1.3233, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 8.45881310894597e-05, |
|
"loss": 1.3495, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 8.370239149689991e-05, |
|
"loss": 1.3476, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 8.281665190434012e-05, |
|
"loss": 1.3237, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 8.193091231178033e-05, |
|
"loss": 1.3465, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 8.104517271922054e-05, |
|
"loss": 1.3308, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 8.015943312666077e-05, |
|
"loss": 1.3414, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 7.927369353410098e-05, |
|
"loss": 1.3329, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 7.838795394154119e-05, |
|
"loss": 1.346, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 7.75022143489814e-05, |
|
"loss": 1.3453, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 7.661647475642161e-05, |
|
"loss": 1.3197, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 7.573073516386182e-05, |
|
"loss": 1.3247, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 7.484499557130204e-05, |
|
"loss": 1.3389, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 7.395925597874226e-05, |
|
"loss": 1.3421, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 7.307351638618247e-05, |
|
"loss": 1.3203, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 7.218777679362268e-05, |
|
"loss": 1.3218, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 7.130203720106288e-05, |
|
"loss": 1.3637, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 7.041629760850309e-05, |
|
"loss": 1.3239, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 6.953055801594331e-05, |
|
"loss": 1.3559, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 6.864481842338352e-05, |
|
"loss": 1.3263, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 6.775907883082374e-05, |
|
"loss": 1.3626, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 6.687333923826395e-05, |
|
"loss": 1.3125, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 6.598759964570416e-05, |
|
"loss": 1.3079, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 6.510186005314437e-05, |
|
"loss": 1.3478, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 6.421612046058459e-05, |
|
"loss": 1.3252, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 6.33303808680248e-05, |
|
"loss": 1.3619, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 6.244464127546502e-05, |
|
"loss": 1.3281, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 6.155890168290523e-05, |
|
"loss": 1.316, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 6.067316209034544e-05, |
|
"loss": 1.3324, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 5.9787422497785654e-05, |
|
"loss": 1.3282, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 5.890168290522586e-05, |
|
"loss": 1.3412, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 5.801594331266608e-05, |
|
"loss": 1.3228, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 5.713020372010629e-05, |
|
"loss": 1.3367, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 5.62444641275465e-05, |
|
"loss": 1.3259, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 5.5358724534986716e-05, |
|
"loss": 1.3245, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 5.4472984942426924e-05, |
|
"loss": 1.3405, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 5.358724534986714e-05, |
|
"loss": 1.3278, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 5.2701505757307354e-05, |
|
"loss": 1.3012, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 5.181576616474756e-05, |
|
"loss": 1.2933, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 5.093002657218778e-05, |
|
"loss": 1.3753, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.004428697962799e-05, |
|
"loss": 1.3211, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.91585473870682e-05, |
|
"loss": 1.3282, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.827280779450841e-05, |
|
"loss": 1.2949, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 4.738706820194863e-05, |
|
"loss": 1.2804, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 4.650132860938884e-05, |
|
"loss": 1.2732, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.561558901682905e-05, |
|
"loss": 1.2743, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 4.472984942426927e-05, |
|
"loss": 1.2978, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 4.384410983170948e-05, |
|
"loss": 1.3016, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 4.2958370239149686e-05, |
|
"loss": 1.3, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 4.20726306465899e-05, |
|
"loss": 1.2877, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 4.1186891054030117e-05, |
|
"loss": 1.2664, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 4.030115146147033e-05, |
|
"loss": 1.2769, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 3.941541186891054e-05, |
|
"loss": 1.2926, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 3.8529672276350755e-05, |
|
"loss": 1.2731, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 3.764393268379097e-05, |
|
"loss": 1.2806, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 3.675819309123118e-05, |
|
"loss": 1.2822, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 3.587245349867139e-05, |
|
"loss": 1.2866, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 3.498671390611161e-05, |
|
"loss": 1.29, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 3.410097431355182e-05, |
|
"loss": 1.2734, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 3.3215234720992025e-05, |
|
"loss": 1.309, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 3.232949512843225e-05, |
|
"loss": 1.2796, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 3.1443755535872456e-05, |
|
"loss": 1.2581, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 3.0558015943312664e-05, |
|
"loss": 1.2936, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 2.967227635075288e-05, |
|
"loss": 1.3346, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 2.878653675819309e-05, |
|
"loss": 1.3168, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 2.7900797165633306e-05, |
|
"loss": 1.3255, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 2.7015057573073517e-05, |
|
"loss": 1.2798, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 2.612931798051373e-05, |
|
"loss": 1.2846, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 2.5243578387953944e-05, |
|
"loss": 1.2615, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 2.4357838795394153e-05, |
|
"loss": 1.2692, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 2.3472099202834368e-05, |
|
"loss": 1.2845, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 2.258635961027458e-05, |
|
"loss": 1.284, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 2.170062001771479e-05, |
|
"loss": 1.3194, |
|
"step": 54000 |
|
} |
|
], |
|
"max_steps": 56450, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.0185474088615219e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|