|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.999740596627756, |
|
"eval_steps": 500, |
|
"global_step": 38548, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00648508430609598, |
|
"grad_norm": 75.29356384277344, |
|
"learning_rate": 9.997838305231302e-06, |
|
"loss": 3.9392, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01297016861219196, |
|
"grad_norm": 76.29737091064453, |
|
"learning_rate": 9.995676610462604e-06, |
|
"loss": 3.2764, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.019455252918287938, |
|
"grad_norm": 41.812530517578125, |
|
"learning_rate": 9.993514915693906e-06, |
|
"loss": 2.7916, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02594033722438392, |
|
"grad_norm": 73.83484649658203, |
|
"learning_rate": 9.991353220925206e-06, |
|
"loss": 2.5989, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0324254215304799, |
|
"grad_norm": 117.84382629394531, |
|
"learning_rate": 9.989191526156507e-06, |
|
"loss": 2.5196, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.038910505836575876, |
|
"grad_norm": 66.67141723632812, |
|
"learning_rate": 9.987029831387809e-06, |
|
"loss": 2.4034, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04539559014267185, |
|
"grad_norm": 87.57711791992188, |
|
"learning_rate": 9.98486813661911e-06, |
|
"loss": 2.4226, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.05188067444876784, |
|
"grad_norm": 40.56406784057617, |
|
"learning_rate": 9.982706441850412e-06, |
|
"loss": 2.3475, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.058365758754863814, |
|
"grad_norm": 168.55384826660156, |
|
"learning_rate": 9.980544747081713e-06, |
|
"loss": 2.306, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.0648508430609598, |
|
"grad_norm": 72.2697982788086, |
|
"learning_rate": 9.978383052313015e-06, |
|
"loss": 2.3191, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07133592736705577, |
|
"grad_norm": 140.37997436523438, |
|
"learning_rate": 9.976221357544315e-06, |
|
"loss": 2.2995, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.07782101167315175, |
|
"grad_norm": 48.368682861328125, |
|
"learning_rate": 9.974059662775617e-06, |
|
"loss": 2.2649, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08430609597924774, |
|
"grad_norm": 49.51039123535156, |
|
"learning_rate": 9.971897968006918e-06, |
|
"loss": 2.273, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.0907911802853437, |
|
"grad_norm": 95.86265563964844, |
|
"learning_rate": 9.96973627323822e-06, |
|
"loss": 2.2113, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.09727626459143969, |
|
"grad_norm": 59.293392181396484, |
|
"learning_rate": 9.967574578469521e-06, |
|
"loss": 2.2497, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.10376134889753567, |
|
"grad_norm": 96.03455352783203, |
|
"learning_rate": 9.965412883700823e-06, |
|
"loss": 2.1745, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.11024643320363164, |
|
"grad_norm": 58.87235641479492, |
|
"learning_rate": 9.963251188932125e-06, |
|
"loss": 2.2074, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.11673151750972763, |
|
"grad_norm": 60.09347152709961, |
|
"learning_rate": 9.961089494163424e-06, |
|
"loss": 2.2143, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.12321660181582361, |
|
"grad_norm": 33.75955581665039, |
|
"learning_rate": 9.958927799394726e-06, |
|
"loss": 2.1443, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.1297016861219196, |
|
"grad_norm": 67.69246673583984, |
|
"learning_rate": 9.956766104626028e-06, |
|
"loss": 2.1568, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13618677042801555, |
|
"grad_norm": 61.85429763793945, |
|
"learning_rate": 9.95460440985733e-06, |
|
"loss": 2.1036, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.14267185473411154, |
|
"grad_norm": 47.63957595825195, |
|
"learning_rate": 9.95244271508863e-06, |
|
"loss": 2.0988, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.14915693904020752, |
|
"grad_norm": 72.68657684326172, |
|
"learning_rate": 9.950281020319932e-06, |
|
"loss": 2.0674, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.1556420233463035, |
|
"grad_norm": 137.7786865234375, |
|
"learning_rate": 9.948119325551234e-06, |
|
"loss": 2.0437, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1621271076523995, |
|
"grad_norm": 28.063282012939453, |
|
"learning_rate": 9.945957630782534e-06, |
|
"loss": 2.0778, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.16861219195849547, |
|
"grad_norm": 183.83932495117188, |
|
"learning_rate": 9.943795936013836e-06, |
|
"loss": 2.0364, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.17509727626459143, |
|
"grad_norm": 69.06026458740234, |
|
"learning_rate": 9.941634241245137e-06, |
|
"loss": 2.0518, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.1815823605706874, |
|
"grad_norm": 46.897403717041016, |
|
"learning_rate": 9.939472546476439e-06, |
|
"loss": 2.0278, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1880674448767834, |
|
"grad_norm": 58.688289642333984, |
|
"learning_rate": 9.93731085170774e-06, |
|
"loss": 2.0535, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.19455252918287938, |
|
"grad_norm": 45.544673919677734, |
|
"learning_rate": 9.935149156939042e-06, |
|
"loss": 2.0326, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.20103761348897536, |
|
"grad_norm": 52.032257080078125, |
|
"learning_rate": 9.932987462170342e-06, |
|
"loss": 2.0506, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.20752269779507135, |
|
"grad_norm": 85.55571746826172, |
|
"learning_rate": 9.930825767401643e-06, |
|
"loss": 2.0022, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2140077821011673, |
|
"grad_norm": 82.76732635498047, |
|
"learning_rate": 9.928664072632945e-06, |
|
"loss": 2.0245, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.2204928664072633, |
|
"grad_norm": 132.71685791015625, |
|
"learning_rate": 9.926502377864247e-06, |
|
"loss": 2.034, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.22697795071335927, |
|
"grad_norm": 37.891571044921875, |
|
"learning_rate": 9.924340683095548e-06, |
|
"loss": 2.0265, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.23346303501945526, |
|
"grad_norm": 74.43115234375, |
|
"learning_rate": 9.92217898832685e-06, |
|
"loss": 1.9993, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.23994811932555124, |
|
"grad_norm": 43.991119384765625, |
|
"learning_rate": 9.920017293558151e-06, |
|
"loss": 1.95, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.24643320363164722, |
|
"grad_norm": 71.00599670410156, |
|
"learning_rate": 9.917855598789451e-06, |
|
"loss": 1.9378, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.2529182879377432, |
|
"grad_norm": 99.50753784179688, |
|
"learning_rate": 9.915693904020753e-06, |
|
"loss": 1.9923, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.2594033722438392, |
|
"grad_norm": 139.31951904296875, |
|
"learning_rate": 9.913532209252054e-06, |
|
"loss": 1.9471, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.26588845654993515, |
|
"grad_norm": 58.99448776245117, |
|
"learning_rate": 9.911370514483356e-06, |
|
"loss": 1.8731, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.2723735408560311, |
|
"grad_norm": 72.88382720947266, |
|
"learning_rate": 9.909208819714658e-06, |
|
"loss": 1.9395, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.2788586251621271, |
|
"grad_norm": 91.64574432373047, |
|
"learning_rate": 9.90704712494596e-06, |
|
"loss": 1.835, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.2853437094682231, |
|
"grad_norm": 69.67208862304688, |
|
"learning_rate": 9.90488543017726e-06, |
|
"loss": 1.8692, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.2918287937743191, |
|
"grad_norm": 81.73461151123047, |
|
"learning_rate": 9.90272373540856e-06, |
|
"loss": 1.8508, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.29831387808041504, |
|
"grad_norm": 89.83018493652344, |
|
"learning_rate": 9.900562040639862e-06, |
|
"loss": 1.8786, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.30479896238651105, |
|
"grad_norm": 63.75005340576172, |
|
"learning_rate": 9.898400345871164e-06, |
|
"loss": 1.8883, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.311284046692607, |
|
"grad_norm": 52.06178283691406, |
|
"learning_rate": 9.896238651102465e-06, |
|
"loss": 1.8696, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.31776913099870296, |
|
"grad_norm": 164.74493408203125, |
|
"learning_rate": 9.894076956333767e-06, |
|
"loss": 1.8852, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.324254215304799, |
|
"grad_norm": 132.9710235595703, |
|
"learning_rate": 9.891915261565069e-06, |
|
"loss": 1.8436, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.33073929961089493, |
|
"grad_norm": 26.191741943359375, |
|
"learning_rate": 9.88975356679637e-06, |
|
"loss": 1.8739, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.33722438391699094, |
|
"grad_norm": 39.41121292114258, |
|
"learning_rate": 9.88759187202767e-06, |
|
"loss": 1.8541, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.3437094682230869, |
|
"grad_norm": 162.25184631347656, |
|
"learning_rate": 9.885430177258972e-06, |
|
"loss": 1.838, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.35019455252918286, |
|
"grad_norm": 45.588375091552734, |
|
"learning_rate": 9.883268482490273e-06, |
|
"loss": 1.8151, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.35667963683527887, |
|
"grad_norm": 58.42121887207031, |
|
"learning_rate": 9.881106787721575e-06, |
|
"loss": 1.8489, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.3631647211413748, |
|
"grad_norm": 58.45185470581055, |
|
"learning_rate": 9.878945092952877e-06, |
|
"loss": 1.834, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.36964980544747084, |
|
"grad_norm": 43.55636215209961, |
|
"learning_rate": 9.876783398184178e-06, |
|
"loss": 1.8172, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.3761348897535668, |
|
"grad_norm": 111.3084487915039, |
|
"learning_rate": 9.87462170341548e-06, |
|
"loss": 1.7982, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.38261997405966275, |
|
"grad_norm": 48.10725402832031, |
|
"learning_rate": 9.87246000864678e-06, |
|
"loss": 1.815, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.38910505836575876, |
|
"grad_norm": 204.3961639404297, |
|
"learning_rate": 9.870298313878081e-06, |
|
"loss": 1.7829, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3955901426718547, |
|
"grad_norm": 57.22758483886719, |
|
"learning_rate": 9.868136619109383e-06, |
|
"loss": 1.8291, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.40207522697795073, |
|
"grad_norm": 129.09658813476562, |
|
"learning_rate": 9.865974924340684e-06, |
|
"loss": 1.8161, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.4085603112840467, |
|
"grad_norm": 36.433780670166016, |
|
"learning_rate": 9.863813229571986e-06, |
|
"loss": 1.7766, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.4150453955901427, |
|
"grad_norm": 61.64483642578125, |
|
"learning_rate": 9.861651534803288e-06, |
|
"loss": 1.8117, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.42153047989623865, |
|
"grad_norm": 107.76203918457031, |
|
"learning_rate": 9.85948984003459e-06, |
|
"loss": 1.7823, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.4280155642023346, |
|
"grad_norm": 118.72686004638672, |
|
"learning_rate": 9.857328145265889e-06, |
|
"loss": 1.7688, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.4345006485084306, |
|
"grad_norm": 166.0326385498047, |
|
"learning_rate": 9.85516645049719e-06, |
|
"loss": 1.759, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.4409857328145266, |
|
"grad_norm": 81.56283569335938, |
|
"learning_rate": 9.853004755728492e-06, |
|
"loss": 1.7628, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.4474708171206226, |
|
"grad_norm": 80.8810043334961, |
|
"learning_rate": 9.850843060959794e-06, |
|
"loss": 1.7637, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.45395590142671854, |
|
"grad_norm": 61.64249801635742, |
|
"learning_rate": 9.848681366191095e-06, |
|
"loss": 1.779, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.4604409857328145, |
|
"grad_norm": 88.62084197998047, |
|
"learning_rate": 9.846519671422397e-06, |
|
"loss": 1.7477, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.4669260700389105, |
|
"grad_norm": 52.68846893310547, |
|
"learning_rate": 9.844357976653699e-06, |
|
"loss": 1.7579, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.47341115434500647, |
|
"grad_norm": 100.87641143798828, |
|
"learning_rate": 9.842196281884999e-06, |
|
"loss": 1.7269, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.4798962386511025, |
|
"grad_norm": 110.61991882324219, |
|
"learning_rate": 9.8400345871163e-06, |
|
"loss": 1.718, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.48638132295719844, |
|
"grad_norm": 191.92201232910156, |
|
"learning_rate": 9.8378728923476e-06, |
|
"loss": 1.7211, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.49286640726329445, |
|
"grad_norm": 92.11019897460938, |
|
"learning_rate": 9.835711197578902e-06, |
|
"loss": 1.6597, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.4993514915693904, |
|
"grad_norm": 113.60994720458984, |
|
"learning_rate": 9.833549502810203e-06, |
|
"loss": 1.702, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.5058365758754864, |
|
"grad_norm": 36.078678131103516, |
|
"learning_rate": 9.831387808041505e-06, |
|
"loss": 1.6789, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.5123216601815823, |
|
"grad_norm": 36.602169036865234, |
|
"learning_rate": 9.829226113272806e-06, |
|
"loss": 1.6852, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.5188067444876784, |
|
"grad_norm": 35.471885681152344, |
|
"learning_rate": 9.827064418504108e-06, |
|
"loss": 1.7237, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5252918287937743, |
|
"grad_norm": 82.1302261352539, |
|
"learning_rate": 9.82490272373541e-06, |
|
"loss": 1.6826, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.5317769130998703, |
|
"grad_norm": 88.32125854492188, |
|
"learning_rate": 9.82274102896671e-06, |
|
"loss": 1.6928, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.5382619974059663, |
|
"grad_norm": 46.689208984375, |
|
"learning_rate": 9.820579334198011e-06, |
|
"loss": 1.6711, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.5447470817120622, |
|
"grad_norm": 51.88264465332031, |
|
"learning_rate": 9.818417639429313e-06, |
|
"loss": 1.661, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.5512321660181583, |
|
"grad_norm": 26.311506271362305, |
|
"learning_rate": 9.816255944660614e-06, |
|
"loss": 1.647, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.5577172503242542, |
|
"grad_norm": 56.531463623046875, |
|
"learning_rate": 9.814094249891916e-06, |
|
"loss": 1.6632, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.5642023346303502, |
|
"grad_norm": 38.8277702331543, |
|
"learning_rate": 9.811932555123218e-06, |
|
"loss": 1.6396, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.5706874189364461, |
|
"grad_norm": 110.19149017333984, |
|
"learning_rate": 9.809770860354519e-06, |
|
"loss": 1.631, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.5771725032425421, |
|
"grad_norm": 31.21686553955078, |
|
"learning_rate": 9.807609165585819e-06, |
|
"loss": 1.6272, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.5836575875486382, |
|
"grad_norm": 54.84867477416992, |
|
"learning_rate": 9.80544747081712e-06, |
|
"loss": 1.6055, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5901426718547341, |
|
"grad_norm": 73.87213134765625, |
|
"learning_rate": 9.803285776048422e-06, |
|
"loss": 1.5888, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.5966277561608301, |
|
"grad_norm": 33.05928039550781, |
|
"learning_rate": 9.801124081279724e-06, |
|
"loss": 1.6019, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.603112840466926, |
|
"grad_norm": 58.189022064208984, |
|
"learning_rate": 9.798962386511025e-06, |
|
"loss": 1.5611, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.6095979247730221, |
|
"grad_norm": 98.57378387451172, |
|
"learning_rate": 9.796800691742327e-06, |
|
"loss": 1.597, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.6160830090791181, |
|
"grad_norm": 40.484683990478516, |
|
"learning_rate": 9.794638996973629e-06, |
|
"loss": 1.5792, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.622568093385214, |
|
"grad_norm": 137.65402221679688, |
|
"learning_rate": 9.792477302204928e-06, |
|
"loss": 1.5859, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.62905317769131, |
|
"grad_norm": 222.86753845214844, |
|
"learning_rate": 9.79031560743623e-06, |
|
"loss": 1.5545, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.6355382619974059, |
|
"grad_norm": 61.394309997558594, |
|
"learning_rate": 9.788153912667532e-06, |
|
"loss": 1.5275, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.642023346303502, |
|
"grad_norm": 56.79536437988281, |
|
"learning_rate": 9.785992217898833e-06, |
|
"loss": 1.5688, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.648508430609598, |
|
"grad_norm": 35.67102813720703, |
|
"learning_rate": 9.783830523130135e-06, |
|
"loss": 1.5359, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6549935149156939, |
|
"grad_norm": 106.68746948242188, |
|
"learning_rate": 9.781668828361436e-06, |
|
"loss": 1.5443, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.6614785992217899, |
|
"grad_norm": 118.8326644897461, |
|
"learning_rate": 9.779507133592736e-06, |
|
"loss": 1.5223, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.6679636835278858, |
|
"grad_norm": 98.10018920898438, |
|
"learning_rate": 9.777345438824038e-06, |
|
"loss": 1.5243, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.6744487678339819, |
|
"grad_norm": 77.71648406982422, |
|
"learning_rate": 9.77518374405534e-06, |
|
"loss": 1.5383, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.6809338521400778, |
|
"grad_norm": 58.089149475097656, |
|
"learning_rate": 9.773022049286641e-06, |
|
"loss": 1.4545, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.6874189364461738, |
|
"grad_norm": 37.549198150634766, |
|
"learning_rate": 9.770860354517943e-06, |
|
"loss": 1.454, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.6939040207522698, |
|
"grad_norm": 105.52291107177734, |
|
"learning_rate": 9.768698659749244e-06, |
|
"loss": 1.4583, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.7003891050583657, |
|
"grad_norm": 48.97454833984375, |
|
"learning_rate": 9.766536964980546e-06, |
|
"loss": 1.4752, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.7068741893644618, |
|
"grad_norm": 46.371768951416016, |
|
"learning_rate": 9.764375270211846e-06, |
|
"loss": 1.4495, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.7133592736705577, |
|
"grad_norm": 46.270530700683594, |
|
"learning_rate": 9.762213575443147e-06, |
|
"loss": 1.4777, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7198443579766537, |
|
"grad_norm": 74.16635131835938, |
|
"learning_rate": 9.760051880674449e-06, |
|
"loss": 1.4826, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.7263294422827496, |
|
"grad_norm": 128.18382263183594, |
|
"learning_rate": 9.75789018590575e-06, |
|
"loss": 1.4844, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.7328145265888456, |
|
"grad_norm": 69.79833984375, |
|
"learning_rate": 9.755728491137052e-06, |
|
"loss": 1.5204, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.7392996108949417, |
|
"grad_norm": 60.08644104003906, |
|
"learning_rate": 9.753566796368354e-06, |
|
"loss": 1.4153, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.7457846952010376, |
|
"grad_norm": 28.086040496826172, |
|
"learning_rate": 9.751405101599655e-06, |
|
"loss": 1.4365, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.7522697795071336, |
|
"grad_norm": 123.76924133300781, |
|
"learning_rate": 9.749243406830955e-06, |
|
"loss": 1.4517, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.7587548638132295, |
|
"grad_norm": 51.66524124145508, |
|
"learning_rate": 9.747081712062257e-06, |
|
"loss": 1.3986, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.7652399481193255, |
|
"grad_norm": 108.23607635498047, |
|
"learning_rate": 9.744920017293558e-06, |
|
"loss": 1.4244, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.7717250324254216, |
|
"grad_norm": 231.63455200195312, |
|
"learning_rate": 9.74275832252486e-06, |
|
"loss": 1.4064, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.7782101167315175, |
|
"grad_norm": 81.5296630859375, |
|
"learning_rate": 9.740596627756162e-06, |
|
"loss": 1.4159, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7846952010376135, |
|
"grad_norm": 88.66846466064453, |
|
"learning_rate": 9.738434932987463e-06, |
|
"loss": 1.3972, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.7911802853437094, |
|
"grad_norm": 51.40989303588867, |
|
"learning_rate": 9.736273238218765e-06, |
|
"loss": 1.3936, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.7976653696498055, |
|
"grad_norm": 125.95860290527344, |
|
"learning_rate": 9.734111543450065e-06, |
|
"loss": 1.4171, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.8041504539559015, |
|
"grad_norm": 106.81758117675781, |
|
"learning_rate": 9.731949848681366e-06, |
|
"loss": 1.3815, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.8106355382619974, |
|
"grad_norm": 59.69066619873047, |
|
"learning_rate": 9.729788153912668e-06, |
|
"loss": 1.3268, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.8171206225680934, |
|
"grad_norm": 58.66751480102539, |
|
"learning_rate": 9.72762645914397e-06, |
|
"loss": 1.3593, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.8236057068741893, |
|
"grad_norm": 67.43231964111328, |
|
"learning_rate": 9.725464764375271e-06, |
|
"loss": 1.3639, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.8300907911802854, |
|
"grad_norm": 42.44819259643555, |
|
"learning_rate": 9.723303069606573e-06, |
|
"loss": 1.3664, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.8365758754863813, |
|
"grad_norm": 31.140090942382812, |
|
"learning_rate": 9.721141374837874e-06, |
|
"loss": 1.3689, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.8430609597924773, |
|
"grad_norm": 295.80523681640625, |
|
"learning_rate": 9.718979680069174e-06, |
|
"loss": 1.3763, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.8495460440985733, |
|
"grad_norm": 88.26421356201172, |
|
"learning_rate": 9.716817985300476e-06, |
|
"loss": 1.3537, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.8560311284046692, |
|
"grad_norm": 193.05654907226562, |
|
"learning_rate": 9.714656290531777e-06, |
|
"loss": 1.3507, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.8625162127107653, |
|
"grad_norm": 79.03955078125, |
|
"learning_rate": 9.712494595763079e-06, |
|
"loss": 1.3388, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.8690012970168612, |
|
"grad_norm": 50.94293975830078, |
|
"learning_rate": 9.71033290099438e-06, |
|
"loss": 1.3568, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.8754863813229572, |
|
"grad_norm": 39.64507293701172, |
|
"learning_rate": 9.708171206225682e-06, |
|
"loss": 1.3824, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.8819714656290532, |
|
"grad_norm": 72.73592376708984, |
|
"learning_rate": 9.706009511456984e-06, |
|
"loss": 1.3592, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.8884565499351491, |
|
"grad_norm": 25.956851959228516, |
|
"learning_rate": 9.703847816688284e-06, |
|
"loss": 1.3338, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.8949416342412452, |
|
"grad_norm": 96.9691162109375, |
|
"learning_rate": 9.701686121919585e-06, |
|
"loss": 1.3522, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.9014267185473411, |
|
"grad_norm": 24.184741973876953, |
|
"learning_rate": 9.699524427150887e-06, |
|
"loss": 1.3276, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.9079118028534371, |
|
"grad_norm": 38.254638671875, |
|
"learning_rate": 9.697362732382188e-06, |
|
"loss": 1.3185, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.914396887159533, |
|
"grad_norm": 160.06329345703125, |
|
"learning_rate": 9.69520103761349e-06, |
|
"loss": 1.3145, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.920881971465629, |
|
"grad_norm": 194.62234497070312, |
|
"learning_rate": 9.693039342844792e-06, |
|
"loss": 1.3505, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.9273670557717251, |
|
"grad_norm": 63.943546295166016, |
|
"learning_rate": 9.690877648076093e-06, |
|
"loss": 1.3353, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.933852140077821, |
|
"grad_norm": 64.3890151977539, |
|
"learning_rate": 9.688715953307393e-06, |
|
"loss": 1.2994, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.940337224383917, |
|
"grad_norm": 42.98583984375, |
|
"learning_rate": 9.686554258538695e-06, |
|
"loss": 1.3034, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.9468223086900129, |
|
"grad_norm": 112.66468811035156, |
|
"learning_rate": 9.684392563769996e-06, |
|
"loss": 1.2642, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.953307392996109, |
|
"grad_norm": 100.0568618774414, |
|
"learning_rate": 9.682230869001298e-06, |
|
"loss": 1.2919, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.959792477302205, |
|
"grad_norm": 26.995040893554688, |
|
"learning_rate": 9.6800691742326e-06, |
|
"loss": 1.2775, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.9662775616083009, |
|
"grad_norm": 26.033170700073242, |
|
"learning_rate": 9.677907479463901e-06, |
|
"loss": 1.2675, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.9727626459143969, |
|
"grad_norm": 53.325523376464844, |
|
"learning_rate": 9.675745784695201e-06, |
|
"loss": 1.2947, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.9792477302204928, |
|
"grad_norm": 71.00118255615234, |
|
"learning_rate": 9.673584089926503e-06, |
|
"loss": 1.3299, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.9857328145265889, |
|
"grad_norm": 115.75421142578125, |
|
"learning_rate": 9.671422395157804e-06, |
|
"loss": 1.3157, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.9922178988326849, |
|
"grad_norm": 40.338565826416016, |
|
"learning_rate": 9.669260700389106e-06, |
|
"loss": 1.3313, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.9987029831387808, |
|
"grad_norm": 44.46931457519531, |
|
"learning_rate": 9.667099005620407e-06, |
|
"loss": 1.3073, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.0051880674448768, |
|
"grad_norm": 38.489139556884766, |
|
"learning_rate": 9.664937310851709e-06, |
|
"loss": 1.2656, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.0116731517509727, |
|
"grad_norm": 118.02386474609375, |
|
"learning_rate": 9.66277561608301e-06, |
|
"loss": 1.2827, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.0181582360570687, |
|
"grad_norm": 73.46105194091797, |
|
"learning_rate": 9.66061392131431e-06, |
|
"loss": 1.3098, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.0246433203631646, |
|
"grad_norm": 76.57545471191406, |
|
"learning_rate": 9.658452226545612e-06, |
|
"loss": 1.2673, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.0311284046692606, |
|
"grad_norm": 34.5427360534668, |
|
"learning_rate": 9.656290531776914e-06, |
|
"loss": 1.2507, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.0376134889753568, |
|
"grad_norm": 93.37530517578125, |
|
"learning_rate": 9.654128837008215e-06, |
|
"loss": 1.2523, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.0440985732814527, |
|
"grad_norm": 104.25950622558594, |
|
"learning_rate": 9.651967142239517e-06, |
|
"loss": 1.2446, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.0505836575875487, |
|
"grad_norm": 139.35931396484375, |
|
"learning_rate": 9.649805447470818e-06, |
|
"loss": 1.24, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.0570687418936446, |
|
"grad_norm": 160.05386352539062, |
|
"learning_rate": 9.64764375270212e-06, |
|
"loss": 1.2476, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.0635538261997406, |
|
"grad_norm": 28.035104751586914, |
|
"learning_rate": 9.64548205793342e-06, |
|
"loss": 1.265, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.0700389105058365, |
|
"grad_norm": 37.27667236328125, |
|
"learning_rate": 9.643320363164722e-06, |
|
"loss": 1.206, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.0765239948119325, |
|
"grad_norm": 35.973751068115234, |
|
"learning_rate": 9.641158668396023e-06, |
|
"loss": 1.2393, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.0830090791180285, |
|
"grad_norm": 101.7255859375, |
|
"learning_rate": 9.638996973627325e-06, |
|
"loss": 1.2775, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.0894941634241244, |
|
"grad_norm": 54.50769805908203, |
|
"learning_rate": 9.636835278858626e-06, |
|
"loss": 1.2302, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.0959792477302206, |
|
"grad_norm": 68.53856658935547, |
|
"learning_rate": 9.634673584089928e-06, |
|
"loss": 1.2285, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.1024643320363166, |
|
"grad_norm": 106.03568267822266, |
|
"learning_rate": 9.63251188932123e-06, |
|
"loss": 1.2285, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.1089494163424125, |
|
"grad_norm": 110.32369232177734, |
|
"learning_rate": 9.63035019455253e-06, |
|
"loss": 1.2455, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.1154345006485085, |
|
"grad_norm": 71.17969512939453, |
|
"learning_rate": 9.628188499783831e-06, |
|
"loss": 1.2908, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.1219195849546044, |
|
"grad_norm": 100.68138885498047, |
|
"learning_rate": 9.626026805015133e-06, |
|
"loss": 1.2489, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.1284046692607004, |
|
"grad_norm": 31.05600929260254, |
|
"learning_rate": 9.623865110246434e-06, |
|
"loss": 1.2033, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.1348897535667963, |
|
"grad_norm": 56.18037033081055, |
|
"learning_rate": 9.621703415477736e-06, |
|
"loss": 1.2507, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.1413748378728923, |
|
"grad_norm": 66.67138671875, |
|
"learning_rate": 9.619541720709037e-06, |
|
"loss": 1.2256, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.1478599221789882, |
|
"grad_norm": 89.71563720703125, |
|
"learning_rate": 9.617380025940339e-06, |
|
"loss": 1.2045, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.1543450064850842, |
|
"grad_norm": 83.06657409667969, |
|
"learning_rate": 9.615218331171639e-06, |
|
"loss": 1.1957, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.1608300907911804, |
|
"grad_norm": 32.97713851928711, |
|
"learning_rate": 9.61305663640294e-06, |
|
"loss": 1.2178, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.1673151750972763, |
|
"grad_norm": 40.30419158935547, |
|
"learning_rate": 9.610894941634242e-06, |
|
"loss": 1.2114, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.1738002594033723, |
|
"grad_norm": 26.891326904296875, |
|
"learning_rate": 9.608733246865544e-06, |
|
"loss": 1.2049, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.1802853437094682, |
|
"grad_norm": 95.95230102539062, |
|
"learning_rate": 9.606571552096845e-06, |
|
"loss": 1.1984, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.1867704280155642, |
|
"grad_norm": 137.105712890625, |
|
"learning_rate": 9.604409857328147e-06, |
|
"loss": 1.1697, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.1932555123216602, |
|
"grad_norm": 145.55722045898438, |
|
"learning_rate": 9.602248162559448e-06, |
|
"loss": 1.2145, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.1997405966277561, |
|
"grad_norm": 200.1924591064453, |
|
"learning_rate": 9.600086467790748e-06, |
|
"loss": 1.1548, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.206225680933852, |
|
"grad_norm": 119.41325378417969, |
|
"learning_rate": 9.59792477302205e-06, |
|
"loss": 1.1784, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.212710765239948, |
|
"grad_norm": 33.50049591064453, |
|
"learning_rate": 9.595763078253352e-06, |
|
"loss": 1.2024, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.2191958495460442, |
|
"grad_norm": 106.87812805175781, |
|
"learning_rate": 9.593601383484653e-06, |
|
"loss": 1.211, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.2256809338521402, |
|
"grad_norm": 50.5958366394043, |
|
"learning_rate": 9.591439688715955e-06, |
|
"loss": 1.1891, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.2321660181582361, |
|
"grad_norm": 66.71959686279297, |
|
"learning_rate": 9.589277993947256e-06, |
|
"loss": 1.1643, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.238651102464332, |
|
"grad_norm": 74.7675552368164, |
|
"learning_rate": 9.587116299178558e-06, |
|
"loss": 1.1527, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.245136186770428, |
|
"grad_norm": 48.216217041015625, |
|
"learning_rate": 9.584954604409858e-06, |
|
"loss": 1.1978, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.251621271076524, |
|
"grad_norm": 49.2801628112793, |
|
"learning_rate": 9.58279290964116e-06, |
|
"loss": 1.1669, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.25810635538262, |
|
"grad_norm": 31.61471939086914, |
|
"learning_rate": 9.580631214872461e-06, |
|
"loss": 1.1393, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.264591439688716, |
|
"grad_norm": 84.38628387451172, |
|
"learning_rate": 9.578469520103763e-06, |
|
"loss": 1.146, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.2710765239948119, |
|
"grad_norm": 64.64751434326172, |
|
"learning_rate": 9.576307825335064e-06, |
|
"loss": 1.178, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.2775616083009078, |
|
"grad_norm": 38.89653396606445, |
|
"learning_rate": 9.574146130566366e-06, |
|
"loss": 1.174, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 1.2840466926070038, |
|
"grad_norm": 154.65513610839844, |
|
"learning_rate": 9.571984435797666e-06, |
|
"loss": 1.1372, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.2905317769131, |
|
"grad_norm": 44.31837844848633, |
|
"learning_rate": 9.569822741028967e-06, |
|
"loss": 1.1371, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 1.297016861219196, |
|
"grad_norm": 66.79319763183594, |
|
"learning_rate": 9.567661046260269e-06, |
|
"loss": 1.1161, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.3035019455252919, |
|
"grad_norm": 109.10542297363281, |
|
"learning_rate": 9.56549935149157e-06, |
|
"loss": 1.1321, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 1.3099870298313878, |
|
"grad_norm": 52.56429672241211, |
|
"learning_rate": 9.563337656722872e-06, |
|
"loss": 1.1606, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.3164721141374838, |
|
"grad_norm": 106.3527603149414, |
|
"learning_rate": 9.561175961954174e-06, |
|
"loss": 1.154, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 1.3229571984435797, |
|
"grad_norm": 275.15216064453125, |
|
"learning_rate": 9.559014267185475e-06, |
|
"loss": 1.1477, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.3294422827496757, |
|
"grad_norm": 146.71636962890625, |
|
"learning_rate": 9.556852572416775e-06, |
|
"loss": 1.154, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.3359273670557716, |
|
"grad_norm": 91.92938995361328, |
|
"learning_rate": 9.554690877648077e-06, |
|
"loss": 1.1194, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.3424124513618678, |
|
"grad_norm": 34.08713912963867, |
|
"learning_rate": 9.552529182879378e-06, |
|
"loss": 1.1176, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 1.3488975356679638, |
|
"grad_norm": 234.9642791748047, |
|
"learning_rate": 9.55036748811068e-06, |
|
"loss": 1.1294, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.3553826199740597, |
|
"grad_norm": 89.80880737304688, |
|
"learning_rate": 9.548205793341982e-06, |
|
"loss": 1.1138, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 1.3618677042801557, |
|
"grad_norm": 54.69230651855469, |
|
"learning_rate": 9.546044098573283e-06, |
|
"loss": 1.1153, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.3683527885862516, |
|
"grad_norm": 43.032466888427734, |
|
"learning_rate": 9.543882403804585e-06, |
|
"loss": 1.1041, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 1.3748378728923476, |
|
"grad_norm": 110.12335205078125, |
|
"learning_rate": 9.541720709035885e-06, |
|
"loss": 1.1056, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.3813229571984436, |
|
"grad_norm": 109.36446380615234, |
|
"learning_rate": 9.539559014267186e-06, |
|
"loss": 1.1288, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 1.3878080415045395, |
|
"grad_norm": 200.33387756347656, |
|
"learning_rate": 9.537397319498488e-06, |
|
"loss": 1.101, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.3942931258106355, |
|
"grad_norm": 31.898895263671875, |
|
"learning_rate": 9.53523562472979e-06, |
|
"loss": 1.0919, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 1.4007782101167314, |
|
"grad_norm": 41.42584991455078, |
|
"learning_rate": 9.533073929961091e-06, |
|
"loss": 1.0914, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.4072632944228274, |
|
"grad_norm": 55.4241828918457, |
|
"learning_rate": 9.530912235192391e-06, |
|
"loss": 1.0457, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 1.4137483787289233, |
|
"grad_norm": 92.30503845214844, |
|
"learning_rate": 9.528750540423693e-06, |
|
"loss": 1.1102, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.4202334630350195, |
|
"grad_norm": 183.83782958984375, |
|
"learning_rate": 9.526588845654994e-06, |
|
"loss": 1.0788, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 1.4267185473411155, |
|
"grad_norm": 58.78367614746094, |
|
"learning_rate": 9.524427150886296e-06, |
|
"loss": 1.0883, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.4332036316472114, |
|
"grad_norm": 122.31067657470703, |
|
"learning_rate": 9.522265456117596e-06, |
|
"loss": 1.0949, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 1.4396887159533074, |
|
"grad_norm": 92.71478271484375, |
|
"learning_rate": 9.520103761348897e-06, |
|
"loss": 1.1128, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.4461738002594033, |
|
"grad_norm": 63.70943832397461, |
|
"learning_rate": 9.517942066580199e-06, |
|
"loss": 1.1321, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 1.4526588845654993, |
|
"grad_norm": 55.85942840576172, |
|
"learning_rate": 9.5157803718115e-06, |
|
"loss": 1.1075, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.4591439688715953, |
|
"grad_norm": 141.53460693359375, |
|
"learning_rate": 9.513618677042802e-06, |
|
"loss": 1.0777, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 1.4656290531776914, |
|
"grad_norm": 52.136348724365234, |
|
"learning_rate": 9.511456982274104e-06, |
|
"loss": 1.094, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.4721141374837874, |
|
"grad_norm": 66.62019348144531, |
|
"learning_rate": 9.509295287505405e-06, |
|
"loss": 1.1122, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 1.4785992217898833, |
|
"grad_norm": 37.23124313354492, |
|
"learning_rate": 9.507133592736705e-06, |
|
"loss": 1.1029, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.4850843060959793, |
|
"grad_norm": 141.20053100585938, |
|
"learning_rate": 9.504971897968007e-06, |
|
"loss": 1.0891, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 1.4915693904020753, |
|
"grad_norm": 70.93553924560547, |
|
"learning_rate": 9.502810203199308e-06, |
|
"loss": 1.0799, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.4980544747081712, |
|
"grad_norm": 78.5645980834961, |
|
"learning_rate": 9.50064850843061e-06, |
|
"loss": 1.0771, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 1.5045395590142672, |
|
"grad_norm": 110.03238677978516, |
|
"learning_rate": 9.498486813661911e-06, |
|
"loss": 1.052, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.5110246433203631, |
|
"grad_norm": 94.38980865478516, |
|
"learning_rate": 9.496325118893213e-06, |
|
"loss": 1.0831, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 1.517509727626459, |
|
"grad_norm": 72.01763153076172, |
|
"learning_rate": 9.494163424124515e-06, |
|
"loss": 1.0674, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.523994811932555, |
|
"grad_norm": 51.9689826965332, |
|
"learning_rate": 9.492001729355815e-06, |
|
"loss": 1.0831, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 1.530479896238651, |
|
"grad_norm": 107.07817840576172, |
|
"learning_rate": 9.489840034587116e-06, |
|
"loss": 1.0426, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.536964980544747, |
|
"grad_norm": 47.38414764404297, |
|
"learning_rate": 9.487678339818418e-06, |
|
"loss": 1.0711, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 1.543450064850843, |
|
"grad_norm": 45.52274703979492, |
|
"learning_rate": 9.48551664504972e-06, |
|
"loss": 1.0781, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.549935149156939, |
|
"grad_norm": 26.50186538696289, |
|
"learning_rate": 9.483354950281021e-06, |
|
"loss": 1.0574, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 1.556420233463035, |
|
"grad_norm": 41.2259521484375, |
|
"learning_rate": 9.481193255512323e-06, |
|
"loss": 1.0754, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.562905317769131, |
|
"grad_norm": 35.60273361206055, |
|
"learning_rate": 9.479031560743624e-06, |
|
"loss": 1.0288, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 1.569390402075227, |
|
"grad_norm": 41.92966842651367, |
|
"learning_rate": 9.476869865974924e-06, |
|
"loss": 1.0488, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.575875486381323, |
|
"grad_norm": 18.675764083862305, |
|
"learning_rate": 9.474708171206226e-06, |
|
"loss": 1.0531, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 1.582360570687419, |
|
"grad_norm": 108.12574005126953, |
|
"learning_rate": 9.472546476437527e-06, |
|
"loss": 1.0376, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.588845654993515, |
|
"grad_norm": 112.02227020263672, |
|
"learning_rate": 9.470384781668829e-06, |
|
"loss": 1.0343, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 1.595330739299611, |
|
"grad_norm": 98.89630126953125, |
|
"learning_rate": 9.46822308690013e-06, |
|
"loss": 1.0823, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.601815823605707, |
|
"grad_norm": 118.65319061279297, |
|
"learning_rate": 9.466061392131432e-06, |
|
"loss": 1.0556, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 1.608300907911803, |
|
"grad_norm": 129.68385314941406, |
|
"learning_rate": 9.463899697362734e-06, |
|
"loss": 1.0468, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.6147859922178989, |
|
"grad_norm": 76.83685302734375, |
|
"learning_rate": 9.461738002594033e-06, |
|
"loss": 1.0861, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 1.6212710765239948, |
|
"grad_norm": 157.33180236816406, |
|
"learning_rate": 9.459576307825335e-06, |
|
"loss": 1.0452, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.6277561608300908, |
|
"grad_norm": 68.68656158447266, |
|
"learning_rate": 9.457414613056637e-06, |
|
"loss": 1.0309, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 1.6342412451361867, |
|
"grad_norm": 168.48919677734375, |
|
"learning_rate": 9.455252918287938e-06, |
|
"loss": 1.0167, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.6407263294422827, |
|
"grad_norm": 57.15532684326172, |
|
"learning_rate": 9.45309122351924e-06, |
|
"loss": 0.9928, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 1.6472114137483787, |
|
"grad_norm": 108.53094482421875, |
|
"learning_rate": 9.450929528750541e-06, |
|
"loss": 0.9977, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.6536964980544746, |
|
"grad_norm": 51.94723129272461, |
|
"learning_rate": 9.448767833981843e-06, |
|
"loss": 1.0463, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 1.6601815823605706, |
|
"grad_norm": 190.38787841796875, |
|
"learning_rate": 9.446606139213143e-06, |
|
"loss": 1.0505, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 53.69584274291992, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 1.0064, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 1.6731517509727627, |
|
"grad_norm": 116.31331634521484, |
|
"learning_rate": 9.442282749675746e-06, |
|
"loss": 1.0066, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.6796368352788587, |
|
"grad_norm": 102.2055892944336, |
|
"learning_rate": 9.440121054907048e-06, |
|
"loss": 0.9889, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 1.6861219195849546, |
|
"grad_norm": 78.99929809570312, |
|
"learning_rate": 9.43795936013835e-06, |
|
"loss": 1.0151, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.6926070038910506, |
|
"grad_norm": 97.79879760742188, |
|
"learning_rate": 9.435797665369651e-06, |
|
"loss": 0.9862, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 1.6990920881971465, |
|
"grad_norm": 30.27912139892578, |
|
"learning_rate": 9.433635970600953e-06, |
|
"loss": 1.0031, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.7055771725032427, |
|
"grad_norm": 47.64608383178711, |
|
"learning_rate": 9.431474275832252e-06, |
|
"loss": 0.9915, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 1.7120622568093387, |
|
"grad_norm": 73.29598236083984, |
|
"learning_rate": 9.429312581063554e-06, |
|
"loss": 0.9978, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.7185473411154346, |
|
"grad_norm": 54.02968215942383, |
|
"learning_rate": 9.427150886294856e-06, |
|
"loss": 0.9901, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 1.7250324254215306, |
|
"grad_norm": 100.88853454589844, |
|
"learning_rate": 9.424989191526157e-06, |
|
"loss": 1.0254, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.7315175097276265, |
|
"grad_norm": 105.97042083740234, |
|
"learning_rate": 9.422827496757459e-06, |
|
"loss": 1.0053, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 1.7380025940337225, |
|
"grad_norm": 110.1412582397461, |
|
"learning_rate": 9.42066580198876e-06, |
|
"loss": 0.996, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.7444876783398184, |
|
"grad_norm": 102.8427505493164, |
|
"learning_rate": 9.41850410722006e-06, |
|
"loss": 1.0155, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 1.7509727626459144, |
|
"grad_norm": 87.20895385742188, |
|
"learning_rate": 9.416342412451362e-06, |
|
"loss": 0.9852, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.7574578469520103, |
|
"grad_norm": 77.43791198730469, |
|
"learning_rate": 9.414180717682663e-06, |
|
"loss": 0.9895, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 1.7639429312581063, |
|
"grad_norm": 172.30885314941406, |
|
"learning_rate": 9.412019022913965e-06, |
|
"loss": 0.9743, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.7704280155642023, |
|
"grad_norm": 70.97063446044922, |
|
"learning_rate": 9.409857328145267e-06, |
|
"loss": 0.9753, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 1.7769130998702982, |
|
"grad_norm": 123.29631805419922, |
|
"learning_rate": 9.407695633376568e-06, |
|
"loss": 0.9374, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.7833981841763942, |
|
"grad_norm": 169.43450927734375, |
|
"learning_rate": 9.40553393860787e-06, |
|
"loss": 0.9733, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.7898832684824901, |
|
"grad_norm": 94.36160278320312, |
|
"learning_rate": 9.40337224383917e-06, |
|
"loss": 0.9741, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.796368352788586, |
|
"grad_norm": 67.74256896972656, |
|
"learning_rate": 9.401210549070471e-06, |
|
"loss": 0.9556, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 1.8028534370946823, |
|
"grad_norm": 171.77330017089844, |
|
"learning_rate": 9.399048854301773e-06, |
|
"loss": 0.9841, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.8093385214007782, |
|
"grad_norm": 110.4674301147461, |
|
"learning_rate": 9.396887159533075e-06, |
|
"loss": 1.0008, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 1.8158236057068742, |
|
"grad_norm": 171.76177978515625, |
|
"learning_rate": 9.394725464764376e-06, |
|
"loss": 0.9835, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.8223086900129701, |
|
"grad_norm": 130.97406005859375, |
|
"learning_rate": 9.392563769995678e-06, |
|
"loss": 0.9498, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 1.8287937743190663, |
|
"grad_norm": 74.91665649414062, |
|
"learning_rate": 9.39040207522698e-06, |
|
"loss": 0.9621, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.8352788586251623, |
|
"grad_norm": 48.18241500854492, |
|
"learning_rate": 9.38824038045828e-06, |
|
"loss": 0.9802, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 1.8417639429312582, |
|
"grad_norm": 133.60479736328125, |
|
"learning_rate": 9.38607868568958e-06, |
|
"loss": 0.9622, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.8482490272373542, |
|
"grad_norm": 163.2623291015625, |
|
"learning_rate": 9.383916990920882e-06, |
|
"loss": 0.959, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 1.8547341115434501, |
|
"grad_norm": 56.28314208984375, |
|
"learning_rate": 9.381755296152184e-06, |
|
"loss": 0.9607, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.861219195849546, |
|
"grad_norm": 115.68190002441406, |
|
"learning_rate": 9.379593601383486e-06, |
|
"loss": 0.9643, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 1.867704280155642, |
|
"grad_norm": 57.527828216552734, |
|
"learning_rate": 9.377431906614787e-06, |
|
"loss": 0.9458, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.874189364461738, |
|
"grad_norm": 83.91288757324219, |
|
"learning_rate": 9.375270211846089e-06, |
|
"loss": 0.937, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 1.880674448767834, |
|
"grad_norm": 205.3312530517578, |
|
"learning_rate": 9.373108517077389e-06, |
|
"loss": 0.9382, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.88715953307393, |
|
"grad_norm": 84.1654281616211, |
|
"learning_rate": 9.37094682230869e-06, |
|
"loss": 0.9737, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 1.8936446173800259, |
|
"grad_norm": 59.71659469604492, |
|
"learning_rate": 9.368785127539992e-06, |
|
"loss": 0.9766, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.9001297016861218, |
|
"grad_norm": 62.78482437133789, |
|
"learning_rate": 9.366623432771293e-06, |
|
"loss": 0.9746, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 1.9066147859922178, |
|
"grad_norm": 41.973777770996094, |
|
"learning_rate": 9.364461738002595e-06, |
|
"loss": 0.965, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.9130998702983137, |
|
"grad_norm": 26.649688720703125, |
|
"learning_rate": 9.362300043233897e-06, |
|
"loss": 0.949, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 1.9195849546044097, |
|
"grad_norm": 43.40812683105469, |
|
"learning_rate": 9.360138348465198e-06, |
|
"loss": 0.9204, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.9260700389105059, |
|
"grad_norm": 72.37606811523438, |
|
"learning_rate": 9.357976653696498e-06, |
|
"loss": 0.9599, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 1.9325551232166018, |
|
"grad_norm": 24.634532928466797, |
|
"learning_rate": 9.3558149589278e-06, |
|
"loss": 0.9309, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.9390402075226978, |
|
"grad_norm": 74.19110870361328, |
|
"learning_rate": 9.353653264159101e-06, |
|
"loss": 0.9173, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 1.9455252918287937, |
|
"grad_norm": 76.68376922607422, |
|
"learning_rate": 9.351491569390403e-06, |
|
"loss": 0.9305, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.9520103761348897, |
|
"grad_norm": 80.31610107421875, |
|
"learning_rate": 9.349329874621705e-06, |
|
"loss": 0.9095, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 1.9584954604409859, |
|
"grad_norm": 59.694969177246094, |
|
"learning_rate": 9.347168179853006e-06, |
|
"loss": 0.9157, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.9649805447470818, |
|
"grad_norm": 47.985164642333984, |
|
"learning_rate": 9.345006485084308e-06, |
|
"loss": 0.9553, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 1.9714656290531778, |
|
"grad_norm": 119.7039566040039, |
|
"learning_rate": 9.342844790315608e-06, |
|
"loss": 0.9479, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.9779507133592737, |
|
"grad_norm": 84.06747436523438, |
|
"learning_rate": 9.34068309554691e-06, |
|
"loss": 0.9539, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 1.9844357976653697, |
|
"grad_norm": 182.93211364746094, |
|
"learning_rate": 9.33852140077821e-06, |
|
"loss": 0.9279, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.9909208819714657, |
|
"grad_norm": 33.33463668823242, |
|
"learning_rate": 9.336359706009512e-06, |
|
"loss": 0.9237, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 1.9974059662775616, |
|
"grad_norm": 98.06361389160156, |
|
"learning_rate": 9.334198011240814e-06, |
|
"loss": 0.9449, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.0038910505836576, |
|
"grad_norm": 40.220664978027344, |
|
"learning_rate": 9.332036316472116e-06, |
|
"loss": 0.9192, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 2.0103761348897535, |
|
"grad_norm": 67.13005828857422, |
|
"learning_rate": 9.329874621703417e-06, |
|
"loss": 0.9487, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.0168612191958495, |
|
"grad_norm": 163.42137145996094, |
|
"learning_rate": 9.327712926934717e-06, |
|
"loss": 0.9684, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 2.0233463035019454, |
|
"grad_norm": 82.5510025024414, |
|
"learning_rate": 9.325551232166019e-06, |
|
"loss": 0.9344, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.0298313878080414, |
|
"grad_norm": 203.52099609375, |
|
"learning_rate": 9.32338953739732e-06, |
|
"loss": 0.8956, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 2.0363164721141374, |
|
"grad_norm": 72.38980865478516, |
|
"learning_rate": 9.321227842628622e-06, |
|
"loss": 0.9391, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 2.0428015564202333, |
|
"grad_norm": 50.11948013305664, |
|
"learning_rate": 9.319066147859923e-06, |
|
"loss": 0.9348, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 2.0492866407263293, |
|
"grad_norm": 122.09666442871094, |
|
"learning_rate": 9.316904453091225e-06, |
|
"loss": 0.8871, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.0557717250324252, |
|
"grad_norm": 59.022274017333984, |
|
"learning_rate": 9.314742758322527e-06, |
|
"loss": 0.9101, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 2.062256809338521, |
|
"grad_norm": 76.15840148925781, |
|
"learning_rate": 9.312581063553827e-06, |
|
"loss": 0.9273, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.0687418936446176, |
|
"grad_norm": 47.89101791381836, |
|
"learning_rate": 9.310419368785128e-06, |
|
"loss": 0.9355, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 2.0752269779507135, |
|
"grad_norm": 229.04345703125, |
|
"learning_rate": 9.30825767401643e-06, |
|
"loss": 0.9091, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.0817120622568095, |
|
"grad_norm": 133.06822204589844, |
|
"learning_rate": 9.306095979247731e-06, |
|
"loss": 0.9448, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 2.0881971465629054, |
|
"grad_norm": 58.23340606689453, |
|
"learning_rate": 9.303934284479033e-06, |
|
"loss": 0.9028, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 2.0946822308690014, |
|
"grad_norm": 232.0340118408203, |
|
"learning_rate": 9.301772589710335e-06, |
|
"loss": 0.9424, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 2.1011673151750974, |
|
"grad_norm": 22.04237937927246, |
|
"learning_rate": 9.299610894941634e-06, |
|
"loss": 0.9227, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.1076523994811933, |
|
"grad_norm": 116.25421142578125, |
|
"learning_rate": 9.297449200172936e-06, |
|
"loss": 0.8914, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 2.1141374837872893, |
|
"grad_norm": 69.3602066040039, |
|
"learning_rate": 9.295287505404238e-06, |
|
"loss": 0.8985, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 2.1206225680933852, |
|
"grad_norm": 145.1238555908203, |
|
"learning_rate": 9.29312581063554e-06, |
|
"loss": 0.8987, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 2.127107652399481, |
|
"grad_norm": 24.35103988647461, |
|
"learning_rate": 9.29096411586684e-06, |
|
"loss": 0.9213, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.133592736705577, |
|
"grad_norm": 37.310787200927734, |
|
"learning_rate": 9.288802421098142e-06, |
|
"loss": 0.8847, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 2.140077821011673, |
|
"grad_norm": 132.53892517089844, |
|
"learning_rate": 9.286640726329444e-06, |
|
"loss": 0.9068, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.146562905317769, |
|
"grad_norm": 75.88333892822266, |
|
"learning_rate": 9.284479031560744e-06, |
|
"loss": 0.894, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 2.153047989623865, |
|
"grad_norm": 251.23751831054688, |
|
"learning_rate": 9.282317336792046e-06, |
|
"loss": 0.9422, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.159533073929961, |
|
"grad_norm": 32.46202850341797, |
|
"learning_rate": 9.280155642023347e-06, |
|
"loss": 0.9291, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 2.166018158236057, |
|
"grad_norm": 53.387718200683594, |
|
"learning_rate": 9.277993947254649e-06, |
|
"loss": 0.8967, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 2.172503242542153, |
|
"grad_norm": 209.8604278564453, |
|
"learning_rate": 9.27583225248595e-06, |
|
"loss": 0.9111, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 2.178988326848249, |
|
"grad_norm": 96.47901153564453, |
|
"learning_rate": 9.273670557717252e-06, |
|
"loss": 0.9166, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.1854734111543452, |
|
"grad_norm": 52.16880798339844, |
|
"learning_rate": 9.271508862948553e-06, |
|
"loss": 0.8909, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 2.191958495460441, |
|
"grad_norm": 170.49676513671875, |
|
"learning_rate": 9.269347168179853e-06, |
|
"loss": 0.898, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 2.198443579766537, |
|
"grad_norm": 55.0761604309082, |
|
"learning_rate": 9.267185473411155e-06, |
|
"loss": 0.9078, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 2.204928664072633, |
|
"grad_norm": 124.61663055419922, |
|
"learning_rate": 9.265023778642457e-06, |
|
"loss": 0.928, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.211413748378729, |
|
"grad_norm": 49.64213562011719, |
|
"learning_rate": 9.262862083873758e-06, |
|
"loss": 0.9018, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 2.217898832684825, |
|
"grad_norm": 143.7904052734375, |
|
"learning_rate": 9.26070038910506e-06, |
|
"loss": 0.8655, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 2.224383916990921, |
|
"grad_norm": 139.10025024414062, |
|
"learning_rate": 9.258538694336361e-06, |
|
"loss": 0.9088, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 2.230869001297017, |
|
"grad_norm": 18.64621925354004, |
|
"learning_rate": 9.256376999567663e-06, |
|
"loss": 0.8923, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.237354085603113, |
|
"grad_norm": 154.90325927734375, |
|
"learning_rate": 9.254215304798963e-06, |
|
"loss": 0.912, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 2.243839169909209, |
|
"grad_norm": 87.64720916748047, |
|
"learning_rate": 9.252053610030264e-06, |
|
"loss": 0.8789, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 2.250324254215305, |
|
"grad_norm": 56.62800216674805, |
|
"learning_rate": 9.249891915261566e-06, |
|
"loss": 0.8899, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 2.2568093385214008, |
|
"grad_norm": 37.476234436035156, |
|
"learning_rate": 9.247730220492868e-06, |
|
"loss": 0.8846, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.2632944228274967, |
|
"grad_norm": 60.178428649902344, |
|
"learning_rate": 9.24556852572417e-06, |
|
"loss": 0.9088, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 2.2697795071335927, |
|
"grad_norm": 113.12017059326172, |
|
"learning_rate": 9.24340683095547e-06, |
|
"loss": 0.8523, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.2762645914396886, |
|
"grad_norm": 70.21991729736328, |
|
"learning_rate": 9.241245136186772e-06, |
|
"loss": 0.8874, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 2.2827496757457846, |
|
"grad_norm": 20.540199279785156, |
|
"learning_rate": 9.239083441418072e-06, |
|
"loss": 0.8262, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.2892347600518805, |
|
"grad_norm": 32.57448959350586, |
|
"learning_rate": 9.236921746649374e-06, |
|
"loss": 0.8445, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 2.2957198443579765, |
|
"grad_norm": 232.79153442382812, |
|
"learning_rate": 9.234760051880676e-06, |
|
"loss": 0.8666, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.3022049286640724, |
|
"grad_norm": 52.618385314941406, |
|
"learning_rate": 9.232598357111977e-06, |
|
"loss": 0.8744, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 2.3086900129701684, |
|
"grad_norm": 47.01662826538086, |
|
"learning_rate": 9.230436662343279e-06, |
|
"loss": 0.8673, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.3151750972762644, |
|
"grad_norm": 52.647891998291016, |
|
"learning_rate": 9.22827496757458e-06, |
|
"loss": 0.8946, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 2.3216601815823608, |
|
"grad_norm": 66.30323791503906, |
|
"learning_rate": 9.226113272805882e-06, |
|
"loss": 0.9222, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 2.3281452658884567, |
|
"grad_norm": 78.40735626220703, |
|
"learning_rate": 9.223951578037182e-06, |
|
"loss": 0.8958, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 2.3346303501945527, |
|
"grad_norm": 156.3478240966797, |
|
"learning_rate": 9.221789883268483e-06, |
|
"loss": 0.8631, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.3411154345006486, |
|
"grad_norm": 46.133201599121094, |
|
"learning_rate": 9.219628188499785e-06, |
|
"loss": 0.8669, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 2.3476005188067446, |
|
"grad_norm": 117.3602523803711, |
|
"learning_rate": 9.217466493731085e-06, |
|
"loss": 0.8592, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 2.3540856031128405, |
|
"grad_norm": 99.78243255615234, |
|
"learning_rate": 9.215304798962386e-06, |
|
"loss": 0.8563, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 2.3605706874189365, |
|
"grad_norm": 47.9234504699707, |
|
"learning_rate": 9.213143104193688e-06, |
|
"loss": 0.8678, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.3670557717250325, |
|
"grad_norm": 83.74739837646484, |
|
"learning_rate": 9.21098140942499e-06, |
|
"loss": 0.8752, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 2.3735408560311284, |
|
"grad_norm": 36.51896667480469, |
|
"learning_rate": 9.208819714656291e-06, |
|
"loss": 0.9021, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 2.3800259403372244, |
|
"grad_norm": 31.101106643676758, |
|
"learning_rate": 9.206658019887593e-06, |
|
"loss": 0.8338, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 2.3865110246433203, |
|
"grad_norm": 103.6131591796875, |
|
"learning_rate": 9.204496325118893e-06, |
|
"loss": 0.9075, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.3929961089494163, |
|
"grad_norm": 36.490447998046875, |
|
"learning_rate": 9.202334630350194e-06, |
|
"loss": 0.8567, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 2.3994811932555122, |
|
"grad_norm": 55.931556701660156, |
|
"learning_rate": 9.200172935581496e-06, |
|
"loss": 0.8783, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.405966277561608, |
|
"grad_norm": 78.6571044921875, |
|
"learning_rate": 9.198011240812798e-06, |
|
"loss": 0.8902, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 2.412451361867704, |
|
"grad_norm": 106.48160552978516, |
|
"learning_rate": 9.195849546044099e-06, |
|
"loss": 0.8735, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.4189364461738, |
|
"grad_norm": 160.64849853515625, |
|
"learning_rate": 9.1936878512754e-06, |
|
"loss": 0.8662, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 2.425421530479896, |
|
"grad_norm": 97.8504867553711, |
|
"learning_rate": 9.191526156506702e-06, |
|
"loss": 0.8682, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 2.4319066147859925, |
|
"grad_norm": 70.43258666992188, |
|
"learning_rate": 9.189364461738002e-06, |
|
"loss": 0.8945, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 2.4383916990920884, |
|
"grad_norm": 112.30128479003906, |
|
"learning_rate": 9.187202766969304e-06, |
|
"loss": 0.8751, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.4448767833981844, |
|
"grad_norm": 112.90283203125, |
|
"learning_rate": 9.185041072200605e-06, |
|
"loss": 0.8573, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 2.4513618677042803, |
|
"grad_norm": 36.05859375, |
|
"learning_rate": 9.182879377431907e-06, |
|
"loss": 0.8304, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 2.4578469520103763, |
|
"grad_norm": 72.84355163574219, |
|
"learning_rate": 9.180717682663209e-06, |
|
"loss": 0.8208, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 2.4643320363164722, |
|
"grad_norm": 125.35198974609375, |
|
"learning_rate": 9.17855598789451e-06, |
|
"loss": 0.8643, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.470817120622568, |
|
"grad_norm": 93.8465805053711, |
|
"learning_rate": 9.176394293125812e-06, |
|
"loss": 0.8591, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 2.477302204928664, |
|
"grad_norm": 114.83902740478516, |
|
"learning_rate": 9.174232598357112e-06, |
|
"loss": 0.836, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 2.48378728923476, |
|
"grad_norm": 61.47188949584961, |
|
"learning_rate": 9.172070903588413e-06, |
|
"loss": 0.8594, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 2.490272373540856, |
|
"grad_norm": 81.23229217529297, |
|
"learning_rate": 9.169909208819715e-06, |
|
"loss": 0.8223, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.496757457846952, |
|
"grad_norm": 143.3751678466797, |
|
"learning_rate": 9.167747514051016e-06, |
|
"loss": 0.8492, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 2.503242542153048, |
|
"grad_norm": 75.92655181884766, |
|
"learning_rate": 9.165585819282318e-06, |
|
"loss": 0.834, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 2.509727626459144, |
|
"grad_norm": 67.34745788574219, |
|
"learning_rate": 9.16342412451362e-06, |
|
"loss": 0.8322, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 2.51621271076524, |
|
"grad_norm": 125.6097640991211, |
|
"learning_rate": 9.161262429744921e-06, |
|
"loss": 0.8121, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.522697795071336, |
|
"grad_norm": 104.31269836425781, |
|
"learning_rate": 9.159100734976221e-06, |
|
"loss": 0.8242, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 2.529182879377432, |
|
"grad_norm": 88.86971282958984, |
|
"learning_rate": 9.156939040207523e-06, |
|
"loss": 0.8465, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.5356679636835278, |
|
"grad_norm": 84.49606323242188, |
|
"learning_rate": 9.154777345438824e-06, |
|
"loss": 0.8228, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 2.5421530479896237, |
|
"grad_norm": 72.06951904296875, |
|
"learning_rate": 9.152615650670126e-06, |
|
"loss": 0.8321, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 2.5486381322957197, |
|
"grad_norm": 51.27252197265625, |
|
"learning_rate": 9.150453955901428e-06, |
|
"loss": 0.8377, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 2.5551232166018156, |
|
"grad_norm": 82.98815155029297, |
|
"learning_rate": 9.148292261132729e-06, |
|
"loss": 0.8577, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 2.5616083009079116, |
|
"grad_norm": 86.29476928710938, |
|
"learning_rate": 9.146130566364029e-06, |
|
"loss": 0.838, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 2.5680933852140075, |
|
"grad_norm": 201.86570739746094, |
|
"learning_rate": 9.14396887159533e-06, |
|
"loss": 0.8341, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.5745784695201035, |
|
"grad_norm": 48.80326461791992, |
|
"learning_rate": 9.141807176826632e-06, |
|
"loss": 0.8183, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 2.5810635538262, |
|
"grad_norm": 123.20867156982422, |
|
"learning_rate": 9.139645482057934e-06, |
|
"loss": 0.8041, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 2.587548638132296, |
|
"grad_norm": 77.76668548583984, |
|
"learning_rate": 9.137483787289235e-06, |
|
"loss": 0.8027, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 2.594033722438392, |
|
"grad_norm": 60.8740119934082, |
|
"learning_rate": 9.135322092520537e-06, |
|
"loss": 0.8354, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.6005188067444878, |
|
"grad_norm": 33.433929443359375, |
|
"learning_rate": 9.133160397751839e-06, |
|
"loss": 0.8297, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 2.6070038910505837, |
|
"grad_norm": 101.6844253540039, |
|
"learning_rate": 9.130998702983139e-06, |
|
"loss": 0.8595, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 2.6134889753566797, |
|
"grad_norm": 56.76240921020508, |
|
"learning_rate": 9.12883700821444e-06, |
|
"loss": 0.8318, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 2.6199740596627756, |
|
"grad_norm": 77.91346740722656, |
|
"learning_rate": 9.126675313445742e-06, |
|
"loss": 0.8171, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 2.6264591439688716, |
|
"grad_norm": 22.083127975463867, |
|
"learning_rate": 9.124513618677043e-06, |
|
"loss": 0.8436, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 2.6329442282749675, |
|
"grad_norm": 32.83180618286133, |
|
"learning_rate": 9.122351923908345e-06, |
|
"loss": 0.8413, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 2.6394293125810635, |
|
"grad_norm": 80.33685302734375, |
|
"learning_rate": 9.120190229139646e-06, |
|
"loss": 0.8307, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 2.6459143968871595, |
|
"grad_norm": 106.72901916503906, |
|
"learning_rate": 9.118028534370948e-06, |
|
"loss": 0.8143, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 2.6523994811932554, |
|
"grad_norm": 84.93223571777344, |
|
"learning_rate": 9.115866839602248e-06, |
|
"loss": 0.8212, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 2.6588845654993514, |
|
"grad_norm": 100.1551513671875, |
|
"learning_rate": 9.11370514483355e-06, |
|
"loss": 0.8289, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.6653696498054473, |
|
"grad_norm": 86.93508911132812, |
|
"learning_rate": 9.111543450064851e-06, |
|
"loss": 0.8047, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 2.6718547341115433, |
|
"grad_norm": 43.016624450683594, |
|
"learning_rate": 9.109381755296153e-06, |
|
"loss": 0.7988, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 2.6783398184176397, |
|
"grad_norm": 310.767822265625, |
|
"learning_rate": 9.107220060527454e-06, |
|
"loss": 0.8227, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 2.6848249027237356, |
|
"grad_norm": 82.60010528564453, |
|
"learning_rate": 9.105058365758756e-06, |
|
"loss": 0.8148, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 2.6913099870298316, |
|
"grad_norm": 76.9372329711914, |
|
"learning_rate": 9.102896670990058e-06, |
|
"loss": 0.7776, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 2.6977950713359276, |
|
"grad_norm": 41.984886169433594, |
|
"learning_rate": 9.100734976221357e-06, |
|
"loss": 0.8093, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 2.7042801556420235, |
|
"grad_norm": 58.13618850708008, |
|
"learning_rate": 9.098573281452659e-06, |
|
"loss": 0.8415, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 2.7107652399481195, |
|
"grad_norm": 66.05621337890625, |
|
"learning_rate": 9.09641158668396e-06, |
|
"loss": 0.8096, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 2.7172503242542154, |
|
"grad_norm": 63.902557373046875, |
|
"learning_rate": 9.094249891915262e-06, |
|
"loss": 0.7865, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 2.7237354085603114, |
|
"grad_norm": 41.3662109375, |
|
"learning_rate": 9.092088197146564e-06, |
|
"loss": 0.8213, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.7302204928664073, |
|
"grad_norm": 53.82701873779297, |
|
"learning_rate": 9.089926502377865e-06, |
|
"loss": 0.8267, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 2.7367055771725033, |
|
"grad_norm": 76.71524047851562, |
|
"learning_rate": 9.087764807609167e-06, |
|
"loss": 0.8065, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 2.7431906614785992, |
|
"grad_norm": 34.62066650390625, |
|
"learning_rate": 9.085603112840467e-06, |
|
"loss": 0.764, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 2.749675745784695, |
|
"grad_norm": 165.2742462158203, |
|
"learning_rate": 9.083441418071769e-06, |
|
"loss": 0.7828, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 2.756160830090791, |
|
"grad_norm": 82.91865539550781, |
|
"learning_rate": 9.08127972330307e-06, |
|
"loss": 0.8355, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 2.762645914396887, |
|
"grad_norm": 60.068851470947266, |
|
"learning_rate": 9.079118028534372e-06, |
|
"loss": 0.8015, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 2.769130998702983, |
|
"grad_norm": 194.20948791503906, |
|
"learning_rate": 9.076956333765673e-06, |
|
"loss": 0.8166, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 2.775616083009079, |
|
"grad_norm": 49.6822509765625, |
|
"learning_rate": 9.074794638996975e-06, |
|
"loss": 0.7728, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 2.782101167315175, |
|
"grad_norm": 73.5209732055664, |
|
"learning_rate": 9.072632944228276e-06, |
|
"loss": 0.7917, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 2.788586251621271, |
|
"grad_norm": 156.21685791015625, |
|
"learning_rate": 9.070471249459576e-06, |
|
"loss": 0.7691, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.795071335927367, |
|
"grad_norm": 85.61043548583984, |
|
"learning_rate": 9.068309554690878e-06, |
|
"loss": 0.7911, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 2.801556420233463, |
|
"grad_norm": 144.1258087158203, |
|
"learning_rate": 9.06614785992218e-06, |
|
"loss": 0.7966, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.808041504539559, |
|
"grad_norm": 45.8646125793457, |
|
"learning_rate": 9.063986165153481e-06, |
|
"loss": 0.8261, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 2.8145265888456548, |
|
"grad_norm": 58.49191665649414, |
|
"learning_rate": 9.061824470384783e-06, |
|
"loss": 0.8226, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 2.8210116731517507, |
|
"grad_norm": 105.04296112060547, |
|
"learning_rate": 9.059662775616084e-06, |
|
"loss": 0.7782, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 2.8274967574578467, |
|
"grad_norm": 62.90886688232422, |
|
"learning_rate": 9.057501080847386e-06, |
|
"loss": 0.7693, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 2.833981841763943, |
|
"grad_norm": 79.02916717529297, |
|
"learning_rate": 9.055339386078686e-06, |
|
"loss": 0.7863, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 2.840466926070039, |
|
"grad_norm": 92.87028503417969, |
|
"learning_rate": 9.053177691309987e-06, |
|
"loss": 0.7804, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 2.846952010376135, |
|
"grad_norm": 88.81787872314453, |
|
"learning_rate": 9.051015996541289e-06, |
|
"loss": 0.802, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 2.853437094682231, |
|
"grad_norm": 140.72811889648438, |
|
"learning_rate": 9.04885430177259e-06, |
|
"loss": 0.801, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.859922178988327, |
|
"grad_norm": 190.2725067138672, |
|
"learning_rate": 9.046692607003892e-06, |
|
"loss": 0.793, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 2.866407263294423, |
|
"grad_norm": 122.08084869384766, |
|
"learning_rate": 9.044530912235194e-06, |
|
"loss": 0.7703, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 2.872892347600519, |
|
"grad_norm": 217.95184326171875, |
|
"learning_rate": 9.042369217466494e-06, |
|
"loss": 0.8127, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 2.8793774319066148, |
|
"grad_norm": 71.10440826416016, |
|
"learning_rate": 9.040207522697795e-06, |
|
"loss": 0.7741, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.8858625162127107, |
|
"grad_norm": 101.68942260742188, |
|
"learning_rate": 9.038045827929097e-06, |
|
"loss": 0.7949, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 2.8923476005188067, |
|
"grad_norm": 55.40034484863281, |
|
"learning_rate": 9.035884133160399e-06, |
|
"loss": 0.7572, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 2.8988326848249026, |
|
"grad_norm": 33.14478302001953, |
|
"learning_rate": 9.0337224383917e-06, |
|
"loss": 0.7708, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 2.9053177691309986, |
|
"grad_norm": 182.9443359375, |
|
"learning_rate": 9.031560743623002e-06, |
|
"loss": 0.7756, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 2.9118028534370946, |
|
"grad_norm": 55.46072769165039, |
|
"learning_rate": 9.029399048854303e-06, |
|
"loss": 0.8147, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 2.9182879377431905, |
|
"grad_norm": 122.65208435058594, |
|
"learning_rate": 9.027237354085603e-06, |
|
"loss": 0.7468, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.924773022049287, |
|
"grad_norm": 127.9378662109375, |
|
"learning_rate": 9.025075659316905e-06, |
|
"loss": 0.7679, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 2.931258106355383, |
|
"grad_norm": 105.78032684326172, |
|
"learning_rate": 9.022913964548206e-06, |
|
"loss": 0.7804, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 2.937743190661479, |
|
"grad_norm": 24.228551864624023, |
|
"learning_rate": 9.020752269779508e-06, |
|
"loss": 0.7805, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 2.9442282749675748, |
|
"grad_norm": 106.55142974853516, |
|
"learning_rate": 9.01859057501081e-06, |
|
"loss": 0.7818, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 2.9507133592736707, |
|
"grad_norm": 188.49441528320312, |
|
"learning_rate": 9.016428880242111e-06, |
|
"loss": 0.7815, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 2.9571984435797667, |
|
"grad_norm": 130.7115478515625, |
|
"learning_rate": 9.014267185473413e-06, |
|
"loss": 0.7834, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.9636835278858626, |
|
"grad_norm": 46.354881286621094, |
|
"learning_rate": 9.012105490704713e-06, |
|
"loss": 0.7945, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 2.9701686121919586, |
|
"grad_norm": 52.4910774230957, |
|
"learning_rate": 9.009943795936014e-06, |
|
"loss": 0.769, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 2.9766536964980546, |
|
"grad_norm": 231.7021026611328, |
|
"learning_rate": 9.007782101167316e-06, |
|
"loss": 0.7773, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 2.9831387808041505, |
|
"grad_norm": 68.80513763427734, |
|
"learning_rate": 9.005620406398617e-06, |
|
"loss": 0.7665, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.9896238651102465, |
|
"grad_norm": 21.473207473754883, |
|
"learning_rate": 9.003458711629919e-06, |
|
"loss": 0.7801, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 2.9961089494163424, |
|
"grad_norm": 120.36124420166016, |
|
"learning_rate": 9.00129701686122e-06, |
|
"loss": 0.7948, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 3.0025940337224384, |
|
"grad_norm": 74.29264831542969, |
|
"learning_rate": 8.999135322092522e-06, |
|
"loss": 0.7434, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 3.0090791180285343, |
|
"grad_norm": 93.22494506835938, |
|
"learning_rate": 8.996973627323822e-06, |
|
"loss": 0.7349, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 3.0155642023346303, |
|
"grad_norm": 30.082307815551758, |
|
"learning_rate": 8.994811932555124e-06, |
|
"loss": 0.7339, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 3.0220492866407263, |
|
"grad_norm": 31.523271560668945, |
|
"learning_rate": 8.992650237786425e-06, |
|
"loss": 0.7256, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 3.028534370946822, |
|
"grad_norm": 43.012237548828125, |
|
"learning_rate": 8.990488543017727e-06, |
|
"loss": 0.7657, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 3.035019455252918, |
|
"grad_norm": 55.685081481933594, |
|
"learning_rate": 8.988326848249028e-06, |
|
"loss": 0.7587, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 3.041504539559014, |
|
"grad_norm": 51.72869110107422, |
|
"learning_rate": 8.98616515348033e-06, |
|
"loss": 0.7697, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 3.04798962386511, |
|
"grad_norm": 49.6856689453125, |
|
"learning_rate": 8.984003458711632e-06, |
|
"loss": 0.7699, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.054474708171206, |
|
"grad_norm": 62.46233367919922, |
|
"learning_rate": 8.981841763942932e-06, |
|
"loss": 0.7785, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 3.060959792477302, |
|
"grad_norm": 129.84275817871094, |
|
"learning_rate": 8.979680069174233e-06, |
|
"loss": 0.7474, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 3.0674448767833984, |
|
"grad_norm": 28.303911209106445, |
|
"learning_rate": 8.977518374405535e-06, |
|
"loss": 0.7629, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 3.0739299610894943, |
|
"grad_norm": 74.46251678466797, |
|
"learning_rate": 8.975356679636836e-06, |
|
"loss": 0.7633, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 3.0804150453955903, |
|
"grad_norm": 27.983522415161133, |
|
"learning_rate": 8.973194984868138e-06, |
|
"loss": 0.7769, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 3.0869001297016863, |
|
"grad_norm": 71.08908081054688, |
|
"learning_rate": 8.97103329009944e-06, |
|
"loss": 0.7656, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 3.093385214007782, |
|
"grad_norm": 100.88603210449219, |
|
"learning_rate": 8.968871595330741e-06, |
|
"loss": 0.7546, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 3.099870298313878, |
|
"grad_norm": 159.69082641601562, |
|
"learning_rate": 8.966709900562041e-06, |
|
"loss": 0.7591, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 3.106355382619974, |
|
"grad_norm": 28.74492073059082, |
|
"learning_rate": 8.964548205793343e-06, |
|
"loss": 0.7779, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 3.11284046692607, |
|
"grad_norm": 86.59606170654297, |
|
"learning_rate": 8.962386511024644e-06, |
|
"loss": 0.7592, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.119325551232166, |
|
"grad_norm": 77.73062133789062, |
|
"learning_rate": 8.960224816255946e-06, |
|
"loss": 0.7932, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 3.125810635538262, |
|
"grad_norm": 82.81999969482422, |
|
"learning_rate": 8.958063121487247e-06, |
|
"loss": 0.7468, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 3.132295719844358, |
|
"grad_norm": 106.86148834228516, |
|
"learning_rate": 8.955901426718549e-06, |
|
"loss": 0.7473, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 3.138780804150454, |
|
"grad_norm": 73.26065063476562, |
|
"learning_rate": 8.95373973194985e-06, |
|
"loss": 0.7653, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 3.14526588845655, |
|
"grad_norm": 154.48199462890625, |
|
"learning_rate": 8.95157803718115e-06, |
|
"loss": 0.7799, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 3.151750972762646, |
|
"grad_norm": 165.397216796875, |
|
"learning_rate": 8.949416342412452e-06, |
|
"loss": 0.7668, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 3.1582360570687418, |
|
"grad_norm": 54.25576400756836, |
|
"learning_rate": 8.947254647643754e-06, |
|
"loss": 0.7501, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 3.1647211413748377, |
|
"grad_norm": 78.98974609375, |
|
"learning_rate": 8.945092952875055e-06, |
|
"loss": 0.7546, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 3.1712062256809337, |
|
"grad_norm": 69.79071807861328, |
|
"learning_rate": 8.942931258106357e-06, |
|
"loss": 0.7455, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 3.1776913099870296, |
|
"grad_norm": 99.46908569335938, |
|
"learning_rate": 8.940769563337658e-06, |
|
"loss": 0.7438, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.184176394293126, |
|
"grad_norm": 87.56387329101562, |
|
"learning_rate": 8.938607868568958e-06, |
|
"loss": 0.7421, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 3.190661478599222, |
|
"grad_norm": 53.633941650390625, |
|
"learning_rate": 8.93644617380026e-06, |
|
"loss": 0.7625, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 3.197146562905318, |
|
"grad_norm": 108.66197967529297, |
|
"learning_rate": 8.934284479031562e-06, |
|
"loss": 0.7474, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 3.203631647211414, |
|
"grad_norm": 62.14433670043945, |
|
"learning_rate": 8.932122784262863e-06, |
|
"loss": 0.7359, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 3.21011673151751, |
|
"grad_norm": 110.50857543945312, |
|
"learning_rate": 8.929961089494165e-06, |
|
"loss": 0.7402, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 3.216601815823606, |
|
"grad_norm": 36.320377349853516, |
|
"learning_rate": 8.927799394725466e-06, |
|
"loss": 0.7385, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 3.223086900129702, |
|
"grad_norm": 119.52420043945312, |
|
"learning_rate": 8.925637699956768e-06, |
|
"loss": 0.755, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 3.2295719844357977, |
|
"grad_norm": 229.50978088378906, |
|
"learning_rate": 8.923476005188068e-06, |
|
"loss": 0.7408, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 3.2360570687418937, |
|
"grad_norm": 29.48551368713379, |
|
"learning_rate": 8.92131431041937e-06, |
|
"loss": 0.7422, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 3.2425421530479897, |
|
"grad_norm": 77.79827880859375, |
|
"learning_rate": 8.919152615650671e-06, |
|
"loss": 0.748, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.2490272373540856, |
|
"grad_norm": 58.29311752319336, |
|
"learning_rate": 8.916990920881973e-06, |
|
"loss": 0.7363, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 3.2555123216601816, |
|
"grad_norm": 22.339330673217773, |
|
"learning_rate": 8.914829226113274e-06, |
|
"loss": 0.7558, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 3.2619974059662775, |
|
"grad_norm": 154.0586700439453, |
|
"learning_rate": 8.912667531344576e-06, |
|
"loss": 0.7527, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 3.2684824902723735, |
|
"grad_norm": 33.30474090576172, |
|
"learning_rate": 8.910505836575877e-06, |
|
"loss": 0.7338, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 3.2749675745784694, |
|
"grad_norm": 70.1267318725586, |
|
"learning_rate": 8.908344141807177e-06, |
|
"loss": 0.7626, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 3.2814526588845654, |
|
"grad_norm": 214.113525390625, |
|
"learning_rate": 8.906182447038479e-06, |
|
"loss": 0.7451, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 3.2879377431906613, |
|
"grad_norm": 83.08194732666016, |
|
"learning_rate": 8.90402075226978e-06, |
|
"loss": 0.7545, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 3.2944228274967573, |
|
"grad_norm": 100.41940307617188, |
|
"learning_rate": 8.90185905750108e-06, |
|
"loss": 0.7442, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 3.3009079118028533, |
|
"grad_norm": 67.69851684570312, |
|
"learning_rate": 8.899697362732382e-06, |
|
"loss": 0.7333, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 3.307392996108949, |
|
"grad_norm": 35.9471549987793, |
|
"learning_rate": 8.897535667963684e-06, |
|
"loss": 0.7444, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.313878080415045, |
|
"grad_norm": 192.07264709472656, |
|
"learning_rate": 8.895373973194985e-06, |
|
"loss": 0.7427, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 3.3203631647211416, |
|
"grad_norm": 71.07801055908203, |
|
"learning_rate": 8.893212278426287e-06, |
|
"loss": 0.762, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 3.3268482490272375, |
|
"grad_norm": 94.97274780273438, |
|
"learning_rate": 8.891050583657588e-06, |
|
"loss": 0.7571, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 64.86588287353516, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.7267, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 3.3398184176394294, |
|
"grad_norm": 46.446414947509766, |
|
"learning_rate": 8.88672719412019e-06, |
|
"loss": 0.7433, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 3.3463035019455254, |
|
"grad_norm": 141.70608520507812, |
|
"learning_rate": 8.884565499351491e-06, |
|
"loss": 0.7115, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 3.3527885862516213, |
|
"grad_norm": 131.68763732910156, |
|
"learning_rate": 8.882403804582793e-06, |
|
"loss": 0.7456, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 3.3592736705577173, |
|
"grad_norm": 44.90886306762695, |
|
"learning_rate": 8.880242109814095e-06, |
|
"loss": 0.7671, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 3.3657587548638133, |
|
"grad_norm": 76.7698974609375, |
|
"learning_rate": 8.878080415045396e-06, |
|
"loss": 0.7414, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 3.372243839169909, |
|
"grad_norm": 73.65957641601562, |
|
"learning_rate": 8.875918720276698e-06, |
|
"loss": 0.7381, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.378728923476005, |
|
"grad_norm": 98.279052734375, |
|
"learning_rate": 8.873757025507998e-06, |
|
"loss": 0.7289, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 3.385214007782101, |
|
"grad_norm": 87.40727233886719, |
|
"learning_rate": 8.8715953307393e-06, |
|
"loss": 0.7374, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 3.391699092088197, |
|
"grad_norm": 147.8469696044922, |
|
"learning_rate": 8.869433635970601e-06, |
|
"loss": 0.7193, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 3.398184176394293, |
|
"grad_norm": 57.17820358276367, |
|
"learning_rate": 8.867271941201903e-06, |
|
"loss": 0.7522, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 3.404669260700389, |
|
"grad_norm": 527.1165771484375, |
|
"learning_rate": 8.865110246433204e-06, |
|
"loss": 0.7249, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 3.411154345006485, |
|
"grad_norm": 110.1869125366211, |
|
"learning_rate": 8.862948551664506e-06, |
|
"loss": 0.7226, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 3.417639429312581, |
|
"grad_norm": 86.62249755859375, |
|
"learning_rate": 8.860786856895807e-06, |
|
"loss": 0.7605, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 3.424124513618677, |
|
"grad_norm": 53.44112014770508, |
|
"learning_rate": 8.858625162127107e-06, |
|
"loss": 0.7259, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 3.4306095979247733, |
|
"grad_norm": 53.45317840576172, |
|
"learning_rate": 8.856463467358409e-06, |
|
"loss": 0.7322, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 3.4370946822308692, |
|
"grad_norm": 75.9814682006836, |
|
"learning_rate": 8.85430177258971e-06, |
|
"loss": 0.7389, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.443579766536965, |
|
"grad_norm": 72.01563262939453, |
|
"learning_rate": 8.852140077821012e-06, |
|
"loss": 0.7388, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 3.450064850843061, |
|
"grad_norm": 108.14093017578125, |
|
"learning_rate": 8.849978383052314e-06, |
|
"loss": 0.7414, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 3.456549935149157, |
|
"grad_norm": 146.429443359375, |
|
"learning_rate": 8.847816688283615e-06, |
|
"loss": 0.7322, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 3.463035019455253, |
|
"grad_norm": 254.16734313964844, |
|
"learning_rate": 8.845654993514917e-06, |
|
"loss": 0.7494, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 3.469520103761349, |
|
"grad_norm": 186.4697265625, |
|
"learning_rate": 8.843493298746217e-06, |
|
"loss": 0.7497, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 3.476005188067445, |
|
"grad_norm": 110.53705596923828, |
|
"learning_rate": 8.841331603977518e-06, |
|
"loss": 0.7513, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 3.482490272373541, |
|
"grad_norm": 95.7660903930664, |
|
"learning_rate": 8.83916990920882e-06, |
|
"loss": 0.7256, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 3.488975356679637, |
|
"grad_norm": 60.745643615722656, |
|
"learning_rate": 8.837008214440121e-06, |
|
"loss": 0.7224, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 3.495460440985733, |
|
"grad_norm": 40.43708419799805, |
|
"learning_rate": 8.834846519671423e-06, |
|
"loss": 0.7239, |
|
"step": 26950 |
|
}, |
|
{ |
|
"epoch": 3.501945525291829, |
|
"grad_norm": 42.59388732910156, |
|
"learning_rate": 8.832684824902725e-06, |
|
"loss": 0.7199, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.5084306095979247, |
|
"grad_norm": 71.25556945800781, |
|
"learning_rate": 8.830523130134026e-06, |
|
"loss": 0.7256, |
|
"step": 27050 |
|
}, |
|
{ |
|
"epoch": 3.5149156939040207, |
|
"grad_norm": 92.77458190917969, |
|
"learning_rate": 8.828361435365326e-06, |
|
"loss": 0.7285, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 3.5214007782101167, |
|
"grad_norm": 52.927757263183594, |
|
"learning_rate": 8.826199740596628e-06, |
|
"loss": 0.7478, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 3.5278858625162126, |
|
"grad_norm": 71.94493865966797, |
|
"learning_rate": 8.82403804582793e-06, |
|
"loss": 0.7345, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 3.5343709468223086, |
|
"grad_norm": 58.30330276489258, |
|
"learning_rate": 8.821876351059231e-06, |
|
"loss": 0.6936, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 3.5408560311284045, |
|
"grad_norm": 54.03791046142578, |
|
"learning_rate": 8.819714656290533e-06, |
|
"loss": 0.7284, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 3.5473411154345005, |
|
"grad_norm": 86.29717254638672, |
|
"learning_rate": 8.817552961521834e-06, |
|
"loss": 0.7216, |
|
"step": 27350 |
|
}, |
|
{ |
|
"epoch": 3.5538261997405964, |
|
"grad_norm": 105.19668579101562, |
|
"learning_rate": 8.815391266753136e-06, |
|
"loss": 0.7469, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 3.5603112840466924, |
|
"grad_norm": 174.84385681152344, |
|
"learning_rate": 8.813229571984436e-06, |
|
"loss": 0.7122, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 3.5667963683527883, |
|
"grad_norm": 88.76931762695312, |
|
"learning_rate": 8.811067877215737e-06, |
|
"loss": 0.7361, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.5732814526588843, |
|
"grad_norm": 238.61947631835938, |
|
"learning_rate": 8.808906182447039e-06, |
|
"loss": 0.7396, |
|
"step": 27550 |
|
}, |
|
{ |
|
"epoch": 3.5797665369649807, |
|
"grad_norm": 105.16651916503906, |
|
"learning_rate": 8.80674448767834e-06, |
|
"loss": 0.7316, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 3.5862516212710767, |
|
"grad_norm": 219.0015869140625, |
|
"learning_rate": 8.804582792909642e-06, |
|
"loss": 0.7254, |
|
"step": 27650 |
|
}, |
|
{ |
|
"epoch": 3.5927367055771726, |
|
"grad_norm": 140.21543884277344, |
|
"learning_rate": 8.802421098140944e-06, |
|
"loss": 0.7555, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 3.5992217898832686, |
|
"grad_norm": 89.52685546875, |
|
"learning_rate": 8.800259403372245e-06, |
|
"loss": 0.7362, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 3.6057068741893645, |
|
"grad_norm": 59.332977294921875, |
|
"learning_rate": 8.798097708603545e-06, |
|
"loss": 0.7284, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 3.6121919584954605, |
|
"grad_norm": 110.76482391357422, |
|
"learning_rate": 8.795936013834847e-06, |
|
"loss": 0.6894, |
|
"step": 27850 |
|
}, |
|
{ |
|
"epoch": 3.6186770428015564, |
|
"grad_norm": 38.199073791503906, |
|
"learning_rate": 8.793774319066148e-06, |
|
"loss": 0.6988, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 3.6251621271076524, |
|
"grad_norm": 63.44047927856445, |
|
"learning_rate": 8.79161262429745e-06, |
|
"loss": 0.734, |
|
"step": 27950 |
|
}, |
|
{ |
|
"epoch": 3.6316472114137484, |
|
"grad_norm": 121.4446029663086, |
|
"learning_rate": 8.789450929528751e-06, |
|
"loss": 0.7129, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.6381322957198443, |
|
"grad_norm": 70.12100982666016, |
|
"learning_rate": 8.787289234760053e-06, |
|
"loss": 0.6867, |
|
"step": 28050 |
|
}, |
|
{ |
|
"epoch": 3.6446173800259403, |
|
"grad_norm": 51.042972564697266, |
|
"learning_rate": 8.785127539991353e-06, |
|
"loss": 0.7204, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 3.6511024643320362, |
|
"grad_norm": 43.0015869140625, |
|
"learning_rate": 8.782965845222655e-06, |
|
"loss": 0.7225, |
|
"step": 28150 |
|
}, |
|
{ |
|
"epoch": 3.657587548638132, |
|
"grad_norm": 59.59611129760742, |
|
"learning_rate": 8.780804150453956e-06, |
|
"loss": 0.7149, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 3.664072632944228, |
|
"grad_norm": 25.105127334594727, |
|
"learning_rate": 8.778642455685258e-06, |
|
"loss": 0.6864, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 3.670557717250324, |
|
"grad_norm": 62.92705154418945, |
|
"learning_rate": 8.77648076091656e-06, |
|
"loss": 0.7048, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 3.6770428015564205, |
|
"grad_norm": 154.20318603515625, |
|
"learning_rate": 8.774319066147861e-06, |
|
"loss": 0.6617, |
|
"step": 28350 |
|
}, |
|
{ |
|
"epoch": 3.6835278858625164, |
|
"grad_norm": 212.035400390625, |
|
"learning_rate": 8.772157371379163e-06, |
|
"loss": 0.6981, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 3.6900129701686124, |
|
"grad_norm": 98.92573547363281, |
|
"learning_rate": 8.769995676610462e-06, |
|
"loss": 0.7024, |
|
"step": 28450 |
|
}, |
|
{ |
|
"epoch": 3.6964980544747084, |
|
"grad_norm": 136.00390625, |
|
"learning_rate": 8.767833981841764e-06, |
|
"loss": 0.7532, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.7029831387808043, |
|
"grad_norm": 249.03781127929688, |
|
"learning_rate": 8.765672287073066e-06, |
|
"loss": 0.6805, |
|
"step": 28550 |
|
}, |
|
{ |
|
"epoch": 3.7094682230869003, |
|
"grad_norm": 37.31251525878906, |
|
"learning_rate": 8.763510592304367e-06, |
|
"loss": 0.7054, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 3.7159533073929962, |
|
"grad_norm": 75.17498779296875, |
|
"learning_rate": 8.761348897535669e-06, |
|
"loss": 0.7303, |
|
"step": 28650 |
|
}, |
|
{ |
|
"epoch": 3.722438391699092, |
|
"grad_norm": 146.90443420410156, |
|
"learning_rate": 8.75918720276697e-06, |
|
"loss": 0.7149, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 3.728923476005188, |
|
"grad_norm": 37.123870849609375, |
|
"learning_rate": 8.757025507998272e-06, |
|
"loss": 0.7096, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 3.735408560311284, |
|
"grad_norm": 54.98661422729492, |
|
"learning_rate": 8.754863813229572e-06, |
|
"loss": 0.7424, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 3.74189364461738, |
|
"grad_norm": 135.15431213378906, |
|
"learning_rate": 8.752702118460874e-06, |
|
"loss": 0.7179, |
|
"step": 28850 |
|
}, |
|
{ |
|
"epoch": 3.748378728923476, |
|
"grad_norm": 159.3280792236328, |
|
"learning_rate": 8.750540423692175e-06, |
|
"loss": 0.7466, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 3.754863813229572, |
|
"grad_norm": 111.12368774414062, |
|
"learning_rate": 8.748378728923477e-06, |
|
"loss": 0.71, |
|
"step": 28950 |
|
}, |
|
{ |
|
"epoch": 3.761348897535668, |
|
"grad_norm": 95.70431518554688, |
|
"learning_rate": 8.746217034154778e-06, |
|
"loss": 0.6719, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.767833981841764, |
|
"grad_norm": 116.32410430908203, |
|
"learning_rate": 8.74405533938608e-06, |
|
"loss": 0.6959, |
|
"step": 29050 |
|
}, |
|
{ |
|
"epoch": 3.77431906614786, |
|
"grad_norm": 48.57170867919922, |
|
"learning_rate": 8.741893644617381e-06, |
|
"loss": 0.7021, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 3.780804150453956, |
|
"grad_norm": 145.74124145507812, |
|
"learning_rate": 8.739731949848681e-06, |
|
"loss": 0.6982, |
|
"step": 29150 |
|
}, |
|
{ |
|
"epoch": 3.7872892347600517, |
|
"grad_norm": 110.97146606445312, |
|
"learning_rate": 8.737570255079983e-06, |
|
"loss": 0.6707, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 3.7937743190661477, |
|
"grad_norm": 80.89407348632812, |
|
"learning_rate": 8.735408560311285e-06, |
|
"loss": 0.704, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 3.8002594033722437, |
|
"grad_norm": 117.62003326416016, |
|
"learning_rate": 8.733246865542586e-06, |
|
"loss": 0.737, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 3.8067444876783396, |
|
"grad_norm": 236.39186096191406, |
|
"learning_rate": 8.731085170773888e-06, |
|
"loss": 0.6954, |
|
"step": 29350 |
|
}, |
|
{ |
|
"epoch": 3.8132295719844356, |
|
"grad_norm": 204.6386260986328, |
|
"learning_rate": 8.72892347600519e-06, |
|
"loss": 0.7258, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 3.8197146562905315, |
|
"grad_norm": 175.2502899169922, |
|
"learning_rate": 8.726761781236491e-06, |
|
"loss": 0.7116, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 3.8261997405966275, |
|
"grad_norm": 64.19542694091797, |
|
"learning_rate": 8.724600086467791e-06, |
|
"loss": 0.7335, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.832684824902724, |
|
"grad_norm": 67.48596954345703, |
|
"learning_rate": 8.722438391699092e-06, |
|
"loss": 0.6889, |
|
"step": 29550 |
|
}, |
|
{ |
|
"epoch": 3.83916990920882, |
|
"grad_norm": 87.38389587402344, |
|
"learning_rate": 8.720276696930394e-06, |
|
"loss": 0.6961, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 3.845654993514916, |
|
"grad_norm": 42.56321334838867, |
|
"learning_rate": 8.718115002161696e-06, |
|
"loss": 0.7061, |
|
"step": 29650 |
|
}, |
|
{ |
|
"epoch": 3.8521400778210118, |
|
"grad_norm": 104.84762573242188, |
|
"learning_rate": 8.715953307392997e-06, |
|
"loss": 0.6809, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 3.8586251621271077, |
|
"grad_norm": 84.26802062988281, |
|
"learning_rate": 8.713791612624299e-06, |
|
"loss": 0.6955, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 3.8651102464332037, |
|
"grad_norm": 76.20053100585938, |
|
"learning_rate": 8.7116299178556e-06, |
|
"loss": 0.6869, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 3.8715953307392996, |
|
"grad_norm": 52.06394958496094, |
|
"learning_rate": 8.7094682230869e-06, |
|
"loss": 0.6838, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 3.8780804150453956, |
|
"grad_norm": 31.091880798339844, |
|
"learning_rate": 8.707306528318202e-06, |
|
"loss": 0.6914, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 3.8845654993514915, |
|
"grad_norm": 154.26475524902344, |
|
"learning_rate": 8.705144833549504e-06, |
|
"loss": 0.7303, |
|
"step": 29950 |
|
}, |
|
{ |
|
"epoch": 3.8910505836575875, |
|
"grad_norm": 70.6423568725586, |
|
"learning_rate": 8.702983138780805e-06, |
|
"loss": 0.6856, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.8975356679636834, |
|
"grad_norm": 70.91290283203125, |
|
"learning_rate": 8.700821444012107e-06, |
|
"loss": 0.6972, |
|
"step": 30050 |
|
}, |
|
{ |
|
"epoch": 3.9040207522697794, |
|
"grad_norm": 87.57475280761719, |
|
"learning_rate": 8.698659749243408e-06, |
|
"loss": 0.6854, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 3.9105058365758754, |
|
"grad_norm": 63.372528076171875, |
|
"learning_rate": 8.69649805447471e-06, |
|
"loss": 0.6723, |
|
"step": 30150 |
|
}, |
|
{ |
|
"epoch": 3.9169909208819713, |
|
"grad_norm": 108.40290069580078, |
|
"learning_rate": 8.69433635970601e-06, |
|
"loss": 0.686, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 3.9234760051880677, |
|
"grad_norm": 134.44715881347656, |
|
"learning_rate": 8.692174664937311e-06, |
|
"loss": 0.6884, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 3.9299610894941637, |
|
"grad_norm": 45.21245574951172, |
|
"learning_rate": 8.690012970168613e-06, |
|
"loss": 0.6974, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 3.9364461738002596, |
|
"grad_norm": 64.46482849121094, |
|
"learning_rate": 8.687851275399915e-06, |
|
"loss": 0.6928, |
|
"step": 30350 |
|
}, |
|
{ |
|
"epoch": 3.9429312581063556, |
|
"grad_norm": 113.75922393798828, |
|
"learning_rate": 8.685689580631216e-06, |
|
"loss": 0.7125, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 3.9494163424124515, |
|
"grad_norm": 106.91778564453125, |
|
"learning_rate": 8.683527885862518e-06, |
|
"loss": 0.6689, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 3.9559014267185475, |
|
"grad_norm": 145.61880493164062, |
|
"learning_rate": 8.681366191093818e-06, |
|
"loss": 0.7005, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 3.9623865110246435, |
|
"grad_norm": 147.24017333984375, |
|
"learning_rate": 8.67920449632512e-06, |
|
"loss": 0.6987, |
|
"step": 30550 |
|
}, |
|
{ |
|
"epoch": 3.9688715953307394, |
|
"grad_norm": 86.30076599121094, |
|
"learning_rate": 8.677042801556421e-06, |
|
"loss": 0.7059, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 3.9753566796368354, |
|
"grad_norm": 118.67623138427734, |
|
"learning_rate": 8.674881106787722e-06, |
|
"loss": 0.6777, |
|
"step": 30650 |
|
}, |
|
{ |
|
"epoch": 3.9818417639429313, |
|
"grad_norm": 48.108436584472656, |
|
"learning_rate": 8.672719412019024e-06, |
|
"loss": 0.6791, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 3.9883268482490273, |
|
"grad_norm": 81.96046447753906, |
|
"learning_rate": 8.670557717250326e-06, |
|
"loss": 0.6913, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 3.9948119325551232, |
|
"grad_norm": 197.388916015625, |
|
"learning_rate": 8.668396022481627e-06, |
|
"loss": 0.7043, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 4.001297016861219, |
|
"grad_norm": 35.324703216552734, |
|
"learning_rate": 8.666234327712927e-06, |
|
"loss": 0.7125, |
|
"step": 30850 |
|
}, |
|
{ |
|
"epoch": 4.007782101167315, |
|
"grad_norm": 105.50518035888672, |
|
"learning_rate": 8.664072632944229e-06, |
|
"loss": 0.707, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 4.014267185473411, |
|
"grad_norm": 23.028858184814453, |
|
"learning_rate": 8.66191093817553e-06, |
|
"loss": 0.6371, |
|
"step": 30950 |
|
}, |
|
{ |
|
"epoch": 4.020752269779507, |
|
"grad_norm": 72.48033142089844, |
|
"learning_rate": 8.659749243406832e-06, |
|
"loss": 0.6719, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.027237354085603, |
|
"grad_norm": 186.94964599609375, |
|
"learning_rate": 8.657587548638134e-06, |
|
"loss": 0.657, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 4.033722438391699, |
|
"grad_norm": 77.0679702758789, |
|
"learning_rate": 8.655425853869435e-06, |
|
"loss": 0.6947, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 4.040207522697795, |
|
"grad_norm": 44.03890609741211, |
|
"learning_rate": 8.653264159100737e-06, |
|
"loss": 0.6778, |
|
"step": 31150 |
|
}, |
|
{ |
|
"epoch": 4.046692607003891, |
|
"grad_norm": 71.50305938720703, |
|
"learning_rate": 8.651102464332037e-06, |
|
"loss": 0.6779, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 4.053177691309987, |
|
"grad_norm": 81.2274398803711, |
|
"learning_rate": 8.648940769563338e-06, |
|
"loss": 0.6696, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 4.059662775616083, |
|
"grad_norm": 169.8217315673828, |
|
"learning_rate": 8.64677907479464e-06, |
|
"loss": 0.6846, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 4.066147859922179, |
|
"grad_norm": 87.5166244506836, |
|
"learning_rate": 8.644617380025941e-06, |
|
"loss": 0.6826, |
|
"step": 31350 |
|
}, |
|
{ |
|
"epoch": 4.072632944228275, |
|
"grad_norm": 189.89439392089844, |
|
"learning_rate": 8.642455685257243e-06, |
|
"loss": 0.6891, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 4.079118028534371, |
|
"grad_norm": 34.17830276489258, |
|
"learning_rate": 8.640293990488545e-06, |
|
"loss": 0.7317, |
|
"step": 31450 |
|
}, |
|
{ |
|
"epoch": 4.085603112840467, |
|
"grad_norm": 135.60418701171875, |
|
"learning_rate": 8.638132295719846e-06, |
|
"loss": 0.6752, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.092088197146563, |
|
"grad_norm": 81.31814575195312, |
|
"learning_rate": 8.635970600951146e-06, |
|
"loss": 0.7003, |
|
"step": 31550 |
|
}, |
|
{ |
|
"epoch": 4.0985732814526585, |
|
"grad_norm": 302.2116394042969, |
|
"learning_rate": 8.633808906182448e-06, |
|
"loss": 0.6662, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 4.1050583657587545, |
|
"grad_norm": 36.41209030151367, |
|
"learning_rate": 8.63164721141375e-06, |
|
"loss": 0.6726, |
|
"step": 31650 |
|
}, |
|
{ |
|
"epoch": 4.1115434500648504, |
|
"grad_norm": 164.67007446289062, |
|
"learning_rate": 8.629485516645051e-06, |
|
"loss": 0.7068, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 4.118028534370946, |
|
"grad_norm": 43.526405334472656, |
|
"learning_rate": 8.627323821876352e-06, |
|
"loss": 0.6729, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 4.124513618677042, |
|
"grad_norm": 110.07795715332031, |
|
"learning_rate": 8.625162127107654e-06, |
|
"loss": 0.683, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 4.130998702983139, |
|
"grad_norm": 89.71601867675781, |
|
"learning_rate": 8.623000432338956e-06, |
|
"loss": 0.6792, |
|
"step": 31850 |
|
}, |
|
{ |
|
"epoch": 4.137483787289235, |
|
"grad_norm": 101.19843292236328, |
|
"learning_rate": 8.620838737570256e-06, |
|
"loss": 0.6505, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 4.143968871595331, |
|
"grad_norm": 230.81871032714844, |
|
"learning_rate": 8.618677042801557e-06, |
|
"loss": 0.6748, |
|
"step": 31950 |
|
}, |
|
{ |
|
"epoch": 4.150453955901427, |
|
"grad_norm": 114.94778442382812, |
|
"learning_rate": 8.616515348032859e-06, |
|
"loss": 0.6755, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.156939040207523, |
|
"grad_norm": 34.266761779785156, |
|
"learning_rate": 8.61435365326416e-06, |
|
"loss": 0.6572, |
|
"step": 32050 |
|
}, |
|
{ |
|
"epoch": 4.163424124513619, |
|
"grad_norm": 80.04161071777344, |
|
"learning_rate": 8.612191958495462e-06, |
|
"loss": 0.6804, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 4.169909208819715, |
|
"grad_norm": 350.19573974609375, |
|
"learning_rate": 8.610030263726764e-06, |
|
"loss": 0.6955, |
|
"step": 32150 |
|
}, |
|
{ |
|
"epoch": 4.176394293125811, |
|
"grad_norm": 83.74986267089844, |
|
"learning_rate": 8.607868568958065e-06, |
|
"loss": 0.6856, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 4.182879377431907, |
|
"grad_norm": 137.72669982910156, |
|
"learning_rate": 8.605706874189365e-06, |
|
"loss": 0.6541, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 4.189364461738003, |
|
"grad_norm": 45.202903747558594, |
|
"learning_rate": 8.603545179420667e-06, |
|
"loss": 0.679, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 4.195849546044099, |
|
"grad_norm": 191.8456268310547, |
|
"learning_rate": 8.601383484651968e-06, |
|
"loss": 0.6594, |
|
"step": 32350 |
|
}, |
|
{ |
|
"epoch": 4.202334630350195, |
|
"grad_norm": 152.2100830078125, |
|
"learning_rate": 8.59922178988327e-06, |
|
"loss": 0.6553, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 4.208819714656291, |
|
"grad_norm": 56.737754821777344, |
|
"learning_rate": 8.597060095114571e-06, |
|
"loss": 0.6915, |
|
"step": 32450 |
|
}, |
|
{ |
|
"epoch": 4.215304798962387, |
|
"grad_norm": 49.64228057861328, |
|
"learning_rate": 8.594898400345873e-06, |
|
"loss": 0.6773, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 4.221789883268483, |
|
"grad_norm": 57.6026496887207, |
|
"learning_rate": 8.592736705577173e-06, |
|
"loss": 0.7089, |
|
"step": 32550 |
|
}, |
|
{ |
|
"epoch": 4.2282749675745785, |
|
"grad_norm": 58.62641143798828, |
|
"learning_rate": 8.590575010808474e-06, |
|
"loss": 0.6433, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 4.2347600518806745, |
|
"grad_norm": 101.03966522216797, |
|
"learning_rate": 8.588413316039776e-06, |
|
"loss": 0.6496, |
|
"step": 32650 |
|
}, |
|
{ |
|
"epoch": 4.2412451361867705, |
|
"grad_norm": 198.1434326171875, |
|
"learning_rate": 8.586251621271076e-06, |
|
"loss": 0.6757, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 4.247730220492866, |
|
"grad_norm": 78.59976196289062, |
|
"learning_rate": 8.584089926502378e-06, |
|
"loss": 0.6913, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 4.254215304798962, |
|
"grad_norm": 94.35735321044922, |
|
"learning_rate": 8.58192823173368e-06, |
|
"loss": 0.6982, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 4.260700389105058, |
|
"grad_norm": 140.77769470214844, |
|
"learning_rate": 8.57976653696498e-06, |
|
"loss": 0.6753, |
|
"step": 32850 |
|
}, |
|
{ |
|
"epoch": 4.267185473411154, |
|
"grad_norm": 59.85847091674805, |
|
"learning_rate": 8.577604842196282e-06, |
|
"loss": 0.644, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 4.27367055771725, |
|
"grad_norm": 49.44724655151367, |
|
"learning_rate": 8.575443147427584e-06, |
|
"loss": 0.676, |
|
"step": 32950 |
|
}, |
|
{ |
|
"epoch": 4.280155642023346, |
|
"grad_norm": 58.50251007080078, |
|
"learning_rate": 8.573281452658886e-06, |
|
"loss": 0.6919, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.286640726329442, |
|
"grad_norm": 52.0682258605957, |
|
"learning_rate": 8.571119757890185e-06, |
|
"loss": 0.6517, |
|
"step": 33050 |
|
}, |
|
{ |
|
"epoch": 4.293125810635538, |
|
"grad_norm": 100.81246948242188, |
|
"learning_rate": 8.568958063121487e-06, |
|
"loss": 0.7115, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 4.299610894941634, |
|
"grad_norm": 126.4149398803711, |
|
"learning_rate": 8.566796368352789e-06, |
|
"loss": 0.6621, |
|
"step": 33150 |
|
}, |
|
{ |
|
"epoch": 4.30609597924773, |
|
"grad_norm": 82.7846908569336, |
|
"learning_rate": 8.56463467358409e-06, |
|
"loss": 0.6653, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 4.312581063553826, |
|
"grad_norm": 111.23580932617188, |
|
"learning_rate": 8.562472978815392e-06, |
|
"loss": 0.6779, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 4.319066147859922, |
|
"grad_norm": 88.04605102539062, |
|
"learning_rate": 8.560311284046693e-06, |
|
"loss": 0.6984, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 4.325551232166018, |
|
"grad_norm": 34.93830871582031, |
|
"learning_rate": 8.558149589277995e-06, |
|
"loss": 0.6901, |
|
"step": 33350 |
|
}, |
|
{ |
|
"epoch": 4.332036316472114, |
|
"grad_norm": 86.28446197509766, |
|
"learning_rate": 8.555987894509295e-06, |
|
"loss": 0.6808, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 4.33852140077821, |
|
"grad_norm": 52.025169372558594, |
|
"learning_rate": 8.553826199740596e-06, |
|
"loss": 0.6769, |
|
"step": 33450 |
|
}, |
|
{ |
|
"epoch": 4.345006485084306, |
|
"grad_norm": 103.3537826538086, |
|
"learning_rate": 8.551664504971898e-06, |
|
"loss": 0.674, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 4.351491569390402, |
|
"grad_norm": 131.0025634765625, |
|
"learning_rate": 8.5495028102032e-06, |
|
"loss": 0.6634, |
|
"step": 33550 |
|
}, |
|
{ |
|
"epoch": 4.357976653696498, |
|
"grad_norm": 36.6743049621582, |
|
"learning_rate": 8.547341115434501e-06, |
|
"loss": 0.6771, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 4.364461738002594, |
|
"grad_norm": 91.82353210449219, |
|
"learning_rate": 8.545179420665803e-06, |
|
"loss": 0.6709, |
|
"step": 33650 |
|
}, |
|
{ |
|
"epoch": 4.3709468223086905, |
|
"grad_norm": 67.49322509765625, |
|
"learning_rate": 8.543017725897104e-06, |
|
"loss": 0.6381, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 4.377431906614786, |
|
"grad_norm": 53.42247772216797, |
|
"learning_rate": 8.540856031128404e-06, |
|
"loss": 0.6886, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 4.383916990920882, |
|
"grad_norm": 221.73178100585938, |
|
"learning_rate": 8.538694336359706e-06, |
|
"loss": 0.6354, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 4.390402075226978, |
|
"grad_norm": 103.88397216796875, |
|
"learning_rate": 8.536532641591008e-06, |
|
"loss": 0.6807, |
|
"step": 33850 |
|
}, |
|
{ |
|
"epoch": 4.396887159533074, |
|
"grad_norm": 45.40660858154297, |
|
"learning_rate": 8.53437094682231e-06, |
|
"loss": 0.6404, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 4.40337224383917, |
|
"grad_norm": 65.8223876953125, |
|
"learning_rate": 8.53220925205361e-06, |
|
"loss": 0.6567, |
|
"step": 33950 |
|
}, |
|
{ |
|
"epoch": 4.409857328145266, |
|
"grad_norm": 245.63230895996094, |
|
"learning_rate": 8.530047557284912e-06, |
|
"loss": 0.6437, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.416342412451362, |
|
"grad_norm": 125.60919952392578, |
|
"learning_rate": 8.527885862516212e-06, |
|
"loss": 0.647, |
|
"step": 34050 |
|
}, |
|
{ |
|
"epoch": 4.422827496757458, |
|
"grad_norm": 147.76620483398438, |
|
"learning_rate": 8.525724167747514e-06, |
|
"loss": 0.6771, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 4.429312581063554, |
|
"grad_norm": 118.33441925048828, |
|
"learning_rate": 8.523562472978815e-06, |
|
"loss": 0.6826, |
|
"step": 34150 |
|
}, |
|
{ |
|
"epoch": 4.43579766536965, |
|
"grad_norm": 289.2904052734375, |
|
"learning_rate": 8.521400778210117e-06, |
|
"loss": 0.6607, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 4.442282749675746, |
|
"grad_norm": 53.50255584716797, |
|
"learning_rate": 8.519239083441419e-06, |
|
"loss": 0.6409, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 4.448767833981842, |
|
"grad_norm": 133.48831176757812, |
|
"learning_rate": 8.51707738867272e-06, |
|
"loss": 0.6691, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 4.455252918287938, |
|
"grad_norm": 119.14691925048828, |
|
"learning_rate": 8.514915693904022e-06, |
|
"loss": 0.6595, |
|
"step": 34350 |
|
}, |
|
{ |
|
"epoch": 4.461738002594034, |
|
"grad_norm": 323.05889892578125, |
|
"learning_rate": 8.512753999135322e-06, |
|
"loss": 0.6648, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 4.46822308690013, |
|
"grad_norm": 193.7076873779297, |
|
"learning_rate": 8.510592304366623e-06, |
|
"loss": 0.6661, |
|
"step": 34450 |
|
}, |
|
{ |
|
"epoch": 4.474708171206226, |
|
"grad_norm": 85.69574737548828, |
|
"learning_rate": 8.508430609597925e-06, |
|
"loss": 0.6683, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 4.481193255512322, |
|
"grad_norm": 23.649465560913086, |
|
"learning_rate": 8.506268914829226e-06, |
|
"loss": 0.6423, |
|
"step": 34550 |
|
}, |
|
{ |
|
"epoch": 4.487678339818418, |
|
"grad_norm": 109.6485366821289, |
|
"learning_rate": 8.504107220060528e-06, |
|
"loss": 0.6924, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 4.494163424124514, |
|
"grad_norm": 76.71481323242188, |
|
"learning_rate": 8.50194552529183e-06, |
|
"loss": 0.6041, |
|
"step": 34650 |
|
}, |
|
{ |
|
"epoch": 4.50064850843061, |
|
"grad_norm": 33.22921371459961, |
|
"learning_rate": 8.499783830523131e-06, |
|
"loss": 0.6208, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 4.5071335927367056, |
|
"grad_norm": 47.12236022949219, |
|
"learning_rate": 8.497622135754431e-06, |
|
"loss": 0.6477, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 4.5136186770428015, |
|
"grad_norm": 51.28311538696289, |
|
"learning_rate": 8.495460440985733e-06, |
|
"loss": 0.628, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 4.5201037613488975, |
|
"grad_norm": 25.358299255371094, |
|
"learning_rate": 8.493298746217034e-06, |
|
"loss": 0.6582, |
|
"step": 34850 |
|
}, |
|
{ |
|
"epoch": 4.526588845654993, |
|
"grad_norm": 97.27490997314453, |
|
"learning_rate": 8.491137051448336e-06, |
|
"loss": 0.6382, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 4.533073929961089, |
|
"grad_norm": 176.92462158203125, |
|
"learning_rate": 8.488975356679638e-06, |
|
"loss": 0.6489, |
|
"step": 34950 |
|
}, |
|
{ |
|
"epoch": 4.539559014267185, |
|
"grad_norm": 46.83137130737305, |
|
"learning_rate": 8.486813661910939e-06, |
|
"loss": 0.6509, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.546044098573281, |
|
"grad_norm": 27.511350631713867, |
|
"learning_rate": 8.48465196714224e-06, |
|
"loss": 0.6425, |
|
"step": 35050 |
|
}, |
|
{ |
|
"epoch": 4.552529182879377, |
|
"grad_norm": 85.7640609741211, |
|
"learning_rate": 8.48249027237354e-06, |
|
"loss": 0.6727, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 4.559014267185473, |
|
"grad_norm": 246.8522491455078, |
|
"learning_rate": 8.480328577604842e-06, |
|
"loss": 0.6551, |
|
"step": 35150 |
|
}, |
|
{ |
|
"epoch": 4.565499351491569, |
|
"grad_norm": 145.3149871826172, |
|
"learning_rate": 8.478166882836144e-06, |
|
"loss": 0.655, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 4.571984435797665, |
|
"grad_norm": 98.9753189086914, |
|
"learning_rate": 8.476005188067445e-06, |
|
"loss": 0.6396, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 4.578469520103761, |
|
"grad_norm": 103.30072021484375, |
|
"learning_rate": 8.473843493298747e-06, |
|
"loss": 0.6491, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 4.584954604409857, |
|
"grad_norm": 46.122684478759766, |
|
"learning_rate": 8.471681798530049e-06, |
|
"loss": 0.6505, |
|
"step": 35350 |
|
}, |
|
{ |
|
"epoch": 4.591439688715953, |
|
"grad_norm": 183.2648468017578, |
|
"learning_rate": 8.46952010376135e-06, |
|
"loss": 0.6504, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 4.597924773022049, |
|
"grad_norm": 37.44175338745117, |
|
"learning_rate": 8.46735840899265e-06, |
|
"loss": 0.6747, |
|
"step": 35450 |
|
}, |
|
{ |
|
"epoch": 4.604409857328145, |
|
"grad_norm": 42.08739471435547, |
|
"learning_rate": 8.465196714223952e-06, |
|
"loss": 0.6365, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 4.610894941634241, |
|
"grad_norm": 86.90052032470703, |
|
"learning_rate": 8.463035019455253e-06, |
|
"loss": 0.6565, |
|
"step": 35550 |
|
}, |
|
{ |
|
"epoch": 4.617380025940337, |
|
"grad_norm": 134.0282440185547, |
|
"learning_rate": 8.460873324686555e-06, |
|
"loss": 0.6702, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 4.623865110246433, |
|
"grad_norm": 47.65680694580078, |
|
"learning_rate": 8.458711629917856e-06, |
|
"loss": 0.6805, |
|
"step": 35650 |
|
}, |
|
{ |
|
"epoch": 4.630350194552529, |
|
"grad_norm": 74.25086212158203, |
|
"learning_rate": 8.456549935149158e-06, |
|
"loss": 0.697, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 4.636835278858625, |
|
"grad_norm": 245.19024658203125, |
|
"learning_rate": 8.45438824038046e-06, |
|
"loss": 0.6648, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 4.6433203631647215, |
|
"grad_norm": 59.609580993652344, |
|
"learning_rate": 8.45222654561176e-06, |
|
"loss": 0.6457, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 4.6498054474708175, |
|
"grad_norm": 68.63418579101562, |
|
"learning_rate": 8.450064850843061e-06, |
|
"loss": 0.6434, |
|
"step": 35850 |
|
}, |
|
{ |
|
"epoch": 4.656290531776913, |
|
"grad_norm": 153.90467834472656, |
|
"learning_rate": 8.447903156074363e-06, |
|
"loss": 0.6385, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 4.662775616083009, |
|
"grad_norm": 68.64386749267578, |
|
"learning_rate": 8.445741461305664e-06, |
|
"loss": 0.6758, |
|
"step": 35950 |
|
}, |
|
{ |
|
"epoch": 4.669260700389105, |
|
"grad_norm": 103.12224578857422, |
|
"learning_rate": 8.443579766536966e-06, |
|
"loss": 0.6526, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.675745784695201, |
|
"grad_norm": 35.21643829345703, |
|
"learning_rate": 8.441418071768268e-06, |
|
"loss": 0.6469, |
|
"step": 36050 |
|
}, |
|
{ |
|
"epoch": 4.682230869001297, |
|
"grad_norm": 48.4489631652832, |
|
"learning_rate": 8.439256376999569e-06, |
|
"loss": 0.6483, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 4.688715953307393, |
|
"grad_norm": 181.4416046142578, |
|
"learning_rate": 8.437094682230869e-06, |
|
"loss": 0.6434, |
|
"step": 36150 |
|
}, |
|
{ |
|
"epoch": 4.695201037613489, |
|
"grad_norm": 153.2976837158203, |
|
"learning_rate": 8.43493298746217e-06, |
|
"loss": 0.6435, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 4.701686121919585, |
|
"grad_norm": 80.14440155029297, |
|
"learning_rate": 8.432771292693472e-06, |
|
"loss": 0.6385, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 4.708171206225681, |
|
"grad_norm": 30.52111053466797, |
|
"learning_rate": 8.430609597924774e-06, |
|
"loss": 0.6292, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 4.714656290531777, |
|
"grad_norm": 192.3052520751953, |
|
"learning_rate": 8.428447903156075e-06, |
|
"loss": 0.6395, |
|
"step": 36350 |
|
}, |
|
{ |
|
"epoch": 4.721141374837873, |
|
"grad_norm": 105.61079406738281, |
|
"learning_rate": 8.426286208387377e-06, |
|
"loss": 0.6241, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 4.727626459143969, |
|
"grad_norm": 111.08782196044922, |
|
"learning_rate": 8.424124513618679e-06, |
|
"loss": 0.6408, |
|
"step": 36450 |
|
}, |
|
{ |
|
"epoch": 4.734111543450065, |
|
"grad_norm": 140.2386932373047, |
|
"learning_rate": 8.421962818849979e-06, |
|
"loss": 0.63, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 4.740596627756161, |
|
"grad_norm": 104.75723266601562, |
|
"learning_rate": 8.41980112408128e-06, |
|
"loss": 0.651, |
|
"step": 36550 |
|
}, |
|
{ |
|
"epoch": 4.747081712062257, |
|
"grad_norm": 38.98159408569336, |
|
"learning_rate": 8.417639429312582e-06, |
|
"loss": 0.6435, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 4.753566796368353, |
|
"grad_norm": 250.14450073242188, |
|
"learning_rate": 8.415477734543883e-06, |
|
"loss": 0.6344, |
|
"step": 36650 |
|
}, |
|
{ |
|
"epoch": 4.760051880674449, |
|
"grad_norm": 50.4091796875, |
|
"learning_rate": 8.413316039775185e-06, |
|
"loss": 0.6409, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 4.766536964980545, |
|
"grad_norm": 146.32968139648438, |
|
"learning_rate": 8.411154345006486e-06, |
|
"loss": 0.6509, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 4.773022049286641, |
|
"grad_norm": 95.01649475097656, |
|
"learning_rate": 8.408992650237786e-06, |
|
"loss": 0.6637, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 4.779507133592737, |
|
"grad_norm": 52.520076751708984, |
|
"learning_rate": 8.406830955469088e-06, |
|
"loss": 0.6525, |
|
"step": 36850 |
|
}, |
|
{ |
|
"epoch": 4.785992217898833, |
|
"grad_norm": 106.26171112060547, |
|
"learning_rate": 8.40466926070039e-06, |
|
"loss": 0.6759, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 4.7924773022049285, |
|
"grad_norm": 82.12842559814453, |
|
"learning_rate": 8.402507565931691e-06, |
|
"loss": 0.6305, |
|
"step": 36950 |
|
}, |
|
{ |
|
"epoch": 4.7989623865110245, |
|
"grad_norm": 110.25924682617188, |
|
"learning_rate": 8.400345871162993e-06, |
|
"loss": 0.6231, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 4.80544747081712, |
|
"grad_norm": 61.60184860229492, |
|
"learning_rate": 8.398184176394294e-06, |
|
"loss": 0.636, |
|
"step": 37050 |
|
}, |
|
{ |
|
"epoch": 4.811932555123216, |
|
"grad_norm": 106.20768737792969, |
|
"learning_rate": 8.396022481625596e-06, |
|
"loss": 0.663, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 4.818417639429312, |
|
"grad_norm": 24.003427505493164, |
|
"learning_rate": 8.393860786856896e-06, |
|
"loss": 0.6515, |
|
"step": 37150 |
|
}, |
|
{ |
|
"epoch": 4.824902723735408, |
|
"grad_norm": 162.2716522216797, |
|
"learning_rate": 8.391699092088197e-06, |
|
"loss": 0.6714, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 4.831387808041504, |
|
"grad_norm": 246.6392059326172, |
|
"learning_rate": 8.389537397319499e-06, |
|
"loss": 0.6578, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 4.8378728923476, |
|
"grad_norm": 122.14068603515625, |
|
"learning_rate": 8.3873757025508e-06, |
|
"loss": 0.6262, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 4.844357976653696, |
|
"grad_norm": 33.177120208740234, |
|
"learning_rate": 8.385214007782102e-06, |
|
"loss": 0.6666, |
|
"step": 37350 |
|
}, |
|
{ |
|
"epoch": 4.850843060959792, |
|
"grad_norm": 106.25698852539062, |
|
"learning_rate": 8.383052313013404e-06, |
|
"loss": 0.6421, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 4.857328145265888, |
|
"grad_norm": 119.15618133544922, |
|
"learning_rate": 8.380890618244705e-06, |
|
"loss": 0.6409, |
|
"step": 37450 |
|
}, |
|
{ |
|
"epoch": 4.863813229571985, |
|
"grad_norm": 82.73539733886719, |
|
"learning_rate": 8.378728923476005e-06, |
|
"loss": 0.6363, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 4.870298313878081, |
|
"grad_norm": 37.429141998291016, |
|
"learning_rate": 8.376567228707307e-06, |
|
"loss": 0.6361, |
|
"step": 37550 |
|
}, |
|
{ |
|
"epoch": 4.876783398184177, |
|
"grad_norm": 152.13327026367188, |
|
"learning_rate": 8.374405533938609e-06, |
|
"loss": 0.6386, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 4.883268482490273, |
|
"grad_norm": 57.97270584106445, |
|
"learning_rate": 8.37224383916991e-06, |
|
"loss": 0.6475, |
|
"step": 37650 |
|
}, |
|
{ |
|
"epoch": 4.889753566796369, |
|
"grad_norm": 87.77135467529297, |
|
"learning_rate": 8.370082144401212e-06, |
|
"loss": 0.6235, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 4.896238651102465, |
|
"grad_norm": 206.52565002441406, |
|
"learning_rate": 8.367920449632513e-06, |
|
"loss": 0.6211, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 4.902723735408561, |
|
"grad_norm": 115.79866027832031, |
|
"learning_rate": 8.365758754863815e-06, |
|
"loss": 0.6338, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 4.909208819714657, |
|
"grad_norm": 147.11058044433594, |
|
"learning_rate": 8.363597060095115e-06, |
|
"loss": 0.6615, |
|
"step": 37850 |
|
}, |
|
{ |
|
"epoch": 4.915693904020753, |
|
"grad_norm": 107.96685028076172, |
|
"learning_rate": 8.361435365326416e-06, |
|
"loss": 0.6437, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 4.9221789883268485, |
|
"grad_norm": 75.72913360595703, |
|
"learning_rate": 8.359273670557718e-06, |
|
"loss": 0.6257, |
|
"step": 37950 |
|
}, |
|
{ |
|
"epoch": 4.9286640726329445, |
|
"grad_norm": 217.0497283935547, |
|
"learning_rate": 8.35711197578902e-06, |
|
"loss": 0.6603, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.93514915693904, |
|
"grad_norm": 40.60713577270508, |
|
"learning_rate": 8.354950281020321e-06, |
|
"loss": 0.6293, |
|
"step": 38050 |
|
}, |
|
{ |
|
"epoch": 4.941634241245136, |
|
"grad_norm": 71.73409271240234, |
|
"learning_rate": 8.352788586251623e-06, |
|
"loss": 0.6409, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 4.948119325551232, |
|
"grad_norm": 91.70991516113281, |
|
"learning_rate": 8.350626891482924e-06, |
|
"loss": 0.6299, |
|
"step": 38150 |
|
}, |
|
{ |
|
"epoch": 4.954604409857328, |
|
"grad_norm": 117.5611572265625, |
|
"learning_rate": 8.348465196714224e-06, |
|
"loss": 0.6322, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 4.961089494163424, |
|
"grad_norm": 119.25588989257812, |
|
"learning_rate": 8.346303501945526e-06, |
|
"loss": 0.6419, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 4.96757457846952, |
|
"grad_norm": 215.7095184326172, |
|
"learning_rate": 8.344141807176827e-06, |
|
"loss": 0.6331, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 4.974059662775616, |
|
"grad_norm": 63.63528060913086, |
|
"learning_rate": 8.341980112408129e-06, |
|
"loss": 0.6632, |
|
"step": 38350 |
|
}, |
|
{ |
|
"epoch": 4.980544747081712, |
|
"grad_norm": 44.25017547607422, |
|
"learning_rate": 8.33981841763943e-06, |
|
"loss": 0.6519, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 4.987029831387808, |
|
"grad_norm": 68.59965515136719, |
|
"learning_rate": 8.337656722870732e-06, |
|
"loss": 0.637, |
|
"step": 38450 |
|
}, |
|
{ |
|
"epoch": 4.993514915693904, |
|
"grad_norm": 136.69644165039062, |
|
"learning_rate": 8.335495028102034e-06, |
|
"loss": 0.6145, |
|
"step": 38500 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 231300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 38548, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4735422916204544e+20, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|