|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5936927565773118, |
|
"eval_steps": 500, |
|
"global_step": 120000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 296.6432189941406, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 36.1442, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 282.62713623046875, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 36.2463, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 271.8738098144531, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 36.3589, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 287.5734558105469, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 36.0855, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 259.08685302734375, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 35.8685, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 292.2701416015625, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 35.4046, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 307.0222473144531, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 34.6875, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 274.7489929199219, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 34.1517, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 240.46612548828125, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 33.4424, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 268.32684326171875, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 32.5807, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 258.87274169921875, |
|
"learning_rate": 2.2e-06, |
|
"loss": 30.8752, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 288.45611572265625, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 29.5351, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 261.72149658203125, |
|
"learning_rate": 2.6e-06, |
|
"loss": 27.398, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 331.3612365722656, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 24.6465, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 215.5654296875, |
|
"learning_rate": 3e-06, |
|
"loss": 21.7979, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 222.94651794433594, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 18.6465, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 252.55087280273438, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 15.7462, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 273.9644470214844, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 13.9379, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 205.786376953125, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 12.2574, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 151.63124084472656, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 9.865, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 120.38298034667969, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 8.9936, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 93.93321990966797, |
|
"learning_rate": 4.4e-06, |
|
"loss": 8.3415, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 102.68135833740234, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 7.4711, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 250.72817993164062, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 6.5714, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 249.8506317138672, |
|
"learning_rate": 5e-06, |
|
"loss": 5.9448, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 66.83155059814453, |
|
"learning_rate": 5.2e-06, |
|
"loss": 5.6368, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 51.391082763671875, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 4.8538, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 50.51924133300781, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 4.5733, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 33.91701889038086, |
|
"learning_rate": 5.8e-06, |
|
"loss": 4.1586, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 41.544532775878906, |
|
"learning_rate": 6e-06, |
|
"loss": 3.8914, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 44.3348274230957, |
|
"learning_rate": 6.200000000000001e-06, |
|
"loss": 3.4145, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 27.11107063293457, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 3.2646, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 144.55479431152344, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 3.1211, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 16.845191955566406, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 3.0997, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 13.76279067993164, |
|
"learning_rate": 7e-06, |
|
"loss": 2.7326, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 10.799291610717773, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 2.743, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 10.540129661560059, |
|
"learning_rate": 7.4e-06, |
|
"loss": 2.6056, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.966181755065918, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 2.4526, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 16.517465591430664, |
|
"learning_rate": 7.800000000000002e-06, |
|
"loss": 2.419, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.533743858337402, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.3781, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.458216667175293, |
|
"learning_rate": 8.2e-06, |
|
"loss": 2.296, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 7.576855182647705, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 2.269, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.14406681060791, |
|
"learning_rate": 8.6e-06, |
|
"loss": 2.2047, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 8.389999389648438, |
|
"learning_rate": 8.8e-06, |
|
"loss": 2.1709, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 9.2235689163208, |
|
"learning_rate": 9e-06, |
|
"loss": 2.0764, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 19.56966209411621, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 2.0616, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 14.260141372680664, |
|
"learning_rate": 9.4e-06, |
|
"loss": 2.0158, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.950816631317139, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 1.9903, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.523265361785889, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 1.9903, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 5.682758331298828, |
|
"learning_rate": 1e-05, |
|
"loss": 2.0119, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.691493988037109, |
|
"learning_rate": 9.99937486520531e-06, |
|
"loss": 1.966, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.045505046844482, |
|
"learning_rate": 9.99874973041062e-06, |
|
"loss": 1.9175, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.446534633636475, |
|
"learning_rate": 9.99812459561593e-06, |
|
"loss": 1.9086, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.22329044342041, |
|
"learning_rate": 9.99749946082124e-06, |
|
"loss": 1.8706, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.028206825256348, |
|
"learning_rate": 9.99687432602655e-06, |
|
"loss": 1.8183, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.474781036376953, |
|
"learning_rate": 9.99624919123186e-06, |
|
"loss": 1.9045, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.7177886962890625, |
|
"learning_rate": 9.99562405643717e-06, |
|
"loss": 1.8141, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.469454288482666, |
|
"learning_rate": 9.99499892164248e-06, |
|
"loss": 1.8079, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.826772689819336, |
|
"learning_rate": 9.99437378684779e-06, |
|
"loss": 1.852, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.368248462677002, |
|
"learning_rate": 9.9937486520531e-06, |
|
"loss": 1.8207, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.686717987060547, |
|
"learning_rate": 9.99312351725841e-06, |
|
"loss": 1.7868, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.562761306762695, |
|
"learning_rate": 9.99249838246372e-06, |
|
"loss": 1.8201, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.788825511932373, |
|
"learning_rate": 9.99187324766903e-06, |
|
"loss": 1.7568, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.2414870262146, |
|
"learning_rate": 9.991248112874338e-06, |
|
"loss": 1.7635, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.775527477264404, |
|
"learning_rate": 9.99062297807965e-06, |
|
"loss": 1.7465, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.719698429107666, |
|
"learning_rate": 9.989997843284958e-06, |
|
"loss": 1.7776, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.103610515594482, |
|
"learning_rate": 9.98937270849027e-06, |
|
"loss": 1.7364, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.553420543670654, |
|
"learning_rate": 9.988747573695578e-06, |
|
"loss": 1.7341, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.687087535858154, |
|
"learning_rate": 9.98812243890089e-06, |
|
"loss": 1.7586, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.242082595825195, |
|
"learning_rate": 9.987497304106198e-06, |
|
"loss": 1.7255, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 7.58695650100708, |
|
"learning_rate": 9.98687216931151e-06, |
|
"loss": 1.724, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.764819622039795, |
|
"learning_rate": 9.986247034516818e-06, |
|
"loss": 1.6934, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.300253868103027, |
|
"learning_rate": 9.985621899722129e-06, |
|
"loss": 1.6773, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.917991638183594, |
|
"learning_rate": 9.984996764927437e-06, |
|
"loss": 1.7492, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 9.860074043273926, |
|
"learning_rate": 9.984371630132749e-06, |
|
"loss": 1.6835, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.517050743103027, |
|
"learning_rate": 9.983746495338059e-06, |
|
"loss": 1.6981, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.88366174697876, |
|
"learning_rate": 9.983121360543367e-06, |
|
"loss": 1.7226, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.612452983856201, |
|
"learning_rate": 9.982496225748679e-06, |
|
"loss": 1.6904, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.865972518920898, |
|
"learning_rate": 9.981871090953987e-06, |
|
"loss": 1.6969, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.375401973724365, |
|
"learning_rate": 9.981245956159299e-06, |
|
"loss": 1.6524, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.660288333892822, |
|
"learning_rate": 9.980620821364607e-06, |
|
"loss": 1.6866, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.874125957489014, |
|
"learning_rate": 9.979995686569918e-06, |
|
"loss": 1.6697, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.102114200592041, |
|
"learning_rate": 9.979370551775227e-06, |
|
"loss": 1.6429, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.398207664489746, |
|
"learning_rate": 9.978745416980538e-06, |
|
"loss": 1.6489, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 8.623647689819336, |
|
"learning_rate": 9.978120282185847e-06, |
|
"loss": 1.6278, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.62777853012085, |
|
"learning_rate": 9.977495147391158e-06, |
|
"loss": 1.6555, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.873004913330078, |
|
"learning_rate": 9.976870012596466e-06, |
|
"loss": 1.6624, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.456192493438721, |
|
"learning_rate": 9.976244877801778e-06, |
|
"loss": 1.6201, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.2842183113098145, |
|
"learning_rate": 9.975619743007086e-06, |
|
"loss": 1.6334, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.298410415649414, |
|
"learning_rate": 9.974994608212396e-06, |
|
"loss": 1.6472, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.887086391448975, |
|
"learning_rate": 9.974369473417706e-06, |
|
"loss": 1.6124, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.660162925720215, |
|
"learning_rate": 9.973744338623016e-06, |
|
"loss": 1.6131, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.750434398651123, |
|
"learning_rate": 9.973119203828326e-06, |
|
"loss": 1.6205, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.6416707038879395, |
|
"learning_rate": 9.972494069033636e-06, |
|
"loss": 1.619, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.29620361328125, |
|
"learning_rate": 9.971868934238946e-06, |
|
"loss": 1.6062, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.172244071960449, |
|
"learning_rate": 9.971243799444256e-06, |
|
"loss": 1.6139, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.9731390476226807, |
|
"learning_rate": 9.970618664649566e-06, |
|
"loss": 1.5706, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.9260454177856445, |
|
"learning_rate": 9.969993529854876e-06, |
|
"loss": 1.6224, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.228986740112305, |
|
"learning_rate": 9.969368395060186e-06, |
|
"loss": 1.5969, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.494061470031738, |
|
"learning_rate": 9.968743260265495e-06, |
|
"loss": 1.6129, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.893834590911865, |
|
"learning_rate": 9.968118125470805e-06, |
|
"loss": 1.5918, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.201370716094971, |
|
"learning_rate": 9.967492990676115e-06, |
|
"loss": 1.571, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.3033576011657715, |
|
"learning_rate": 9.966867855881425e-06, |
|
"loss": 1.6216, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.211835861206055, |
|
"learning_rate": 9.966242721086735e-06, |
|
"loss": 1.6006, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.3779730796813965, |
|
"learning_rate": 9.965617586292045e-06, |
|
"loss": 1.6184, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.778099060058594, |
|
"learning_rate": 9.964992451497355e-06, |
|
"loss": 1.5855, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 7.489856243133545, |
|
"learning_rate": 9.964367316702665e-06, |
|
"loss": 1.5827, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.601972579956055, |
|
"learning_rate": 9.963742181907975e-06, |
|
"loss": 1.6029, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.222909450531006, |
|
"learning_rate": 9.963117047113285e-06, |
|
"loss": 1.6027, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.561893939971924, |
|
"learning_rate": 9.962491912318595e-06, |
|
"loss": 1.592, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.908820152282715, |
|
"learning_rate": 9.961866777523905e-06, |
|
"loss": 1.612, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.185163974761963, |
|
"learning_rate": 9.961241642729215e-06, |
|
"loss": 1.5834, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.215177536010742, |
|
"learning_rate": 9.960616507934524e-06, |
|
"loss": 1.5741, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.018292427062988, |
|
"learning_rate": 9.959991373139834e-06, |
|
"loss": 1.5687, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.539705276489258, |
|
"learning_rate": 9.959366238345144e-06, |
|
"loss": 1.5967, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.168763637542725, |
|
"learning_rate": 9.958741103550454e-06, |
|
"loss": 1.5716, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.548024654388428, |
|
"learning_rate": 9.958115968755764e-06, |
|
"loss": 1.5576, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.756062030792236, |
|
"learning_rate": 9.957490833961072e-06, |
|
"loss": 1.5742, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.219858646392822, |
|
"learning_rate": 9.956865699166384e-06, |
|
"loss": 1.5807, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.460545063018799, |
|
"learning_rate": 9.956240564371692e-06, |
|
"loss": 1.5859, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.683807849884033, |
|
"learning_rate": 9.955615429577004e-06, |
|
"loss": 1.5769, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.083448886871338, |
|
"learning_rate": 9.954990294782312e-06, |
|
"loss": 1.5701, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.151342391967773, |
|
"learning_rate": 9.954365159987624e-06, |
|
"loss": 1.5834, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.23958158493042, |
|
"learning_rate": 9.953740025192932e-06, |
|
"loss": 1.5797, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.883495330810547, |
|
"learning_rate": 9.953114890398244e-06, |
|
"loss": 1.5584, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.5151190757751465, |
|
"learning_rate": 9.952489755603552e-06, |
|
"loss": 1.5723, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.234920024871826, |
|
"learning_rate": 9.951864620808863e-06, |
|
"loss": 1.5404, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.942254543304443, |
|
"learning_rate": 9.951239486014173e-06, |
|
"loss": 1.5373, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.068475723266602, |
|
"learning_rate": 9.950614351219483e-06, |
|
"loss": 1.5584, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.250300407409668, |
|
"learning_rate": 9.949989216424793e-06, |
|
"loss": 1.6023, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.279661178588867, |
|
"learning_rate": 9.949364081630101e-06, |
|
"loss": 1.5517, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.5876946449279785, |
|
"learning_rate": 9.948738946835413e-06, |
|
"loss": 1.5142, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.220710754394531, |
|
"learning_rate": 9.948113812040721e-06, |
|
"loss": 1.601, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.183436870574951, |
|
"learning_rate": 9.947488677246033e-06, |
|
"loss": 1.5508, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.20064640045166, |
|
"learning_rate": 9.946863542451341e-06, |
|
"loss": 1.532, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.151244640350342, |
|
"learning_rate": 9.946238407656653e-06, |
|
"loss": 1.5256, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.308995246887207, |
|
"learning_rate": 9.945613272861961e-06, |
|
"loss": 1.5349, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.477377891540527, |
|
"learning_rate": 9.944988138067273e-06, |
|
"loss": 1.5365, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.085025310516357, |
|
"learning_rate": 9.944363003272581e-06, |
|
"loss": 1.5038, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.769080638885498, |
|
"learning_rate": 9.943737868477893e-06, |
|
"loss": 1.5387, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.0054931640625, |
|
"learning_rate": 9.9431127336832e-06, |
|
"loss": 1.5018, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.712356090545654, |
|
"learning_rate": 9.942487598888512e-06, |
|
"loss": 1.5049, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.574007034301758, |
|
"learning_rate": 9.94186246409382e-06, |
|
"loss": 1.5316, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.079704761505127, |
|
"learning_rate": 9.94123732929913e-06, |
|
"loss": 1.527, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.1134490966796875, |
|
"learning_rate": 9.94061219450444e-06, |
|
"loss": 1.5472, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.486052989959717, |
|
"learning_rate": 9.93998705970975e-06, |
|
"loss": 1.5063, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.8249921798706055, |
|
"learning_rate": 9.93936192491506e-06, |
|
"loss": 1.5116, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.1462931632995605, |
|
"learning_rate": 9.93873679012037e-06, |
|
"loss": 1.5259, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.222506999969482, |
|
"learning_rate": 9.93811165532568e-06, |
|
"loss": 1.5378, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.8890185356140137, |
|
"learning_rate": 9.93748652053099e-06, |
|
"loss": 1.5424, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.006450176239014, |
|
"learning_rate": 9.9368613857363e-06, |
|
"loss": 1.5276, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.570881366729736, |
|
"learning_rate": 9.93623625094161e-06, |
|
"loss": 1.5173, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.1144633293151855, |
|
"learning_rate": 9.93561111614692e-06, |
|
"loss": 1.4905, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.034255027770996, |
|
"learning_rate": 9.93498598135223e-06, |
|
"loss": 1.5321, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.831255912780762, |
|
"learning_rate": 9.93436084655754e-06, |
|
"loss": 1.5297, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.800346851348877, |
|
"learning_rate": 9.93373571176285e-06, |
|
"loss": 1.5109, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.187744617462158, |
|
"learning_rate": 9.93311057696816e-06, |
|
"loss": 1.4896, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.569481372833252, |
|
"learning_rate": 9.93248544217347e-06, |
|
"loss": 1.5254, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.826055526733398, |
|
"learning_rate": 9.93186030737878e-06, |
|
"loss": 1.5003, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.592813491821289, |
|
"learning_rate": 9.93123517258409e-06, |
|
"loss": 1.5386, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.18519926071167, |
|
"learning_rate": 9.9306100377894e-06, |
|
"loss": 1.5192, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.737257719039917, |
|
"learning_rate": 9.92998490299471e-06, |
|
"loss": 1.5049, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.077634334564209, |
|
"learning_rate": 9.929359768200019e-06, |
|
"loss": 1.4786, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.7141683101654053, |
|
"learning_rate": 9.928734633405329e-06, |
|
"loss": 1.5008, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.656344413757324, |
|
"learning_rate": 9.928109498610639e-06, |
|
"loss": 1.4907, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.02158784866333, |
|
"learning_rate": 9.927484363815949e-06, |
|
"loss": 1.5436, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.094794273376465, |
|
"learning_rate": 9.926859229021259e-06, |
|
"loss": 1.5135, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.3504533767700195, |
|
"learning_rate": 9.926234094226569e-06, |
|
"loss": 1.4825, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.326951503753662, |
|
"learning_rate": 9.925608959431879e-06, |
|
"loss": 1.5042, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.341583728790283, |
|
"learning_rate": 9.924983824637187e-06, |
|
"loss": 1.5239, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.5446648597717285, |
|
"learning_rate": 9.924358689842499e-06, |
|
"loss": 1.5104, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.787079811096191, |
|
"learning_rate": 9.923733555047807e-06, |
|
"loss": 1.4917, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.259307384490967, |
|
"learning_rate": 9.923108420253118e-06, |
|
"loss": 1.5162, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.553911209106445, |
|
"learning_rate": 9.922483285458427e-06, |
|
"loss": 1.4805, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.846059322357178, |
|
"learning_rate": 9.921858150663738e-06, |
|
"loss": 1.51, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.385834217071533, |
|
"learning_rate": 9.921233015869046e-06, |
|
"loss": 1.4883, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.686222553253174, |
|
"learning_rate": 9.920607881074358e-06, |
|
"loss": 1.525, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.7362844944000244, |
|
"learning_rate": 9.919982746279666e-06, |
|
"loss": 1.4877, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.107117652893066, |
|
"learning_rate": 9.919357611484978e-06, |
|
"loss": 1.5353, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.318368911743164, |
|
"learning_rate": 9.918732476690288e-06, |
|
"loss": 1.4682, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.914721488952637, |
|
"learning_rate": 9.918107341895598e-06, |
|
"loss": 1.5261, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.494168281555176, |
|
"learning_rate": 9.917482207100908e-06, |
|
"loss": 1.5013, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.026334762573242, |
|
"learning_rate": 9.916857072306216e-06, |
|
"loss": 1.5093, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.3312907218933105, |
|
"learning_rate": 9.916231937511528e-06, |
|
"loss": 1.5224, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.9422335624694824, |
|
"learning_rate": 9.915606802716836e-06, |
|
"loss": 1.5059, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.773715496063232, |
|
"learning_rate": 9.914981667922147e-06, |
|
"loss": 1.5031, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.202546119689941, |
|
"learning_rate": 9.914356533127456e-06, |
|
"loss": 1.5133, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.315513610839844, |
|
"learning_rate": 9.913731398332767e-06, |
|
"loss": 1.5343, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.325439929962158, |
|
"learning_rate": 9.913106263538076e-06, |
|
"loss": 1.4993, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.189039707183838, |
|
"learning_rate": 9.912481128743387e-06, |
|
"loss": 1.4871, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.019628524780273, |
|
"learning_rate": 9.911855993948695e-06, |
|
"loss": 1.5294, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.682359457015991, |
|
"learning_rate": 9.911230859154007e-06, |
|
"loss": 1.4728, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.9543027877807617, |
|
"learning_rate": 9.910605724359315e-06, |
|
"loss": 1.473, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.9522476196289062, |
|
"learning_rate": 9.909980589564627e-06, |
|
"loss": 1.5023, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.9608728885650635, |
|
"learning_rate": 9.909355454769935e-06, |
|
"loss": 1.4532, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.24020528793335, |
|
"learning_rate": 9.908730319975245e-06, |
|
"loss": 1.4691, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.217301845550537, |
|
"learning_rate": 9.908105185180555e-06, |
|
"loss": 1.4704, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.651137113571167, |
|
"learning_rate": 9.907480050385865e-06, |
|
"loss": 1.4933, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.657069206237793, |
|
"learning_rate": 9.906854915591175e-06, |
|
"loss": 1.4778, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.250380992889404, |
|
"learning_rate": 9.906229780796485e-06, |
|
"loss": 1.4931, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.308979034423828, |
|
"learning_rate": 9.905604646001795e-06, |
|
"loss": 1.4946, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.854809284210205, |
|
"learning_rate": 9.904979511207105e-06, |
|
"loss": 1.4399, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.725897789001465, |
|
"learning_rate": 9.904354376412414e-06, |
|
"loss": 1.4711, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.014156818389893, |
|
"learning_rate": 9.903729241617724e-06, |
|
"loss": 1.5038, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 6.402193546295166, |
|
"learning_rate": 9.903104106823034e-06, |
|
"loss": 1.4607, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.356836795806885, |
|
"learning_rate": 9.902478972028344e-06, |
|
"loss": 1.4767, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.174656867980957, |
|
"learning_rate": 9.901853837233654e-06, |
|
"loss": 1.4675, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.668475866317749, |
|
"learning_rate": 9.901228702438964e-06, |
|
"loss": 1.4529, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.7700912952423096, |
|
"learning_rate": 9.900603567644274e-06, |
|
"loss": 1.4521, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.570835828781128, |
|
"learning_rate": 9.899978432849584e-06, |
|
"loss": 1.4323, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.7499380111694336, |
|
"learning_rate": 9.899353298054894e-06, |
|
"loss": 1.4644, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.8630640506744385, |
|
"learning_rate": 9.898728163260204e-06, |
|
"loss": 1.4801, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.1705145835876465, |
|
"learning_rate": 9.898103028465514e-06, |
|
"loss": 1.4649, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.312972545623779, |
|
"learning_rate": 9.897477893670824e-06, |
|
"loss": 1.4696, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.0083088874816895, |
|
"learning_rate": 9.896852758876134e-06, |
|
"loss": 1.4557, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 3.791517496109009, |
|
"learning_rate": 9.896227624081444e-06, |
|
"loss": 1.4507, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.635531425476074, |
|
"learning_rate": 9.895602489286753e-06, |
|
"loss": 1.4922, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 3.6790366172790527, |
|
"learning_rate": 9.894977354492063e-06, |
|
"loss": 1.456, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.739284038543701, |
|
"learning_rate": 9.894352219697373e-06, |
|
"loss": 1.4993, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 3.6052489280700684, |
|
"learning_rate": 9.893727084902683e-06, |
|
"loss": 1.4599, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.582137584686279, |
|
"learning_rate": 9.893101950107993e-06, |
|
"loss": 1.464, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.414693355560303, |
|
"learning_rate": 9.892476815313303e-06, |
|
"loss": 1.4383, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.012635707855225, |
|
"learning_rate": 9.891851680518613e-06, |
|
"loss": 1.4496, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 3.935889482498169, |
|
"learning_rate": 9.891226545723921e-06, |
|
"loss": 1.44, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 3.735189199447632, |
|
"learning_rate": 9.890601410929233e-06, |
|
"loss": 1.4396, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 7.265974998474121, |
|
"learning_rate": 9.889976276134541e-06, |
|
"loss": 1.4367, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 3.6876790523529053, |
|
"learning_rate": 9.889351141339853e-06, |
|
"loss": 1.4477, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.87882661819458, |
|
"learning_rate": 9.888726006545161e-06, |
|
"loss": 1.4687, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.071088790893555, |
|
"learning_rate": 9.888100871750473e-06, |
|
"loss": 1.5096, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 3.7322299480438232, |
|
"learning_rate": 9.887475736955783e-06, |
|
"loss": 1.4701, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.085651397705078, |
|
"learning_rate": 9.886850602161092e-06, |
|
"loss": 1.4518, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 3.951169729232788, |
|
"learning_rate": 9.886225467366402e-06, |
|
"loss": 1.4336, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.026634693145752, |
|
"learning_rate": 9.885600332571712e-06, |
|
"loss": 1.476, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.0271477699279785, |
|
"learning_rate": 9.884975197777022e-06, |
|
"loss": 1.4925, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.727571725845337, |
|
"learning_rate": 9.884350062982332e-06, |
|
"loss": 1.452, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.8245084285736084, |
|
"learning_rate": 9.883724928187642e-06, |
|
"loss": 1.4597, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.508169174194336, |
|
"learning_rate": 9.88309979339295e-06, |
|
"loss": 1.4481, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.080810070037842, |
|
"learning_rate": 9.882474658598262e-06, |
|
"loss": 1.4807, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.269100666046143, |
|
"learning_rate": 9.88184952380357e-06, |
|
"loss": 1.4108, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.10108757019043, |
|
"learning_rate": 9.881224389008882e-06, |
|
"loss": 1.4592, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.869362831115723, |
|
"learning_rate": 9.88059925421419e-06, |
|
"loss": 1.4687, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.7101335525512695, |
|
"learning_rate": 9.879974119419502e-06, |
|
"loss": 1.4748, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.676862955093384, |
|
"learning_rate": 9.87934898462481e-06, |
|
"loss": 1.4439, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.2332844734191895, |
|
"learning_rate": 9.878723849830121e-06, |
|
"loss": 1.4371, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.947660207748413, |
|
"learning_rate": 9.87809871503543e-06, |
|
"loss": 1.4619, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.331580638885498, |
|
"learning_rate": 9.877473580240741e-06, |
|
"loss": 1.4332, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.24333381652832, |
|
"learning_rate": 9.87684844544605e-06, |
|
"loss": 1.4474, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.499051570892334, |
|
"learning_rate": 9.876223310651361e-06, |
|
"loss": 1.4303, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.9360058307647705, |
|
"learning_rate": 9.87559817585667e-06, |
|
"loss": 1.4731, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.0263352394104, |
|
"learning_rate": 9.87497304106198e-06, |
|
"loss": 1.4429, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.827765941619873, |
|
"learning_rate": 9.87434790626729e-06, |
|
"loss": 1.4312, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.680577278137207, |
|
"learning_rate": 9.8737227714726e-06, |
|
"loss": 1.4571, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.409987926483154, |
|
"learning_rate": 9.873097636677909e-06, |
|
"loss": 1.4708, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.233061790466309, |
|
"learning_rate": 9.872472501883219e-06, |
|
"loss": 1.4079, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.923621416091919, |
|
"learning_rate": 9.871847367088529e-06, |
|
"loss": 1.4389, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.695704698562622, |
|
"learning_rate": 9.871222232293839e-06, |
|
"loss": 1.448, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.188453674316406, |
|
"learning_rate": 9.870597097499149e-06, |
|
"loss": 1.4356, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.8711934089660645, |
|
"learning_rate": 9.869971962704459e-06, |
|
"loss": 1.4221, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.016773223876953, |
|
"learning_rate": 9.869346827909769e-06, |
|
"loss": 1.4409, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.488391399383545, |
|
"learning_rate": 9.868721693115079e-06, |
|
"loss": 1.4609, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.66510534286499, |
|
"learning_rate": 9.868096558320389e-06, |
|
"loss": 1.4132, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.9409704208374023, |
|
"learning_rate": 9.867471423525698e-06, |
|
"loss": 1.4406, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.3582875728607178, |
|
"learning_rate": 9.866846288731008e-06, |
|
"loss": 1.438, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.238399505615234, |
|
"learning_rate": 9.866221153936318e-06, |
|
"loss": 1.4201, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.6502299308776855, |
|
"learning_rate": 9.865596019141628e-06, |
|
"loss": 1.4592, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.8077006340026855, |
|
"learning_rate": 9.864970884346938e-06, |
|
"loss": 1.4556, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.270641326904297, |
|
"learning_rate": 9.864345749552248e-06, |
|
"loss": 1.4101, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.743376731872559, |
|
"learning_rate": 9.863720614757558e-06, |
|
"loss": 1.4719, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.345736980438232, |
|
"learning_rate": 9.863095479962868e-06, |
|
"loss": 1.4002, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.8192217350006104, |
|
"learning_rate": 9.862470345168178e-06, |
|
"loss": 1.4697, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.685102939605713, |
|
"learning_rate": 9.861845210373488e-06, |
|
"loss": 1.4519, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.690993070602417, |
|
"learning_rate": 9.861220075578798e-06, |
|
"loss": 1.4424, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.8806326389312744, |
|
"learning_rate": 9.860594940784108e-06, |
|
"loss": 1.4416, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.362546443939209, |
|
"learning_rate": 9.859969805989418e-06, |
|
"loss": 1.4245, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 4.167792320251465, |
|
"learning_rate": 9.859344671194728e-06, |
|
"loss": 1.4645, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 4.120845317840576, |
|
"learning_rate": 9.858719536400036e-06, |
|
"loss": 1.3847, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.8441598415374756, |
|
"learning_rate": 9.858094401605347e-06, |
|
"loss": 1.4244, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.3462672233581543, |
|
"learning_rate": 9.857469266810656e-06, |
|
"loss": 1.4401, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 4.065661430358887, |
|
"learning_rate": 9.856844132015967e-06, |
|
"loss": 1.4212, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.5907657146453857, |
|
"learning_rate": 9.856218997221275e-06, |
|
"loss": 1.4639, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.701472759246826, |
|
"learning_rate": 9.855593862426587e-06, |
|
"loss": 1.4052, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.7131853103637695, |
|
"learning_rate": 9.854968727631897e-06, |
|
"loss": 1.4237, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.156214475631714, |
|
"learning_rate": 9.854343592837207e-06, |
|
"loss": 1.4364, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 4.9435715675354, |
|
"learning_rate": 9.853718458042517e-06, |
|
"loss": 1.4352, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.94811749458313, |
|
"learning_rate": 9.853093323247827e-06, |
|
"loss": 1.4303, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.5269935131073, |
|
"learning_rate": 9.852468188453137e-06, |
|
"loss": 1.4214, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 4.688473224639893, |
|
"learning_rate": 9.851843053658447e-06, |
|
"loss": 1.3854, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 4.054961204528809, |
|
"learning_rate": 9.851217918863757e-06, |
|
"loss": 1.432, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.178467273712158, |
|
"learning_rate": 9.850592784069065e-06, |
|
"loss": 1.4439, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 4.031513690948486, |
|
"learning_rate": 9.849967649274376e-06, |
|
"loss": 1.4228, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.9268980026245117, |
|
"learning_rate": 9.849342514479685e-06, |
|
"loss": 1.4002, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.176645040512085, |
|
"learning_rate": 9.848717379684996e-06, |
|
"loss": 1.4732, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.7952117919921875, |
|
"learning_rate": 9.848092244890304e-06, |
|
"loss": 1.4609, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.6165409088134766, |
|
"learning_rate": 9.847467110095616e-06, |
|
"loss": 1.4179, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.359500408172607, |
|
"learning_rate": 9.846841975300924e-06, |
|
"loss": 1.4443, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.256430625915527, |
|
"learning_rate": 9.846216840506236e-06, |
|
"loss": 1.4205, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.939763069152832, |
|
"learning_rate": 9.845591705711544e-06, |
|
"loss": 1.3889, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.5934700965881348, |
|
"learning_rate": 9.844966570916856e-06, |
|
"loss": 1.4045, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.0760035514831543, |
|
"learning_rate": 9.844341436122164e-06, |
|
"loss": 1.4534, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.314694881439209, |
|
"learning_rate": 9.843716301327476e-06, |
|
"loss": 1.4284, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.9042022228240967, |
|
"learning_rate": 9.843091166532784e-06, |
|
"loss": 1.4249, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.454749822616577, |
|
"learning_rate": 9.842466031738094e-06, |
|
"loss": 1.4291, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.8640189170837402, |
|
"learning_rate": 9.841840896943404e-06, |
|
"loss": 1.4514, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.65750789642334, |
|
"learning_rate": 9.841215762148714e-06, |
|
"loss": 1.4626, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.030206680297852, |
|
"learning_rate": 9.840590627354024e-06, |
|
"loss": 1.4038, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.036793231964111, |
|
"learning_rate": 9.839965492559334e-06, |
|
"loss": 1.4225, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.980349063873291, |
|
"learning_rate": 9.839340357764643e-06, |
|
"loss": 1.4334, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.157260894775391, |
|
"learning_rate": 9.838715222969953e-06, |
|
"loss": 1.453, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.416947841644287, |
|
"learning_rate": 9.838090088175263e-06, |
|
"loss": 1.4176, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 5.0742645263671875, |
|
"learning_rate": 9.837464953380573e-06, |
|
"loss": 1.4492, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.5331027507781982, |
|
"learning_rate": 9.836839818585883e-06, |
|
"loss": 1.4258, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.268676280975342, |
|
"learning_rate": 9.836214683791193e-06, |
|
"loss": 1.4081, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.619158983230591, |
|
"learning_rate": 9.835589548996503e-06, |
|
"loss": 1.3998, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.513633966445923, |
|
"learning_rate": 9.834964414201813e-06, |
|
"loss": 1.4102, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.7973320484161377, |
|
"learning_rate": 9.834339279407123e-06, |
|
"loss": 1.3846, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.910383701324463, |
|
"learning_rate": 9.833714144612433e-06, |
|
"loss": 1.3886, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.820688009262085, |
|
"learning_rate": 9.833089009817743e-06, |
|
"loss": 1.4178, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.5854384899139404, |
|
"learning_rate": 9.832463875023053e-06, |
|
"loss": 1.4154, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.8664228916168213, |
|
"learning_rate": 9.831838740228363e-06, |
|
"loss": 1.4255, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.179574728012085, |
|
"learning_rate": 9.831213605433673e-06, |
|
"loss": 1.4039, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.206747531890869, |
|
"learning_rate": 9.830588470638982e-06, |
|
"loss": 1.4066, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.408509731292725, |
|
"learning_rate": 9.829963335844292e-06, |
|
"loss": 1.4028, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 5.01927375793457, |
|
"learning_rate": 9.829338201049602e-06, |
|
"loss": 1.4415, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.166085720062256, |
|
"learning_rate": 9.828713066254912e-06, |
|
"loss": 1.4698, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.4799346923828125, |
|
"learning_rate": 9.828087931460222e-06, |
|
"loss": 1.4055, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.68662428855896, |
|
"learning_rate": 9.827462796665532e-06, |
|
"loss": 1.3942, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.4798970222473145, |
|
"learning_rate": 9.826837661870842e-06, |
|
"loss": 1.4293, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.4319188594818115, |
|
"learning_rate": 9.826212527076152e-06, |
|
"loss": 1.4134, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.2756521701812744, |
|
"learning_rate": 9.825587392281462e-06, |
|
"loss": 1.4535, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.3544061183929443, |
|
"learning_rate": 9.82496225748677e-06, |
|
"loss": 1.3997, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.7909374237060547, |
|
"learning_rate": 9.824337122692082e-06, |
|
"loss": 1.4043, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.8240981101989746, |
|
"learning_rate": 9.82371198789739e-06, |
|
"loss": 1.428, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.214618682861328, |
|
"learning_rate": 9.823086853102702e-06, |
|
"loss": 1.3999, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 8.105681419372559, |
|
"learning_rate": 9.822461718308011e-06, |
|
"loss": 1.4082, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 4.449899196624756, |
|
"learning_rate": 9.821836583513321e-06, |
|
"loss": 1.4273, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 4.17997932434082, |
|
"learning_rate": 9.821211448718631e-06, |
|
"loss": 1.4291, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.813826322555542, |
|
"learning_rate": 9.820586313923941e-06, |
|
"loss": 1.407, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.7879397869110107, |
|
"learning_rate": 9.819961179129251e-06, |
|
"loss": 1.4159, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.9027743339538574, |
|
"learning_rate": 9.819336044334561e-06, |
|
"loss": 1.439, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.7069435119628906, |
|
"learning_rate": 9.818710909539871e-06, |
|
"loss": 1.4117, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.904519557952881, |
|
"learning_rate": 9.818085774745181e-06, |
|
"loss": 1.4227, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.545767068862915, |
|
"learning_rate": 9.817460639950491e-06, |
|
"loss": 1.4344, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.717536687850952, |
|
"learning_rate": 9.816835505155799e-06, |
|
"loss": 1.447, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.036220073699951, |
|
"learning_rate": 9.81621037036111e-06, |
|
"loss": 1.4124, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 4.334647178649902, |
|
"learning_rate": 9.815585235566419e-06, |
|
"loss": 1.4347, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.7795979976654053, |
|
"learning_rate": 9.81496010077173e-06, |
|
"loss": 1.417, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.4146125316619873, |
|
"learning_rate": 9.814334965977039e-06, |
|
"loss": 1.3847, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.193895101547241, |
|
"learning_rate": 9.81370983118235e-06, |
|
"loss": 1.4084, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.3683910369873047, |
|
"learning_rate": 9.813084696387659e-06, |
|
"loss": 1.4393, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 4.252421855926514, |
|
"learning_rate": 9.81245956159297e-06, |
|
"loss": 1.4294, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.6922848224639893, |
|
"learning_rate": 9.811834426798279e-06, |
|
"loss": 1.4212, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.27756667137146, |
|
"learning_rate": 9.81120929200359e-06, |
|
"loss": 1.4185, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.377180337905884, |
|
"learning_rate": 9.810584157208898e-06, |
|
"loss": 1.3991, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.4141881465911865, |
|
"learning_rate": 9.80995902241421e-06, |
|
"loss": 1.3816, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.6975343227386475, |
|
"learning_rate": 9.809333887619518e-06, |
|
"loss": 1.4275, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.303208112716675, |
|
"learning_rate": 9.808708752824828e-06, |
|
"loss": 1.3996, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.1281208992004395, |
|
"learning_rate": 9.808083618030138e-06, |
|
"loss": 1.4573, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 4.216818809509277, |
|
"learning_rate": 9.807458483235448e-06, |
|
"loss": 1.4264, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.6236705780029297, |
|
"learning_rate": 9.806833348440758e-06, |
|
"loss": 1.42, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.4652881622314453, |
|
"learning_rate": 9.806208213646068e-06, |
|
"loss": 1.4509, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 4.3565449714660645, |
|
"learning_rate": 9.805583078851378e-06, |
|
"loss": 1.3968, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 5.522129535675049, |
|
"learning_rate": 9.804957944056688e-06, |
|
"loss": 1.4402, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.9470767974853516, |
|
"learning_rate": 9.804332809261998e-06, |
|
"loss": 1.3922, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.978543758392334, |
|
"learning_rate": 9.803707674467308e-06, |
|
"loss": 1.403, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 5.382244110107422, |
|
"learning_rate": 9.803082539672618e-06, |
|
"loss": 1.3968, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 4.595647811889648, |
|
"learning_rate": 9.802457404877927e-06, |
|
"loss": 1.4002, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.6310489177703857, |
|
"learning_rate": 9.801832270083237e-06, |
|
"loss": 1.4122, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.4216363430023193, |
|
"learning_rate": 9.801207135288547e-06, |
|
"loss": 1.3859, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.4577724933624268, |
|
"learning_rate": 9.800582000493857e-06, |
|
"loss": 1.4206, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 4.211758136749268, |
|
"learning_rate": 9.799956865699167e-06, |
|
"loss": 1.4146, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.5187759399414062, |
|
"learning_rate": 9.799331730904477e-06, |
|
"loss": 1.3983, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.524277925491333, |
|
"learning_rate": 9.798706596109787e-06, |
|
"loss": 1.39, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.746493339538574, |
|
"learning_rate": 9.798081461315097e-06, |
|
"loss": 1.4351, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.489757537841797, |
|
"learning_rate": 9.797456326520407e-06, |
|
"loss": 1.4215, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.862546443939209, |
|
"learning_rate": 9.796831191725717e-06, |
|
"loss": 1.3839, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.700289487838745, |
|
"learning_rate": 9.796206056931027e-06, |
|
"loss": 1.4134, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 4.463230609893799, |
|
"learning_rate": 9.795580922136337e-06, |
|
"loss": 1.4094, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.6630661487579346, |
|
"learning_rate": 9.794955787341647e-06, |
|
"loss": 1.4008, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 4.630967140197754, |
|
"learning_rate": 9.794330652546956e-06, |
|
"loss": 1.3759, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.3025717735290527, |
|
"learning_rate": 9.793705517752266e-06, |
|
"loss": 1.3884, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.3258678913116455, |
|
"learning_rate": 9.793080382957576e-06, |
|
"loss": 1.386, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.719531536102295, |
|
"learning_rate": 9.792455248162885e-06, |
|
"loss": 1.3964, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.2938575744628906, |
|
"learning_rate": 9.791830113368196e-06, |
|
"loss": 1.4057, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 4.785384654998779, |
|
"learning_rate": 9.791204978573506e-06, |
|
"loss": 1.4134, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.3767313957214355, |
|
"learning_rate": 9.790579843778816e-06, |
|
"loss": 1.4139, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.9999425411224365, |
|
"learning_rate": 9.789954708984126e-06, |
|
"loss": 1.4039, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 4.019780158996582, |
|
"learning_rate": 9.789329574189436e-06, |
|
"loss": 1.3637, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.2933456897735596, |
|
"learning_rate": 9.788704439394746e-06, |
|
"loss": 1.3784, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.511465549468994, |
|
"learning_rate": 9.788079304600056e-06, |
|
"loss": 1.3845, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.031588077545166, |
|
"learning_rate": 9.787454169805366e-06, |
|
"loss": 1.3815, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.726041078567505, |
|
"learning_rate": 9.786829035010676e-06, |
|
"loss": 1.4031, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.459808349609375, |
|
"learning_rate": 9.786203900215986e-06, |
|
"loss": 1.4168, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 5.884055137634277, |
|
"learning_rate": 9.785578765421295e-06, |
|
"loss": 1.3897, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.8769099712371826, |
|
"learning_rate": 9.784953630626605e-06, |
|
"loss": 1.4214, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 4.353100776672363, |
|
"learning_rate": 9.784328495831914e-06, |
|
"loss": 1.3875, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.665733575820923, |
|
"learning_rate": 9.783703361037225e-06, |
|
"loss": 1.4033, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 4.098516941070557, |
|
"learning_rate": 9.783078226242533e-06, |
|
"loss": 1.3755, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.6719651222229004, |
|
"learning_rate": 9.782453091447845e-06, |
|
"loss": 1.4025, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.7323970794677734, |
|
"learning_rate": 9.781827956653153e-06, |
|
"loss": 1.4072, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.6012964248657227, |
|
"learning_rate": 9.781202821858465e-06, |
|
"loss": 1.3839, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.1830966472625732, |
|
"learning_rate": 9.780577687063773e-06, |
|
"loss": 1.4025, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.503458261489868, |
|
"learning_rate": 9.779952552269085e-06, |
|
"loss": 1.3849, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.934358596801758, |
|
"learning_rate": 9.779327417474393e-06, |
|
"loss": 1.3908, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.263597249984741, |
|
"learning_rate": 9.778702282679705e-06, |
|
"loss": 1.4144, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.6019351482391357, |
|
"learning_rate": 9.778077147885013e-06, |
|
"loss": 1.3966, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.211871862411499, |
|
"learning_rate": 9.777452013090324e-06, |
|
"loss": 1.4029, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 4.445366382598877, |
|
"learning_rate": 9.776826878295633e-06, |
|
"loss": 1.4192, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.834134340286255, |
|
"learning_rate": 9.776201743500943e-06, |
|
"loss": 1.3971, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 11.223153114318848, |
|
"learning_rate": 9.775576608706253e-06, |
|
"loss": 1.4199, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.168875217437744, |
|
"learning_rate": 9.774951473911563e-06, |
|
"loss": 1.403, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 4.082376480102539, |
|
"learning_rate": 9.774326339116872e-06, |
|
"loss": 1.3758, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.381903648376465, |
|
"learning_rate": 9.773701204322182e-06, |
|
"loss": 1.4124, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.220072031021118, |
|
"learning_rate": 9.773076069527492e-06, |
|
"loss": 1.3731, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 4.445113182067871, |
|
"learning_rate": 9.772450934732802e-06, |
|
"loss": 1.409, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 4.551964282989502, |
|
"learning_rate": 9.771825799938112e-06, |
|
"loss": 1.4092, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.120997905731201, |
|
"learning_rate": 9.771200665143422e-06, |
|
"loss": 1.4316, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.3942294120788574, |
|
"learning_rate": 9.770575530348732e-06, |
|
"loss": 1.3752, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.3764448165893555, |
|
"learning_rate": 9.769950395554042e-06, |
|
"loss": 1.4238, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.600349187850952, |
|
"learning_rate": 9.769325260759352e-06, |
|
"loss": 1.4012, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.6565279960632324, |
|
"learning_rate": 9.768700125964662e-06, |
|
"loss": 1.3768, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.65138578414917, |
|
"learning_rate": 9.768074991169972e-06, |
|
"loss": 1.4022, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.3732988834381104, |
|
"learning_rate": 9.767449856375282e-06, |
|
"loss": 1.409, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.8248541355133057, |
|
"learning_rate": 9.766824721580592e-06, |
|
"loss": 1.4121, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.3323121070861816, |
|
"learning_rate": 9.766199586785901e-06, |
|
"loss": 1.4203, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.0881714820861816, |
|
"learning_rate": 9.765574451991211e-06, |
|
"loss": 1.3993, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.6461262702941895, |
|
"learning_rate": 9.764949317196521e-06, |
|
"loss": 1.3842, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.2910470962524414, |
|
"learning_rate": 9.764324182401831e-06, |
|
"loss": 1.4108, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.8803622722625732, |
|
"learning_rate": 9.763699047607141e-06, |
|
"loss": 1.3962, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.9977149963378906, |
|
"learning_rate": 9.763073912812451e-06, |
|
"loss": 1.3876, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.4803717136383057, |
|
"learning_rate": 9.762448778017761e-06, |
|
"loss": 1.3601, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.819903612136841, |
|
"learning_rate": 9.761823643223071e-06, |
|
"loss": 1.3891, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.582854747772217, |
|
"learning_rate": 9.761198508428381e-06, |
|
"loss": 1.4145, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.323837995529175, |
|
"learning_rate": 9.76057337363369e-06, |
|
"loss": 1.4141, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.7181520462036133, |
|
"learning_rate": 9.759948238839e-06, |
|
"loss": 1.3464, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.4599475860595703, |
|
"learning_rate": 9.75932310404431e-06, |
|
"loss": 1.3775, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.005889654159546, |
|
"learning_rate": 9.75869796924962e-06, |
|
"loss": 1.4078, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.152175188064575, |
|
"learning_rate": 9.75807283445493e-06, |
|
"loss": 1.411, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.951000213623047, |
|
"learning_rate": 9.75744769966024e-06, |
|
"loss": 1.3737, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.722508192062378, |
|
"learning_rate": 9.75682256486555e-06, |
|
"loss": 1.4092, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 4.0199761390686035, |
|
"learning_rate": 9.75619743007086e-06, |
|
"loss": 1.3964, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.306147336959839, |
|
"learning_rate": 9.75557229527617e-06, |
|
"loss": 1.3772, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.0931670665740967, |
|
"learning_rate": 9.75494716048148e-06, |
|
"loss": 1.3848, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.4775798320770264, |
|
"learning_rate": 9.75432202568679e-06, |
|
"loss": 1.391, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.973440170288086, |
|
"learning_rate": 9.7536968908921e-06, |
|
"loss": 1.3993, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.9864256381988525, |
|
"learning_rate": 9.75307175609741e-06, |
|
"loss": 1.3523, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.9546356201171875, |
|
"learning_rate": 9.75244662130272e-06, |
|
"loss": 1.3527, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 6.238116264343262, |
|
"learning_rate": 9.75182148650803e-06, |
|
"loss": 1.3624, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.241060495376587, |
|
"learning_rate": 9.75119635171334e-06, |
|
"loss": 1.3875, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.17099666595459, |
|
"learning_rate": 9.750571216918648e-06, |
|
"loss": 1.381, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.647505283355713, |
|
"learning_rate": 9.74994608212396e-06, |
|
"loss": 1.3673, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.5791754722595215, |
|
"learning_rate": 9.749320947329268e-06, |
|
"loss": 1.3936, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 4.946603775024414, |
|
"learning_rate": 9.74869581253458e-06, |
|
"loss": 1.4135, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.9948465824127197, |
|
"learning_rate": 9.748070677739888e-06, |
|
"loss": 1.3824, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.355520725250244, |
|
"learning_rate": 9.7474455429452e-06, |
|
"loss": 1.3757, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.599808931350708, |
|
"learning_rate": 9.746820408150508e-06, |
|
"loss": 1.4019, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.60089111328125, |
|
"learning_rate": 9.746195273355819e-06, |
|
"loss": 1.4147, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.074833393096924, |
|
"learning_rate": 9.745570138561127e-06, |
|
"loss": 1.4084, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.3081626892089844, |
|
"learning_rate": 9.744945003766439e-06, |
|
"loss": 1.3999, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 4.088558673858643, |
|
"learning_rate": 9.744319868971747e-06, |
|
"loss": 1.398, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.7625129222869873, |
|
"learning_rate": 9.743694734177059e-06, |
|
"loss": 1.388, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 4.328056335449219, |
|
"learning_rate": 9.743069599382367e-06, |
|
"loss": 1.3721, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.1336140632629395, |
|
"learning_rate": 9.742444464587677e-06, |
|
"loss": 1.3865, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.0789365768432617, |
|
"learning_rate": 9.741819329792987e-06, |
|
"loss": 1.3609, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.569803237915039, |
|
"learning_rate": 9.741194194998297e-06, |
|
"loss": 1.3954, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.9342846870422363, |
|
"learning_rate": 9.740569060203607e-06, |
|
"loss": 1.3381, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.4400010108947754, |
|
"learning_rate": 9.739943925408917e-06, |
|
"loss": 1.3582, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.698220729827881, |
|
"learning_rate": 9.739318790614227e-06, |
|
"loss": 1.4165, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.7116315364837646, |
|
"learning_rate": 9.738693655819537e-06, |
|
"loss": 1.3643, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 4.515981674194336, |
|
"learning_rate": 9.738068521024846e-06, |
|
"loss": 1.3899, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.1222646236419678, |
|
"learning_rate": 9.737443386230156e-06, |
|
"loss": 1.3497, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.3143482208251953, |
|
"learning_rate": 9.736818251435466e-06, |
|
"loss": 1.3906, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.673861026763916, |
|
"learning_rate": 9.736193116640776e-06, |
|
"loss": 1.3742, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.1223580837249756, |
|
"learning_rate": 9.735567981846086e-06, |
|
"loss": 1.3679, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.321925640106201, |
|
"learning_rate": 9.734942847051396e-06, |
|
"loss": 1.3872, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.3574061393737793, |
|
"learning_rate": 9.734317712256706e-06, |
|
"loss": 1.3536, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.0097270011901855, |
|
"learning_rate": 9.733692577462016e-06, |
|
"loss": 1.3867, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 6.731925964355469, |
|
"learning_rate": 9.733067442667326e-06, |
|
"loss": 1.3671, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.5036468505859375, |
|
"learning_rate": 9.732442307872636e-06, |
|
"loss": 1.3935, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 5.059147357940674, |
|
"learning_rate": 9.731817173077946e-06, |
|
"loss": 1.3693, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.65627384185791, |
|
"learning_rate": 9.731192038283256e-06, |
|
"loss": 1.3838, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.8300678730010986, |
|
"learning_rate": 9.730566903488566e-06, |
|
"loss": 1.4273, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.2943530082702637, |
|
"learning_rate": 9.729941768693876e-06, |
|
"loss": 1.3598, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.6148500442504883, |
|
"learning_rate": 9.729316633899185e-06, |
|
"loss": 1.3916, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.14809250831604, |
|
"learning_rate": 9.728691499104495e-06, |
|
"loss": 1.3596, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.4826507568359375, |
|
"learning_rate": 9.728066364309805e-06, |
|
"loss": 1.4113, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.761301279067993, |
|
"learning_rate": 9.727441229515115e-06, |
|
"loss": 1.3811, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.280597448348999, |
|
"learning_rate": 9.726816094720425e-06, |
|
"loss": 1.3735, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.2056515216827393, |
|
"learning_rate": 9.726190959925735e-06, |
|
"loss": 1.3928, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 5.535262107849121, |
|
"learning_rate": 9.725565825131045e-06, |
|
"loss": 1.3795, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.71197509765625, |
|
"learning_rate": 9.724940690336355e-06, |
|
"loss": 1.3956, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.048292875289917, |
|
"learning_rate": 9.724315555541665e-06, |
|
"loss": 1.3756, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.6094841957092285, |
|
"learning_rate": 9.723690420746975e-06, |
|
"loss": 1.4143, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.416449546813965, |
|
"learning_rate": 9.723065285952285e-06, |
|
"loss": 1.3618, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.372152328491211, |
|
"learning_rate": 9.722440151157595e-06, |
|
"loss": 1.402, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.1622934341430664, |
|
"learning_rate": 9.721815016362905e-06, |
|
"loss": 1.3914, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.1704394817352295, |
|
"learning_rate": 9.721189881568214e-06, |
|
"loss": 1.3827, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.9178764820098877, |
|
"learning_rate": 9.720564746773524e-06, |
|
"loss": 1.371, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.839916706085205, |
|
"learning_rate": 9.719939611978834e-06, |
|
"loss": 1.361, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.5647811889648438, |
|
"learning_rate": 9.719314477184144e-06, |
|
"loss": 1.3857, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.2756240367889404, |
|
"learning_rate": 9.718689342389454e-06, |
|
"loss": 1.375, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 4.051654815673828, |
|
"learning_rate": 9.718064207594762e-06, |
|
"loss": 1.3941, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 4.137097358703613, |
|
"learning_rate": 9.717439072800074e-06, |
|
"loss": 1.3892, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.646369457244873, |
|
"learning_rate": 9.716813938005382e-06, |
|
"loss": 1.3846, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.162900686264038, |
|
"learning_rate": 9.716188803210694e-06, |
|
"loss": 1.4173, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.1182548999786377, |
|
"learning_rate": 9.715563668416002e-06, |
|
"loss": 1.3769, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 5.595252513885498, |
|
"learning_rate": 9.714938533621314e-06, |
|
"loss": 1.4087, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.782058000564575, |
|
"learning_rate": 9.714313398826622e-06, |
|
"loss": 1.4179, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.0897421836853027, |
|
"learning_rate": 9.713688264031934e-06, |
|
"loss": 1.3671, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.455578565597534, |
|
"learning_rate": 9.713063129237242e-06, |
|
"loss": 1.401, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.877256155014038, |
|
"learning_rate": 9.712437994442553e-06, |
|
"loss": 1.4114, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.3103723526000977, |
|
"learning_rate": 9.711812859647862e-06, |
|
"loss": 1.3895, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.909396171569824, |
|
"learning_rate": 9.711187724853173e-06, |
|
"loss": 1.3632, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.4170010089874268, |
|
"learning_rate": 9.710562590058482e-06, |
|
"loss": 1.3819, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.1259448528289795, |
|
"learning_rate": 9.709937455263791e-06, |
|
"loss": 1.3894, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 4.449690818786621, |
|
"learning_rate": 9.709312320469101e-06, |
|
"loss": 1.3998, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.47631573677063, |
|
"learning_rate": 9.708687185674411e-06, |
|
"loss": 1.374, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.5324013233184814, |
|
"learning_rate": 9.708062050879721e-06, |
|
"loss": 1.3518, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.9155433177948, |
|
"learning_rate": 9.707436916085031e-06, |
|
"loss": 1.3848, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.4327306747436523, |
|
"learning_rate": 9.706811781290341e-06, |
|
"loss": 1.3653, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 6.06643533706665, |
|
"learning_rate": 9.706186646495651e-06, |
|
"loss": 1.3892, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.518132448196411, |
|
"learning_rate": 9.705561511700961e-06, |
|
"loss": 1.3433, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.2915585041046143, |
|
"learning_rate": 9.704936376906271e-06, |
|
"loss": 1.3579, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.804596185684204, |
|
"learning_rate": 9.70431124211158e-06, |
|
"loss": 1.3809, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.617377758026123, |
|
"learning_rate": 9.70368610731689e-06, |
|
"loss": 1.398, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.0816659927368164, |
|
"learning_rate": 9.7030609725222e-06, |
|
"loss": 1.3652, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.5903923511505127, |
|
"learning_rate": 9.70243583772751e-06, |
|
"loss": 1.3767, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.43280029296875, |
|
"learning_rate": 9.70181070293282e-06, |
|
"loss": 1.3802, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.5906081199646, |
|
"learning_rate": 9.70118556813813e-06, |
|
"loss": 1.3726, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.5110647678375244, |
|
"learning_rate": 9.70056043334344e-06, |
|
"loss": 1.4125, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.5731992721557617, |
|
"learning_rate": 9.69993529854875e-06, |
|
"loss": 1.3971, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.522200584411621, |
|
"learning_rate": 9.69931016375406e-06, |
|
"loss": 1.3758, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.754093647003174, |
|
"learning_rate": 9.69868502895937e-06, |
|
"loss": 1.4125, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.088795185089111, |
|
"learning_rate": 9.69805989416468e-06, |
|
"loss": 1.374, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.095700263977051, |
|
"learning_rate": 9.69743475936999e-06, |
|
"loss": 1.3475, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.6446001529693604, |
|
"learning_rate": 9.6968096245753e-06, |
|
"loss": 1.3675, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.0287554264068604, |
|
"learning_rate": 9.69618448978061e-06, |
|
"loss": 1.3648, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.5153772830963135, |
|
"learning_rate": 9.69555935498592e-06, |
|
"loss": 1.3526, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.402449131011963, |
|
"learning_rate": 9.69493422019123e-06, |
|
"loss": 1.4021, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 4.223129749298096, |
|
"learning_rate": 9.69430908539654e-06, |
|
"loss": 1.3754, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.7301337718963623, |
|
"learning_rate": 9.69368395060185e-06, |
|
"loss": 1.411, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.3157832622528076, |
|
"learning_rate": 9.69305881580716e-06, |
|
"loss": 1.3695, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 4.0217671394348145, |
|
"learning_rate": 9.69243368101247e-06, |
|
"loss": 1.3822, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.575080633163452, |
|
"learning_rate": 9.69180854621778e-06, |
|
"loss": 1.3578, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.4445888996124268, |
|
"learning_rate": 9.69118341142309e-06, |
|
"loss": 1.3795, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.173060894012451, |
|
"learning_rate": 9.6905582766284e-06, |
|
"loss": 1.3446, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.8823065757751465, |
|
"learning_rate": 9.689933141833709e-06, |
|
"loss": 1.3696, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.5092880725860596, |
|
"learning_rate": 9.689308007039019e-06, |
|
"loss": 1.3262, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.270498514175415, |
|
"learning_rate": 9.688682872244329e-06, |
|
"loss": 1.3905, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.0580673217773438, |
|
"learning_rate": 9.688057737449639e-06, |
|
"loss": 1.3707, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.982407569885254, |
|
"learning_rate": 9.687432602654949e-06, |
|
"loss": 1.3909, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 4.194490432739258, |
|
"learning_rate": 9.686807467860259e-06, |
|
"loss": 1.3703, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.5486743450164795, |
|
"learning_rate": 9.686182333065569e-06, |
|
"loss": 1.3528, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.081116199493408, |
|
"learning_rate": 9.685557198270879e-06, |
|
"loss": 1.3929, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.644366979598999, |
|
"learning_rate": 9.684932063476189e-06, |
|
"loss": 1.3847, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.9201712608337402, |
|
"learning_rate": 9.684306928681497e-06, |
|
"loss": 1.3689, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.1488590240478516, |
|
"learning_rate": 9.683681793886808e-06, |
|
"loss": 1.3573, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.4069724082946777, |
|
"learning_rate": 9.683056659092117e-06, |
|
"loss": 1.3447, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.669130563735962, |
|
"learning_rate": 9.682431524297428e-06, |
|
"loss": 1.3629, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.5612809658050537, |
|
"learning_rate": 9.681806389502736e-06, |
|
"loss": 1.3463, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.3988492488861084, |
|
"learning_rate": 9.681181254708048e-06, |
|
"loss": 1.3262, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.286510705947876, |
|
"learning_rate": 9.680556119913356e-06, |
|
"loss": 1.3671, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 4.079017639160156, |
|
"learning_rate": 9.679930985118668e-06, |
|
"loss": 1.3691, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 5.038201808929443, |
|
"learning_rate": 9.679305850323976e-06, |
|
"loss": 1.3684, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.9335787296295166, |
|
"learning_rate": 9.678680715529288e-06, |
|
"loss": 1.3651, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.750838279724121, |
|
"learning_rate": 9.678055580734596e-06, |
|
"loss": 1.3979, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.269113779067993, |
|
"learning_rate": 9.677430445939908e-06, |
|
"loss": 1.4005, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.9525506496429443, |
|
"learning_rate": 9.676805311145216e-06, |
|
"loss": 1.3535, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 4.0349273681640625, |
|
"learning_rate": 9.676180176350526e-06, |
|
"loss": 1.3568, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.9644954204559326, |
|
"learning_rate": 9.675555041555836e-06, |
|
"loss": 1.3368, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.748861312866211, |
|
"learning_rate": 9.674929906761146e-06, |
|
"loss": 1.3788, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.927027940750122, |
|
"learning_rate": 9.674304771966456e-06, |
|
"loss": 1.3369, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.5700511932373047, |
|
"learning_rate": 9.673679637171766e-06, |
|
"loss": 1.3476, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.2039616107940674, |
|
"learning_rate": 9.673054502377075e-06, |
|
"loss": 1.358, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.7656171321868896, |
|
"learning_rate": 9.672429367582385e-06, |
|
"loss": 1.3656, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.84936261177063, |
|
"learning_rate": 9.671804232787695e-06, |
|
"loss": 1.4221, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.145597457885742, |
|
"learning_rate": 9.671179097993005e-06, |
|
"loss": 1.3444, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.1017513275146484, |
|
"learning_rate": 9.670553963198315e-06, |
|
"loss": 1.3604, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.460015058517456, |
|
"learning_rate": 9.669928828403625e-06, |
|
"loss": 1.3556, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.2410836219787598, |
|
"learning_rate": 9.669303693608935e-06, |
|
"loss": 1.3468, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 4.7556843757629395, |
|
"learning_rate": 9.668678558814245e-06, |
|
"loss": 1.3535, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.8711953163146973, |
|
"learning_rate": 9.668053424019555e-06, |
|
"loss": 1.3587, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.9098100662231445, |
|
"learning_rate": 9.667428289224865e-06, |
|
"loss": 1.3866, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.6795027256011963, |
|
"learning_rate": 9.666803154430175e-06, |
|
"loss": 1.3692, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 4.239531517028809, |
|
"learning_rate": 9.666178019635485e-06, |
|
"loss": 1.3898, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.871461868286133, |
|
"learning_rate": 9.665552884840795e-06, |
|
"loss": 1.3863, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.8275647163391113, |
|
"learning_rate": 9.664927750046104e-06, |
|
"loss": 1.4035, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.168945074081421, |
|
"learning_rate": 9.664302615251414e-06, |
|
"loss": 1.4028, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.4457874298095703, |
|
"learning_rate": 9.663677480456724e-06, |
|
"loss": 1.3484, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.0998809337615967, |
|
"learning_rate": 9.663052345662034e-06, |
|
"loss": 1.3533, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.2760820388793945, |
|
"learning_rate": 9.662427210867344e-06, |
|
"loss": 1.3733, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.1642961502075195, |
|
"learning_rate": 9.661802076072654e-06, |
|
"loss": 1.3675, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.5796260833740234, |
|
"learning_rate": 9.661176941277964e-06, |
|
"loss": 1.3842, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.379223108291626, |
|
"learning_rate": 9.660551806483274e-06, |
|
"loss": 1.3, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.7059500217437744, |
|
"learning_rate": 9.659926671688584e-06, |
|
"loss": 1.3801, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.9644994735717773, |
|
"learning_rate": 9.659301536893894e-06, |
|
"loss": 1.3882, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.516009569168091, |
|
"learning_rate": 9.658676402099204e-06, |
|
"loss": 1.3507, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.6307122707366943, |
|
"learning_rate": 9.658051267304514e-06, |
|
"loss": 1.3887, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.963676929473877, |
|
"learning_rate": 9.657426132509824e-06, |
|
"loss": 1.3574, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.6590583324432373, |
|
"learning_rate": 9.656800997715134e-06, |
|
"loss": 1.3766, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.9890248775482178, |
|
"learning_rate": 9.656175862920443e-06, |
|
"loss": 1.3766, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.7033519744873047, |
|
"learning_rate": 9.655550728125753e-06, |
|
"loss": 1.3653, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.2749149799346924, |
|
"learning_rate": 9.654925593331063e-06, |
|
"loss": 1.3555, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.6017117500305176, |
|
"learning_rate": 9.654300458536373e-06, |
|
"loss": 1.3379, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.2133805751800537, |
|
"learning_rate": 9.653675323741683e-06, |
|
"loss": 1.3383, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.5152649879455566, |
|
"learning_rate": 9.653050188946993e-06, |
|
"loss": 1.3821, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.8433949947357178, |
|
"learning_rate": 9.652425054152303e-06, |
|
"loss": 1.3648, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.219630241394043, |
|
"learning_rate": 9.651799919357611e-06, |
|
"loss": 1.3381, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.9422874450683594, |
|
"learning_rate": 9.651174784562923e-06, |
|
"loss": 1.3596, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.0160861015319824, |
|
"learning_rate": 9.650549649768231e-06, |
|
"loss": 1.4086, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.6983797550201416, |
|
"learning_rate": 9.649924514973543e-06, |
|
"loss": 1.3653, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.4345366954803467, |
|
"learning_rate": 9.649299380178851e-06, |
|
"loss": 1.3836, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 4.864907741546631, |
|
"learning_rate": 9.648674245384163e-06, |
|
"loss": 1.3395, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.0346243381500244, |
|
"learning_rate": 9.64804911058947e-06, |
|
"loss": 1.3483, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.271688938140869, |
|
"learning_rate": 9.647423975794782e-06, |
|
"loss": 1.3718, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 5.519439697265625, |
|
"learning_rate": 9.64679884100009e-06, |
|
"loss": 1.3516, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.676679849624634, |
|
"learning_rate": 9.646173706205402e-06, |
|
"loss": 1.3786, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.799685001373291, |
|
"learning_rate": 9.64554857141071e-06, |
|
"loss": 1.3622, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.5693440437316895, |
|
"learning_rate": 9.644923436616022e-06, |
|
"loss": 1.3506, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 4.081248760223389, |
|
"learning_rate": 9.64429830182133e-06, |
|
"loss": 1.3551, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.275651454925537, |
|
"learning_rate": 9.64367316702664e-06, |
|
"loss": 1.3691, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.2802531719207764, |
|
"learning_rate": 9.64304803223195e-06, |
|
"loss": 1.3654, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.8903188705444336, |
|
"learning_rate": 9.64242289743726e-06, |
|
"loss": 1.3694, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.767051935195923, |
|
"learning_rate": 9.64179776264257e-06, |
|
"loss": 1.4189, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.3065268993377686, |
|
"learning_rate": 9.64117262784788e-06, |
|
"loss": 1.3528, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.6622681617736816, |
|
"learning_rate": 9.64054749305319e-06, |
|
"loss": 1.3744, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.5773024559020996, |
|
"learning_rate": 9.6399223582585e-06, |
|
"loss": 1.3678, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 4.050888538360596, |
|
"learning_rate": 9.63929722346381e-06, |
|
"loss": 1.3459, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.3641510009765625, |
|
"learning_rate": 9.63867208866912e-06, |
|
"loss": 1.3289, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.2410778999328613, |
|
"learning_rate": 9.63804695387443e-06, |
|
"loss": 1.3582, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.7819199562072754, |
|
"learning_rate": 9.63742181907974e-06, |
|
"loss": 1.3535, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.4329464435577393, |
|
"learning_rate": 9.63679668428505e-06, |
|
"loss": 1.3637, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.7776496410369873, |
|
"learning_rate": 9.63617154949036e-06, |
|
"loss": 1.3427, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.6807868480682373, |
|
"learning_rate": 9.63554641469567e-06, |
|
"loss": 1.3366, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.182055711746216, |
|
"learning_rate": 9.63492127990098e-06, |
|
"loss": 1.3907, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.0613508224487305, |
|
"learning_rate": 9.63429614510629e-06, |
|
"loss": 1.3628, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.813504219055176, |
|
"learning_rate": 9.633671010311599e-06, |
|
"loss": 1.3534, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.0388875007629395, |
|
"learning_rate": 9.633045875516909e-06, |
|
"loss": 1.3701, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.5311150550842285, |
|
"learning_rate": 9.632420740722219e-06, |
|
"loss": 1.3419, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.283538341522217, |
|
"learning_rate": 9.631795605927529e-06, |
|
"loss": 1.3772, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.6988024711608887, |
|
"learning_rate": 9.631170471132839e-06, |
|
"loss": 1.3633, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.738215684890747, |
|
"learning_rate": 9.630545336338149e-06, |
|
"loss": 1.3814, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.899857997894287, |
|
"learning_rate": 9.629920201543459e-06, |
|
"loss": 1.3787, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.2490193843841553, |
|
"learning_rate": 9.629295066748769e-06, |
|
"loss": 1.347, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.262529134750366, |
|
"learning_rate": 9.628669931954079e-06, |
|
"loss": 1.3405, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.1799771785736084, |
|
"learning_rate": 9.628044797159388e-06, |
|
"loss": 1.3796, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.5044260025024414, |
|
"learning_rate": 9.627419662364698e-06, |
|
"loss": 1.322, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.560049295425415, |
|
"learning_rate": 9.626794527570008e-06, |
|
"loss": 1.356, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.3910019397735596, |
|
"learning_rate": 9.626169392775318e-06, |
|
"loss": 1.3546, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 4.631550312042236, |
|
"learning_rate": 9.625544257980628e-06, |
|
"loss": 1.3664, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.415191650390625, |
|
"learning_rate": 9.624919123185938e-06, |
|
"loss": 1.3818, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.071653127670288, |
|
"learning_rate": 9.624293988391248e-06, |
|
"loss": 1.3947, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.575773000717163, |
|
"learning_rate": 9.623668853596558e-06, |
|
"loss": 1.3392, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.1757047176361084, |
|
"learning_rate": 9.623043718801868e-06, |
|
"loss": 1.3619, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.487311840057373, |
|
"learning_rate": 9.622418584007178e-06, |
|
"loss": 1.3538, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.791187047958374, |
|
"learning_rate": 9.621793449212488e-06, |
|
"loss": 1.343, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.840940237045288, |
|
"learning_rate": 9.621168314417798e-06, |
|
"loss": 1.3479, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.4006075859069824, |
|
"learning_rate": 9.620543179623108e-06, |
|
"loss": 1.3742, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.8290023803710938, |
|
"learning_rate": 9.619918044828418e-06, |
|
"loss": 1.3825, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 4.114961624145508, |
|
"learning_rate": 9.619292910033727e-06, |
|
"loss": 1.3742, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.4792447090148926, |
|
"learning_rate": 9.618667775239037e-06, |
|
"loss": 1.3442, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.3174169063568115, |
|
"learning_rate": 9.618042640444346e-06, |
|
"loss": 1.3896, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.7161898612976074, |
|
"learning_rate": 9.617417505649657e-06, |
|
"loss": 1.3492, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.3158161640167236, |
|
"learning_rate": 9.616792370854965e-06, |
|
"loss": 1.3378, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 4.810378074645996, |
|
"learning_rate": 9.616167236060277e-06, |
|
"loss": 1.3482, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.8272716999053955, |
|
"learning_rate": 9.615542101265585e-06, |
|
"loss": 1.3624, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.1543540954589844, |
|
"learning_rate": 9.614916966470897e-06, |
|
"loss": 1.3703, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.571789503097534, |
|
"learning_rate": 9.614291831676205e-06, |
|
"loss": 1.3467, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.8913381099700928, |
|
"learning_rate": 9.613666696881517e-06, |
|
"loss": 1.3629, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.3162176609039307, |
|
"learning_rate": 9.613041562086825e-06, |
|
"loss": 1.399, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.26802396774292, |
|
"learning_rate": 9.612416427292137e-06, |
|
"loss": 1.3481, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.7507224082946777, |
|
"learning_rate": 9.611791292497445e-06, |
|
"loss": 1.3875, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.059849262237549, |
|
"learning_rate": 9.611166157702756e-06, |
|
"loss": 1.3402, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.9418632984161377, |
|
"learning_rate": 9.610541022908065e-06, |
|
"loss": 1.385, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.126817464828491, |
|
"learning_rate": 9.609915888113375e-06, |
|
"loss": 1.3342, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.908066749572754, |
|
"learning_rate": 9.609290753318685e-06, |
|
"loss": 1.3658, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.3799283504486084, |
|
"learning_rate": 9.608665618523994e-06, |
|
"loss": 1.3392, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.0953500270843506, |
|
"learning_rate": 9.608040483729304e-06, |
|
"loss": 1.3664, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.4090096950531006, |
|
"learning_rate": 9.607415348934614e-06, |
|
"loss": 1.3913, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.0916600227355957, |
|
"learning_rate": 9.606790214139924e-06, |
|
"loss": 1.3482, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 4.232104778289795, |
|
"learning_rate": 9.606165079345234e-06, |
|
"loss": 1.3663, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.300558090209961, |
|
"learning_rate": 9.605539944550544e-06, |
|
"loss": 1.3608, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.795227527618408, |
|
"learning_rate": 9.604914809755854e-06, |
|
"loss": 1.3637, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.083174467086792, |
|
"learning_rate": 9.604289674961164e-06, |
|
"loss": 1.3269, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.8292133808135986, |
|
"learning_rate": 9.603664540166474e-06, |
|
"loss": 1.3311, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.3727259635925293, |
|
"learning_rate": 9.603039405371784e-06, |
|
"loss": 1.3233, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.0696310997009277, |
|
"learning_rate": 9.602414270577094e-06, |
|
"loss": 1.3217, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 5.1085591316223145, |
|
"learning_rate": 9.601789135782404e-06, |
|
"loss": 1.395, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 4.036706447601318, |
|
"learning_rate": 9.601164000987714e-06, |
|
"loss": 1.3763, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.823237419128418, |
|
"learning_rate": 9.600538866193024e-06, |
|
"loss": 1.3868, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.535228729248047, |
|
"learning_rate": 9.599913731398333e-06, |
|
"loss": 1.3714, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.333162546157837, |
|
"learning_rate": 9.599288596603643e-06, |
|
"loss": 1.3509, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.901670455932617, |
|
"learning_rate": 9.598663461808953e-06, |
|
"loss": 1.3486, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.89204478263855, |
|
"learning_rate": 9.598038327014263e-06, |
|
"loss": 1.3975, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.5590710639953613, |
|
"learning_rate": 9.597413192219573e-06, |
|
"loss": 1.3822, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.234952449798584, |
|
"learning_rate": 9.596788057424883e-06, |
|
"loss": 1.3942, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.125939130783081, |
|
"learning_rate": 9.596162922630193e-06, |
|
"loss": 1.3573, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.25191593170166, |
|
"learning_rate": 9.595537787835503e-06, |
|
"loss": 1.3742, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.0981853008270264, |
|
"learning_rate": 9.594912653040813e-06, |
|
"loss": 1.3272, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.660688638687134, |
|
"learning_rate": 9.594287518246123e-06, |
|
"loss": 1.3859, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 4.212889671325684, |
|
"learning_rate": 9.593662383451433e-06, |
|
"loss": 1.3685, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.602475643157959, |
|
"learning_rate": 9.593037248656743e-06, |
|
"loss": 1.3486, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.8037405014038086, |
|
"learning_rate": 9.592412113862053e-06, |
|
"loss": 1.3094, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.735767364501953, |
|
"learning_rate": 9.591786979067363e-06, |
|
"loss": 1.3443, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.10837984085083, |
|
"learning_rate": 9.591161844272672e-06, |
|
"loss": 1.3637, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.339202880859375, |
|
"learning_rate": 9.590536709477982e-06, |
|
"loss": 1.374, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 4.676008224487305, |
|
"learning_rate": 9.589911574683292e-06, |
|
"loss": 1.3609, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.2127492427825928, |
|
"learning_rate": 9.589286439888602e-06, |
|
"loss": 1.3443, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.552145481109619, |
|
"learning_rate": 9.588661305093912e-06, |
|
"loss": 1.3653, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 4.267813205718994, |
|
"learning_rate": 9.588036170299222e-06, |
|
"loss": 1.3316, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 4.004978179931641, |
|
"learning_rate": 9.587411035504532e-06, |
|
"loss": 1.3259, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.433945417404175, |
|
"learning_rate": 9.586785900709842e-06, |
|
"loss": 1.3475, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.3748490810394287, |
|
"learning_rate": 9.586160765915152e-06, |
|
"loss": 1.3442, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.2221007347106934, |
|
"learning_rate": 9.58553563112046e-06, |
|
"loss": 1.3705, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.846968173980713, |
|
"learning_rate": 9.584910496325772e-06, |
|
"loss": 1.3411, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.973281145095825, |
|
"learning_rate": 9.58428536153108e-06, |
|
"loss": 1.3503, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.7122104167938232, |
|
"learning_rate": 9.583660226736392e-06, |
|
"loss": 1.3581, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 4.12910795211792, |
|
"learning_rate": 9.5830350919417e-06, |
|
"loss": 1.385, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.4491500854492188, |
|
"learning_rate": 9.582409957147011e-06, |
|
"loss": 1.3626, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 4.059682846069336, |
|
"learning_rate": 9.58178482235232e-06, |
|
"loss": 1.3595, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.2482686042785645, |
|
"learning_rate": 9.581159687557631e-06, |
|
"loss": 1.3339, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.3053741455078125, |
|
"learning_rate": 9.58053455276294e-06, |
|
"loss": 1.3656, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.101283311843872, |
|
"learning_rate": 9.579909417968251e-06, |
|
"loss": 1.3751, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.7894277572631836, |
|
"learning_rate": 9.57928428317356e-06, |
|
"loss": 1.3744, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.6949033737182617, |
|
"learning_rate": 9.578659148378871e-06, |
|
"loss": 1.3434, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.2511017322540283, |
|
"learning_rate": 9.57803401358418e-06, |
|
"loss": 1.3809, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.6631274223327637, |
|
"learning_rate": 9.577408878789489e-06, |
|
"loss": 1.3232, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.0832998752593994, |
|
"learning_rate": 9.576783743994799e-06, |
|
"loss": 1.3785, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 4.4912238121032715, |
|
"learning_rate": 9.576158609200109e-06, |
|
"loss": 1.3558, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.720935821533203, |
|
"learning_rate": 9.575533474405419e-06, |
|
"loss": 1.3547, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.69688081741333, |
|
"learning_rate": 9.574908339610729e-06, |
|
"loss": 1.3409, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 4.521012783050537, |
|
"learning_rate": 9.574283204816039e-06, |
|
"loss": 1.3701, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.9866528511047363, |
|
"learning_rate": 9.573658070021349e-06, |
|
"loss": 1.3623, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 4.300259590148926, |
|
"learning_rate": 9.573032935226659e-06, |
|
"loss": 1.3562, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.585087299346924, |
|
"learning_rate": 9.572407800431969e-06, |
|
"loss": 1.3574, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.3413264751434326, |
|
"learning_rate": 9.571782665637278e-06, |
|
"loss": 1.3586, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.7272746562957764, |
|
"learning_rate": 9.571157530842588e-06, |
|
"loss": 1.3525, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.167235851287842, |
|
"learning_rate": 9.570532396047898e-06, |
|
"loss": 1.3545, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.007138252258301, |
|
"learning_rate": 9.569907261253208e-06, |
|
"loss": 1.3638, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.402449607849121, |
|
"learning_rate": 9.569282126458518e-06, |
|
"loss": 1.3394, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.050807237625122, |
|
"learning_rate": 9.568656991663828e-06, |
|
"loss": 1.3477, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.470465660095215, |
|
"learning_rate": 9.568031856869138e-06, |
|
"loss": 1.3573, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.1874587535858154, |
|
"learning_rate": 9.567406722074448e-06, |
|
"loss": 1.3545, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.022789478302002, |
|
"learning_rate": 9.566781587279758e-06, |
|
"loss": 1.3857, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.696437358856201, |
|
"learning_rate": 9.566156452485068e-06, |
|
"loss": 1.3845, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.3129115104675293, |
|
"learning_rate": 9.565531317690378e-06, |
|
"loss": 1.3342, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.0286476612091064, |
|
"learning_rate": 9.564906182895688e-06, |
|
"loss": 1.3451, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.14780330657959, |
|
"learning_rate": 9.564281048100998e-06, |
|
"loss": 1.3501, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.660125732421875, |
|
"learning_rate": 9.563655913306308e-06, |
|
"loss": 1.3974, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.7873997688293457, |
|
"learning_rate": 9.563030778511617e-06, |
|
"loss": 1.3328, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 4.175543785095215, |
|
"learning_rate": 9.562405643716927e-06, |
|
"loss": 1.3871, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.0854318141937256, |
|
"learning_rate": 9.561780508922237e-06, |
|
"loss": 1.3436, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.430039882659912, |
|
"learning_rate": 9.561155374127547e-06, |
|
"loss": 1.3614, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.1078710556030273, |
|
"learning_rate": 9.560530239332857e-06, |
|
"loss": 1.3788, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.394430160522461, |
|
"learning_rate": 9.559905104538167e-06, |
|
"loss": 1.3534, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.4498708248138428, |
|
"learning_rate": 9.559279969743477e-06, |
|
"loss": 1.3875, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.169480562210083, |
|
"learning_rate": 9.558654834948787e-06, |
|
"loss": 1.3391, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.4374375343322754, |
|
"learning_rate": 9.558029700154097e-06, |
|
"loss": 1.3657, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.0859546661376953, |
|
"learning_rate": 9.557404565359407e-06, |
|
"loss": 1.3765, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.567939281463623, |
|
"learning_rate": 9.556779430564717e-06, |
|
"loss": 1.3432, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.236070156097412, |
|
"learning_rate": 9.556154295770027e-06, |
|
"loss": 1.3402, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.366365432739258, |
|
"learning_rate": 9.555529160975337e-06, |
|
"loss": 1.3736, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 4.573514461517334, |
|
"learning_rate": 9.554904026180646e-06, |
|
"loss": 1.3581, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.199225664138794, |
|
"learning_rate": 9.554278891385956e-06, |
|
"loss": 1.3452, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.0722098350524902, |
|
"learning_rate": 9.553653756591266e-06, |
|
"loss": 1.3785, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.137385606765747, |
|
"learning_rate": 9.553028621796576e-06, |
|
"loss": 1.3521, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 4.893807888031006, |
|
"learning_rate": 9.552403487001886e-06, |
|
"loss": 1.3572, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 4.010082721710205, |
|
"learning_rate": 9.551778352207194e-06, |
|
"loss": 1.3483, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.6857099533081055, |
|
"learning_rate": 9.551153217412506e-06, |
|
"loss": 1.3347, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.510134696960449, |
|
"learning_rate": 9.550528082617814e-06, |
|
"loss": 1.3461, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.9825291633605957, |
|
"learning_rate": 9.549902947823126e-06, |
|
"loss": 1.3415, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.156740427017212, |
|
"learning_rate": 9.549277813028434e-06, |
|
"loss": 1.3305, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.245800256729126, |
|
"learning_rate": 9.548652678233746e-06, |
|
"loss": 1.3573, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.6874351501464844, |
|
"learning_rate": 9.548027543439054e-06, |
|
"loss": 1.3443, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.6892011165618896, |
|
"learning_rate": 9.547402408644366e-06, |
|
"loss": 1.3192, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.505993604660034, |
|
"learning_rate": 9.546777273849674e-06, |
|
"loss": 1.3812, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.5395193099975586, |
|
"learning_rate": 9.546152139054985e-06, |
|
"loss": 1.3604, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.2124781608581543, |
|
"learning_rate": 9.545527004260294e-06, |
|
"loss": 1.312, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.8713743686676025, |
|
"learning_rate": 9.544901869465605e-06, |
|
"loss": 1.3435, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.1610865592956543, |
|
"learning_rate": 9.544276734670914e-06, |
|
"loss": 1.3696, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 7.323131561279297, |
|
"learning_rate": 9.543651599876223e-06, |
|
"loss": 1.357, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.0851237773895264, |
|
"learning_rate": 9.543026465081533e-06, |
|
"loss": 1.3406, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.637321949005127, |
|
"learning_rate": 9.542401330286843e-06, |
|
"loss": 1.376, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.876664638519287, |
|
"learning_rate": 9.541776195492153e-06, |
|
"loss": 1.4049, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.146031618118286, |
|
"learning_rate": 9.541151060697463e-06, |
|
"loss": 1.3484, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 4.596341609954834, |
|
"learning_rate": 9.540525925902773e-06, |
|
"loss": 1.3518, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 5.041236400604248, |
|
"learning_rate": 9.539900791108083e-06, |
|
"loss": 1.362, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.9177463054656982, |
|
"learning_rate": 9.539275656313393e-06, |
|
"loss": 1.3586, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.115206003189087, |
|
"learning_rate": 9.538650521518703e-06, |
|
"loss": 1.367, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.846676826477051, |
|
"learning_rate": 9.538025386724013e-06, |
|
"loss": 1.3699, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.625420331954956, |
|
"learning_rate": 9.537400251929323e-06, |
|
"loss": 1.3505, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.315352439880371, |
|
"learning_rate": 9.536775117134633e-06, |
|
"loss": 1.3456, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.249753475189209, |
|
"learning_rate": 9.536149982339943e-06, |
|
"loss": 1.3538, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.9315223693847656, |
|
"learning_rate": 9.535524847545253e-06, |
|
"loss": 1.3459, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.4720170497894287, |
|
"learning_rate": 9.534899712750562e-06, |
|
"loss": 1.3935, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.97334885597229, |
|
"learning_rate": 9.534274577955872e-06, |
|
"loss": 1.3526, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.218647003173828, |
|
"learning_rate": 9.533649443161182e-06, |
|
"loss": 1.3845, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.644829034805298, |
|
"learning_rate": 9.533024308366492e-06, |
|
"loss": 1.3515, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 4.018405437469482, |
|
"learning_rate": 9.532399173571802e-06, |
|
"loss": 1.3544, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.210761308670044, |
|
"learning_rate": 9.531774038777112e-06, |
|
"loss": 1.3554, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.046523094177246, |
|
"learning_rate": 9.531148903982422e-06, |
|
"loss": 1.3733, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.437032699584961, |
|
"learning_rate": 9.530523769187732e-06, |
|
"loss": 1.347, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.712858200073242, |
|
"learning_rate": 9.529898634393042e-06, |
|
"loss": 1.3856, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.859689474105835, |
|
"learning_rate": 9.529273499598352e-06, |
|
"loss": 1.3312, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.8814845085144043, |
|
"learning_rate": 9.528648364803662e-06, |
|
"loss": 1.3362, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.983851909637451, |
|
"learning_rate": 9.528023230008972e-06, |
|
"loss": 1.3689, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.3227264881134033, |
|
"learning_rate": 9.527398095214282e-06, |
|
"loss": 1.3362, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.825824499130249, |
|
"learning_rate": 9.526772960419591e-06, |
|
"loss": 1.3496, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.376059055328369, |
|
"learning_rate": 9.526147825624901e-06, |
|
"loss": 1.3781, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.2187156677246094, |
|
"learning_rate": 9.525522690830211e-06, |
|
"loss": 1.4142, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.073812246322632, |
|
"learning_rate": 9.524897556035521e-06, |
|
"loss": 1.3185, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.7107346057891846, |
|
"learning_rate": 9.524272421240831e-06, |
|
"loss": 1.322, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.378969669342041, |
|
"learning_rate": 9.523647286446141e-06, |
|
"loss": 1.3438, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 4.337489604949951, |
|
"learning_rate": 9.523022151651451e-06, |
|
"loss": 1.3326, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 4.453660488128662, |
|
"learning_rate": 9.522397016856761e-06, |
|
"loss": 1.3624, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.236886501312256, |
|
"learning_rate": 9.521771882062071e-06, |
|
"loss": 1.3704, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.969984531402588, |
|
"learning_rate": 9.52114674726738e-06, |
|
"loss": 1.3605, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.707930326461792, |
|
"learning_rate": 9.52052161247269e-06, |
|
"loss": 1.3272, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.0617573261260986, |
|
"learning_rate": 9.519896477678e-06, |
|
"loss": 1.3779, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.8938345909118652, |
|
"learning_rate": 9.519271342883309e-06, |
|
"loss": 1.3453, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.273656129837036, |
|
"learning_rate": 9.51864620808862e-06, |
|
"loss": 1.3535, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.6416726112365723, |
|
"learning_rate": 9.518021073293929e-06, |
|
"loss": 1.393, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.7089104652404785, |
|
"learning_rate": 9.51739593849924e-06, |
|
"loss": 1.3568, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.872784376144409, |
|
"learning_rate": 9.516770803704549e-06, |
|
"loss": 1.3725, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.3895182609558105, |
|
"learning_rate": 9.51614566890986e-06, |
|
"loss": 1.3347, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.349815845489502, |
|
"learning_rate": 9.515520534115168e-06, |
|
"loss": 1.2965, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.8851418495178223, |
|
"learning_rate": 9.51489539932048e-06, |
|
"loss": 1.3547, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.7153375148773193, |
|
"learning_rate": 9.514270264525788e-06, |
|
"loss": 1.3502, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.9336204528808594, |
|
"learning_rate": 9.5136451297311e-06, |
|
"loss": 1.3727, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.910884141921997, |
|
"learning_rate": 9.513019994936408e-06, |
|
"loss": 1.3643, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.9535582065582275, |
|
"learning_rate": 9.51239486014172e-06, |
|
"loss": 1.3532, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.453658103942871, |
|
"learning_rate": 9.511769725347028e-06, |
|
"loss": 1.3747, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 4.163629055023193, |
|
"learning_rate": 9.511144590552338e-06, |
|
"loss": 1.3686, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.291599988937378, |
|
"learning_rate": 9.510519455757648e-06, |
|
"loss": 1.3195, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 4.140781879425049, |
|
"learning_rate": 9.509894320962958e-06, |
|
"loss": 1.3454, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.2356150150299072, |
|
"learning_rate": 9.509269186168268e-06, |
|
"loss": 1.3656, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.98710298538208, |
|
"learning_rate": 9.508644051373578e-06, |
|
"loss": 1.371, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.949601650238037, |
|
"learning_rate": 9.508018916578888e-06, |
|
"loss": 1.3794, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.5830845832824707, |
|
"learning_rate": 9.507393781784198e-06, |
|
"loss": 1.3665, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.1843700408935547, |
|
"learning_rate": 9.506768646989507e-06, |
|
"loss": 1.3463, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.233429193496704, |
|
"learning_rate": 9.506143512194817e-06, |
|
"loss": 1.3741, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.79780650138855, |
|
"learning_rate": 9.505518377400127e-06, |
|
"loss": 1.3006, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.684920310974121, |
|
"learning_rate": 9.504893242605437e-06, |
|
"loss": 1.3613, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 4.043038368225098, |
|
"learning_rate": 9.504268107810747e-06, |
|
"loss": 1.3762, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.4520349502563477, |
|
"learning_rate": 9.503642973016057e-06, |
|
"loss": 1.3339, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.5900933742523193, |
|
"learning_rate": 9.503017838221367e-06, |
|
"loss": 1.3507, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 4.365208625793457, |
|
"learning_rate": 9.502392703426677e-06, |
|
"loss": 1.354, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.7963385581970215, |
|
"learning_rate": 9.501767568631987e-06, |
|
"loss": 1.3377, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.07368803024292, |
|
"learning_rate": 9.501142433837297e-06, |
|
"loss": 1.3544, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.9033076763153076, |
|
"learning_rate": 9.500517299042607e-06, |
|
"loss": 1.3466, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.946506977081299, |
|
"learning_rate": 9.499892164247917e-06, |
|
"loss": 1.3524, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 4.097044944763184, |
|
"learning_rate": 9.499267029453227e-06, |
|
"loss": 1.3342, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.7137930393218994, |
|
"learning_rate": 9.498641894658536e-06, |
|
"loss": 1.3737, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.2496094703674316, |
|
"learning_rate": 9.498016759863846e-06, |
|
"loss": 1.339, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 4.326569557189941, |
|
"learning_rate": 9.497391625069156e-06, |
|
"loss": 1.372, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.918201208114624, |
|
"learning_rate": 9.496766490274466e-06, |
|
"loss": 1.341, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.4720118045806885, |
|
"learning_rate": 9.496141355479776e-06, |
|
"loss": 1.3525, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.20745587348938, |
|
"learning_rate": 9.495516220685086e-06, |
|
"loss": 1.3664, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.320747137069702, |
|
"learning_rate": 9.494891085890396e-06, |
|
"loss": 1.326, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.690807342529297, |
|
"learning_rate": 9.494265951095706e-06, |
|
"loss": 1.3689, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 4.453171253204346, |
|
"learning_rate": 9.493640816301016e-06, |
|
"loss": 1.374, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.375361204147339, |
|
"learning_rate": 9.493015681506326e-06, |
|
"loss": 1.3427, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.053560495376587, |
|
"learning_rate": 9.492390546711636e-06, |
|
"loss": 1.3418, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 4.028963565826416, |
|
"learning_rate": 9.491765411916946e-06, |
|
"loss": 1.3203, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.746544599533081, |
|
"learning_rate": 9.491140277122256e-06, |
|
"loss": 1.3121, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.1117103099823, |
|
"learning_rate": 9.490515142327566e-06, |
|
"loss": 1.3383, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.2640998363494873, |
|
"learning_rate": 9.489890007532875e-06, |
|
"loss": 1.3384, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.436328172683716, |
|
"learning_rate": 9.489264872738185e-06, |
|
"loss": 1.3587, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.372560977935791, |
|
"learning_rate": 9.488639737943495e-06, |
|
"loss": 1.4054, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.880247116088867, |
|
"learning_rate": 9.488014603148805e-06, |
|
"loss": 1.3166, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.686885356903076, |
|
"learning_rate": 9.487389468354115e-06, |
|
"loss": 1.3629, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.168898820877075, |
|
"learning_rate": 9.486764333559425e-06, |
|
"loss": 1.3868, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.3519859313964844, |
|
"learning_rate": 9.486139198764735e-06, |
|
"loss": 1.3696, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.844688892364502, |
|
"learning_rate": 9.485514063970043e-06, |
|
"loss": 1.3498, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.061849594116211, |
|
"learning_rate": 9.484888929175355e-06, |
|
"loss": 1.3692, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 4.100019931793213, |
|
"learning_rate": 9.484263794380663e-06, |
|
"loss": 1.3407, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.519801378250122, |
|
"learning_rate": 9.483638659585975e-06, |
|
"loss": 1.3565, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 6.410887241363525, |
|
"learning_rate": 9.483013524791283e-06, |
|
"loss": 1.3291, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.080322504043579, |
|
"learning_rate": 9.482388389996595e-06, |
|
"loss": 1.3519, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.0409817695617676, |
|
"learning_rate": 9.481763255201903e-06, |
|
"loss": 1.3519, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 4.2845025062561035, |
|
"learning_rate": 9.481138120407214e-06, |
|
"loss": 1.3436, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.1132304668426514, |
|
"learning_rate": 9.480512985612523e-06, |
|
"loss": 1.3145, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 4.386362075805664, |
|
"learning_rate": 9.479887850817834e-06, |
|
"loss": 1.3483, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.7619481086730957, |
|
"learning_rate": 9.479262716023143e-06, |
|
"loss": 1.312, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.451927900314331, |
|
"learning_rate": 9.478637581228454e-06, |
|
"loss": 1.3807, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.5724120140075684, |
|
"learning_rate": 9.478012446433762e-06, |
|
"loss": 1.3469, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 4.330935955047607, |
|
"learning_rate": 9.477387311639072e-06, |
|
"loss": 1.3136, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.6509666442871094, |
|
"learning_rate": 9.476762176844382e-06, |
|
"loss": 1.3645, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.39678692817688, |
|
"learning_rate": 9.476137042049692e-06, |
|
"loss": 1.3229, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.2337393760681152, |
|
"learning_rate": 9.475511907255002e-06, |
|
"loss": 1.3473, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.9486355781555176, |
|
"learning_rate": 9.474886772460312e-06, |
|
"loss": 1.341, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.5861918926239014, |
|
"learning_rate": 9.474261637665622e-06, |
|
"loss": 1.3708, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.8153584003448486, |
|
"learning_rate": 9.473636502870932e-06, |
|
"loss": 1.35, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.0656278133392334, |
|
"learning_rate": 9.473011368076242e-06, |
|
"loss": 1.3564, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.5475146770477295, |
|
"learning_rate": 9.472386233281552e-06, |
|
"loss": 1.3178, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 4.837975025177002, |
|
"learning_rate": 9.471761098486862e-06, |
|
"loss": 1.3588, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.626478433609009, |
|
"learning_rate": 9.471135963692172e-06, |
|
"loss": 1.3288, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.8399198055267334, |
|
"learning_rate": 9.470510828897481e-06, |
|
"loss": 1.3542, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.478510856628418, |
|
"learning_rate": 9.469885694102791e-06, |
|
"loss": 1.3282, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.0036330223083496, |
|
"learning_rate": 9.469260559308101e-06, |
|
"loss": 1.3135, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.49764084815979, |
|
"learning_rate": 9.468635424513411e-06, |
|
"loss": 1.3216, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 4.711456298828125, |
|
"learning_rate": 9.468010289718721e-06, |
|
"loss": 1.3746, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.8252532482147217, |
|
"learning_rate": 9.467385154924031e-06, |
|
"loss": 1.3375, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.445317029953003, |
|
"learning_rate": 9.466760020129341e-06, |
|
"loss": 1.3523, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.879566192626953, |
|
"learning_rate": 9.466134885334651e-06, |
|
"loss": 1.3184, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.891055107116699, |
|
"learning_rate": 9.465509750539961e-06, |
|
"loss": 1.3246, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.6852951049804688, |
|
"learning_rate": 9.46488461574527e-06, |
|
"loss": 1.3803, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.9267516136169434, |
|
"learning_rate": 9.46425948095058e-06, |
|
"loss": 1.3254, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.6373448371887207, |
|
"learning_rate": 9.46363434615589e-06, |
|
"loss": 1.3266, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.703024387359619, |
|
"learning_rate": 9.4630092113612e-06, |
|
"loss": 1.3552, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.5792810916900635, |
|
"learning_rate": 9.46238407656651e-06, |
|
"loss": 1.3365, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.6020054817199707, |
|
"learning_rate": 9.46175894177182e-06, |
|
"loss": 1.3287, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.713806390762329, |
|
"learning_rate": 9.46113380697713e-06, |
|
"loss": 1.3537, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 4.407512187957764, |
|
"learning_rate": 9.46050867218244e-06, |
|
"loss": 1.3835, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.683220863342285, |
|
"learning_rate": 9.45988353738775e-06, |
|
"loss": 1.3206, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.046828031539917, |
|
"learning_rate": 9.45925840259306e-06, |
|
"loss": 1.3251, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.8515381813049316, |
|
"learning_rate": 9.45863326779837e-06, |
|
"loss": 1.3432, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.381223678588867, |
|
"learning_rate": 9.45800813300368e-06, |
|
"loss": 1.355, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.301053762435913, |
|
"learning_rate": 9.45738299820899e-06, |
|
"loss": 1.3406, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.924475908279419, |
|
"learning_rate": 9.4567578634143e-06, |
|
"loss": 1.3444, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.036510705947876, |
|
"learning_rate": 9.45613272861961e-06, |
|
"loss": 1.3519, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.7162649631500244, |
|
"learning_rate": 9.45550759382492e-06, |
|
"loss": 1.3407, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.7335431575775146, |
|
"learning_rate": 9.45488245903023e-06, |
|
"loss": 1.3941, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.4760313034057617, |
|
"learning_rate": 9.45425732423554e-06, |
|
"loss": 1.3481, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.327454090118408, |
|
"learning_rate": 9.45363218944085e-06, |
|
"loss": 1.3313, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.170297861099243, |
|
"learning_rate": 9.453007054646158e-06, |
|
"loss": 1.3222, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.097593307495117, |
|
"learning_rate": 9.45238191985147e-06, |
|
"loss": 1.3615, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.889549493789673, |
|
"learning_rate": 9.451756785056778e-06, |
|
"loss": 1.3447, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.5188488960266113, |
|
"learning_rate": 9.45113165026209e-06, |
|
"loss": 1.3844, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.8199424743652344, |
|
"learning_rate": 9.450506515467397e-06, |
|
"loss": 1.3751, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.9179065227508545, |
|
"learning_rate": 9.449881380672709e-06, |
|
"loss": 1.3483, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.69584584236145, |
|
"learning_rate": 9.449256245878017e-06, |
|
"loss": 1.3613, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 4.401488780975342, |
|
"learning_rate": 9.448631111083329e-06, |
|
"loss": 1.3625, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.9850871562957764, |
|
"learning_rate": 9.448005976288637e-06, |
|
"loss": 1.3522, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.8156750202178955, |
|
"learning_rate": 9.447380841493949e-06, |
|
"loss": 1.3387, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.664689779281616, |
|
"learning_rate": 9.446755706699257e-06, |
|
"loss": 1.3233, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 4.119280815124512, |
|
"learning_rate": 9.446130571904569e-06, |
|
"loss": 1.396, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.9794814586639404, |
|
"learning_rate": 9.445505437109877e-06, |
|
"loss": 1.3731, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.943528890609741, |
|
"learning_rate": 9.444880302315187e-06, |
|
"loss": 1.3452, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.734614610671997, |
|
"learning_rate": 9.444255167520497e-06, |
|
"loss": 1.346, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.5047719478607178, |
|
"learning_rate": 9.443630032725807e-06, |
|
"loss": 1.3618, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.040126323699951, |
|
"learning_rate": 9.443004897931117e-06, |
|
"loss": 1.3461, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.951183795928955, |
|
"learning_rate": 9.442379763136426e-06, |
|
"loss": 1.3402, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.8336355686187744, |
|
"learning_rate": 9.441754628341736e-06, |
|
"loss": 1.3211, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.9649171829223633, |
|
"learning_rate": 9.441129493547046e-06, |
|
"loss": 1.3538, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.0716583728790283, |
|
"learning_rate": 9.440504358752356e-06, |
|
"loss": 1.3518, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.94270658493042, |
|
"learning_rate": 9.439879223957666e-06, |
|
"loss": 1.3278, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.8442766666412354, |
|
"learning_rate": 9.439254089162976e-06, |
|
"loss": 1.32, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.8846325874328613, |
|
"learning_rate": 9.438628954368286e-06, |
|
"loss": 1.3391, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.698730230331421, |
|
"learning_rate": 9.438003819573596e-06, |
|
"loss": 1.3777, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.3867924213409424, |
|
"learning_rate": 9.437378684778906e-06, |
|
"loss": 1.3563, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.769615411758423, |
|
"learning_rate": 9.436753549984216e-06, |
|
"loss": 1.3298, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.6002724170684814, |
|
"learning_rate": 9.436128415189526e-06, |
|
"loss": 1.3425, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.228452205657959, |
|
"learning_rate": 9.435503280394836e-06, |
|
"loss": 1.3429, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.423189401626587, |
|
"learning_rate": 9.434878145600146e-06, |
|
"loss": 1.3768, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.5708446502685547, |
|
"learning_rate": 9.434253010805456e-06, |
|
"loss": 1.3125, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.9108211994171143, |
|
"learning_rate": 9.433627876010765e-06, |
|
"loss": 1.3347, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.365302324295044, |
|
"learning_rate": 9.433002741216075e-06, |
|
"loss": 1.3277, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.0356671810150146, |
|
"learning_rate": 9.432377606421385e-06, |
|
"loss": 1.3345, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.6186368465423584, |
|
"learning_rate": 9.431752471626695e-06, |
|
"loss": 1.3059, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.4578585624694824, |
|
"learning_rate": 9.431127336832005e-06, |
|
"loss": 1.3635, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.8224425315856934, |
|
"learning_rate": 9.430502202037315e-06, |
|
"loss": 1.3325, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.635671377182007, |
|
"learning_rate": 9.429877067242625e-06, |
|
"loss": 1.3007, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.900747299194336, |
|
"learning_rate": 9.429251932447935e-06, |
|
"loss": 1.3317, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.8643271923065186, |
|
"learning_rate": 9.428626797653245e-06, |
|
"loss": 1.3489, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.498797655105591, |
|
"learning_rate": 9.428001662858555e-06, |
|
"loss": 1.3263, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.8632445335388184, |
|
"learning_rate": 9.427376528063865e-06, |
|
"loss": 1.3465, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.886178970336914, |
|
"learning_rate": 9.426751393269175e-06, |
|
"loss": 1.3622, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.605872869491577, |
|
"learning_rate": 9.426126258474485e-06, |
|
"loss": 1.3404, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 4.709196090698242, |
|
"learning_rate": 9.425501123679795e-06, |
|
"loss": 1.3344, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.5497000217437744, |
|
"learning_rate": 9.424875988885104e-06, |
|
"loss": 1.3536, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.165081024169922, |
|
"learning_rate": 9.424250854090414e-06, |
|
"loss": 1.3453, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.4329254627227783, |
|
"learning_rate": 9.423625719295724e-06, |
|
"loss": 1.3044, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.0184082984924316, |
|
"learning_rate": 9.423000584501034e-06, |
|
"loss": 1.344, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.776301383972168, |
|
"learning_rate": 9.422375449706344e-06, |
|
"loss": 1.3315, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.908074140548706, |
|
"learning_rate": 9.421750314911654e-06, |
|
"loss": 1.3613, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.8458778858184814, |
|
"learning_rate": 9.421125180116964e-06, |
|
"loss": 1.3665, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.778986930847168, |
|
"learning_rate": 9.420500045322274e-06, |
|
"loss": 1.3271, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 4.841845512390137, |
|
"learning_rate": 9.419874910527584e-06, |
|
"loss": 1.3842, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.875431537628174, |
|
"learning_rate": 9.419249775732892e-06, |
|
"loss": 1.3734, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.327831268310547, |
|
"learning_rate": 9.418624640938204e-06, |
|
"loss": 1.3225, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.921052932739258, |
|
"learning_rate": 9.417999506143512e-06, |
|
"loss": 1.3103, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.3352317810058594, |
|
"learning_rate": 9.417374371348824e-06, |
|
"loss": 1.33, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.6515772342681885, |
|
"learning_rate": 9.416749236554132e-06, |
|
"loss": 1.3325, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.6556906700134277, |
|
"learning_rate": 9.416124101759443e-06, |
|
"loss": 1.3537, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.394216775894165, |
|
"learning_rate": 9.415498966964752e-06, |
|
"loss": 1.337, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.2017979621887207, |
|
"learning_rate": 9.414873832170063e-06, |
|
"loss": 1.3697, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 4.548534393310547, |
|
"learning_rate": 9.414248697375371e-06, |
|
"loss": 1.353, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.0345072746276855, |
|
"learning_rate": 9.413623562580683e-06, |
|
"loss": 1.3475, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.3942067623138428, |
|
"learning_rate": 9.412998427785991e-06, |
|
"loss": 1.293, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.577939033508301, |
|
"learning_rate": 9.412373292991303e-06, |
|
"loss": 1.3267, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.599987745285034, |
|
"learning_rate": 9.411748158196611e-06, |
|
"loss": 1.3248, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.732025623321533, |
|
"learning_rate": 9.411123023401921e-06, |
|
"loss": 1.3331, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.382721185684204, |
|
"learning_rate": 9.410497888607231e-06, |
|
"loss": 1.3492, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.670431613922119, |
|
"learning_rate": 9.409872753812541e-06, |
|
"loss": 1.3651, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.932300329208374, |
|
"learning_rate": 9.409247619017851e-06, |
|
"loss": 1.3176, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 4.241666316986084, |
|
"learning_rate": 9.40862248422316e-06, |
|
"loss": 1.3056, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.6911585330963135, |
|
"learning_rate": 9.40799734942847e-06, |
|
"loss": 1.3425, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.879465103149414, |
|
"learning_rate": 9.40737221463378e-06, |
|
"loss": 1.3395, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.778740167617798, |
|
"learning_rate": 9.40674707983909e-06, |
|
"loss": 1.3426, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.6691386699676514, |
|
"learning_rate": 9.4061219450444e-06, |
|
"loss": 1.3696, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 4.0432562828063965, |
|
"learning_rate": 9.40549681024971e-06, |
|
"loss": 1.368, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.7415411472320557, |
|
"learning_rate": 9.40487167545502e-06, |
|
"loss": 1.3368, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.6961042881011963, |
|
"learning_rate": 9.404246540660332e-06, |
|
"loss": 1.3821, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.592819929122925, |
|
"learning_rate": 9.40362140586564e-06, |
|
"loss": 1.3313, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.588106632232666, |
|
"learning_rate": 9.40299627107095e-06, |
|
"loss": 1.3369, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.1717209815979004, |
|
"learning_rate": 9.40237113627626e-06, |
|
"loss": 1.3063, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.9011149406433105, |
|
"learning_rate": 9.40174600148157e-06, |
|
"loss": 1.3371, |
|
"step": 100700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.861337184906006, |
|
"learning_rate": 9.40112086668688e-06, |
|
"loss": 1.3511, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.2174508571624756, |
|
"learning_rate": 9.40049573189219e-06, |
|
"loss": 1.3374, |
|
"step": 100900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.301086664199829, |
|
"learning_rate": 9.3998705970975e-06, |
|
"loss": 1.3024, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.3187129497528076, |
|
"learning_rate": 9.39924546230281e-06, |
|
"loss": 1.349, |
|
"step": 101100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.6953561305999756, |
|
"learning_rate": 9.39862032750812e-06, |
|
"loss": 1.3528, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.3338325023651123, |
|
"learning_rate": 9.39799519271343e-06, |
|
"loss": 1.3511, |
|
"step": 101300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.3522443771362305, |
|
"learning_rate": 9.39737005791874e-06, |
|
"loss": 1.3373, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.7400362491607666, |
|
"learning_rate": 9.39674492312405e-06, |
|
"loss": 1.3535, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.793731212615967, |
|
"learning_rate": 9.39611978832936e-06, |
|
"loss": 1.3184, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.759066581726074, |
|
"learning_rate": 9.39549465353467e-06, |
|
"loss": 1.3489, |
|
"step": 101700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 4.7479681968688965, |
|
"learning_rate": 9.39486951873998e-06, |
|
"loss": 1.3498, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.2522835731506348, |
|
"learning_rate": 9.394244383945289e-06, |
|
"loss": 1.3476, |
|
"step": 101900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 4.208197593688965, |
|
"learning_rate": 9.393619249150599e-06, |
|
"loss": 1.3289, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.8332533836364746, |
|
"learning_rate": 9.392994114355909e-06, |
|
"loss": 1.3501, |
|
"step": 102100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.675553798675537, |
|
"learning_rate": 9.392368979561219e-06, |
|
"loss": 1.3559, |
|
"step": 102200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.7442257404327393, |
|
"learning_rate": 9.391743844766529e-06, |
|
"loss": 1.346, |
|
"step": 102300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.129180431365967, |
|
"learning_rate": 9.391118709971839e-06, |
|
"loss": 1.3265, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.1826012134552, |
|
"learning_rate": 9.390493575177149e-06, |
|
"loss": 1.3488, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.8879926204681396, |
|
"learning_rate": 9.389868440382459e-06, |
|
"loss": 1.3814, |
|
"step": 102600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 4.066867828369141, |
|
"learning_rate": 9.389243305587769e-06, |
|
"loss": 1.3454, |
|
"step": 102700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.3028340339660645, |
|
"learning_rate": 9.388618170793078e-06, |
|
"loss": 1.3661, |
|
"step": 102800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.9503161907196045, |
|
"learning_rate": 9.387993035998388e-06, |
|
"loss": 1.3326, |
|
"step": 102900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.030353546142578, |
|
"learning_rate": 9.387367901203698e-06, |
|
"loss": 1.3436, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.6172800064086914, |
|
"learning_rate": 9.386742766409007e-06, |
|
"loss": 1.3545, |
|
"step": 103100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.2115883827209473, |
|
"learning_rate": 9.386117631614318e-06, |
|
"loss": 1.3345, |
|
"step": 103200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.9105865955352783, |
|
"learning_rate": 9.385492496819626e-06, |
|
"loss": 1.3667, |
|
"step": 103300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 4.278082847595215, |
|
"learning_rate": 9.384867362024938e-06, |
|
"loss": 1.3497, |
|
"step": 103400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.113901376724243, |
|
"learning_rate": 9.384242227230246e-06, |
|
"loss": 1.3216, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.9379656314849854, |
|
"learning_rate": 9.383617092435558e-06, |
|
"loss": 1.3469, |
|
"step": 103600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.53202748298645, |
|
"learning_rate": 9.382991957640866e-06, |
|
"loss": 1.3488, |
|
"step": 103700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.973238468170166, |
|
"learning_rate": 9.382366822846178e-06, |
|
"loss": 1.3387, |
|
"step": 103800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.464711904525757, |
|
"learning_rate": 9.381741688051486e-06, |
|
"loss": 1.3472, |
|
"step": 103900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.8774147033691406, |
|
"learning_rate": 9.381116553256798e-06, |
|
"loss": 1.3786, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.0821099281311035, |
|
"learning_rate": 9.380491418462106e-06, |
|
"loss": 1.3329, |
|
"step": 104100 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 4.228484630584717, |
|
"learning_rate": 9.379866283667417e-06, |
|
"loss": 1.3329, |
|
"step": 104200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.113833427429199, |
|
"learning_rate": 9.379241148872726e-06, |
|
"loss": 1.373, |
|
"step": 104300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.078624963760376, |
|
"learning_rate": 9.378616014078036e-06, |
|
"loss": 1.3409, |
|
"step": 104400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.510251045227051, |
|
"learning_rate": 9.377990879283346e-06, |
|
"loss": 1.3393, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.0739970207214355, |
|
"learning_rate": 9.377365744488655e-06, |
|
"loss": 1.3696, |
|
"step": 104600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.1729655265808105, |
|
"learning_rate": 9.376740609693965e-06, |
|
"loss": 1.3229, |
|
"step": 104700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.003714084625244, |
|
"learning_rate": 9.376115474899275e-06, |
|
"loss": 1.3309, |
|
"step": 104800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.0130865573883057, |
|
"learning_rate": 9.375490340104585e-06, |
|
"loss": 1.3736, |
|
"step": 104900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.3182711601257324, |
|
"learning_rate": 9.374865205309895e-06, |
|
"loss": 1.342, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.8089771270751953, |
|
"learning_rate": 9.374240070515205e-06, |
|
"loss": 1.3187, |
|
"step": 105100 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.321974515914917, |
|
"learning_rate": 9.373614935720515e-06, |
|
"loss": 1.3676, |
|
"step": 105200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.114701986312866, |
|
"learning_rate": 9.372989800925825e-06, |
|
"loss": 1.3746, |
|
"step": 105300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.1047348976135254, |
|
"learning_rate": 9.372364666131135e-06, |
|
"loss": 1.3437, |
|
"step": 105400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.5590476989746094, |
|
"learning_rate": 9.371739531336446e-06, |
|
"loss": 1.3496, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.6819374561309814, |
|
"learning_rate": 9.371114396541755e-06, |
|
"loss": 1.3254, |
|
"step": 105600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.0282516479492188, |
|
"learning_rate": 9.370489261747066e-06, |
|
"loss": 1.3568, |
|
"step": 105700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.659374475479126, |
|
"learning_rate": 9.369864126952375e-06, |
|
"loss": 1.3331, |
|
"step": 105800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.9225759506225586, |
|
"learning_rate": 9.369238992157685e-06, |
|
"loss": 1.3293, |
|
"step": 105900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.179713726043701, |
|
"learning_rate": 9.368613857362994e-06, |
|
"loss": 1.3342, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.126467704772949, |
|
"learning_rate": 9.367988722568304e-06, |
|
"loss": 1.3441, |
|
"step": 106100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 4.179965019226074, |
|
"learning_rate": 9.367363587773614e-06, |
|
"loss": 1.3225, |
|
"step": 106200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 4.020696640014648, |
|
"learning_rate": 9.366738452978924e-06, |
|
"loss": 1.3263, |
|
"step": 106300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 4.02736759185791, |
|
"learning_rate": 9.366113318184234e-06, |
|
"loss": 1.3621, |
|
"step": 106400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.7809269428253174, |
|
"learning_rate": 9.365488183389544e-06, |
|
"loss": 1.3583, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.92323637008667, |
|
"learning_rate": 9.364863048594854e-06, |
|
"loss": 1.359, |
|
"step": 106600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.1310439109802246, |
|
"learning_rate": 9.364237913800164e-06, |
|
"loss": 1.3296, |
|
"step": 106700 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.9712395668029785, |
|
"learning_rate": 9.363612779005474e-06, |
|
"loss": 1.3118, |
|
"step": 106800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.047405481338501, |
|
"learning_rate": 9.362987644210784e-06, |
|
"loss": 1.3441, |
|
"step": 106900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 4.126023292541504, |
|
"learning_rate": 9.362362509416094e-06, |
|
"loss": 1.3484, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 4.234996318817139, |
|
"learning_rate": 9.361737374621404e-06, |
|
"loss": 1.2947, |
|
"step": 107100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.2066574096679688, |
|
"learning_rate": 9.361112239826714e-06, |
|
"loss": 1.3595, |
|
"step": 107200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.9832849502563477, |
|
"learning_rate": 9.360487105032023e-06, |
|
"loss": 1.324, |
|
"step": 107300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.168886661529541, |
|
"learning_rate": 9.359861970237333e-06, |
|
"loss": 1.3508, |
|
"step": 107400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 4.0027995109558105, |
|
"learning_rate": 9.359236835442643e-06, |
|
"loss": 1.344, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.394458770751953, |
|
"learning_rate": 9.358611700647953e-06, |
|
"loss": 1.3295, |
|
"step": 107600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.4304399490356445, |
|
"learning_rate": 9.357986565853263e-06, |
|
"loss": 1.3473, |
|
"step": 107700 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.8694140911102295, |
|
"learning_rate": 9.357361431058573e-06, |
|
"loss": 1.3052, |
|
"step": 107800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.8801755905151367, |
|
"learning_rate": 9.356736296263883e-06, |
|
"loss": 1.3495, |
|
"step": 107900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.713139057159424, |
|
"learning_rate": 9.356111161469193e-06, |
|
"loss": 1.3552, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.1576766967773438, |
|
"learning_rate": 9.355486026674503e-06, |
|
"loss": 1.3425, |
|
"step": 108100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.07737398147583, |
|
"learning_rate": 9.354860891879813e-06, |
|
"loss": 1.2991, |
|
"step": 108200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.1460793018341064, |
|
"learning_rate": 9.354235757085123e-06, |
|
"loss": 1.3528, |
|
"step": 108300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.4123237133026123, |
|
"learning_rate": 9.353610622290433e-06, |
|
"loss": 1.3549, |
|
"step": 108400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 4.295971870422363, |
|
"learning_rate": 9.352985487495741e-06, |
|
"loss": 1.32, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 7.587291240692139, |
|
"learning_rate": 9.352360352701053e-06, |
|
"loss": 1.3205, |
|
"step": 108600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.6797661781311035, |
|
"learning_rate": 9.35173521790636e-06, |
|
"loss": 1.3383, |
|
"step": 108700 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.9257397651672363, |
|
"learning_rate": 9.351110083111672e-06, |
|
"loss": 1.343, |
|
"step": 108800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.9473204612731934, |
|
"learning_rate": 9.35048494831698e-06, |
|
"loss": 1.3423, |
|
"step": 108900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.4466347694396973, |
|
"learning_rate": 9.349859813522292e-06, |
|
"loss": 1.3261, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.173017978668213, |
|
"learning_rate": 9.3492346787276e-06, |
|
"loss": 1.3222, |
|
"step": 109100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.7112643718719482, |
|
"learning_rate": 9.348609543932912e-06, |
|
"loss": 1.3528, |
|
"step": 109200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.747380495071411, |
|
"learning_rate": 9.34798440913822e-06, |
|
"loss": 1.343, |
|
"step": 109300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.6822452545166016, |
|
"learning_rate": 9.347359274343532e-06, |
|
"loss": 1.3478, |
|
"step": 109400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.6231350898742676, |
|
"learning_rate": 9.34673413954884e-06, |
|
"loss": 1.3569, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.985164165496826, |
|
"learning_rate": 9.346109004754152e-06, |
|
"loss": 1.358, |
|
"step": 109600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.304058790206909, |
|
"learning_rate": 9.34548386995946e-06, |
|
"loss": 1.3312, |
|
"step": 109700 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.1674203872680664, |
|
"learning_rate": 9.34485873516477e-06, |
|
"loss": 1.3499, |
|
"step": 109800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.0080056190490723, |
|
"learning_rate": 9.34423360037008e-06, |
|
"loss": 1.3268, |
|
"step": 109900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.8802080154418945, |
|
"learning_rate": 9.34360846557539e-06, |
|
"loss": 1.3494, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.842288017272949, |
|
"learning_rate": 9.3429833307807e-06, |
|
"loss": 1.3368, |
|
"step": 110100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.315469980239868, |
|
"learning_rate": 9.34235819598601e-06, |
|
"loss": 1.3744, |
|
"step": 110200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 5.507584095001221, |
|
"learning_rate": 9.34173306119132e-06, |
|
"loss": 1.3635, |
|
"step": 110300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.494532585144043, |
|
"learning_rate": 9.34110792639663e-06, |
|
"loss": 1.3553, |
|
"step": 110400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.602483034133911, |
|
"learning_rate": 9.34048279160194e-06, |
|
"loss": 1.3783, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 4.201229572296143, |
|
"learning_rate": 9.33985765680725e-06, |
|
"loss": 1.3466, |
|
"step": 110600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.809846878051758, |
|
"learning_rate": 9.339232522012561e-06, |
|
"loss": 1.3274, |
|
"step": 110700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.964759588241577, |
|
"learning_rate": 9.33860738721787e-06, |
|
"loss": 1.3954, |
|
"step": 110800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.925959587097168, |
|
"learning_rate": 9.33798225242318e-06, |
|
"loss": 1.3258, |
|
"step": 110900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.654022455215454, |
|
"learning_rate": 9.337357117628489e-06, |
|
"loss": 1.3504, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.296046257019043, |
|
"learning_rate": 9.336731982833799e-06, |
|
"loss": 1.3136, |
|
"step": 111100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.804032564163208, |
|
"learning_rate": 9.336106848039109e-06, |
|
"loss": 1.3428, |
|
"step": 111200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.254333019256592, |
|
"learning_rate": 9.335481713244419e-06, |
|
"loss": 1.3583, |
|
"step": 111300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.7955849170684814, |
|
"learning_rate": 9.334856578449729e-06, |
|
"loss": 1.3294, |
|
"step": 111400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 4.444815635681152, |
|
"learning_rate": 9.334231443655039e-06, |
|
"loss": 1.3727, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.5120747089385986, |
|
"learning_rate": 9.333606308860349e-06, |
|
"loss": 1.3454, |
|
"step": 111600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.015342950820923, |
|
"learning_rate": 9.332981174065659e-06, |
|
"loss": 1.319, |
|
"step": 111700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.021049976348877, |
|
"learning_rate": 9.332356039270968e-06, |
|
"loss": 1.3498, |
|
"step": 111800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.354524850845337, |
|
"learning_rate": 9.331730904476278e-06, |
|
"loss": 1.3552, |
|
"step": 111900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.649719476699829, |
|
"learning_rate": 9.331105769681588e-06, |
|
"loss": 1.3276, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.1411306858062744, |
|
"learning_rate": 9.330480634886898e-06, |
|
"loss": 1.3616, |
|
"step": 112100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.030653238296509, |
|
"learning_rate": 9.329855500092208e-06, |
|
"loss": 1.3253, |
|
"step": 112200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.987105369567871, |
|
"learning_rate": 9.329230365297518e-06, |
|
"loss": 1.3133, |
|
"step": 112300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.9337832927703857, |
|
"learning_rate": 9.328605230502828e-06, |
|
"loss": 1.3697, |
|
"step": 112400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.6805777549743652, |
|
"learning_rate": 9.327980095708138e-06, |
|
"loss": 1.3689, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.582444667816162, |
|
"learning_rate": 9.327354960913448e-06, |
|
"loss": 1.3746, |
|
"step": 112600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 4.040530204772949, |
|
"learning_rate": 9.326729826118758e-06, |
|
"loss": 1.3484, |
|
"step": 112700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.6632535457611084, |
|
"learning_rate": 9.326104691324068e-06, |
|
"loss": 1.3191, |
|
"step": 112800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.814882278442383, |
|
"learning_rate": 9.325479556529378e-06, |
|
"loss": 1.3331, |
|
"step": 112900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.2695939540863037, |
|
"learning_rate": 9.324854421734688e-06, |
|
"loss": 1.3307, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.6386842727661133, |
|
"learning_rate": 9.324229286939998e-06, |
|
"loss": 1.3464, |
|
"step": 113100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.7537262439727783, |
|
"learning_rate": 9.323604152145307e-06, |
|
"loss": 1.3177, |
|
"step": 113200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.279010057449341, |
|
"learning_rate": 9.322979017350617e-06, |
|
"loss": 1.353, |
|
"step": 113300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.230193614959717, |
|
"learning_rate": 9.322353882555927e-06, |
|
"loss": 1.3301, |
|
"step": 113400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.851243257522583, |
|
"learning_rate": 9.321728747761237e-06, |
|
"loss": 1.3535, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.865309000015259, |
|
"learning_rate": 9.321103612966547e-06, |
|
"loss": 1.3836, |
|
"step": 113600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.171292781829834, |
|
"learning_rate": 9.320478478171855e-06, |
|
"loss": 1.3426, |
|
"step": 113700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.774327278137207, |
|
"learning_rate": 9.319853343377167e-06, |
|
"loss": 1.3376, |
|
"step": 113800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.775113344192505, |
|
"learning_rate": 9.319228208582475e-06, |
|
"loss": 1.3253, |
|
"step": 113900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.0207529067993164, |
|
"learning_rate": 9.318603073787787e-06, |
|
"loss": 1.3176, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.2777695655822754, |
|
"learning_rate": 9.317977938993095e-06, |
|
"loss": 1.3699, |
|
"step": 114100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.0100061893463135, |
|
"learning_rate": 9.317352804198407e-06, |
|
"loss": 1.3158, |
|
"step": 114200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.428809881210327, |
|
"learning_rate": 9.316727669403715e-06, |
|
"loss": 1.3165, |
|
"step": 114300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.833083391189575, |
|
"learning_rate": 9.316102534609027e-06, |
|
"loss": 1.3487, |
|
"step": 114400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.81231951713562, |
|
"learning_rate": 9.315477399814335e-06, |
|
"loss": 1.327, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.9721994400024414, |
|
"learning_rate": 9.314852265019646e-06, |
|
"loss": 1.3189, |
|
"step": 114600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.969564437866211, |
|
"learning_rate": 9.314227130224955e-06, |
|
"loss": 1.3487, |
|
"step": 114700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.1244125366210938, |
|
"learning_rate": 9.313601995430266e-06, |
|
"loss": 1.3391, |
|
"step": 114800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.785893201828003, |
|
"learning_rate": 9.312976860635575e-06, |
|
"loss": 1.3017, |
|
"step": 114900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.9908628463745117, |
|
"learning_rate": 9.312351725840884e-06, |
|
"loss": 1.3004, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.7877655029296875, |
|
"learning_rate": 9.311726591046194e-06, |
|
"loss": 1.3345, |
|
"step": 115100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.73725962638855, |
|
"learning_rate": 9.311101456251504e-06, |
|
"loss": 1.3164, |
|
"step": 115200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.6744511127471924, |
|
"learning_rate": 9.310476321456814e-06, |
|
"loss": 1.3362, |
|
"step": 115300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.9426522254943848, |
|
"learning_rate": 9.309851186662124e-06, |
|
"loss": 1.3111, |
|
"step": 115400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.818319797515869, |
|
"learning_rate": 9.309226051867434e-06, |
|
"loss": 1.3193, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.0991666316986084, |
|
"learning_rate": 9.308600917072744e-06, |
|
"loss": 1.3144, |
|
"step": 115600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.6219863891601562, |
|
"learning_rate": 9.307975782278054e-06, |
|
"loss": 1.3397, |
|
"step": 115700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.1432971954345703, |
|
"learning_rate": 9.307350647483364e-06, |
|
"loss": 1.3259, |
|
"step": 115800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 4.15132999420166, |
|
"learning_rate": 9.306725512688675e-06, |
|
"loss": 1.3362, |
|
"step": 115900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 4.191103935241699, |
|
"learning_rate": 9.306100377893984e-06, |
|
"loss": 1.3117, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.6365630626678467, |
|
"learning_rate": 9.305475243099295e-06, |
|
"loss": 1.3397, |
|
"step": 116100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.899077892303467, |
|
"learning_rate": 9.304850108304604e-06, |
|
"loss": 1.3434, |
|
"step": 116200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.006063461303711, |
|
"learning_rate": 9.304224973509915e-06, |
|
"loss": 1.3148, |
|
"step": 116300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.2103986740112305, |
|
"learning_rate": 9.303599838715223e-06, |
|
"loss": 1.3313, |
|
"step": 116400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.6371185779571533, |
|
"learning_rate": 9.302974703920533e-06, |
|
"loss": 1.3219, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.5810282230377197, |
|
"learning_rate": 9.302349569125843e-06, |
|
"loss": 1.3199, |
|
"step": 116600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.7903432846069336, |
|
"learning_rate": 9.301724434331153e-06, |
|
"loss": 1.3604, |
|
"step": 116700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.0497376918792725, |
|
"learning_rate": 9.301099299536463e-06, |
|
"loss": 1.3517, |
|
"step": 116800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.0921273231506348, |
|
"learning_rate": 9.300474164741773e-06, |
|
"loss": 1.3192, |
|
"step": 116900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 4.081624507904053, |
|
"learning_rate": 9.299849029947083e-06, |
|
"loss": 1.3376, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.9647045135498047, |
|
"learning_rate": 9.299223895152393e-06, |
|
"loss": 1.3501, |
|
"step": 117100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.3014702796936035, |
|
"learning_rate": 9.298598760357703e-06, |
|
"loss": 1.3467, |
|
"step": 117200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 4.240477085113525, |
|
"learning_rate": 9.297973625563013e-06, |
|
"loss": 1.3094, |
|
"step": 117300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.317046642303467, |
|
"learning_rate": 9.297348490768323e-06, |
|
"loss": 1.3586, |
|
"step": 117400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.8633594512939453, |
|
"learning_rate": 9.296723355973633e-06, |
|
"loss": 1.3272, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.435241937637329, |
|
"learning_rate": 9.296098221178943e-06, |
|
"loss": 1.3683, |
|
"step": 117600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.955159902572632, |
|
"learning_rate": 9.295473086384252e-06, |
|
"loss": 1.35, |
|
"step": 117700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.941067695617676, |
|
"learning_rate": 9.294847951589562e-06, |
|
"loss": 1.3438, |
|
"step": 117800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 4.773413181304932, |
|
"learning_rate": 9.294222816794872e-06, |
|
"loss": 1.3167, |
|
"step": 117900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.976818084716797, |
|
"learning_rate": 9.293597682000182e-06, |
|
"loss": 1.3415, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.769272804260254, |
|
"learning_rate": 9.292972547205492e-06, |
|
"loss": 1.3596, |
|
"step": 118100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.0079257488250732, |
|
"learning_rate": 9.292347412410802e-06, |
|
"loss": 1.3479, |
|
"step": 118200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 6.148379802703857, |
|
"learning_rate": 9.291722277616112e-06, |
|
"loss": 1.3324, |
|
"step": 118300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.0416197776794434, |
|
"learning_rate": 9.291097142821422e-06, |
|
"loss": 1.307, |
|
"step": 118400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.869318962097168, |
|
"learning_rate": 9.290472008026732e-06, |
|
"loss": 1.3217, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.5434398651123047, |
|
"learning_rate": 9.289846873232042e-06, |
|
"loss": 1.3356, |
|
"step": 118600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.5270133018493652, |
|
"learning_rate": 9.289221738437352e-06, |
|
"loss": 1.3206, |
|
"step": 118700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 5.0058369636535645, |
|
"learning_rate": 9.288596603642662e-06, |
|
"loss": 1.3386, |
|
"step": 118800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.7998316287994385, |
|
"learning_rate": 9.287971468847972e-06, |
|
"loss": 1.2882, |
|
"step": 118900 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 4.403027534484863, |
|
"learning_rate": 9.287346334053281e-06, |
|
"loss": 1.3318, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.3011553287506104, |
|
"learning_rate": 9.28672119925859e-06, |
|
"loss": 1.3395, |
|
"step": 119100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.5392231941223145, |
|
"learning_rate": 9.286096064463901e-06, |
|
"loss": 1.3309, |
|
"step": 119200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.0157182216644287, |
|
"learning_rate": 9.28547092966921e-06, |
|
"loss": 1.3212, |
|
"step": 119300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.292978048324585, |
|
"learning_rate": 9.284845794874521e-06, |
|
"loss": 1.3419, |
|
"step": 119400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.0125534534454346, |
|
"learning_rate": 9.28422066007983e-06, |
|
"loss": 1.3237, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.421067476272583, |
|
"learning_rate": 9.283595525285141e-06, |
|
"loss": 1.343, |
|
"step": 119600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.0257959365844727, |
|
"learning_rate": 9.28297039049045e-06, |
|
"loss": 1.3339, |
|
"step": 119700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.5511181354522705, |
|
"learning_rate": 9.282345255695761e-06, |
|
"loss": 1.2891, |
|
"step": 119800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.5419836044311523, |
|
"learning_rate": 9.281720120901069e-06, |
|
"loss": 1.3189, |
|
"step": 119900 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.700242757797241, |
|
"learning_rate": 9.28109498610638e-06, |
|
"loss": 1.3302, |
|
"step": 120000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1604655, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 10000, |
|
"total_flos": 2.4386875134941594e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|